STOR 390
01/24/16
select
(select columns)filter
(select rows)mutate
(make new colmn)arrange
(rearrange rows)summarise
(many rows -> one number)select(bison)
select(bison, bisonID, eventDate)
sighting_dates <- select(bison, bisonID, eventDate)
select(bison, year, ITISscientificName)
What is the ITISscientificName of the 382nd row? https://goo.gl/forms/vEWGVC0EIAThz2e33
filter(bison, ITIScommonName == "Carolina Chickadee")
filter(bison, ITIScommonName == "Carolina Chickadee")
# A tibble: 1,478 × 37
bisonID basisOfRecord catalogNumber collectorNumber recordedBy
<int> <chr> <chr> <chr> <chr>
1 940795743 observation OBS182987414 <NA> obsr143470
2 943427969 observation OBS186127858 <NA> obsr143470
3 933987932 observation OBS174439934 <NA> obsr143470
4 933992451 observation OBS174556851 <NA> obsr143470
5 936922013 observation OBS178387679 <NA> obsr143470
6 971994869 observation OBS218011543 <NA> obsr143470
7 972287684 observation OBS218623899 <NA> obsr143470
8 973919771 observation OBS220479035 <NA> obsr143470
9 973929092 observation OBS220273149 <NA> obsr143470
10 974747631 observation OBS221371577 <NA> obsr143470
# ... with 1,468 more rows, and 32 more variables: providerID <int>,
# institutionCode <chr>, resourceID <dbl>,
# ownerInstitutionCollectionCode <chr>, eventDate <dttm>, year <int>,
# providedScientificName <chr>, scientificName <chr>,
# ITISscientificName <chr>, providedCommonName <chr>,
# ITIScommonName <chr>, ITIStsn <chr>, validAcceptedITIStsn <int>,
# providedTSN <chr>, decimalLatitude <dbl>, decimalLongitude <dbl>,
# geodeticDatum <chr>, coordinatePrecision <chr>,
# coordinateUncertaintyInMeters <chr>, verbatimElevation <chr>,
# verbatimDepth <chr>, centroid <chr>, higherGeographyID <chr>,
# computedCountyFips <int>, providedCounty <chr>,
# calculatedCounty <chr>, providedState <chr>, calculatedState <chr>,
# countryCode <chr>, institutionID <chr>, collectionID <chr>,
# relatedResourceID <chr>
arrange(bison, year)
arrange(bison, year)
# A tibble: 65,499 × 37
bisonID basisOfRecord catalogNumber collectorNumber
<int> <chr> <chr> <chr>
1 477258358 specimen 3526 <NA>
2 477258834 specimen 3488 <NA>
3 477259484 specimen 1824 <NA>
4 477259485 specimen 1825 <NA>
5 477259486 specimen 1826 <NA>
6 477236495 specimen 1827 <NA>
7 1216134054 observation 52-24774 <NA>
8 1051991657 specimen barcode-00596144 170
9 1051975572 specimen barcode-00596164 736
10 1042722599 specimen barcode-00512972 <NA>
# ... with 65,489 more rows, and 33 more variables: recordedBy <chr>,
# providerID <int>, institutionCode <chr>, resourceID <dbl>,
# ownerInstitutionCollectionCode <chr>, eventDate <dttm>, year <int>,
# providedScientificName <chr>, scientificName <chr>,
# ITISscientificName <chr>, providedCommonName <chr>,
# ITIScommonName <chr>, ITIStsn <chr>, validAcceptedITIStsn <int>,
# providedTSN <chr>, decimalLatitude <dbl>, decimalLongitude <dbl>,
# geodeticDatum <chr>, coordinatePrecision <chr>,
# coordinateUncertaintyInMeters <chr>, verbatimElevation <chr>,
# verbatimDepth <chr>, centroid <chr>, higherGeographyID <chr>,
# computedCountyFips <int>, providedCounty <chr>,
# calculatedCounty <chr>, providedState <chr>, calculatedState <chr>,
# countryCode <chr>, institutionID <chr>, collectionID <chr>,
# relatedResourceID <chr>
arrange(bison, ITISscientificName)
arrange(bison, ITISscientificName)
# A tibble: 65,499 × 37
bisonID basisOfRecord catalogNumber collectorNumber recordedBy
<int> <chr> <chr> <chr> <chr>
1 1218646356 observation <NA> <NA> <NA>
2 1218646464 observation <NA> <NA> <NA>
3 1218646669 observation <NA> <NA> <NA>
4 1218649006 observation <NA> <NA> <NA>
5 1218645940 observation <NA> <NA> <NA>
6 1218646007 observation <NA> <NA> <NA>
7 NA specimen <NA> <NA> <NA>
8 1220147222 observation <NA> <NA> Israel, Kimberly
9 NA specimen <NA> <NA> <NA>
10 1052571748 observation 860908 <NA> Milo Pyne
# ... with 65,489 more rows, and 32 more variables: providerID <int>,
# institutionCode <chr>, resourceID <dbl>,
# ownerInstitutionCollectionCode <chr>, eventDate <dttm>, year <int>,
# providedScientificName <chr>, scientificName <chr>,
# ITISscientificName <chr>, providedCommonName <chr>,
# ITIScommonName <chr>, ITIStsn <chr>, validAcceptedITIStsn <int>,
# providedTSN <chr>, decimalLatitude <dbl>, decimalLongitude <dbl>,
# geodeticDatum <chr>, coordinatePrecision <chr>,
# coordinateUncertaintyInMeters <chr>, verbatimElevation <chr>,
# verbatimDepth <chr>, centroid <chr>, higherGeographyID <chr>,
# computedCountyFips <int>, providedCounty <chr>,
# calculatedCounty <chr>, providedState <chr>, calculatedState <chr>,
# countryCode <chr>, institutionID <chr>, collectionID <chr>,
# relatedResourceID <chr>
mutate(bison,
latitude_rad = pi * decimalLatitude / 180,
longitude_rad = pi * decimalLongitude / 180,
eventDate = parse_datetime(eventDate))
mutate(bison,
latitude_rad = pi * decimalLatitude / 180,
longitude_rad = pi * decimalLongitude / 180,
eventDate = parse_datetime(eventDate))
# A tibble: 65,499 × 39
bisonID basisOfRecord catalogNumber collectorNumber recordedBy
<int> <chr> <chr> <chr> <chr>
1 844239256 observation OBS174171744 <NA> obsr143470
2 844267315 observation OBS173343892 <NA> obsr143470
3 846018158 observation OBS174269666 <NA> obsr143470
4 846916268 observation OBS173343881 <NA> obsr143470
5 846967932 observation OBS174171742 <NA> obsr143470
6 940794719 observation OBS182987416 <NA> obsr143470
7 940794958 observation OBS182987420 <NA> obsr143470
8 940795061 observation OBS182987419 <NA> obsr143470
9 940795118 observation OBS182987418 <NA> obsr143470
10 940795350 observation OBS182987421 <NA> obsr143470
# ... with 65,489 more rows, and 34 more variables: providerID <int>,
# institutionCode <chr>, resourceID <dbl>,
# ownerInstitutionCollectionCode <chr>, eventDate <dttm>, year <int>,
# providedScientificName <chr>, scientificName <chr>,
# ITISscientificName <chr>, providedCommonName <chr>,
# ITIScommonName <chr>, ITIStsn <chr>, validAcceptedITIStsn <int>,
# providedTSN <chr>, decimalLatitude <dbl>, decimalLongitude <dbl>,
# geodeticDatum <chr>, coordinatePrecision <chr>,
# coordinateUncertaintyInMeters <chr>, verbatimElevation <chr>,
# verbatimDepth <chr>, centroid <chr>, higherGeographyID <chr>,
# computedCountyFips <int>, providedCounty <chr>,
# calculatedCounty <chr>, providedState <chr>, calculatedState <chr>,
# countryCode <chr>, institutionID <chr>, collectionID <chr>,
# relatedResourceID <chr>, latitude_rad <dbl>, longitude_rad <dbl>
mutate(select(filter(bison, ITIScommonName == "Carolina Chickadee"), year, ITIScommonName, decimalLatitude, decimalLongitude, eventDate), latitude_rad = pi * decimalLatitude / 180, longitude_rad = pi * decimalLongitude / 180, eventDate = parse_datetime(eventDate))
temp <- filter(bison, ITIScommonName == "Carolina Chickadee")
temp <- select(temp, year, ITIScommonName, decimalLatitude, decimalLongitude, eventDate)
mutate(temp, latitude_rad = pi * decimalLatitude / 180, longitude_rad = pi * decimalLongitude / 180, eventDate = parse_datetime(eventDate))
bison %>%
filter(ITIScommonName == "Carolina Chickadee") %>%
select(year, ITIScommonName, decimalLatitude, decimalLongitude, eventDate) %>%
mutate(latitude_rad = pi * decimalLatitude / 180,
longitude_rad = pi * decimalLongitude / 180,
eventDate = parse_datetime(eventDate))
bison %>%
filter(ITIScommonName == "Carolina Chickadee") %>%
select(year, ITIScommonName, decimalLatitude, decimalLongitude, eventDate) %>%
mutate(latitude_rad = pi * decimalLatitude / 180,
longitude_rad = pi * decimalLongitude / 180,
eventDate = parse_datetime(eventDate))
# A tibble: 1,478 × 7
year ITIScommonName decimalLatitude decimalLongitude eventDate
<int> <chr> <dbl> <dbl> <dttm>
1 2013 Carolina Chickadee 35.9283 -79.084 2013-02-23
2 2013 Carolina Chickadee 35.9283 -79.084 2013-03-17
3 2012 Carolina Chickadee 35.9283 -79.084 2012-12-31
4 2013 Carolina Chickadee 35.9283 -79.084 2013-01-01
5 2013 Carolina Chickadee 35.9283 -79.084 2013-01-27
6 2013 Carolina Chickadee 35.9283 -79.084 2013-11-17
7 2013 Carolina Chickadee 35.9283 -79.084 2013-11-23
8 2013 Carolina Chickadee 35.9283 -79.084 2013-12-08
9 2013 Carolina Chickadee 35.9283 -79.084 2013-12-07
10 2013 Carolina Chickadee 35.9283 -79.084 2013-12-15
# ... with 1,468 more rows, and 2 more variables: latitude_rad <dbl>,
# longitude_rad <dbl>
Poecile carolinensis is the scientific name for Carolina Chickadee
bison %>%
filter(ITISscientificName == "Poecile carolinensis") %>%
summarise(num_obs = n(), most_recent = max(year))
Poecile carolinensis is the scientific name for Carolina Chickadee
bison %>%
filter(ITISscientificName == "Poecile carolinensis") %>%
summarise(num_obs = n(), most_recent = max(year))
# A tibble: 1 × 2
num_obs most_recent
<int> <int>
1 1478 2013
What is the decimalLongitude of the most recent Dumetella carolinensis sighting? https://goo.gl/forms/LYhbES0N9gdhWBtr2
bison %>%
group_by(ITISscientificName) %>%
summarise(num_obs = n(), most_recent = max(year))
bison %>%
group_by(ITISscientificName) %>%
summarise(num_obs = n(), most_recent = max(year))
# A tibble: 2,544 × 3
ITISscientificName num_obs most_recent
<chr> <int> <int>
1 Ablabesmyia 6 2003
2 Abutilon theophrasti 1 NA
3 Acalypha 1 2009
4 Acalypha gracilens 1 NA
5 Acalypha ostryifolia 1 2014
6 Acalypha rhomboidea 1 NA
7 Acalypha virginica 4 NA
8 Acanthodrilidae 1 1969
9 Acari 4 2003
10 Accipiter cooperii 58 2013
# ... with 2,534 more rows
Species seen at least 3 times in the entire datbase, but only observations in 2015
group_by(bison, ITISscientificName) %>%
filter(n() > 3, year == 2015)
Species seen at least 3 times in the entire datbase, but only observations in 2015
group_by(bison, ITISscientificName) %>%
filter(n() > 3, year == 2015)
Source: local data frame [132 x 37]
Groups: ITISscientificName [88]
bisonID basisOfRecord catalogNumber collectorNumber recordedBy
<int> <chr> <chr> <chr> <chr>
1 1092883743 observation 1502468 <NA> Suzanne Cadwell
2 1092883702 observation 1502460 <NA> Suzanne Cadwell
3 1143531724 observation 1916624 <NA> jptuttle
4 1088924502 observation 1309338 <NA> Milo Pyne
5 1088947111 observation 1391927 <NA> Milo Pyne
6 1143531759 observation 1916623 <NA> jptuttle
7 1092895216 observation 1537080 <NA> Suzanne Cadwell
8 1088952193 observation 1405741 <NA> Suzanne Cadwell
9 1134968123 observation 1825180 <NA> Suzanne Cadwell
10 1092905293 observation 1567347 <NA> Suzanne Cadwell
# ... with 122 more rows, and 32 more variables: providerID <int>,
# institutionCode <chr>, resourceID <dbl>,
# ownerInstitutionCollectionCode <chr>, eventDate <dttm>, year <int>,
# providedScientificName <chr>, scientificName <chr>,
# ITISscientificName <chr>, providedCommonName <chr>,
# ITIScommonName <chr>, ITIStsn <chr>, validAcceptedITIStsn <int>,
# providedTSN <chr>, decimalLatitude <dbl>, decimalLongitude <dbl>,
# geodeticDatum <chr>, coordinatePrecision <chr>,
# coordinateUncertaintyInMeters <chr>, verbatimElevation <chr>,
# verbatimDepth <chr>, centroid <chr>, higherGeographyID <chr>,
# computedCountyFips <int>, providedCounty <chr>,
# calculatedCounty <chr>, providedState <chr>, calculatedState <chr>,
# countryCode <chr>, institutionID <chr>, collectionID <chr>,
# relatedResourceID <chr>
Species seen at least 3 times in 2015
group_by(bison, ITISscientificName, year) %>%
filter(n() > 3, year == 2015)
Species seen at least 3 times in 2015
group_by(bison, ITISscientificName, year) %>%
filter(n() > 3, year == 2015)
Source: local data frame [20 x 37]
Groups: ITISscientificName, year [3]
bisonID basisOfRecord catalogNumber collectorNumber recordedBy
<int> <chr> <chr> <chr> <chr>
1 1088952193 observation 1405741 <NA> Suzanne Cadwell
2 1143534032 observation 1922784 <NA> Suzanne Cadwell
3 1092895077 observation 1536431 <NA> Suzanne Cadwell
4 1135211827 observation 1847559 <NA> Nicki Cagle
5 1088952154 observation 1405740 <NA> Suzanne Cadwell
6 1088951040 observation 1402023 <NA> Robert Graham
7 1088924555 observation 1309508 <NA> Suzanne Cadwell
8 1088924584 observation 1309510 <NA> Suzanne Cadwell
9 1088926416 observation 1333652 <NA> Suzanne Cadwell
10 1088924185 observation 1308312 <NA> mklein
11 1088924277 observation 1308649 <NA> Milo Pyne
12 1088898166 observation 1216125 <NA> Ann Prince
13 1099973107 observation 1727052 <NA> Suzanne Cadwell
14 1098913825 observation 1642366 <NA> fungjustin_112
15 1099973119 observation 1727085 <NA> Suzanne Cadwell
16 1088957892 observation 1422660 <NA> Suzanne Cadwell
17 1088957895 observation 1422661 <NA> Suzanne Cadwell
18 1092878779 observation 1469123 <NA> Suzanne Cadwell
19 1143541738 observation 1945328 <NA> Suzanne Cadwell
20 1088975979 observation 1474612 <NA> Suzanne Cadwell
# ... with 32 more variables: providerID <int>, institutionCode <chr>,
# resourceID <dbl>, ownerInstitutionCollectionCode <chr>,
# eventDate <dttm>, year <int>, providedScientificName <chr>,
# scientificName <chr>, ITISscientificName <chr>,
# providedCommonName <chr>, ITIScommonName <chr>, ITIStsn <chr>,
# validAcceptedITIStsn <int>, providedTSN <chr>, decimalLatitude <dbl>,
# decimalLongitude <dbl>, geodeticDatum <chr>,
# coordinatePrecision <chr>, coordinateUncertaintyInMeters <chr>,
# verbatimElevation <chr>, verbatimDepth <chr>, centroid <chr>,
# higherGeographyID <chr>, computedCountyFips <int>,
# providedCounty <chr>, calculatedCounty <chr>, providedState <chr>,
# calculatedState <chr>, countryCode <chr>, institutionID <chr>,
# collectionID <chr>, relatedResourceID <chr>
Which species has been observed the most by one collector?
https://goo.gl/forms/8qXaFbQGLkCSJXqx2
ITISscientificName
and collectorNumber