%cd /Users/alisonfowler/Docs/Jupityr-notebooks/
/Users/alisonfowler/Docs/Jupityr-notebooks
import pandas as pd
# import data and replace NaNs with empty values
surveys_df = pd.read_csv("data/surveys.csv",
keep_default_na=False, na_values=[""])
# Read in first 10 lines of surveys table
survey_sub = surveys_df.head(10)
# Grab the last 10 rows
survey_sub_last10 = surveys_df.tail(10)
# Reset the index values so the second dataframe appends properly
survey_sub_last10 = survey_sub_last10.reset_index(drop=True)
# drop=True option avoids adding new index column with old index values
# Stack the DataFrames on top of each other
vertical_stack = pd.concat([survey_sub, survey_sub_last10], axis=0)
# Place the DataFrames side by side
horizontal_stack = pd.concat([survey_sub, survey_sub_last10], axis=1)
print(vertical_stack)
record_id month day year plot_id species_id sex hindfoot_length \ 0 1 7 16 1977 2 NL M 32.0 1 2 7 16 1977 3 NL M 33.0 2 3 7 16 1977 2 DM F 37.0 3 4 7 16 1977 7 DM M 36.0 4 5 7 16 1977 3 DM M 35.0 5 6 7 16 1977 1 PF M 14.0 6 7 7 16 1977 2 PE F NaN 7 8 7 16 1977 1 DM M 37.0 8 9 7 16 1977 1 DM F 34.0 9 10 7 16 1977 6 PF F 20.0 0 35540 12 31 2002 15 PB F 26.0 1 35541 12 31 2002 15 PB F 24.0 2 35542 12 31 2002 15 PB F 26.0 3 35543 12 31 2002 15 PB F 27.0 4 35544 12 31 2002 15 US NaN NaN 5 35545 12 31 2002 15 AH NaN NaN 6 35546 12 31 2002 15 AH NaN NaN 7 35547 12 31 2002 10 RM F 15.0 8 35548 12 31 2002 7 DO M 36.0 9 35549 12 31 2002 5 NaN NaN NaN weight 0 NaN 1 NaN 2 NaN 3 NaN 4 NaN 5 NaN 6 NaN 7 NaN 8 NaN 9 NaN 0 23.0 1 31.0 2 29.0 3 34.0 4 NaN 5 NaN 6 NaN 7 14.0 8 51.0 9 NaN
print(horizontal_stack)
record_id month day year plot_id species_id sex hindfoot_length \ 0 1 7 16 1977 2 NL M 32.0 1 2 7 16 1977 3 NL M 33.0 2 3 7 16 1977 2 DM F 37.0 3 4 7 16 1977 7 DM M 36.0 4 5 7 16 1977 3 DM M 35.0 5 6 7 16 1977 1 PF M 14.0 6 7 7 16 1977 2 PE F NaN 7 8 7 16 1977 1 DM M 37.0 8 9 7 16 1977 1 DM F 34.0 9 10 7 16 1977 6 PF F 20.0 weight record_id month day year plot_id species_id sex \ 0 NaN 35540 12 31 2002 15 PB F 1 NaN 35541 12 31 2002 15 PB F 2 NaN 35542 12 31 2002 15 PB F 3 NaN 35543 12 31 2002 15 PB F 4 NaN 35544 12 31 2002 15 US NaN 5 NaN 35545 12 31 2002 15 AH NaN 6 NaN 35546 12 31 2002 15 AH NaN 7 NaN 35547 12 31 2002 10 RM F 8 NaN 35548 12 31 2002 7 DO M 9 NaN 35549 12 31 2002 5 NaN NaN hindfoot_length weight 0 26.0 23.0 1 24.0 31.0 2 26.0 29.0 3 27.0 34.0 4 NaN NaN 5 NaN NaN 6 NaN NaN 7 15.0 14.0 8 36.0 51.0 9 NaN NaN
# need to reindex the vertical stack data
vertical_stack.reset_index(drop=True)
| record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN |
| 1 | 2 | 7 | 16 | 1977 | 3 | NL | M | 33.0 | NaN |
| 2 | 3 | 7 | 16 | 1977 | 2 | DM | F | 37.0 | NaN |
| 3 | 4 | 7 | 16 | 1977 | 7 | DM | M | 36.0 | NaN |
| 4 | 5 | 7 | 16 | 1977 | 3 | DM | M | 35.0 | NaN |
| 5 | 6 | 7 | 16 | 1977 | 1 | PF | M | 14.0 | NaN |
| 6 | 7 | 7 | 16 | 1977 | 2 | PE | F | NaN | NaN |
| 7 | 8 | 7 | 16 | 1977 | 1 | DM | M | 37.0 | NaN |
| 8 | 9 | 7 | 16 | 1977 | 1 | DM | F | 34.0 | NaN |
| 9 | 10 | 7 | 16 | 1977 | 6 | PF | F | 20.0 | NaN |
| 10 | 35540 | 12 | 31 | 2002 | 15 | PB | F | 26.0 | 23.0 |
| 11 | 35541 | 12 | 31 | 2002 | 15 | PB | F | 24.0 | 31.0 |
| 12 | 35542 | 12 | 31 | 2002 | 15 | PB | F | 26.0 | 29.0 |
| 13 | 35543 | 12 | 31 | 2002 | 15 | PB | F | 27.0 | 34.0 |
| 14 | 35544 | 12 | 31 | 2002 | 15 | US | NaN | NaN | NaN |
| 15 | 35545 | 12 | 31 | 2002 | 15 | AH | NaN | NaN | NaN |
| 16 | 35546 | 12 | 31 | 2002 | 15 | AH | NaN | NaN | NaN |
| 17 | 35547 | 12 | 31 | 2002 | 10 | RM | F | 15.0 | 14.0 |
| 18 | 35548 | 12 | 31 | 2002 | 7 | DO | M | 36.0 | 51.0 |
| 19 | 35549 | 12 | 31 | 2002 | 5 | NaN | NaN | NaN | NaN |
# Write DataFrame to CSV
vertical_stack.to_csv('data/output/out.csv', index=False)
# index=false prevents pandas from including the index number for each line
# For kicks, read our output back into Python and make sure all looks good
new_output = pd.read_csv('data/output/out.csv', keep_default_na=False, na_values=[""])
print(new_output)
record_id month day year plot_id species_id sex hindfoot_length \
0 1 7 16 1977 2 NL M 32.0
1 2 7 16 1977 3 NL M 33.0
2 3 7 16 1977 2 DM F 37.0
3 4 7 16 1977 7 DM M 36.0
4 5 7 16 1977 3 DM M 35.0
5 6 7 16 1977 1 PF M 14.0
6 7 7 16 1977 2 PE F NaN
7 8 7 16 1977 1 DM M 37.0
8 9 7 16 1977 1 DM F 34.0
9 10 7 16 1977 6 PF F 20.0
10 35540 12 31 2002 15 PB F 26.0
11 35541 12 31 2002 15 PB F 24.0
12 35542 12 31 2002 15 PB F 26.0
13 35543 12 31 2002 15 PB F 27.0
14 35544 12 31 2002 15 US NaN NaN
15 35545 12 31 2002 15 AH NaN NaN
16 35546 12 31 2002 15 AH NaN NaN
17 35547 12 31 2002 10 RM F 15.0
18 35548 12 31 2002 7 DO M 36.0
19 35549 12 31 2002 5 NaN NaN NaN
weight
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
5 NaN
6 NaN
7 NaN
8 NaN
9 NaN
10 23.0
11 31.0
12 29.0
13 34.0
14 NaN
15 NaN
16 NaN
17 14.0
18 51.0
19 NaN
# CHALLENGE - Combine data
# combine surveys2001.csv and surveys2002.csv
# create a plot of average plot weight by year grouped by sex
# export your results as a csv and make sure it reads back into python properly
surveys2001 = pd.read_csv("data/surveys2001.csv")
print(surveys2001)
Unnamed: 0 record_id month day year site_id species_id sex \
0 31710 31711 1 21 2001 1 PB F
1 31711 31712 1 21 2001 1 DM M
2 31712 31713 1 21 2001 1 PB M
3 31713 31714 1 21 2001 1 DO M
4 31714 31715 1 21 2001 2 OT M
... ... ... ... ... ... ... ... ..
1429 33303 33304 12 15 2001 24 RM M
1430 33304 33305 12 15 2001 7 PB M
1431 33305 33306 12 15 2001 7 OT M
1432 33306 33307 12 15 2001 7 OT M
1433 33307 33308 12 15 2001 7 PP M
hindfoot_length weight
0 26.0 25.0
1 37.0 43.0
2 29.0 44.0
3 34.0 53.0
4 20.0 27.0
... ... ...
1429 16.0 10.0
1430 29.0 44.0
1431 19.0 21.0
1432 20.0 19.0
1433 24.0 16.0
[1434 rows x 10 columns]
surveys2002 = pd.read_csv("data/surveys2002.csv")
print(surveys2002)
Unnamed: 0 record_id month day year site_id species_id sex \
0 33320 33321 1 12 2002 1 DM M
1 33321 33322 1 12 2002 1 DO M
2 33322 33323 1 12 2002 1 PB M
3 33324 33325 1 12 2002 1 DO M
4 33325 33326 1 12 2002 2 OT F
... ... ... ... ... ... ... ... ..
2073 35540 35541 12 31 2002 15 PB F
2074 35541 35542 12 31 2002 15 PB F
2075 35542 35543 12 31 2002 15 PB F
2076 35546 35547 12 31 2002 10 RM F
2077 35547 35548 12 31 2002 7 DO M
hindfoot_length weight
0 38.0 44.0
1 37.0 58.0
2 28.0 45.0
3 35.0 29.0
4 20.0 26.0
... ... ...
2073 24.0 31.0
2074 26.0 29.0
2075 27.0 34.0
2076 15.0 14.0
2077 36.0 51.0
[2078 rows x 10 columns]
# Reset the index values to the second dataframe appends properly
surveys2002 = surveys2002.reset_index(drop=True)
combined = pd.concat([surveys2001, surveys2002], axis=0)
print(combined)
Unnamed: 0 record_id month day year site_id species_id sex \
0 31710 31711 1 21 2001 1 PB F
1 31711 31712 1 21 2001 1 DM M
2 31712 31713 1 21 2001 1 PB M
3 31713 31714 1 21 2001 1 DO M
4 31714 31715 1 21 2001 2 OT M
... ... ... ... ... ... ... ... ..
2073 35540 35541 12 31 2002 15 PB F
2074 35541 35542 12 31 2002 15 PB F
2075 35542 35543 12 31 2002 15 PB F
2076 35546 35547 12 31 2002 10 RM F
2077 35547 35548 12 31 2002 7 DO M
hindfoot_length weight
0 26.0 25.0
1 37.0 43.0
2 29.0 44.0
3 34.0 53.0
4 20.0 27.0
... ... ...
2073 24.0 31.0
2074 26.0 29.0
2075 27.0 34.0
2076 15.0 14.0
2077 36.0 51.0
[3512 rows x 10 columns]
# create a plot of average plot weight by year grouped by sex
combined_sex = combined.groupby(['year','sex'])
mean_weights = combined_sex['weight'].mean()
print(mean_weights)
year sex
2001 F 36.034771
M 36.404196
2002 F 33.878761
M 37.490506
Name: weight, dtype: float64
mean_weights.unstack()
| sex | F | M |
|---|---|---|
| year | ||
| 2001 | 36.034771 | 36.404196 |
| 2002 | 33.878761 | 37.490506 |
to_plot = mean_weights.unstack()
print(to_plot)
sex F M year 2001 36.034771 36.404196 2002 33.878761 37.490506
to_plot.plot(kind='bar', stacked=True, title="Average plot weight by year and sex")
<matplotlib.axes._subplots.AxesSubplot at 0x1154288b0>
# export
to_plot.to_csv('data/output/mean_weights.csv', index=False)
# reimport to check
new_output = pd.read_csv('data/output/mean_weights.csv', keep_default_na=False, na_values=[""])
new_output
| F | M | |
|---|---|---|
| 0 | 36.034771 | 36.404196 |
| 1 | 33.878761 | 37.490506 |
# Read in first 10 lines of surveys table
survey_sub = surveys_df.head(10)
survey_sub
| record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN |
| 1 | 2 | 7 | 16 | 1977 | 3 | NL | M | 33.0 | NaN |
| 2 | 3 | 7 | 16 | 1977 | 2 | DM | F | 37.0 | NaN |
| 3 | 4 | 7 | 16 | 1977 | 7 | DM | M | 36.0 | NaN |
| 4 | 5 | 7 | 16 | 1977 | 3 | DM | M | 35.0 | NaN |
| 5 | 6 | 7 | 16 | 1977 | 1 | PF | M | 14.0 | NaN |
| 6 | 7 | 7 | 16 | 1977 | 2 | PE | F | NaN | NaN |
| 7 | 8 | 7 | 16 | 1977 | 1 | DM | M | 37.0 | NaN |
| 8 | 9 | 7 | 16 | 1977 | 1 | DM | F | 34.0 | NaN |
| 9 | 10 | 7 | 16 | 1977 | 6 | PF | F | 20.0 | NaN |
# Import a small subset of the species data designed for this part of the lesson.
# It is stored in the data folder.
species_sub = pd.read_csv('data/speciesSubset.csv', keep_default_na=False, na_values=[""])
species_sub
| species_id | genus | species | taxa | |
|---|---|---|---|---|
| 0 | DM | Dipodomys | merriami | Rodent |
| 1 | NL | Neotoma | albigula | Rodent |
| 2 | PE | Peromyscus | eremicus | Rodent |
>>> species_sub.columns
Index(['species_id', 'genus', 'species', 'taxa'], dtype='object')
>>> survey_sub.columns
Index(['record_id', 'month', 'day', 'year', 'plot_id', 'species_id', 'sex',
'hindfoot_length', 'weight'],
dtype='object')
# Inner Joins
# combines two dataframes based on a join key and returns a new dataframe that contains only those rows that have matching values
# in both of the original dataframes
merged_inner = pd.merge(left=survey_sub, right=species_sub, left_on='species_id', right_on='species_id')
# In this case `species_id` is the only column name in both dataframes, so if we skipped `left_on`
# And `right_on` arguments we would still get the same result
merged_inner.shape
(8, 12)
merged_inner
| record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | genus | species | taxa | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN | Neotoma | albigula | Rodent |
| 1 | 2 | 7 | 16 | 1977 | 3 | NL | M | 33.0 | NaN | Neotoma | albigula | Rodent |
| 2 | 3 | 7 | 16 | 1977 | 2 | DM | F | 37.0 | NaN | Dipodomys | merriami | Rodent |
| 3 | 4 | 7 | 16 | 1977 | 7 | DM | M | 36.0 | NaN | Dipodomys | merriami | Rodent |
| 4 | 5 | 7 | 16 | 1977 | 3 | DM | M | 35.0 | NaN | Dipodomys | merriami | Rodent |
| 5 | 8 | 7 | 16 | 1977 | 1 | DM | M | 37.0 | NaN | Dipodomys | merriami | Rodent |
| 6 | 9 | 7 | 16 | 1977 | 1 | DM | F | 34.0 | NaN | Dipodomys | merriami | Rodent |
| 7 | 7 | 7 | 16 | 1977 | 2 | PE | F | NaN | NaN | Peromyscus | eremicus | Rodent |
# Left Joins
# combines two dataframes and includes ALL of the data frame from the left, and only certain rows from the right.
merged_left = pd.merge(left=survey_sub, right=species_sub, how='left', left_on='species_id', right_on='species_id')
merged_left
| record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | genus | species | taxa | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN | Neotoma | albigula | Rodent |
| 1 | 2 | 7 | 16 | 1977 | 3 | NL | M | 33.0 | NaN | Neotoma | albigula | Rodent |
| 2 | 3 | 7 | 16 | 1977 | 2 | DM | F | 37.0 | NaN | Dipodomys | merriami | Rodent |
| 3 | 4 | 7 | 16 | 1977 | 7 | DM | M | 36.0 | NaN | Dipodomys | merriami | Rodent |
| 4 | 5 | 7 | 16 | 1977 | 3 | DM | M | 35.0 | NaN | Dipodomys | merriami | Rodent |
| 5 | 6 | 7 | 16 | 1977 | 1 | PF | M | 14.0 | NaN | NaN | NaN | NaN |
| 6 | 7 | 7 | 16 | 1977 | 2 | PE | F | NaN | NaN | Peromyscus | eremicus | Rodent |
| 7 | 8 | 7 | 16 | 1977 | 1 | DM | M | 37.0 | NaN | Dipodomys | merriami | Rodent |
| 8 | 9 | 7 | 16 | 1977 | 1 | DM | F | 34.0 | NaN | Dipodomys | merriami | Rodent |
| 9 | 10 | 7 | 16 | 1977 | 6 | PF | F | 20.0 | NaN | NaN | NaN | NaN |
merged_left[ pd.isnull(merged_left.genus) ]
| record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | genus | species | taxa | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 5 | 6 | 7 | 16 | 1977 | 1 | PF | M | 14.0 | NaN | NaN | NaN | NaN |
| 9 | 10 | 7 | 16 | 1977 | 6 | PF | F | 20.0 | NaN | NaN | NaN | NaN |
# other join types:
# right (outer) join: similar to a left join, except all rows from the right DataFrame are kept,
# while rows from the left DataFrame without matching join key(s) values are discarded.
# full (outer) join: This join type returns the all pairwise combinations of rows from both DataFrames;
# i.e., the result DataFrame will NaN where data is missing in one of the dataframes.
# This join type is very rarely used.
# Challenge - Distributions
# Create a new DataFrame by joining the contents of the surveys.csv and species.csv tables.
# Then calculate and plot the distribution of:
# taxa by plot
# taxa by sex by plot
spp = pd.read_csv("data/species.csv")
spp
| species_id | genus | species | taxa | |
|---|---|---|---|---|
| 0 | AB | Amphispiza | bilineata | Bird |
| 1 | AH | Ammospermophilus | harrisi | Rodent |
| 2 | AS | Ammodramus | savannarum | Bird |
| 3 | BA | Baiomys | taylori | Rodent |
| 4 | CB | Campylorhynchus | brunneicapillus | Bird |
| 5 | CM | Calamospiza | melanocorys | Bird |
| 6 | CQ | Callipepla | squamata | Bird |
| 7 | CS | Crotalus | scutalatus | Reptile |
| 8 | CT | Cnemidophorus | tigris | Reptile |
| 9 | CU | Cnemidophorus | uniparens | Reptile |
| 10 | CV | Crotalus | viridis | Reptile |
| 11 | DM | Dipodomys | merriami | Rodent |
| 12 | DO | Dipodomys | ordii | Rodent |
| 13 | DS | Dipodomys | spectabilis | Rodent |
| 14 | DX | Dipodomys | sp. | Rodent |
| 15 | EO | Eumeces | obsoletus | Reptile |
| 16 | GS | Gambelia | silus | Reptile |
| 17 | NL | Neotoma | albigula | Rodent |
| 18 | NX | Neotoma | sp. | Rodent |
| 19 | OL | Onychomys | leucogaster | Rodent |
| 20 | OT | Onychomys | torridus | Rodent |
| 21 | OX | Onychomys | sp. | Rodent |
| 22 | PB | Chaetodipus | baileyi | Rodent |
| 23 | PC | Pipilo | chlorurus | Bird |
| 24 | PE | Peromyscus | eremicus | Rodent |
| 25 | PF | Perognathus | flavus | Rodent |
| 26 | PG | Pooecetes | gramineus | Bird |
| 27 | PH | Perognathus | hispidus | Rodent |
| 28 | PI | Chaetodipus | intermedius | Rodent |
| 29 | PL | Peromyscus | leucopus | Rodent |
| 30 | PM | Peromyscus | maniculatus | Rodent |
| 31 | PP | Chaetodipus | penicillatus | Rodent |
| 32 | PU | Pipilo | fuscus | Bird |
| 33 | PX | Chaetodipus | sp. | Rodent |
| 34 | RF | Reithrodontomys | fulvescens | Rodent |
| 35 | RM | Reithrodontomys | megalotis | Rodent |
| 36 | RO | Reithrodontomys | montanus | Rodent |
| 37 | RX | Reithrodontomys | sp. | Rodent |
| 38 | SA | Sylvilagus | audubonii | Rabbit |
| 39 | SB | Spizella | breweri | Bird |
| 40 | SC | Sceloporus | clarki | Reptile |
| 41 | SF | Sigmodon | fulviventer | Rodent |
| 42 | SH | Sigmodon | hispidus | Rodent |
| 43 | SO | Sigmodon | ochrognathus | Rodent |
| 44 | SS | Spermophilus | spilosoma | Rodent |
| 45 | ST | Spermophilus | tereticaudus | Rodent |
| 46 | SU | Sceloporus | undulatus | Reptile |
| 47 | SX | Sigmodon | sp. | Rodent |
| 48 | UL | Lizard | sp. | Reptile |
| 49 | UP | Pipilo | sp. | Bird |
| 50 | UR | Rodent | sp. | Rodent |
| 51 | US | Sparrow | sp. | Bird |
| 52 | ZL | Zonotrichia | leucophrys | Bird |
| 53 | ZM | Zenaida | macroura | Bird |
# I guess we will use an inner join for this?
new_df = pd.merge(left=surveys_df, right=spp, left_on='species_id', right_on='species_id')
new_df.dtypes
record_id int64 month int64 day int64 year int64 plot_id int64 species_id object sex object hindfoot_length float64 weight float64 genus object species object taxa object dtype: object
new_df.shape
(34786, 12)
new_df
| record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | genus | species | taxa | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN | Neotoma | albigula | Rodent |
| 1 | 2 | 7 | 16 | 1977 | 3 | NL | M | 33.0 | NaN | Neotoma | albigula | Rodent |
| 2 | 22 | 7 | 17 | 1977 | 15 | NL | F | 31.0 | NaN | Neotoma | albigula | Rodent |
| 3 | 38 | 7 | 17 | 1977 | 17 | NL | M | 33.0 | NaN | Neotoma | albigula | Rodent |
| 4 | 72 | 8 | 19 | 1977 | 2 | NL | M | 31.0 | NaN | Neotoma | albigula | Rodent |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 34781 | 28988 | 12 | 23 | 1998 | 6 | CT | NaN | NaN | NaN | Cnemidophorus | tigris | Reptile |
| 34782 | 35512 | 12 | 31 | 2002 | 11 | US | NaN | NaN | NaN | Sparrow | sp. | Bird |
| 34783 | 35513 | 12 | 31 | 2002 | 11 | US | NaN | NaN | NaN | Sparrow | sp. | Bird |
| 34784 | 35528 | 12 | 31 | 2002 | 13 | US | NaN | NaN | NaN | Sparrow | sp. | Bird |
| 34785 | 35544 | 12 | 31 | 2002 | 15 | US | NaN | NaN | NaN | Sparrow | sp. | Bird |
34786 rows × 12 columns
# plot distribution of taxa by plot_id
plot_plot_id = new_df.groupby(["plot_id"])
plot_plot_id.head()
| record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | genus | species | taxa | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN | Neotoma | albigula | Rodent |
| 1 | 2 | 7 | 16 | 1977 | 3 | NL | M | 33.0 | NaN | Neotoma | albigula | Rodent |
| 2 | 22 | 7 | 17 | 1977 | 15 | NL | F | 31.0 | NaN | Neotoma | albigula | Rodent |
| 3 | 38 | 7 | 17 | 1977 | 17 | NL | M | 33.0 | NaN | Neotoma | albigula | Rodent |
| 4 | 72 | 8 | 19 | 1977 | 2 | NL | M | 31.0 | NaN | Neotoma | albigula | Rodent |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1291 | 59 | 7 | 18 | 1977 | 19 | DM | M | 33.0 | NaN | Dipodomys | merriami | Rodent |
| 1292 | 61 | 7 | 18 | 1977 | 23 | DM | M | 35.0 | NaN | Dipodomys | merriami | Rodent |
| 1338 | 130 | 8 | 21 | 1977 | 16 | DM | F | 34.0 | NaN | Dipodomys | merriami | Rodent |
| 2086 | 2207 | 11 | 18 | 1979 | 10 | DM | F | 34.0 | 36.0 | Dipodomys | merriami | Rodent |
| 2199 | 2477 | 2 | 25 | 1980 | 10 | DM | F | 34.0 | 40.0 | Dipodomys | merriami | Rodent |
120 rows × 12 columns
plot_taxa["taxa"].count()
plot_id taxa
1 Bird 11
Rabbit 2
Rodent 1976
2 Bird 17
Rabbit 3
...
23 Rabbit 8
Rodent 407
24 Bird 17
Rabbit 2
Rodent 1006
Name: taxa, Length: 80, dtype: int64
counts_to_plot = plot_taxa["taxa"].count()
counts_to_plot.plot(kind = "bar", stacked = True, title = "Taxa counts in each plot")
<matplotlib.axes._subplots.AxesSubplot at 0x116228f40>
# now, taxa by sex by plot
# I'm not sure how we could stack more than one variable (taxa and sex)?
plot_sex = new_df.groupby(["plot_id", "sex"])
plot_sex.head()
| record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | genus | species | taxa | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN | Neotoma | albigula | Rodent |
| 1 | 2 | 7 | 16 | 1977 | 3 | NL | M | 33.0 | NaN | Neotoma | albigula | Rodent |
| 2 | 22 | 7 | 17 | 1977 | 15 | NL | F | 31.0 | NaN | Neotoma | albigula | Rodent |
| 3 | 38 | 7 | 17 | 1977 | 17 | NL | M | 33.0 | NaN | Neotoma | albigula | Rodent |
| 4 | 72 | 8 | 19 | 1977 | 2 | NL | M | 31.0 | NaN | Neotoma | albigula | Rodent |
| 5 | 106 | 8 | 20 | 1977 | 12 | NL | NaN | NaN | NaN | Neotoma | albigula | Rodent |
| 6 | 107 | 8 | 20 | 1977 | 18 | NL | NaN | NaN | NaN | Neotoma | albigula | Rodent |
| 7 | 121 | 8 | 21 | 1977 | 15 | NL | NaN | NaN | NaN | Neotoma | albigula | Rodent |
| 8 | 171 | 9 | 11 | 1977 | 12 | NL | NaN | NaN | NaN | Neotoma | albigula | Rodent |
| 9 | 194 | 9 | 12 | 1977 | 11 | NL | NaN | NaN | NaN | Neotoma | albigula | Rodent |
| 37 | 588 | 2 | 18 | 1978 | 2 | NL | M | NaN | 218.0 | Neotoma | albigula | Rodent |
| 39 | 646 | 2 | 20 | 1978 | 18 | NL | M | 32.0 | 228.0 | Neotoma | albigula | Rodent |
| 40 | 655 | 3 | 11 | 1978 | 3 | NL | M | 32.0 | 232.0 | Neotoma | albigula | Rodent |
| 42 | 736 | 4 | 8 | 1978 | 6 | NL | M | NaN | NaN | Neotoma | albigula | Rodent |
| 44 | 755 | 4 | 8 | 1978 | 3 | NL | F | 32.0 | NaN | Neotoma | albigula | Rodent |
| 45 | 825 | 4 | 10 | 1978 | 18 | NL | M | NaN | 225.0 | Neotoma | albigula | Rodent |
| 46 | 845 | 5 | 6 | 1978 | 2 | NL | M | 32.0 | 204.0 | Neotoma | albigula | Rodent |
| 47 | 848 | 5 | 6 | 1978 | 22 | NL | M | 32.0 | 212.0 | Neotoma | albigula | Rodent |
| 48 | 869 | 5 | 17 | 1978 | 20 | NL | F | 33.0 | 174.0 | Neotoma | albigula | Rodent |
| 49 | 875 | 5 | 17 | 1978 | 5 | NL | F | 33.0 | 212.0 | Neotoma | albigula | Rodent |
| 50 | 886 | 5 | 18 | 1978 | 3 | NL | F | NaN | 182.0 | Neotoma | albigula | Rodent |
| 51 | 971 | 6 | 8 | 1978 | 17 | NL | F | 32.0 | 135.0 | Neotoma | albigula | Rodent |
| 52 | 990 | 6 | 9 | 1978 | 2 | NL | M | NaN | 200.0 | Neotoma | albigula | Rodent |
| 53 | 1029 | 6 | 9 | 1978 | 18 | NL | M | 32.0 | 232.0 | Neotoma | albigula | Rodent |
| 54 | 1081 | 7 | 8 | 1978 | 14 | NL | M | 33.0 | 113.0 | Neotoma | albigula | Rodent |
| 55 | 1083 | 7 | 8 | 1978 | 3 | NL | F | 34.0 | 115.0 | Neotoma | albigula | Rodent |
| 56 | 1087 | 7 | 8 | 1978 | 3 | NL | M | 34.0 | 190.0 | Neotoma | albigula | Rodent |
| 57 | 1119 | 8 | 4 | 1978 | 12 | NL | M | 33.0 | 189.0 | Neotoma | albigula | Rodent |
| 58 | 1131 | 8 | 4 | 1978 | 20 | NL | F | 33.0 | 179.0 | Neotoma | albigula | Rodent |
| 59 | 1146 | 8 | 4 | 1978 | 17 | NL | F | 32.0 | 149.0 | Neotoma | albigula | Rodent |
| 61 | 1168 | 8 | 5 | 1978 | 15 | NL | F | 31.0 | 196.0 | Neotoma | albigula | Rodent |
| 62 | 1197 | 9 | 3 | 1978 | 12 | NL | F | NaN | 158.0 | Neotoma | albigula | Rodent |
| 63 | 1204 | 9 | 3 | 1978 | 12 | NL | M | NaN | 189.0 | Neotoma | albigula | Rodent |
| 64 | 1220 | 9 | 3 | 1978 | 20 | NL | F | 31.0 | 181.0 | Neotoma | albigula | Rodent |
| 65 | 1250 | 9 | 4 | 1978 | 3 | NL | F | 34.0 | 141.0 | Neotoma | albigula | Rodent |
| 67 | 1285 | 9 | 4 | 1978 | 14 | NL | M | 32.0 | 118.0 | Neotoma | albigula | Rodent |
| 75 | 1465 | 11 | 5 | 1978 | 14 | NL | M | 32.0 | 153.0 | Neotoma | albigula | Rodent |
| 76 | 1490 | 12 | 2 | 1978 | 12 | NL | M | 32.0 | 223.0 | Neotoma | albigula | Rodent |
| 77 | 1512 | 12 | 2 | 1978 | 20 | NL | F | 31.0 | 181.0 | Neotoma | albigula | Rodent |
| 78 | 1530 | 12 | 3 | 1978 | 14 | NL | M | NaN | 169.0 | Neotoma | albigula | Rodent |
| 79 | 1729 | 4 | 28 | 1979 | 12 | NL | F | 31.0 | 86.0 | Neotoma | albigula | Rodent |
| 80 | 1731 | 4 | 28 | 1979 | 12 | NL | F | 32.0 | 239.0 | Neotoma | albigula | Rodent |
| 82 | 1763 | 4 | 29 | 1979 | 18 | NL | F | 30.0 | 126.0 | Neotoma | albigula | Rodent |
| 83 | 1776 | 4 | 29 | 1979 | 4 | NL | M | 32.0 | 101.0 | Neotoma | albigula | Rodent |
| 84 | 1782 | 5 | 29 | 1979 | 12 | NL | M | 27.0 | 66.0 | Neotoma | albigula | Rodent |
| 85 | 1784 | 5 | 29 | 1979 | 12 | NL | M | 30.0 | 72.0 | Neotoma | albigula | Rodent |
| 87 | 1807 | 5 | 29 | 1979 | 20 | NL | M | 27.0 | 40.0 | Neotoma | albigula | Rodent |
| 89 | 1863 | 7 | 3 | 1979 | 20 | NL | F | 30.0 | 94.0 | Neotoma | albigula | Rodent |
| 92 | 1928 | 7 | 25 | 1979 | 14 | NL | M | 36.0 | 151.0 | Neotoma | albigula | Rodent |
| 94 | 1982 | 8 | 22 | 1979 | 12 | NL | F | 29.0 | 72.0 | Neotoma | albigula | Rodent |
| 95 | 1993 | 8 | 22 | 1979 | 17 | NL | F | 30.0 | 105.0 | Neotoma | albigula | Rodent |
| 97 | 2078 | 10 | 24 | 1979 | 12 | NL | F | 32.0 | 92.0 | Neotoma | albigula | Rodent |
| 99 | 2102 | 10 | 24 | 1979 | 17 | NL | M | 32.0 | 133.0 | Neotoma | albigula | Rodent |
| 100 | 2107 | 10 | 24 | 1979 | 20 | NL | M | 32.0 | 171.0 | Neotoma | albigula | Rodent |
| 101 | 2133 | 10 | 25 | 1979 | 2 | NL | F | 33.0 | 274.0 | Neotoma | albigula | Rodent |
| 102 | 2174 | 11 | 17 | 1979 | 14 | NL | F | 31.0 | 109.0 | Neotoma | albigula | Rodent |
| 103 | 2184 | 11 | 17 | 1979 | 2 | NL | F | 30.0 | 186.0 | Neotoma | albigula | Rodent |
| 104 | 2191 | 11 | 17 | 1979 | 17 | NL | M | 31.0 | 153.0 | Neotoma | albigula | Rodent |
| 105 | 2211 | 11 | 18 | 1979 | 11 | NL | F | 33.0 | 122.0 | Neotoma | albigula | Rodent |
| 106 | 2216 | 11 | 18 | 1979 | 13 | NL | F | 31.0 | 168.0 | Neotoma | albigula | Rodent |
| 107 | 2231 | 11 | 18 | 1979 | 19 | NL | F | 33.0 | 122.0 | Neotoma | albigula | Rodent |
| 111 | 2329 | 1 | 15 | 1980 | 17 | NL | M | 35.0 | 187.0 | Neotoma | albigula | Rodent |
| 112 | 2346 | 1 | 16 | 1980 | 5 | NL | M | 35.0 | 239.0 | Neotoma | albigula | Rodent |
| 113 | 2350 | 1 | 16 | 1980 | 6 | NL | M | 35.0 | 241.0 | Neotoma | albigula | Rodent |
| 114 | 2362 | 1 | 16 | 1980 | 17 | NL | M | 32.0 | 168.0 | Neotoma | albigula | Rodent |
| 115 | 2395 | 1 | 16 | 1980 | 14 | NL | F | 33.0 | 120.0 | Neotoma | albigula | Rodent |
| 116 | 2406 | 1 | 16 | 1980 | 2 | NL | F | 33.0 | 184.0 | Neotoma | albigula | Rodent |
| 117 | 2498 | 2 | 25 | 1980 | 24 | NL | F | 34.0 | 170.0 | Neotoma | albigula | Rodent |
| 118 | 2522 | 2 | 25 | 1980 | 14 | NL | F | 32.0 | 147.0 | Neotoma | albigula | Rodent |
| 120 | 2694 | 3 | 9 | 1980 | 20 | NL | M | NaN | 177.0 | Neotoma | albigula | Rodent |
| 121 | 2728 | 3 | 9 | 1980 | 2 | NL | F | NaN | NaN | Neotoma | albigula | Rodent |
| 122 | 2732 | 3 | 9 | 1980 | 20 | NL | M | NaN | 212.0 | Neotoma | albigula | Rodent |
| 125 | 2904 | 4 | 18 | 1980 | 18 | NL | F | NaN | 110.0 | Neotoma | albigula | Rodent |
| 126 | 2927 | 5 | 17 | 1980 | 24 | NL | M | NaN | 109.0 | Neotoma | albigula | Rodent |
| 127 | 2932 | 5 | 17 | 1980 | 1 | NL | M | 30.0 | 119.0 | Neotoma | albigula | Rodent |
| 128 | 2952 | 5 | 17 | 1980 | 20 | NL | M | 32.0 | 207.0 | Neotoma | albigula | Rodent |
| 129 | 2992 | 5 | 18 | 1980 | 14 | NL | F | 33.0 | 189.0 | Neotoma | albigula | Rodent |
| 130 | 2994 | 5 | 18 | 1980 | 18 | NL | M | 32.0 | 73.0 | Neotoma | albigula | Rodent |
| 132 | 3000 | 5 | 18 | 1980 | 2 | NL | F | 31.0 | 87.0 | Neotoma | albigula | Rodent |
| 133 | 3001 | 5 | 18 | 1980 | 18 | NL | F | 30.0 | 75.0 | Neotoma | albigula | Rodent |
| 135 | 3032 | 5 | 25 | 1980 | 5 | NL | F | NaN | 135.0 | Neotoma | albigula | Rodent |
| 136 | 3058 | 6 | 22 | 1980 | 24 | NL | F | 34.0 | 187.0 | Neotoma | albigula | Rodent |
| 138 | 3087 | 6 | 23 | 1980 | 13 | NL | M | 35.0 | 241.0 | Neotoma | albigula | Rodent |
| 139 | 3089 | 6 | 23 | 1980 | 3 | NL | F | 32.0 | 150.0 | Neotoma | albigula | Rodent |
| 140 | 3123 | 7 | 21 | 1980 | 24 | NL | F | 29.0 | 75.0 | Neotoma | albigula | Rodent |
| 141 | 3133 | 7 | 21 | 1980 | 24 | NL | F | 34.0 | 191.0 | Neotoma | albigula | Rodent |
| 143 | 3162 | 7 | 22 | 1980 | 14 | NL | F | 33.0 | 80.0 | Neotoma | albigula | Rodent |
| 144 | 3195 | 8 | 13 | 1980 | 24 | NL | F | 31.0 | 108.0 | Neotoma | albigula | Rodent |
| 145 | 3212 | 8 | 13 | 1980 | 11 | NL | M | 31.0 | 83.0 | Neotoma | albigula | Rodent |
| 149 | 3272 | 9 | 7 | 1980 | 11 | NL | M | 31.0 | 143.0 | Neotoma | albigula | Rodent |
| 152 | 3343 | 10 | 11 | 1980 | 1 | NL | M | 31.0 | 171.0 | Neotoma | albigula | Rodent |
| 156 | 3426 | 11 | 8 | 1980 | 13 | NL | F | 31.0 | 124.0 | Neotoma | albigula | Rodent |
| 157 | 3429 | 11 | 8 | 1980 | 1 | NL | M | 33.0 | 153.0 | Neotoma | albigula | Rodent |
| 161 | 3557 | 12 | 15 | 1980 | 1 | NL | M | 33.0 | 174.0 | Neotoma | albigula | Rodent |
| 162 | 3649 | 12 | 16 | 1980 | 13 | NL | F | 31.0 | 120.0 | Neotoma | albigula | Rodent |
| 164 | 3669 | 12 | 16 | 1980 | 18 | NL | F | 35.0 | 189.0 | Neotoma | albigula | Rodent |
| 165 | 3679 | 12 | 16 | 1980 | 11 | NL | M | 31.0 | 116.0 | Neotoma | albigula | Rodent |
| 166 | 3691 | 1 | 11 | 1981 | 13 | NL | F | 29.0 | 118.0 | Neotoma | albigula | Rodent |
| 168 | 3747 | 1 | 12 | 1981 | 4 | NL | F | 33.0 | 200.0 | Neotoma | albigula | Rodent |
| 169 | 3777 | 1 | 12 | 1981 | 1 | NL | M | 35.0 | 173.0 | Neotoma | albigula | Rodent |
| 172 | 3816 | 1 | 31 | 1981 | 13 | NL | F | 30.0 | 135.0 | Neotoma | albigula | Rodent |
| 180 | 4302 | 4 | 6 | 1981 | 18 | NL | M | NaN | 220.0 | Neotoma | albigula | Rodent |
| 182 | 4395 | 5 | 3 | 1981 | 18 | NL | F | 32.0 | 56.0 | Neotoma | albigula | Rodent |
| 188 | 4568 | 6 | 5 | 1981 | 24 | NL | M | 33.0 | 136.0 | Neotoma | albigula | Rodent |
| 190 | 4604 | 7 | 7 | 1981 | 17 | NL | F | 31.0 | 126.0 | Neotoma | albigula | Rodent |
| 191 | 4618 | 7 | 7 | 1981 | 24 | NL | M | 34.0 | 137.0 | Neotoma | albigula | Rodent |
| 193 | 4649 | 7 | 8 | 1981 | 15 | NL | M | NaN | 158.0 | Neotoma | albigula | Rodent |
| 196 | 4692 | 7 | 30 | 1981 | 15 | NL | M | 33.0 | 174.0 | Neotoma | albigula | Rodent |
| 197 | 4713 | 7 | 30 | 1981 | 4 | NL | M | 34.0 | 175.0 | Neotoma | albigula | Rodent |
| 198 | 4756 | 8 | 30 | 1981 | 24 | NL | M | NaN | 193.0 | Neotoma | albigula | Rodent |
| 200 | 4782 | 8 | 30 | 1981 | 11 | NL | F | 32.0 | 172.0 | Neotoma | albigula | Rodent |
| 201 | 4798 | 8 | 31 | 1981 | 8 | NL | M | 31.0 | 173.0 | Neotoma | albigula | Rodent |
| 202 | 4803 | 8 | 31 | 1981 | 9 | NL | M | 31.0 | 156.0 | Neotoma | albigula | Rodent |
| 203 | 4831 | 9 | 29 | 1981 | 21 | NL | F | 32.0 | 165.0 | Neotoma | albigula | Rodent |
| 204 | 4837 | 9 | 30 | 1981 | 5 | NL | F | 32.0 | 185.0 | Neotoma | albigula | Rodent |
| 205 | 4838 | 9 | 30 | 1981 | 11 | NL | M | 31.0 | 155.0 | Neotoma | albigula | Rodent |
| 206 | 4846 | 9 | 30 | 1981 | 11 | NL | F | 32.0 | 160.0 | Neotoma | albigula | Rodent |
| 209 | 4902 | 10 | 24 | 1981 | 21 | NL | F | 34.0 | 165.0 | Neotoma | albigula | Rodent |
| 210 | 4914 | 10 | 25 | 1981 | 11 | NL | M | 34.0 | 162.0 | Neotoma | albigula | Rodent |
| 211 | 4923 | 10 | 25 | 1981 | 11 | NL | F | 33.0 | 157.0 | Neotoma | albigula | Rodent |
| 212 | 4935 | 10 | 25 | 1981 | 13 | NL | M | 35.0 | 177.0 | Neotoma | albigula | Rodent |
| 213 | 4940 | 10 | 26 | 1981 | 3 | NL | M | 34.0 | 195.0 | Neotoma | albigula | Rodent |
| 217 | 4986 | 11 | 22 | 1981 | 11 | NL | F | 33.0 | 169.0 | Neotoma | albigula | Rodent |
| 219 | 5024 | 11 | 23 | 1981 | 5 | NL | F | 33.0 | 186.0 | Neotoma | albigula | Rodent |
| 223 | 5069 | 11 | 23 | 1981 | 13 | NL | M | NaN | NaN | Neotoma | albigula | Rodent |
| 227 | 5127 | 12 | 31 | 1981 | 24 | NL | M | 34.0 | 157.0 | Neotoma | albigula | Rodent |
| 229 | 5161 | 1 | 1 | 1982 | 5 | NL | F | 33.0 | 193.0 | Neotoma | albigula | Rodent |
| 232 | 5195 | 1 | 1 | 1982 | 6 | NL | M | 35.0 | 152.0 | Neotoma | albigula | Rodent |
| 246 | 5403 | 2 | 23 | 1982 | 3 | NL | M | 35.0 | 211.0 | Neotoma | albigula | Rodent |
| 247 | 5458 | 2 | 23 | 1982 | 5 | NL | M | 34.0 | 235.0 | Neotoma | albigula | Rodent |
| 255 | 5632 | 3 | 30 | 1982 | 1 | NL | F | 31.0 | 109.0 | Neotoma | albigula | Rodent |
| 257 | 5740 | 4 | 28 | 1982 | 17 | NL | F | 32.0 | 146.0 | Neotoma | albigula | Rodent |
| 261 | 5797 | 4 | 29 | 1982 | 15 | NL | M | 31.0 | 129.0 | Neotoma | albigula | Rodent |
| 269 | 5920 | 5 | 22 | 1982 | 15 | NL | M | 33.0 | 164.0 | Neotoma | albigula | Rodent |
| 275 | 5995 | 6 | 28 | 1982 | 10 | NL | M | NaN | 237.0 | Neotoma | albigula | Rodent |
| 276 | 5999 | 6 | 28 | 1982 | 15 | NL | M | NaN | 183.0 | Neotoma | albigula | Rodent |
| 279 | 6022 | 6 | 28 | 1982 | 1 | NL | F | 33.0 | 143.0 | Neotoma | albigula | Rodent |
| 282 | 6044 | 6 | 28 | 1982 | 15 | NL | F | NaN | 112.0 | Neotoma | albigula | Rodent |
| 286 | 6110 | 6 | 29 | 1982 | 16 | NL | F | NaN | 99.0 | Neotoma | albigula | Rodent |
| 291 | 6187 | 7 | 25 | 1982 | 1 | NL | F | NaN | 145.0 | Neotoma | albigula | Rodent |
| 292 | 6202 | 7 | 25 | 1982 | 9 | NL | M | NaN | NaN | Neotoma | albigula | Rodent |
| 294 | 6225 | 7 | 26 | 1982 | 13 | NL | M | NaN | 60.0 | Neotoma | albigula | Rodent |
| 303 | 6291 | 7 | 27 | 1982 | 16 | NL | F | NaN | 109.0 | Neotoma | albigula | Rodent |
| 305 | 6311 | 7 | 27 | 1982 | 16 | NL | F | NaN | 108.0 | Neotoma | albigula | Rodent |
| 316 | 6488 | 8 | 16 | 1982 | 15 | NL | F | 30.0 | 137.0 | Neotoma | albigula | Rodent |
| 317 | 6490 | 8 | 16 | 1982 | 9 | NL | M | 32.0 | 117.0 | Neotoma | albigula | Rodent |
| 320 | 6515 | 9 | 18 | 1982 | 9 | NL | M | 33.0 | 159.0 | Neotoma | albigula | Rodent |
| 331 | 6962 | 11 | 21 | 1982 | 13 | NL | M | 34.0 | 160.0 | Neotoma | albigula | Rodent |
| 351 | 7902 | 5 | 15 | 1983 | 6 | NL | F | 31.0 | 116.0 | Neotoma | albigula | Rodent |
| 354 | 7955 | 6 | 17 | 1983 | 22 | NL | F | 30.0 | 88.0 | Neotoma | albigula | Rodent |
| 360 | 8033 | 6 | 18 | 1983 | 15 | NL | F | 29.0 | 117.0 | Neotoma | albigula | Rodent |
| 365 | 8104 | 7 | 16 | 1983 | 22 | NL | F | 31.0 | 110.0 | Neotoma | albigula | Rodent |
| 368 | 8129 | 7 | 17 | 1983 | 1 | NL | F | 31.0 | 135.0 | Neotoma | albigula | Rodent |
| 369 | 8141 | 7 | 17 | 1983 | 9 | NL | M | 33.0 | 132.0 | Neotoma | albigula | Rodent |
| 377 | 8207 | 8 | 15 | 1983 | 22 | NL | F | 32.0 | NaN | Neotoma | albigula | Rodent |
| 381 | 8233 | 8 | 16 | 1983 | 6 | NL | F | 33.0 | 120.0 | Neotoma | albigula | Rodent |
| 388 | 8285 | 9 | 10 | 1983 | 16 | NL | F | 32.0 | 152.0 | Neotoma | albigula | Rodent |
| 390 | 8288 | 9 | 10 | 1983 | 19 | NL | F | 31.0 | 139.0 | Neotoma | albigula | Rodent |
| 398 | 8357 | 9 | 11 | 1983 | 4 | NL | F | 32.0 | 196.0 | Neotoma | albigula | Rodent |
| 400 | 8376 | 9 | 11 | 1983 | 6 | NL | F | 32.0 | 139.0 | Neotoma | albigula | Rodent |
| 411 | 8470 | 10 | 15 | 1983 | 21 | NL | F | 31.0 | 131.0 | Neotoma | albigula | Rodent |
| 413 | 8484 | 10 | 16 | 1983 | 5 | NL | M | 33.0 | 184.0 | Neotoma | albigula | Rodent |
| 418 | 8516 | 10 | 16 | 1983 | 6 | NL | F | 32.0 | 158.0 | Neotoma | albigula | Rodent |
| 432 | 8739 | 12 | 8 | 1983 | 21 | NL | F | 32.0 | 144.0 | Neotoma | albigula | Rodent |
| 437 | 8783 | 12 | 9 | 1983 | 6 | NL | F | 34.0 | 178.0 | Neotoma | albigula | Rodent |
| 460 | 9265 | 5 | 28 | 1984 | 1 | NL | F | 32.0 | 134.0 | Neotoma | albigula | Rodent |
| 474 | 9398 | 7 | 31 | 1984 | 23 | NL | M | 31.0 | 123.0 | Neotoma | albigula | Rodent |
| 524 | 10668 | 8 | 19 | 1985 | 22 | NL | M | 31.0 | 113.0 | Neotoma | albigula | Rodent |
| 571 | 11567 | 6 | 4 | 1986 | 21 | NL | F | 32.0 | 98.0 | Neotoma | albigula | Rodent |
| 632 | 12691 | 4 | 26 | 1987 | 9 | NL | F | 34.0 | 117.0 | Neotoma | albigula | Rodent |
| 648 | 12873 | 5 | 28 | 1987 | 9 | NL | F | 31.0 | 177.0 | Neotoma | albigula | Rodent |
| 649 | 12876 | 5 | 28 | 1987 | 22 | NL | M | 30.0 | 55.0 | Neotoma | albigula | Rodent |
| 661 | 13094 | 7 | 25 | 1987 | 22 | NL | F | NaN | 84.0 | Neotoma | albigula | Rodent |
| 662 | 13108 | 7 | 26 | 1987 | 8 | NL | M | NaN | 134.0 | Neotoma | albigula | Rodent |
| 667 | 13176 | 7 | 26 | 1987 | 4 | NL | M | NaN | 187.0 | Neotoma | albigula | Rodent |
| 671 | 13233 | 8 | 25 | 1987 | 22 | NL | F | 29.0 | 110.0 | Neotoma | albigula | Rodent |
| 675 | 13259 | 8 | 26 | 1987 | 8 | NL | F | 32.0 | 120.0 | Neotoma | albigula | Rodent |
| 677 | 13270 | 8 | 26 | 1987 | 9 | NL | F | 31.0 | 90.0 | Neotoma | albigula | Rodent |
| 686 | 13362 | 9 | 26 | 1987 | 6 | NL | M | 34.0 | 205.0 | Neotoma | albigula | Rodent |
| 691 | 13404 | 9 | 26 | 1987 | 22 | NL | M | 34.0 | 139.0 | Neotoma | albigula | Rodent |
| 698 | 13451 | 9 | 27 | 1987 | 7 | NL | M | 32.0 | 154.0 | Neotoma | albigula | Rodent |
| 699 | 13461 | 9 | 27 | 1987 | 7 | NL | F | 33.0 | 170.0 | Neotoma | albigula | Rodent |
| 711 | 13548 | 10 | 24 | 1987 | 22 | NL | M | 30.0 | 146.0 | Neotoma | albigula | Rodent |
| 720 | 13639 | 10 | 25 | 1987 | 7 | NL | F | 35.0 | NaN | Neotoma | albigula | Rodent |
| 730 | 13782 | 11 | 22 | 1987 | 7 | NL | F | NaN | 169.0 | Neotoma | albigula | Rodent |
| 756 | 14327 | 3 | 21 | 1988 | 5 | NL | M | 32.0 | 176.0 | Neotoma | albigula | Rodent |
| 759 | 14384 | 4 | 18 | 1988 | 8 | NL | F | 30.0 | 60.0 | Neotoma | albigula | Rodent |
| 761 | 14404 | 4 | 18 | 1988 | 7 | NL | M | 34.0 | 235.0 | Neotoma | albigula | Rodent |
| 763 | 14442 | 4 | 19 | 1988 | 5 | NL | M | 33.0 | 131.0 | Neotoma | albigula | Rodent |
| 767 | 14495 | 5 | 15 | 1988 | 9 | NL | F | 31.0 | 112.0 | Neotoma | albigula | Rodent |
| 769 | 14524 | 5 | 15 | 1988 | 8 | NL | F | 33.0 | 178.0 | Neotoma | albigula | Rodent |
| 770 | 14532 | 5 | 15 | 1988 | 7 | NL | M | 34.0 | NaN | Neotoma | albigula | Rodent |
| 771 | 14539 | 5 | 15 | 1988 | 7 | NL | M | 32.0 | 133.0 | Neotoma | albigula | Rodent |
| 777 | 14595 | 6 | 12 | 1988 | 8 | NL | F | 34.0 | 172.0 | Neotoma | albigula | Rodent |
| 780 | 14634 | 6 | 12 | 1988 | 8 | NL | F | 31.0 | 100.0 | Neotoma | albigula | Rodent |
| 792 | 14721 | 7 | 15 | 1988 | 4 | NL | M | 33.0 | 126.0 | Neotoma | albigula | Rodent |
| 808 | 14870 | 9 | 11 | 1988 | 6 | NL | M | 34.0 | 215.0 | Neotoma | albigula | Rodent |
| 809 | 14874 | 9 | 11 | 1988 | 23 | NL | M | 34.0 | 112.0 | Neotoma | albigula | Rodent |
| 822 | 14999 | 10 | 9 | 1988 | 7 | NL | F | 34.0 | 161.0 | Neotoma | albigula | Rodent |
| 823 | 15028 | 10 | 9 | 1988 | 10 | NL | M | 33.0 | 143.0 | Neotoma | albigula | Rodent |
| 829 | 15127 | 11 | 6 | 1988 | 7 | NL | M | 33.0 | 180.0 | Neotoma | albigula | Rodent |
| 831 | 15159 | 11 | 6 | 1988 | 7 | NL | F | 33.0 | 156.0 | Neotoma | albigula | Rodent |
| 861 | 15981 | 5 | 9 | 1989 | 21 | NL | M | 33.0 | 100.0 | Neotoma | albigula | Rodent |
| 872 | 16120 | 6 | 3 | 1989 | 21 | NL | M | 32.0 | 121.0 | Neotoma | albigula | Rodent |
| 879 | 16232 | 7 | 3 | 1989 | 21 | NL | M | NaN | 145.0 | Neotoma | albigula | Rodent |
| 887 | 16350 | 7 | 29 | 1989 | 21 | NL | M | 32.0 | 77.0 | Neotoma | albigula | Rodent |
| 892 | 16490 | 10 | 7 | 1989 | 21 | NL | M | 33.0 | 167.0 | Neotoma | albigula | Rodent |
| 952 | 18823 | 7 | 12 | 1991 | 19 | NL | M | 32.0 | 99.0 | Neotoma | albigula | Rodent |
| 1261 | 16 | 7 | 16 | 1977 | 4 | DM | F | 36.0 | NaN | Dipodomys | merriami | Rodent |
| 1269 | 32 | 7 | 17 | 1977 | 10 | DM | F | 35.0 | NaN | Dipodomys | merriami | Rodent |
| 1276 | 41 | 7 | 18 | 1977 | 23 | DM | F | 34.0 | NaN | Dipodomys | merriami | Rodent |
| 1280 | 46 | 7 | 18 | 1977 | 19 | DM | M | 35.0 | NaN | Dipodomys | merriami | Rodent |
| 1288 | 55 | 7 | 18 | 1977 | 23 | DM | M | 36.0 | NaN | Dipodomys | merriami | Rodent |
| 1291 | 59 | 7 | 18 | 1977 | 19 | DM | M | 33.0 | NaN | Dipodomys | merriami | Rodent |
| 1292 | 61 | 7 | 18 | 1977 | 23 | DM | M | 35.0 | NaN | Dipodomys | merriami | Rodent |
| 1293 | 62 | 7 | 18 | 1977 | 19 | DM | M | 35.0 | NaN | Dipodomys | merriami | Rodent |
| 1296 | 65 | 8 | 19 | 1977 | 4 | DM | F | 34.0 | 29.0 | Dipodomys | merriami | Rodent |
| 1297 | 66 | 8 | 19 | 1977 | 4 | DM | F | 35.0 | 46.0 | Dipodomys | merriami | Rodent |
| 1304 | 84 | 8 | 20 | 1977 | 19 | DM | F | 35.0 | 43.0 | Dipodomys | merriami | Rodent |
| 1305 | 85 | 8 | 20 | 1977 | 23 | DM | F | 35.0 | 41.0 | Dipodomys | merriami | Rodent |
| 1312 | 95 | 8 | 20 | 1977 | 23 | DM | M | 38.0 | 46.0 | Dipodomys | merriami | Rodent |
| 1318 | 102 | 8 | 20 | 1977 | 23 | DM | F | 35.0 | 40.0 | Dipodomys | merriami | Rodent |
| 1320 | 105 | 8 | 20 | 1977 | 19 | DM | M | 35.0 | 39.0 | Dipodomys | merriami | Rodent |
| 1338 | 130 | 8 | 21 | 1977 | 16 | DM | F | 34.0 | NaN | Dipodomys | merriami | Rodent |
| 1346 | 141 | 8 | 21 | 1977 | 16 | DM | M | 36.0 | NaN | Dipodomys | merriami | Rodent |
| 1375 | 186 | 9 | 11 | 1977 | 19 | DM | F | 34.0 | 35.0 | Dipodomys | merriami | Rodent |
| 1381 | 198 | 9 | 12 | 1977 | 4 | DM | M | 35.0 | 39.0 | Dipodomys | merriami | Rodent |
| 1435 | 302 | 10 | 17 | 1977 | 19 | DM | F | 36.0 | 44.0 | Dipodomys | merriami | Rodent |
| 1439 | 306 | 10 | 17 | 1977 | 16 | DM | M | 33.0 | 23.0 | Dipodomys | merriami | Rodent |
| 1454 | 342 | 10 | 18 | 1977 | 16 | DM | M | 34.0 | 25.0 | Dipodomys | merriami | Rodent |
| 1584 | 649 | 2 | 20 | 1978 | 16 | DM | M | 35.0 | 48.0 | Dipodomys | merriami | Rodent |
| 1617 | 737 | 4 | 8 | 1978 | 9 | DM | F | 36.0 | NaN | Dipodomys | merriami | Rodent |
| 1637 | 778 | 4 | 9 | 1978 | 8 | DM | M | 36.0 | 36.0 | Dipodomys | merriami | Rodent |
| 1674 | 880 | 5 | 18 | 1978 | 16 | DM | M | NaN | 48.0 | Dipodomys | merriami | Rodent |
| 1948 | 1634 | 2 | 24 | 1979 | 8 | DM | M | 36.0 | 42.0 | Dipodomys | merriami | Rodent |
| 1975 | 1715 | 3 | 31 | 1979 | 8 | DM | M | 37.0 | 45.0 | Dipodomys | merriami | Rodent |
| 2086 | 2207 | 11 | 18 | 1979 | 10 | DM | F | 34.0 | 36.0 | Dipodomys | merriami | Rodent |
| 2154 | 2366 | 1 | 16 | 1980 | 23 | DM | F | 36.0 | 44.0 | Dipodomys | merriami | Rodent |
| 2199 | 2477 | 2 | 25 | 1980 | 10 | DM | F | 34.0 | 40.0 | Dipodomys | merriami | Rodent |
| 2200 | 2478 | 2 | 25 | 1980 | 10 | DM | M | 36.0 | 49.0 | Dipodomys | merriami | Rodent |
| 2491 | 3422 | 11 | 8 | 1980 | 10 | DM | M | 35.0 | 40.0 | Dipodomys | merriami | Rodent |
| 2557 | 3581 | 12 | 15 | 1980 | 10 | DM | M | 36.0 | 41.0 | Dipodomys | merriami | Rodent |
| 2588 | 3638 | 12 | 16 | 1980 | 23 | DM | F | 35.0 | 56.0 | Dipodomys | merriami | Rodent |
| 13820 | 12675 | 4 | 26 | 1987 | 10 | PE | F | 18.0 | 30.0 | Peromyscus | eremicus | Rodent |
| 13919 | 13770 | 11 | 22 | 1987 | 10 | PE | F | 20.0 | 18.0 | Peromyscus | eremicus | Rodent |
# why didn't it group by sex like I thought it would?
plot_sex.shape
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-278-8cd8c3f9cbc9> in <module> ----> 1 plot_sex.shape ~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in __getattr__(self, attr) 578 return self[attr] 579 --> 580 raise AttributeError( 581 f"'{type(self).__name__}' object has no attribute '{attr}'" 582 ) AttributeError: 'DataFrameGroupBy' object has no attribute 'shape'
# why doesn't it allow us to look at the shape of this dataframe?
sex_counts = plot_sex["taxa"].count()
sex_counts
plot_id sex
1 F 848
M 1095
2 F 970
M 1144
3 F 893
M 840
4 F 872
M 1030
5 F 527
M 586
6 F 737
M 764
7 F 335
M 307
8 F 840
M 983
9 F 853
M 1004
10 F 141
M 142
11 F 822
M 1029
12 F 1060
M 1200
13 F 628
M 783
14 F 709
M 1070
15 F 481
M 409
16 F 222
M 273
17 F 892
M 1053
18 F 755
M 626
19 F 522
M 581
20 F 650
M 613
21 F 611
M 451
22 F 671
M 673
23 F 165
M 207
24 F 486
M 485
Name: taxa, dtype: int64
sex_counts_to_plot = sex_counts.unstack()
sex_counts_to_plot.plot(kind = "bar", stacked = True, title = "Number of taxa in each plot")
<matplotlib.axes._subplots.AxesSubplot at 0x11a0249a0>
# Challenge - Diversity Index
# In the data folder, there is a plots.csv file that contains information about the type associated with each plot.
# Use that data to summarize the number of plots by plot type.
plots = pd.read_csv("data/plots.csv")
plots
| plot_id | plot_type | |
|---|---|---|
| 0 | 1 | Spectab exclosure |
| 1 | 2 | Control |
| 2 | 3 | Long-term Krat Exclosure |
| 3 | 4 | Control |
| 4 | 5 | Rodent Exclosure |
| 5 | 6 | Short-term Krat Exclosure |
| 6 | 7 | Rodent Exclosure |
| 7 | 8 | Control |
| 8 | 9 | Spectab exclosure |
| 9 | 10 | Rodent Exclosure |
| 10 | 11 | Control |
| 11 | 12 | Control |
| 12 | 13 | Short-term Krat Exclosure |
| 13 | 14 | Control |
| 14 | 15 | Long-term Krat Exclosure |
| 15 | 16 | Rodent Exclosure |
| 16 | 17 | Control |
| 17 | 18 | Short-term Krat Exclosure |
| 18 | 19 | Long-term Krat Exclosure |
| 19 | 20 | Short-term Krat Exclosure |
| 20 | 21 | Long-term Krat Exclosure |
| 21 | 22 | Control |
| 22 | 23 | Rodent Exclosure |
| 23 | 24 | Rodent Exclosure |
types = plots.groupby(["plot_type"])
types.head()
| plot_id | plot_type | |
|---|---|---|
| 0 | 1 | Spectab exclosure |
| 1 | 2 | Control |
| 2 | 3 | Long-term Krat Exclosure |
| 3 | 4 | Control |
| 4 | 5 | Rodent Exclosure |
| 5 | 6 | Short-term Krat Exclosure |
| 6 | 7 | Rodent Exclosure |
| 7 | 8 | Control |
| 8 | 9 | Spectab exclosure |
| 9 | 10 | Rodent Exclosure |
| 10 | 11 | Control |
| 11 | 12 | Control |
| 12 | 13 | Short-term Krat Exclosure |
| 14 | 15 | Long-term Krat Exclosure |
| 15 | 16 | Rodent Exclosure |
| 17 | 18 | Short-term Krat Exclosure |
| 18 | 19 | Long-term Krat Exclosure |
| 19 | 20 | Short-term Krat Exclosure |
| 20 | 21 | Long-term Krat Exclosure |
| 22 | 23 | Rodent Exclosure |
types_counts = types["plot_type"].count()
types_counts
plot_type Control 8 Long-term Krat Exclosure 4 Rodent Exclosure 6 Short-term Krat Exclosure 4 Spectab exclosure 2 Name: plot_type, dtype: int64
# Calculate a diversity index of your choice for control vs. rodent exclosure plots
# The index should consider both species abundance and number of species.
# You might choose to use the simple biodiversity index described as:
# number of species in the plot / total number of individuals in the plot
# OK first let's figure out which plots are control plots:
# I just looked at the dataframe, but there's probably a quicker way to see...
# plot_id 2, 4, 8, 11, 12, 14, 17, 22
# let's make a subset of the original data with just thost plots
controls = surveys_df[surveys_df['plot_id'].isin(["2","4","8","11","12","14","17","22"])]
controls
| record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN |
| 2 | 3 | 7 | 16 | 1977 | 2 | DM | F | 37.0 | NaN |
| 6 | 7 | 7 | 16 | 1977 | 2 | PE | F | NaN | NaN |
| 13 | 14 | 7 | 16 | 1977 | 8 | DM | NaN | NaN | NaN |
| 15 | 16 | 7 | 16 | 1977 | 4 | DM | F | 36.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 35532 | 35533 | 12 | 31 | 2002 | 14 | DM | F | 36.0 | 48.0 |
| 35533 | 35534 | 12 | 31 | 2002 | 14 | DM | M | 37.0 | 56.0 |
| 35534 | 35535 | 12 | 31 | 2002 | 14 | DM | M | 37.0 | 53.0 |
| 35535 | 35536 | 12 | 31 | 2002 | 14 | DM | F | 35.0 | 42.0 |
| 35536 | 35537 | 12 | 31 | 2002 | 14 | DM | F | 36.0 | 46.0 |
15660 rows × 9 columns
by_plots = controls.groupby(["plot_id"])
by_plots.head()
| record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN |
| 2 | 3 | 7 | 16 | 1977 | 2 | DM | F | 37.0 | NaN |
| 6 | 7 | 7 | 16 | 1977 | 2 | PE | F | NaN | NaN |
| 13 | 14 | 7 | 16 | 1977 | 8 | DM | NaN | NaN | NaN |
| 15 | 16 | 7 | 16 | 1977 | 4 | DM | F | 36.0 | NaN |
| 17 | 18 | 7 | 16 | 1977 | 2 | PP | M | 22.0 | NaN |
| 18 | 19 | 7 | 16 | 1977 | 4 | PF | NaN | NaN | NaN |
| 19 | 20 | 7 | 17 | 1977 | 11 | DS | F | 48.0 | NaN |
| 20 | 21 | 7 | 17 | 1977 | 14 | DM | F | 34.0 | NaN |
| 27 | 28 | 7 | 17 | 1977 | 11 | DM | M | 38.0 | NaN |
| 28 | 29 | 7 | 17 | 1977 | 11 | PP | M | NaN | NaN |
| 32 | 33 | 7 | 17 | 1977 | 11 | DM | F | 36.0 | NaN |
| 33 | 34 | 7 | 17 | 1977 | 17 | DM | NaN | NaN | NaN |
| 36 | 37 | 7 | 17 | 1977 | 11 | DM | F | 35.0 | NaN |
| 37 | 38 | 7 | 17 | 1977 | 17 | NL | M | 33.0 | NaN |
| 44 | 45 | 7 | 18 | 1977 | 22 | DM | M | 36.0 | NaN |
| 48 | 49 | 7 | 18 | 1977 | 12 | PP | F | 17.0 | NaN |
| 51 | 52 | 7 | 18 | 1977 | 12 | DM | F | 26.0 | NaN |
| 52 | 53 | 7 | 18 | 1977 | 22 | DM | M | 36.0 | NaN |
| 56 | 57 | 7 | 18 | 1977 | 22 | DM | NaN | NaN | NaN |
| 57 | 58 | 7 | 18 | 1977 | 12 | DS | M | 45.0 | NaN |
| 64 | 65 | 8 | 19 | 1977 | 4 | DM | F | 34.0 | 29.0 |
| 65 | 66 | 8 | 19 | 1977 | 4 | DM | F | 35.0 | 46.0 |
| 67 | 68 | 8 | 19 | 1977 | 8 | DO | F | 32.0 | 52.0 |
| 68 | 69 | 8 | 19 | 1977 | 2 | PF | M | 15.0 | 8.0 |
| 73 | 74 | 8 | 19 | 1977 | 8 | PF | M | 12.0 | 7.0 |
| 74 | 75 | 8 | 19 | 1977 | 8 | DM | F | 32.0 | 22.0 |
| 76 | 77 | 8 | 19 | 1977 | 4 | SS | NaN | NaN | NaN |
| 88 | 89 | 8 | 20 | 1977 | 12 | PP | F | 20.0 | 15.0 |
| 95 | 96 | 8 | 20 | 1977 | 12 | DM | M | 36.0 | 41.0 |
| 117 | 118 | 8 | 21 | 1977 | 17 | DM | M | 33.0 | NaN |
| 121 | 122 | 8 | 21 | 1977 | 22 | DM | F | 33.0 | NaN |
| 122 | 123 | 8 | 21 | 1977 | 17 | DM | M | 37.0 | NaN |
| 123 | 124 | 8 | 21 | 1977 | 14 | DM | M | 36.0 | NaN |
| 124 | 125 | 8 | 21 | 1977 | 22 | DM | M | 35.0 | NaN |
| 126 | 127 | 8 | 21 | 1977 | 14 | DM | M | 33.0 | NaN |
| 128 | 129 | 8 | 21 | 1977 | 14 | DM | F | 34.0 | NaN |
| 133 | 134 | 8 | 21 | 1977 | 17 | DS | F | 45.0 | NaN |
| 138 | 139 | 8 | 21 | 1977 | 14 | PF | M | 15.0 | NaN |
| 222 | 223 | 9 | 13 | 1977 | 8 | DS | F | 50.0 | NaN |
# why didn't this group like I thought it would?
by_plots['species_id'].value_counts()
plot_id species_id
2 DM 578
DO 313
NL 201
OT 194
PP 187
...
22 CQ 1
DX 1
SF 1
SU 1
UP 1
Name: species_id, Length: 192, dtype: int64
spp_counts = by_plots['species_id'].value_counts()
# this output object is not a dataframe, it's a "series"
# OPTION FOR DISPLAYING ENTIRE DATA FRAME:
pd.set_option("display.max_rows", None, "display.max_columns", None)
print(spp_counts)
plot_id species_id
2 DM 578
DO 313
NL 201
OT 194
PP 187
PB 171
PE 146
DS 137
RM 71
OL 68
PM 36
PF 21
SH 15
AB 14
SO 8
AH 7
SS 7
DX 4
SA 3
OX 2
BA 1
CM 1
CQ 1
RF 1
RO 1
UL 1
UP 1
UR 1
4 DM 1075
DS 277
PF 136
PP 115
PB 97
OT 76
DO 62
OL 57
SS 22
RM 19
NL 6
PM 4
AB 3
DX 3
AH 2
SH 2
CB 1
PC 1
PE 1
PI 1
8 DM 569
DO 488
PP 217
DS 212
PB 104
OT 97
PF 56
OL 55
SS 15
RM 13
AH 12
PE 11
NL 8
PM 7
AB 6
DX 3
CB 2
CM 1
PC 1
PI 1
SA 1
UL 1
UR 1
11 DM 912
DO 209
PP 156
DS 130
OT 119
RM 71
PB 54
OL 52
PE 51
NL 43
PF 32
PM 17
SS 14
SH 11
AH 7
SF 6
DX 5
SA 5
PC 4
UR 3
AB 2
US 2
CB 1
CS 1
PG 1
PH 1
RF 1
SO 1
SU 1
12 DM 1020
DO 272
PP 201
OT 185
NL 166
DS 120
RM 94
OL 77
PE 50
PB 36
AB 28
PF 24
AH 22
PM 13
SF 12
RF 9
SA 8
SS 8
SH 7
DX 3
PI 3
OX 2
AS 1
PG 1
UP 1
14 DM 1215
OL 98
DS 90
OT 83
PP 78
NL 62
DO 61
AH 37
RM 36
PF 33
PE 22
PM 17
SS 16
AB 8
CB 6
DX 4
PB 4
SA 4
PC 2
OX 1
PG 1
PH 1
PL 1
17 DM 869
DO 261
OT 169
DS 121
PP 111
NL 106
RM 82
PB 67
PF 66
OL 43
PE 39
AH 34
PM 22
SS 16
SH 15
SA 6
DX 4
AB 3
CB 1
OX 1
UL 1
22 DM 522
DS 302
PB 256
OT 81
PF 56
PP 43
PE 23
OL 21
NL 18
RM 17
AB 12
PH 9
DO 5
SS 5
AH 4
PM 4
CB 2
SA 2
CQ 1
DX 1
SF 1
SU 1
UP 1
Name: species_id, dtype: int64
spp_counts.dtype
dtype('int64')
# let's subset just plot 2
plot2 = controls[controls.plot_id == 2]
plot2.head()
| record_id | month | day | year | plot_id | species_id | sex | hindfoot_length | weight | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 7 | 16 | 1977 | 2 | NL | M | 32.0 | NaN |
| 2 | 3 | 7 | 16 | 1977 | 2 | DM | F | 37.0 | NaN |
| 6 | 7 | 7 | 16 | 1977 | 2 | PE | F | NaN | NaN |
| 17 | 18 | 7 | 16 | 1977 | 2 | PP | M | 22.0 | NaN |
| 68 | 69 | 8 | 19 | 1977 | 2 | PF | M | 15.0 | 8.0 |
plot2['species_id'].value_counts()
DM 578 DO 313 NL 201 OT 194 PP 187 PB 171 PE 146 DS 137 RM 71 OL 68 PM 36 PF 21 SH 15 AB 14 SO 8 AH 7 SS 7 DX 4 SA 3 OX 2 UP 1 RF 1 BA 1 UR 1 CM 1 CQ 1 UL 1 RO 1 Name: species_id, dtype: int64
type(plot2)
pandas.core.frame.DataFrame
plot2.dtypes
record_id int64 month int64 day int64 year int64 plot_id int64 species_id object sex object hindfoot_length float64 weight float64 dtype: object
counts = plot2['species_id'].value_counts()
counts.shape
(28,)
spp_number = counts.shape[0]
spp_number
28
# now we need to add up all those counts to get total number of individuals
ind_number = counts.sum()
ind_number
2191
# biodiversity index for Plot 2:
biodiversity = spp_number / ind_number
biodiversity
0.012779552715654952