Data carpentry tutorials

Combining DataFrames with pandas

September 14, 2020

Alison Fowler

Instructions

In [2]:
%cd /Users/alisonfowler/Docs/Jupityr-notebooks/
/Users/alisonfowler/Docs/Jupityr-notebooks
In [3]:
import pandas as pd
In [4]:
# import data and replace NaNs with empty values
surveys_df = pd.read_csv("data/surveys.csv",
                         keep_default_na=False, na_values=[""])
In [5]:
# Read in first 10 lines of surveys table
survey_sub = surveys_df.head(10)
In [6]:
# Grab the last 10 rows
survey_sub_last10 = surveys_df.tail(10)
In [7]:
# Reset the index values so the second dataframe appends properly
survey_sub_last10 = survey_sub_last10.reset_index(drop=True)
# drop=True option avoids adding new index column with old index values
In [8]:
# Stack the DataFrames on top of each other
vertical_stack = pd.concat([survey_sub, survey_sub_last10], axis=0)
In [9]:
# Place the DataFrames side by side
horizontal_stack = pd.concat([survey_sub, survey_sub_last10], axis=1)
In [10]:
print(vertical_stack)
   record_id  month  day  year  plot_id species_id  sex  hindfoot_length  \
0          1      7   16  1977        2         NL    M             32.0   
1          2      7   16  1977        3         NL    M             33.0   
2          3      7   16  1977        2         DM    F             37.0   
3          4      7   16  1977        7         DM    M             36.0   
4          5      7   16  1977        3         DM    M             35.0   
5          6      7   16  1977        1         PF    M             14.0   
6          7      7   16  1977        2         PE    F              NaN   
7          8      7   16  1977        1         DM    M             37.0   
8          9      7   16  1977        1         DM    F             34.0   
9         10      7   16  1977        6         PF    F             20.0   
0      35540     12   31  2002       15         PB    F             26.0   
1      35541     12   31  2002       15         PB    F             24.0   
2      35542     12   31  2002       15         PB    F             26.0   
3      35543     12   31  2002       15         PB    F             27.0   
4      35544     12   31  2002       15         US  NaN              NaN   
5      35545     12   31  2002       15         AH  NaN              NaN   
6      35546     12   31  2002       15         AH  NaN              NaN   
7      35547     12   31  2002       10         RM    F             15.0   
8      35548     12   31  2002        7         DO    M             36.0   
9      35549     12   31  2002        5        NaN  NaN              NaN   

   weight  
0     NaN  
1     NaN  
2     NaN  
3     NaN  
4     NaN  
5     NaN  
6     NaN  
7     NaN  
8     NaN  
9     NaN  
0    23.0  
1    31.0  
2    29.0  
3    34.0  
4     NaN  
5     NaN  
6     NaN  
7    14.0  
8    51.0  
9     NaN  
In [11]:
print(horizontal_stack)
   record_id  month  day  year  plot_id species_id sex  hindfoot_length  \
0          1      7   16  1977        2         NL   M             32.0   
1          2      7   16  1977        3         NL   M             33.0   
2          3      7   16  1977        2         DM   F             37.0   
3          4      7   16  1977        7         DM   M             36.0   
4          5      7   16  1977        3         DM   M             35.0   
5          6      7   16  1977        1         PF   M             14.0   
6          7      7   16  1977        2         PE   F              NaN   
7          8      7   16  1977        1         DM   M             37.0   
8          9      7   16  1977        1         DM   F             34.0   
9         10      7   16  1977        6         PF   F             20.0   

   weight  record_id  month  day  year  plot_id species_id  sex  \
0     NaN      35540     12   31  2002       15         PB    F   
1     NaN      35541     12   31  2002       15         PB    F   
2     NaN      35542     12   31  2002       15         PB    F   
3     NaN      35543     12   31  2002       15         PB    F   
4     NaN      35544     12   31  2002       15         US  NaN   
5     NaN      35545     12   31  2002       15         AH  NaN   
6     NaN      35546     12   31  2002       15         AH  NaN   
7     NaN      35547     12   31  2002       10         RM    F   
8     NaN      35548     12   31  2002        7         DO    M   
9     NaN      35549     12   31  2002        5        NaN  NaN   

   hindfoot_length  weight  
0             26.0    23.0  
1             24.0    31.0  
2             26.0    29.0  
3             27.0    34.0  
4              NaN     NaN  
5              NaN     NaN  
6              NaN     NaN  
7             15.0    14.0  
8             36.0    51.0  
9              NaN     NaN  
In [12]:
# need to reindex the vertical stack data 
vertical_stack.reset_index(drop=True)
Out[12]:
record_id month day year plot_id species_id sex hindfoot_length weight
0 1 7 16 1977 2 NL M 32.0 NaN
1 2 7 16 1977 3 NL M 33.0 NaN
2 3 7 16 1977 2 DM F 37.0 NaN
3 4 7 16 1977 7 DM M 36.0 NaN
4 5 7 16 1977 3 DM M 35.0 NaN
5 6 7 16 1977 1 PF M 14.0 NaN
6 7 7 16 1977 2 PE F NaN NaN
7 8 7 16 1977 1 DM M 37.0 NaN
8 9 7 16 1977 1 DM F 34.0 NaN
9 10 7 16 1977 6 PF F 20.0 NaN
10 35540 12 31 2002 15 PB F 26.0 23.0
11 35541 12 31 2002 15 PB F 24.0 31.0
12 35542 12 31 2002 15 PB F 26.0 29.0
13 35543 12 31 2002 15 PB F 27.0 34.0
14 35544 12 31 2002 15 US NaN NaN NaN
15 35545 12 31 2002 15 AH NaN NaN NaN
16 35546 12 31 2002 15 AH NaN NaN NaN
17 35547 12 31 2002 10 RM F 15.0 14.0
18 35548 12 31 2002 7 DO M 36.0 51.0
19 35549 12 31 2002 5 NaN NaN NaN NaN
In [13]:
# Write DataFrame to CSV
vertical_stack.to_csv('data/output/out.csv', index=False)
# index=false prevents pandas from including the index number for each line 
In [14]:
# For kicks, read our output back into Python and make sure all looks good
new_output = pd.read_csv('data/output/out.csv', keep_default_na=False, na_values=[""])
In [15]:
print(new_output)
    record_id  month  day  year  plot_id species_id  sex  hindfoot_length  \
0           1      7   16  1977        2         NL    M             32.0   
1           2      7   16  1977        3         NL    M             33.0   
2           3      7   16  1977        2         DM    F             37.0   
3           4      7   16  1977        7         DM    M             36.0   
4           5      7   16  1977        3         DM    M             35.0   
5           6      7   16  1977        1         PF    M             14.0   
6           7      7   16  1977        2         PE    F              NaN   
7           8      7   16  1977        1         DM    M             37.0   
8           9      7   16  1977        1         DM    F             34.0   
9          10      7   16  1977        6         PF    F             20.0   
10      35540     12   31  2002       15         PB    F             26.0   
11      35541     12   31  2002       15         PB    F             24.0   
12      35542     12   31  2002       15         PB    F             26.0   
13      35543     12   31  2002       15         PB    F             27.0   
14      35544     12   31  2002       15         US  NaN              NaN   
15      35545     12   31  2002       15         AH  NaN              NaN   
16      35546     12   31  2002       15         AH  NaN              NaN   
17      35547     12   31  2002       10         RM    F             15.0   
18      35548     12   31  2002        7         DO    M             36.0   
19      35549     12   31  2002        5        NaN  NaN              NaN   

    weight  
0      NaN  
1      NaN  
2      NaN  
3      NaN  
4      NaN  
5      NaN  
6      NaN  
7      NaN  
8      NaN  
9      NaN  
10    23.0  
11    31.0  
12    29.0  
13    34.0  
14     NaN  
15     NaN  
16     NaN  
17    14.0  
18    51.0  
19     NaN  
In [ ]:
# CHALLENGE - Combine data 
# combine surveys2001.csv and surveys2002.csv
# create a plot of average plot weight by year grouped by sex
# export your results as a csv and make sure it reads back into python properly 
In [16]:
surveys2001 = pd.read_csv("data/surveys2001.csv")
In [17]:
print(surveys2001)
      Unnamed: 0  record_id  month  day  year  site_id species_id sex  \
0          31710      31711      1   21  2001        1         PB   F   
1          31711      31712      1   21  2001        1         DM   M   
2          31712      31713      1   21  2001        1         PB   M   
3          31713      31714      1   21  2001        1         DO   M   
4          31714      31715      1   21  2001        2         OT   M   
...          ...        ...    ...  ...   ...      ...        ...  ..   
1429       33303      33304     12   15  2001       24         RM   M   
1430       33304      33305     12   15  2001        7         PB   M   
1431       33305      33306     12   15  2001        7         OT   M   
1432       33306      33307     12   15  2001        7         OT   M   
1433       33307      33308     12   15  2001        7         PP   M   

      hindfoot_length  weight  
0                26.0    25.0  
1                37.0    43.0  
2                29.0    44.0  
3                34.0    53.0  
4                20.0    27.0  
...               ...     ...  
1429             16.0    10.0  
1430             29.0    44.0  
1431             19.0    21.0  
1432             20.0    19.0  
1433             24.0    16.0  

[1434 rows x 10 columns]
In [18]:
surveys2002 = pd.read_csv("data/surveys2002.csv")
In [19]:
print(surveys2002)
      Unnamed: 0  record_id  month  day  year  site_id species_id sex  \
0          33320      33321      1   12  2002        1         DM   M   
1          33321      33322      1   12  2002        1         DO   M   
2          33322      33323      1   12  2002        1         PB   M   
3          33324      33325      1   12  2002        1         DO   M   
4          33325      33326      1   12  2002        2         OT   F   
...          ...        ...    ...  ...   ...      ...        ...  ..   
2073       35540      35541     12   31  2002       15         PB   F   
2074       35541      35542     12   31  2002       15         PB   F   
2075       35542      35543     12   31  2002       15         PB   F   
2076       35546      35547     12   31  2002       10         RM   F   
2077       35547      35548     12   31  2002        7         DO   M   

      hindfoot_length  weight  
0                38.0    44.0  
1                37.0    58.0  
2                28.0    45.0  
3                35.0    29.0  
4                20.0    26.0  
...               ...     ...  
2073             24.0    31.0  
2074             26.0    29.0  
2075             27.0    34.0  
2076             15.0    14.0  
2077             36.0    51.0  

[2078 rows x 10 columns]
In [20]:
# Reset the index values to the second dataframe appends properly
surveys2002 = surveys2002.reset_index(drop=True)
In [21]:
combined = pd.concat([surveys2001, surveys2002], axis=0)
In [22]:
print(combined)
      Unnamed: 0  record_id  month  day  year  site_id species_id sex  \
0          31710      31711      1   21  2001        1         PB   F   
1          31711      31712      1   21  2001        1         DM   M   
2          31712      31713      1   21  2001        1         PB   M   
3          31713      31714      1   21  2001        1         DO   M   
4          31714      31715      1   21  2001        2         OT   M   
...          ...        ...    ...  ...   ...      ...        ...  ..   
2073       35540      35541     12   31  2002       15         PB   F   
2074       35541      35542     12   31  2002       15         PB   F   
2075       35542      35543     12   31  2002       15         PB   F   
2076       35546      35547     12   31  2002       10         RM   F   
2077       35547      35548     12   31  2002        7         DO   M   

      hindfoot_length  weight  
0                26.0    25.0  
1                37.0    43.0  
2                29.0    44.0  
3                34.0    53.0  
4                20.0    27.0  
...               ...     ...  
2073             24.0    31.0  
2074             26.0    29.0  
2075             27.0    34.0  
2076             15.0    14.0  
2077             36.0    51.0  

[3512 rows x 10 columns]
In [23]:
# create a plot of average plot weight by year grouped by sex 
In [24]:
combined_sex = combined.groupby(['year','sex'])
In [25]:
mean_weights = combined_sex['weight'].mean()
In [26]:
print(mean_weights)
year  sex
2001  F      36.034771
      M      36.404196
2002  F      33.878761
      M      37.490506
Name: weight, dtype: float64
In [27]:
mean_weights.unstack()
Out[27]:
sex F M
year
2001 36.034771 36.404196
2002 33.878761 37.490506
In [28]:
to_plot = mean_weights.unstack()
In [29]:
print(to_plot)
sex           F          M
year                      
2001  36.034771  36.404196
2002  33.878761  37.490506
In [30]:
to_plot.plot(kind='bar', stacked=True, title="Average plot weight by year and sex")
Out[30]:
<matplotlib.axes._subplots.AxesSubplot at 0x1154288b0>
In [31]:
# export 
to_plot.to_csv('data/output/mean_weights.csv', index=False)
In [32]:
# reimport to check
new_output = pd.read_csv('data/output/mean_weights.csv', keep_default_na=False, na_values=[""])
In [33]:
new_output
Out[33]:
F M
0 36.034771 36.404196
1 33.878761 37.490506
In [34]:
# Read in first 10 lines of surveys table
survey_sub = surveys_df.head(10)
In [35]:
survey_sub
Out[35]:
record_id month day year plot_id species_id sex hindfoot_length weight
0 1 7 16 1977 2 NL M 32.0 NaN
1 2 7 16 1977 3 NL M 33.0 NaN
2 3 7 16 1977 2 DM F 37.0 NaN
3 4 7 16 1977 7 DM M 36.0 NaN
4 5 7 16 1977 3 DM M 35.0 NaN
5 6 7 16 1977 1 PF M 14.0 NaN
6 7 7 16 1977 2 PE F NaN NaN
7 8 7 16 1977 1 DM M 37.0 NaN
8 9 7 16 1977 1 DM F 34.0 NaN
9 10 7 16 1977 6 PF F 20.0 NaN
In [36]:
# Import a small subset of the species data designed for this part of the lesson.
# It is stored in the data folder.
species_sub = pd.read_csv('data/speciesSubset.csv', keep_default_na=False, na_values=[""])
In [37]:
species_sub
Out[37]:
species_id genus species taxa
0 DM Dipodomys merriami Rodent
1 NL Neotoma albigula Rodent
2 PE Peromyscus eremicus Rodent
In [38]:
>>> species_sub.columns
Out[38]:
Index(['species_id', 'genus', 'species', 'taxa'], dtype='object')
In [39]:
>>> survey_sub.columns
Out[39]:
Index(['record_id', 'month', 'day', 'year', 'plot_id', 'species_id', 'sex',
       'hindfoot_length', 'weight'],
      dtype='object')
In [40]:
# Inner Joins 
# combines two dataframes based on a join key and returns a new dataframe that contains only those rows that have matching values
# in both of the original dataframes 
In [41]:
merged_inner = pd.merge(left=survey_sub, right=species_sub, left_on='species_id', right_on='species_id')

# In this case `species_id` is the only column name in  both dataframes, so if we skipped `left_on`
# And `right_on` arguments we would still get the same result
In [42]:
merged_inner.shape
Out[42]:
(8, 12)
In [43]:
merged_inner
Out[43]:
record_id month day year plot_id species_id sex hindfoot_length weight genus species taxa
0 1 7 16 1977 2 NL M 32.0 NaN Neotoma albigula Rodent
1 2 7 16 1977 3 NL M 33.0 NaN Neotoma albigula Rodent
2 3 7 16 1977 2 DM F 37.0 NaN Dipodomys merriami Rodent
3 4 7 16 1977 7 DM M 36.0 NaN Dipodomys merriami Rodent
4 5 7 16 1977 3 DM M 35.0 NaN Dipodomys merriami Rodent
5 8 7 16 1977 1 DM M 37.0 NaN Dipodomys merriami Rodent
6 9 7 16 1977 1 DM F 34.0 NaN Dipodomys merriami Rodent
7 7 7 16 1977 2 PE F NaN NaN Peromyscus eremicus Rodent
In [44]:
# Left Joins 
# combines two dataframes and includes ALL of the data frame from the left, and only certain rows from the right. 
merged_left = pd.merge(left=survey_sub, right=species_sub, how='left', left_on='species_id', right_on='species_id')
merged_left
Out[44]:
record_id month day year plot_id species_id sex hindfoot_length weight genus species taxa
0 1 7 16 1977 2 NL M 32.0 NaN Neotoma albigula Rodent
1 2 7 16 1977 3 NL M 33.0 NaN Neotoma albigula Rodent
2 3 7 16 1977 2 DM F 37.0 NaN Dipodomys merriami Rodent
3 4 7 16 1977 7 DM M 36.0 NaN Dipodomys merriami Rodent
4 5 7 16 1977 3 DM M 35.0 NaN Dipodomys merriami Rodent
5 6 7 16 1977 1 PF M 14.0 NaN NaN NaN NaN
6 7 7 16 1977 2 PE F NaN NaN Peromyscus eremicus Rodent
7 8 7 16 1977 1 DM M 37.0 NaN Dipodomys merriami Rodent
8 9 7 16 1977 1 DM F 34.0 NaN Dipodomys merriami Rodent
9 10 7 16 1977 6 PF F 20.0 NaN NaN NaN NaN
In [45]:
merged_left[ pd.isnull(merged_left.genus) ]
Out[45]:
record_id month day year plot_id species_id sex hindfoot_length weight genus species taxa
5 6 7 16 1977 1 PF M 14.0 NaN NaN NaN NaN
9 10 7 16 1977 6 PF F 20.0 NaN NaN NaN NaN
In [ ]:
# other join types: 
# right (outer) join: similar to a left join, except all rows from the right DataFrame are kept, 
# while rows from the left DataFrame without matching join key(s) values are discarded.

# full (outer) join: This join type returns the all pairwise combinations of rows from both DataFrames; 
# i.e., the result DataFrame will NaN where data is missing in one of the dataframes. 
# This join type is very rarely used.
In [ ]:
# Challenge - Distributions 

# Create a new DataFrame by joining the contents of the surveys.csv and species.csv tables. 
# Then calculate and plot the distribution of:
# taxa by plot
# taxa by sex by plot
In [84]:
spp = pd.read_csv("data/species.csv")
In [86]:
spp
Out[86]:
species_id genus species taxa
0 AB Amphispiza bilineata Bird
1 AH Ammospermophilus harrisi Rodent
2 AS Ammodramus savannarum Bird
3 BA Baiomys taylori Rodent
4 CB Campylorhynchus brunneicapillus Bird
5 CM Calamospiza melanocorys Bird
6 CQ Callipepla squamata Bird
7 CS Crotalus scutalatus Reptile
8 CT Cnemidophorus tigris Reptile
9 CU Cnemidophorus uniparens Reptile
10 CV Crotalus viridis Reptile
11 DM Dipodomys merriami Rodent
12 DO Dipodomys ordii Rodent
13 DS Dipodomys spectabilis Rodent
14 DX Dipodomys sp. Rodent
15 EO Eumeces obsoletus Reptile
16 GS Gambelia silus Reptile
17 NL Neotoma albigula Rodent
18 NX Neotoma sp. Rodent
19 OL Onychomys leucogaster Rodent
20 OT Onychomys torridus Rodent
21 OX Onychomys sp. Rodent
22 PB Chaetodipus baileyi Rodent
23 PC Pipilo chlorurus Bird
24 PE Peromyscus eremicus Rodent
25 PF Perognathus flavus Rodent
26 PG Pooecetes gramineus Bird
27 PH Perognathus hispidus Rodent
28 PI Chaetodipus intermedius Rodent
29 PL Peromyscus leucopus Rodent
30 PM Peromyscus maniculatus Rodent
31 PP Chaetodipus penicillatus Rodent
32 PU Pipilo fuscus Bird
33 PX Chaetodipus sp. Rodent
34 RF Reithrodontomys fulvescens Rodent
35 RM Reithrodontomys megalotis Rodent
36 RO Reithrodontomys montanus Rodent
37 RX Reithrodontomys sp. Rodent
38 SA Sylvilagus audubonii Rabbit
39 SB Spizella breweri Bird
40 SC Sceloporus clarki Reptile
41 SF Sigmodon fulviventer Rodent
42 SH Sigmodon hispidus Rodent
43 SO Sigmodon ochrognathus Rodent
44 SS Spermophilus spilosoma Rodent
45 ST Spermophilus tereticaudus Rodent
46 SU Sceloporus undulatus Reptile
47 SX Sigmodon sp. Rodent
48 UL Lizard sp. Reptile
49 UP Pipilo sp. Bird
50 UR Rodent sp. Rodent
51 US Sparrow sp. Bird
52 ZL Zonotrichia leucophrys Bird
53 ZM Zenaida macroura Bird
In [105]:
# I guess we will use an inner join for this? 

new_df = pd.merge(left=surveys_df, right=spp, left_on='species_id', right_on='species_id')
In [89]:
new_df.dtypes
Out[89]:
record_id            int64
month                int64
day                  int64
year                 int64
plot_id              int64
species_id          object
sex                 object
hindfoot_length    float64
weight             float64
genus               object
species             object
taxa                object
dtype: object
In [90]:
new_df.shape
Out[90]:
(34786, 12)
In [91]:
new_df
Out[91]:
record_id month day year plot_id species_id sex hindfoot_length weight genus species taxa
0 1 7 16 1977 2 NL M 32.0 NaN Neotoma albigula Rodent
1 2 7 16 1977 3 NL M 33.0 NaN Neotoma albigula Rodent
2 22 7 17 1977 15 NL F 31.0 NaN Neotoma albigula Rodent
3 38 7 17 1977 17 NL M 33.0 NaN Neotoma albigula Rodent
4 72 8 19 1977 2 NL M 31.0 NaN Neotoma albigula Rodent
... ... ... ... ... ... ... ... ... ... ... ... ...
34781 28988 12 23 1998 6 CT NaN NaN NaN Cnemidophorus tigris Reptile
34782 35512 12 31 2002 11 US NaN NaN NaN Sparrow sp. Bird
34783 35513 12 31 2002 11 US NaN NaN NaN Sparrow sp. Bird
34784 35528 12 31 2002 13 US NaN NaN NaN Sparrow sp. Bird
34785 35544 12 31 2002 15 US NaN NaN NaN Sparrow sp. Bird

34786 rows × 12 columns

In [57]:
# plot distribution of taxa by plot_id 
In [172]:
plot_plot_id = new_df.groupby(["plot_id"])
In [173]:
plot_plot_id.head()
Out[173]:
record_id month day year plot_id species_id sex hindfoot_length weight genus species taxa
0 1 7 16 1977 2 NL M 32.0 NaN Neotoma albigula Rodent
1 2 7 16 1977 3 NL M 33.0 NaN Neotoma albigula Rodent
2 22 7 17 1977 15 NL F 31.0 NaN Neotoma albigula Rodent
3 38 7 17 1977 17 NL M 33.0 NaN Neotoma albigula Rodent
4 72 8 19 1977 2 NL M 31.0 NaN Neotoma albigula Rodent
... ... ... ... ... ... ... ... ... ... ... ... ...
1291 59 7 18 1977 19 DM M 33.0 NaN Dipodomys merriami Rodent
1292 61 7 18 1977 23 DM M 35.0 NaN Dipodomys merriami Rodent
1338 130 8 21 1977 16 DM F 34.0 NaN Dipodomys merriami Rodent
2086 2207 11 18 1979 10 DM F 34.0 36.0 Dipodomys merriami Rodent
2199 2477 2 25 1980 10 DM F 34.0 40.0 Dipodomys merriami Rodent

120 rows × 12 columns

In [174]:
plot_taxa["taxa"].count()
Out[174]:
plot_id  taxa  
1        Bird        11
         Rabbit       2
         Rodent    1976
2        Bird        17
         Rabbit       3
                   ... 
23       Rabbit       8
         Rodent     407
24       Bird        17
         Rabbit       2
         Rodent    1006
Name: taxa, Length: 80, dtype: int64
In [176]:
counts_to_plot = plot_taxa["taxa"].count()
In [181]:
counts_to_plot.plot(kind = "bar", stacked = True, title = "Taxa counts in each plot")
Out[181]:
<matplotlib.axes._subplots.AxesSubplot at 0x116228f40>
In [ ]:
# now, taxa by sex by plot 
# I'm not sure how we could stack more than one variable (taxa and sex)? 
In [276]:
plot_sex = new_df.groupby(["plot_id", "sex"])
In [277]:
plot_sex.head()
Out[277]:
record_id month day year plot_id species_id sex hindfoot_length weight genus species taxa
0 1 7 16 1977 2 NL M 32.0 NaN Neotoma albigula Rodent
1 2 7 16 1977 3 NL M 33.0 NaN Neotoma albigula Rodent
2 22 7 17 1977 15 NL F 31.0 NaN Neotoma albigula Rodent
3 38 7 17 1977 17 NL M 33.0 NaN Neotoma albigula Rodent
4 72 8 19 1977 2 NL M 31.0 NaN Neotoma albigula Rodent
5 106 8 20 1977 12 NL NaN NaN NaN Neotoma albigula Rodent
6 107 8 20 1977 18 NL NaN NaN NaN Neotoma albigula Rodent
7 121 8 21 1977 15 NL NaN NaN NaN Neotoma albigula Rodent
8 171 9 11 1977 12 NL NaN NaN NaN Neotoma albigula Rodent
9 194 9 12 1977 11 NL NaN NaN NaN Neotoma albigula Rodent
37 588 2 18 1978 2 NL M NaN 218.0 Neotoma albigula Rodent
39 646 2 20 1978 18 NL M 32.0 228.0 Neotoma albigula Rodent
40 655 3 11 1978 3 NL M 32.0 232.0 Neotoma albigula Rodent
42 736 4 8 1978 6 NL M NaN NaN Neotoma albigula Rodent
44 755 4 8 1978 3 NL F 32.0 NaN Neotoma albigula Rodent
45 825 4 10 1978 18 NL M NaN 225.0 Neotoma albigula Rodent
46 845 5 6 1978 2 NL M 32.0 204.0 Neotoma albigula Rodent
47 848 5 6 1978 22 NL M 32.0 212.0 Neotoma albigula Rodent
48 869 5 17 1978 20 NL F 33.0 174.0 Neotoma albigula Rodent
49 875 5 17 1978 5 NL F 33.0 212.0 Neotoma albigula Rodent
50 886 5 18 1978 3 NL F NaN 182.0 Neotoma albigula Rodent
51 971 6 8 1978 17 NL F 32.0 135.0 Neotoma albigula Rodent
52 990 6 9 1978 2 NL M NaN 200.0 Neotoma albigula Rodent
53 1029 6 9 1978 18 NL M 32.0 232.0 Neotoma albigula Rodent
54 1081 7 8 1978 14 NL M 33.0 113.0 Neotoma albigula Rodent
55 1083 7 8 1978 3 NL F 34.0 115.0 Neotoma albigula Rodent
56 1087 7 8 1978 3 NL M 34.0 190.0 Neotoma albigula Rodent
57 1119 8 4 1978 12 NL M 33.0 189.0 Neotoma albigula Rodent
58 1131 8 4 1978 20 NL F 33.0 179.0 Neotoma albigula Rodent
59 1146 8 4 1978 17 NL F 32.0 149.0 Neotoma albigula Rodent
61 1168 8 5 1978 15 NL F 31.0 196.0 Neotoma albigula Rodent
62 1197 9 3 1978 12 NL F NaN 158.0 Neotoma albigula Rodent
63 1204 9 3 1978 12 NL M NaN 189.0 Neotoma albigula Rodent
64 1220 9 3 1978 20 NL F 31.0 181.0 Neotoma albigula Rodent
65 1250 9 4 1978 3 NL F 34.0 141.0 Neotoma albigula Rodent
67 1285 9 4 1978 14 NL M 32.0 118.0 Neotoma albigula Rodent
75 1465 11 5 1978 14 NL M 32.0 153.0 Neotoma albigula Rodent
76 1490 12 2 1978 12 NL M 32.0 223.0 Neotoma albigula Rodent
77 1512 12 2 1978 20 NL F 31.0 181.0 Neotoma albigula Rodent
78 1530 12 3 1978 14 NL M NaN 169.0 Neotoma albigula Rodent
79 1729 4 28 1979 12 NL F 31.0 86.0 Neotoma albigula Rodent
80 1731 4 28 1979 12 NL F 32.0 239.0 Neotoma albigula Rodent
82 1763 4 29 1979 18 NL F 30.0 126.0 Neotoma albigula Rodent
83 1776 4 29 1979 4 NL M 32.0 101.0 Neotoma albigula Rodent
84 1782 5 29 1979 12 NL M 27.0 66.0 Neotoma albigula Rodent
85 1784 5 29 1979 12 NL M 30.0 72.0 Neotoma albigula Rodent
87 1807 5 29 1979 20 NL M 27.0 40.0 Neotoma albigula Rodent
89 1863 7 3 1979 20 NL F 30.0 94.0 Neotoma albigula Rodent
92 1928 7 25 1979 14 NL M 36.0 151.0 Neotoma albigula Rodent
94 1982 8 22 1979 12 NL F 29.0 72.0 Neotoma albigula Rodent
95 1993 8 22 1979 17 NL F 30.0 105.0 Neotoma albigula Rodent
97 2078 10 24 1979 12 NL F 32.0 92.0 Neotoma albigula Rodent
99 2102 10 24 1979 17 NL M 32.0 133.0 Neotoma albigula Rodent
100 2107 10 24 1979 20 NL M 32.0 171.0 Neotoma albigula Rodent
101 2133 10 25 1979 2 NL F 33.0 274.0 Neotoma albigula Rodent
102 2174 11 17 1979 14 NL F 31.0 109.0 Neotoma albigula Rodent
103 2184 11 17 1979 2 NL F 30.0 186.0 Neotoma albigula Rodent
104 2191 11 17 1979 17 NL M 31.0 153.0 Neotoma albigula Rodent
105 2211 11 18 1979 11 NL F 33.0 122.0 Neotoma albigula Rodent
106 2216 11 18 1979 13 NL F 31.0 168.0 Neotoma albigula Rodent
107 2231 11 18 1979 19 NL F 33.0 122.0 Neotoma albigula Rodent
111 2329 1 15 1980 17 NL M 35.0 187.0 Neotoma albigula Rodent
112 2346 1 16 1980 5 NL M 35.0 239.0 Neotoma albigula Rodent
113 2350 1 16 1980 6 NL M 35.0 241.0 Neotoma albigula Rodent
114 2362 1 16 1980 17 NL M 32.0 168.0 Neotoma albigula Rodent
115 2395 1 16 1980 14 NL F 33.0 120.0 Neotoma albigula Rodent
116 2406 1 16 1980 2 NL F 33.0 184.0 Neotoma albigula Rodent
117 2498 2 25 1980 24 NL F 34.0 170.0 Neotoma albigula Rodent
118 2522 2 25 1980 14 NL F 32.0 147.0 Neotoma albigula Rodent
120 2694 3 9 1980 20 NL M NaN 177.0 Neotoma albigula Rodent
121 2728 3 9 1980 2 NL F NaN NaN Neotoma albigula Rodent
122 2732 3 9 1980 20 NL M NaN 212.0 Neotoma albigula Rodent
125 2904 4 18 1980 18 NL F NaN 110.0 Neotoma albigula Rodent
126 2927 5 17 1980 24 NL M NaN 109.0 Neotoma albigula Rodent
127 2932 5 17 1980 1 NL M 30.0 119.0 Neotoma albigula Rodent
128 2952 5 17 1980 20 NL M 32.0 207.0 Neotoma albigula Rodent
129 2992 5 18 1980 14 NL F 33.0 189.0 Neotoma albigula Rodent
130 2994 5 18 1980 18 NL M 32.0 73.0 Neotoma albigula Rodent
132 3000 5 18 1980 2 NL F 31.0 87.0 Neotoma albigula Rodent
133 3001 5 18 1980 18 NL F 30.0 75.0 Neotoma albigula Rodent
135 3032 5 25 1980 5 NL F NaN 135.0 Neotoma albigula Rodent
136 3058 6 22 1980 24 NL F 34.0 187.0 Neotoma albigula Rodent
138 3087 6 23 1980 13 NL M 35.0 241.0 Neotoma albigula Rodent
139 3089 6 23 1980 3 NL F 32.0 150.0 Neotoma albigula Rodent
140 3123 7 21 1980 24 NL F 29.0 75.0 Neotoma albigula Rodent
141 3133 7 21 1980 24 NL F 34.0 191.0 Neotoma albigula Rodent
143 3162 7 22 1980 14 NL F 33.0 80.0 Neotoma albigula Rodent
144 3195 8 13 1980 24 NL F 31.0 108.0 Neotoma albigula Rodent
145 3212 8 13 1980 11 NL M 31.0 83.0 Neotoma albigula Rodent
149 3272 9 7 1980 11 NL M 31.0 143.0 Neotoma albigula Rodent
152 3343 10 11 1980 1 NL M 31.0 171.0 Neotoma albigula Rodent
156 3426 11 8 1980 13 NL F 31.0 124.0 Neotoma albigula Rodent
157 3429 11 8 1980 1 NL M 33.0 153.0 Neotoma albigula Rodent
161 3557 12 15 1980 1 NL M 33.0 174.0 Neotoma albigula Rodent
162 3649 12 16 1980 13 NL F 31.0 120.0 Neotoma albigula Rodent
164 3669 12 16 1980 18 NL F 35.0 189.0 Neotoma albigula Rodent
165 3679 12 16 1980 11 NL M 31.0 116.0 Neotoma albigula Rodent
166 3691 1 11 1981 13 NL F 29.0 118.0 Neotoma albigula Rodent
168 3747 1 12 1981 4 NL F 33.0 200.0 Neotoma albigula Rodent
169 3777 1 12 1981 1 NL M 35.0 173.0 Neotoma albigula Rodent
172 3816 1 31 1981 13 NL F 30.0 135.0 Neotoma albigula Rodent
180 4302 4 6 1981 18 NL M NaN 220.0 Neotoma albigula Rodent
182 4395 5 3 1981 18 NL F 32.0 56.0 Neotoma albigula Rodent
188 4568 6 5 1981 24 NL M 33.0 136.0 Neotoma albigula Rodent
190 4604 7 7 1981 17 NL F 31.0 126.0 Neotoma albigula Rodent
191 4618 7 7 1981 24 NL M 34.0 137.0 Neotoma albigula Rodent
193 4649 7 8 1981 15 NL M NaN 158.0 Neotoma albigula Rodent
196 4692 7 30 1981 15 NL M 33.0 174.0 Neotoma albigula Rodent
197 4713 7 30 1981 4 NL M 34.0 175.0 Neotoma albigula Rodent
198 4756 8 30 1981 24 NL M NaN 193.0 Neotoma albigula Rodent
200 4782 8 30 1981 11 NL F 32.0 172.0 Neotoma albigula Rodent
201 4798 8 31 1981 8 NL M 31.0 173.0 Neotoma albigula Rodent
202 4803 8 31 1981 9 NL M 31.0 156.0 Neotoma albigula Rodent
203 4831 9 29 1981 21 NL F 32.0 165.0 Neotoma albigula Rodent
204 4837 9 30 1981 5 NL F 32.0 185.0 Neotoma albigula Rodent
205 4838 9 30 1981 11 NL M 31.0 155.0 Neotoma albigula Rodent
206 4846 9 30 1981 11 NL F 32.0 160.0 Neotoma albigula Rodent
209 4902 10 24 1981 21 NL F 34.0 165.0 Neotoma albigula Rodent
210 4914 10 25 1981 11 NL M 34.0 162.0 Neotoma albigula Rodent
211 4923 10 25 1981 11 NL F 33.0 157.0 Neotoma albigula Rodent
212 4935 10 25 1981 13 NL M 35.0 177.0 Neotoma albigula Rodent
213 4940 10 26 1981 3 NL M 34.0 195.0 Neotoma albigula Rodent
217 4986 11 22 1981 11 NL F 33.0 169.0 Neotoma albigula Rodent
219 5024 11 23 1981 5 NL F 33.0 186.0 Neotoma albigula Rodent
223 5069 11 23 1981 13 NL M NaN NaN Neotoma albigula Rodent
227 5127 12 31 1981 24 NL M 34.0 157.0 Neotoma albigula Rodent
229 5161 1 1 1982 5 NL F 33.0 193.0 Neotoma albigula Rodent
232 5195 1 1 1982 6 NL M 35.0 152.0 Neotoma albigula Rodent
246 5403 2 23 1982 3 NL M 35.0 211.0 Neotoma albigula Rodent
247 5458 2 23 1982 5 NL M 34.0 235.0 Neotoma albigula Rodent
255 5632 3 30 1982 1 NL F 31.0 109.0 Neotoma albigula Rodent
257 5740 4 28 1982 17 NL F 32.0 146.0 Neotoma albigula Rodent
261 5797 4 29 1982 15 NL M 31.0 129.0 Neotoma albigula Rodent
269 5920 5 22 1982 15 NL M 33.0 164.0 Neotoma albigula Rodent
275 5995 6 28 1982 10 NL M NaN 237.0 Neotoma albigula Rodent
276 5999 6 28 1982 15 NL M NaN 183.0 Neotoma albigula Rodent
279 6022 6 28 1982 1 NL F 33.0 143.0 Neotoma albigula Rodent
282 6044 6 28 1982 15 NL F NaN 112.0 Neotoma albigula Rodent
286 6110 6 29 1982 16 NL F NaN 99.0 Neotoma albigula Rodent
291 6187 7 25 1982 1 NL F NaN 145.0 Neotoma albigula Rodent
292 6202 7 25 1982 9 NL M NaN NaN Neotoma albigula Rodent
294 6225 7 26 1982 13 NL M NaN 60.0 Neotoma albigula Rodent
303 6291 7 27 1982 16 NL F NaN 109.0 Neotoma albigula Rodent
305 6311 7 27 1982 16 NL F NaN 108.0 Neotoma albigula Rodent
316 6488 8 16 1982 15 NL F 30.0 137.0 Neotoma albigula Rodent
317 6490 8 16 1982 9 NL M 32.0 117.0 Neotoma albigula Rodent
320 6515 9 18 1982 9 NL M 33.0 159.0 Neotoma albigula Rodent
331 6962 11 21 1982 13 NL M 34.0 160.0 Neotoma albigula Rodent
351 7902 5 15 1983 6 NL F 31.0 116.0 Neotoma albigula Rodent
354 7955 6 17 1983 22 NL F 30.0 88.0 Neotoma albigula Rodent
360 8033 6 18 1983 15 NL F 29.0 117.0 Neotoma albigula Rodent
365 8104 7 16 1983 22 NL F 31.0 110.0 Neotoma albigula Rodent
368 8129 7 17 1983 1 NL F 31.0 135.0 Neotoma albigula Rodent
369 8141 7 17 1983 9 NL M 33.0 132.0 Neotoma albigula Rodent
377 8207 8 15 1983 22 NL F 32.0 NaN Neotoma albigula Rodent
381 8233 8 16 1983 6 NL F 33.0 120.0 Neotoma albigula Rodent
388 8285 9 10 1983 16 NL F 32.0 152.0 Neotoma albigula Rodent
390 8288 9 10 1983 19 NL F 31.0 139.0 Neotoma albigula Rodent
398 8357 9 11 1983 4 NL F 32.0 196.0 Neotoma albigula Rodent
400 8376 9 11 1983 6 NL F 32.0 139.0 Neotoma albigula Rodent
411 8470 10 15 1983 21 NL F 31.0 131.0 Neotoma albigula Rodent
413 8484 10 16 1983 5 NL M 33.0 184.0 Neotoma albigula Rodent
418 8516 10 16 1983 6 NL F 32.0 158.0 Neotoma albigula Rodent
432 8739 12 8 1983 21 NL F 32.0 144.0 Neotoma albigula Rodent
437 8783 12 9 1983 6 NL F 34.0 178.0 Neotoma albigula Rodent
460 9265 5 28 1984 1 NL F 32.0 134.0 Neotoma albigula Rodent
474 9398 7 31 1984 23 NL M 31.0 123.0 Neotoma albigula Rodent
524 10668 8 19 1985 22 NL M 31.0 113.0 Neotoma albigula Rodent
571 11567 6 4 1986 21 NL F 32.0 98.0 Neotoma albigula Rodent
632 12691 4 26 1987 9 NL F 34.0 117.0 Neotoma albigula Rodent
648 12873 5 28 1987 9 NL F 31.0 177.0 Neotoma albigula Rodent
649 12876 5 28 1987 22 NL M 30.0 55.0 Neotoma albigula Rodent
661 13094 7 25 1987 22 NL F NaN 84.0 Neotoma albigula Rodent
662 13108 7 26 1987 8 NL M NaN 134.0 Neotoma albigula Rodent
667 13176 7 26 1987 4 NL M NaN 187.0 Neotoma albigula Rodent
671 13233 8 25 1987 22 NL F 29.0 110.0 Neotoma albigula Rodent
675 13259 8 26 1987 8 NL F 32.0 120.0 Neotoma albigula Rodent
677 13270 8 26 1987 9 NL F 31.0 90.0 Neotoma albigula Rodent
686 13362 9 26 1987 6 NL M 34.0 205.0 Neotoma albigula Rodent
691 13404 9 26 1987 22 NL M 34.0 139.0 Neotoma albigula Rodent
698 13451 9 27 1987 7 NL M 32.0 154.0 Neotoma albigula Rodent
699 13461 9 27 1987 7 NL F 33.0 170.0 Neotoma albigula Rodent
711 13548 10 24 1987 22 NL M 30.0 146.0 Neotoma albigula Rodent
720 13639 10 25 1987 7 NL F 35.0 NaN Neotoma albigula Rodent
730 13782 11 22 1987 7 NL F NaN 169.0 Neotoma albigula Rodent
756 14327 3 21 1988 5 NL M 32.0 176.0 Neotoma albigula Rodent
759 14384 4 18 1988 8 NL F 30.0 60.0 Neotoma albigula Rodent
761 14404 4 18 1988 7 NL M 34.0 235.0 Neotoma albigula Rodent
763 14442 4 19 1988 5 NL M 33.0 131.0 Neotoma albigula Rodent
767 14495 5 15 1988 9 NL F 31.0 112.0 Neotoma albigula Rodent
769 14524 5 15 1988 8 NL F 33.0 178.0 Neotoma albigula Rodent
770 14532 5 15 1988 7 NL M 34.0 NaN Neotoma albigula Rodent
771 14539 5 15 1988 7 NL M 32.0 133.0 Neotoma albigula Rodent
777 14595 6 12 1988 8 NL F 34.0 172.0 Neotoma albigula Rodent
780 14634 6 12 1988 8 NL F 31.0 100.0 Neotoma albigula Rodent
792 14721 7 15 1988 4 NL M 33.0 126.0 Neotoma albigula Rodent
808 14870 9 11 1988 6 NL M 34.0 215.0 Neotoma albigula Rodent
809 14874 9 11 1988 23 NL M 34.0 112.0 Neotoma albigula Rodent
822 14999 10 9 1988 7 NL F 34.0 161.0 Neotoma albigula Rodent
823 15028 10 9 1988 10 NL M 33.0 143.0 Neotoma albigula Rodent
829 15127 11 6 1988 7 NL M 33.0 180.0 Neotoma albigula Rodent
831 15159 11 6 1988 7 NL F 33.0 156.0 Neotoma albigula Rodent
861 15981 5 9 1989 21 NL M 33.0 100.0 Neotoma albigula Rodent
872 16120 6 3 1989 21 NL M 32.0 121.0 Neotoma albigula Rodent
879 16232 7 3 1989 21 NL M NaN 145.0 Neotoma albigula Rodent
887 16350 7 29 1989 21 NL M 32.0 77.0 Neotoma albigula Rodent
892 16490 10 7 1989 21 NL M 33.0 167.0 Neotoma albigula Rodent
952 18823 7 12 1991 19 NL M 32.0 99.0 Neotoma albigula Rodent
1261 16 7 16 1977 4 DM F 36.0 NaN Dipodomys merriami Rodent
1269 32 7 17 1977 10 DM F 35.0 NaN Dipodomys merriami Rodent
1276 41 7 18 1977 23 DM F 34.0 NaN Dipodomys merriami Rodent
1280 46 7 18 1977 19 DM M 35.0 NaN Dipodomys merriami Rodent
1288 55 7 18 1977 23 DM M 36.0 NaN Dipodomys merriami Rodent
1291 59 7 18 1977 19 DM M 33.0 NaN Dipodomys merriami Rodent
1292 61 7 18 1977 23 DM M 35.0 NaN Dipodomys merriami Rodent
1293 62 7 18 1977 19 DM M 35.0 NaN Dipodomys merriami Rodent
1296 65 8 19 1977 4 DM F 34.0 29.0 Dipodomys merriami Rodent
1297 66 8 19 1977 4 DM F 35.0 46.0 Dipodomys merriami Rodent
1304 84 8 20 1977 19 DM F 35.0 43.0 Dipodomys merriami Rodent
1305 85 8 20 1977 23 DM F 35.0 41.0 Dipodomys merriami Rodent
1312 95 8 20 1977 23 DM M 38.0 46.0 Dipodomys merriami Rodent
1318 102 8 20 1977 23 DM F 35.0 40.0 Dipodomys merriami Rodent
1320 105 8 20 1977 19 DM M 35.0 39.0 Dipodomys merriami Rodent
1338 130 8 21 1977 16 DM F 34.0 NaN Dipodomys merriami Rodent
1346 141 8 21 1977 16 DM M 36.0 NaN Dipodomys merriami Rodent
1375 186 9 11 1977 19 DM F 34.0 35.0 Dipodomys merriami Rodent
1381 198 9 12 1977 4 DM M 35.0 39.0 Dipodomys merriami Rodent
1435 302 10 17 1977 19 DM F 36.0 44.0 Dipodomys merriami Rodent
1439 306 10 17 1977 16 DM M 33.0 23.0 Dipodomys merriami Rodent
1454 342 10 18 1977 16 DM M 34.0 25.0 Dipodomys merriami Rodent
1584 649 2 20 1978 16 DM M 35.0 48.0 Dipodomys merriami Rodent
1617 737 4 8 1978 9 DM F 36.0 NaN Dipodomys merriami Rodent
1637 778 4 9 1978 8 DM M 36.0 36.0 Dipodomys merriami Rodent
1674 880 5 18 1978 16 DM M NaN 48.0 Dipodomys merriami Rodent
1948 1634 2 24 1979 8 DM M 36.0 42.0 Dipodomys merriami Rodent
1975 1715 3 31 1979 8 DM M 37.0 45.0 Dipodomys merriami Rodent
2086 2207 11 18 1979 10 DM F 34.0 36.0 Dipodomys merriami Rodent
2154 2366 1 16 1980 23 DM F 36.0 44.0 Dipodomys merriami Rodent
2199 2477 2 25 1980 10 DM F 34.0 40.0 Dipodomys merriami Rodent
2200 2478 2 25 1980 10 DM M 36.0 49.0 Dipodomys merriami Rodent
2491 3422 11 8 1980 10 DM M 35.0 40.0 Dipodomys merriami Rodent
2557 3581 12 15 1980 10 DM M 36.0 41.0 Dipodomys merriami Rodent
2588 3638 12 16 1980 23 DM F 35.0 56.0 Dipodomys merriami Rodent
13820 12675 4 26 1987 10 PE F 18.0 30.0 Peromyscus eremicus Rodent
13919 13770 11 22 1987 10 PE F 20.0 18.0 Peromyscus eremicus Rodent
In [ ]:
# why didn't it group by sex like I thought it would? 
In [278]:
plot_sex.shape
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-278-8cd8c3f9cbc9> in <module>
----> 1 plot_sex.shape

~/opt/anaconda3/lib/python3.8/site-packages/pandas/core/groupby/groupby.py in __getattr__(self, attr)
    578             return self[attr]
    579 
--> 580         raise AttributeError(
    581             f"'{type(self).__name__}' object has no attribute '{attr}'"
    582         )

AttributeError: 'DataFrameGroupBy' object has no attribute 'shape'
In [ ]:
# why doesn't it allow us to look at the shape of this dataframe? 
In [279]:
sex_counts = plot_sex["taxa"].count()
In [280]:
sex_counts
Out[280]:
plot_id  sex
1        F       848
         M      1095
2        F       970
         M      1144
3        F       893
         M       840
4        F       872
         M      1030
5        F       527
         M       586
6        F       737
         M       764
7        F       335
         M       307
8        F       840
         M       983
9        F       853
         M      1004
10       F       141
         M       142
11       F       822
         M      1029
12       F      1060
         M      1200
13       F       628
         M       783
14       F       709
         M      1070
15       F       481
         M       409
16       F       222
         M       273
17       F       892
         M      1053
18       F       755
         M       626
19       F       522
         M       581
20       F       650
         M       613
21       F       611
         M       451
22       F       671
         M       673
23       F       165
         M       207
24       F       486
         M       485
Name: taxa, dtype: int64
In [281]:
sex_counts_to_plot = sex_counts.unstack()
In [283]:
sex_counts_to_plot.plot(kind = "bar", stacked = True, title = "Number of taxa in each plot")
Out[283]:
<matplotlib.axes._subplots.AxesSubplot at 0x11a0249a0>
In [ ]:
# Challenge - Diversity Index 

# In the data folder, there is a plots.csv file that contains information about the type associated with each plot. 
# Use that data to summarize the number of plots by plot type.
In [208]:
plots = pd.read_csv("data/plots.csv")
In [209]:
plots
Out[209]:
plot_id plot_type
0 1 Spectab exclosure
1 2 Control
2 3 Long-term Krat Exclosure
3 4 Control
4 5 Rodent Exclosure
5 6 Short-term Krat Exclosure
6 7 Rodent Exclosure
7 8 Control
8 9 Spectab exclosure
9 10 Rodent Exclosure
10 11 Control
11 12 Control
12 13 Short-term Krat Exclosure
13 14 Control
14 15 Long-term Krat Exclosure
15 16 Rodent Exclosure
16 17 Control
17 18 Short-term Krat Exclosure
18 19 Long-term Krat Exclosure
19 20 Short-term Krat Exclosure
20 21 Long-term Krat Exclosure
21 22 Control
22 23 Rodent Exclosure
23 24 Rodent Exclosure
In [218]:
types = plots.groupby(["plot_type"])
In [219]:
types.head()
Out[219]:
plot_id plot_type
0 1 Spectab exclosure
1 2 Control
2 3 Long-term Krat Exclosure
3 4 Control
4 5 Rodent Exclosure
5 6 Short-term Krat Exclosure
6 7 Rodent Exclosure
7 8 Control
8 9 Spectab exclosure
9 10 Rodent Exclosure
10 11 Control
11 12 Control
12 13 Short-term Krat Exclosure
14 15 Long-term Krat Exclosure
15 16 Rodent Exclosure
17 18 Short-term Krat Exclosure
18 19 Long-term Krat Exclosure
19 20 Short-term Krat Exclosure
20 21 Long-term Krat Exclosure
22 23 Rodent Exclosure
In [221]:
types_counts = types["plot_type"].count()
In [222]:
types_counts
Out[222]:
plot_type
Control                      8
Long-term Krat Exclosure     4
Rodent Exclosure             6
Short-term Krat Exclosure    4
Spectab exclosure            2
Name: plot_type, dtype: int64
In [ ]:
# Calculate a diversity index of your choice for control vs. rodent exclosure plots 
# The index should consider both species abundance and number of species. 
# You might choose to use the simple biodiversity index described as: 
# number of species in the plot / total number of individuals in the plot 
In [ ]:
# OK first let's figure out which plots are control plots: 
# I just looked at the dataframe, but there's probably a quicker way to see... 
# plot_id 2, 4, 8, 11, 12, 14, 17, 22
# let's make a subset of the original data with just thost plots
In [223]:
controls = surveys_df[surveys_df['plot_id'].isin(["2","4","8","11","12","14","17","22"])]
In [224]:
controls
Out[224]:
record_id month day year plot_id species_id sex hindfoot_length weight
0 1 7 16 1977 2 NL M 32.0 NaN
2 3 7 16 1977 2 DM F 37.0 NaN
6 7 7 16 1977 2 PE F NaN NaN
13 14 7 16 1977 8 DM NaN NaN NaN
15 16 7 16 1977 4 DM F 36.0 NaN
... ... ... ... ... ... ... ... ... ...
35532 35533 12 31 2002 14 DM F 36.0 48.0
35533 35534 12 31 2002 14 DM M 37.0 56.0
35534 35535 12 31 2002 14 DM M 37.0 53.0
35535 35536 12 31 2002 14 DM F 35.0 42.0
35536 35537 12 31 2002 14 DM F 36.0 46.0

15660 rows × 9 columns

In [225]:
by_plots = controls.groupby(["plot_id"])
In [230]:
by_plots.head()
Out[230]:
record_id month day year plot_id species_id sex hindfoot_length weight
0 1 7 16 1977 2 NL M 32.0 NaN
2 3 7 16 1977 2 DM F 37.0 NaN
6 7 7 16 1977 2 PE F NaN NaN
13 14 7 16 1977 8 DM NaN NaN NaN
15 16 7 16 1977 4 DM F 36.0 NaN
17 18 7 16 1977 2 PP M 22.0 NaN
18 19 7 16 1977 4 PF NaN NaN NaN
19 20 7 17 1977 11 DS F 48.0 NaN
20 21 7 17 1977 14 DM F 34.0 NaN
27 28 7 17 1977 11 DM M 38.0 NaN
28 29 7 17 1977 11 PP M NaN NaN
32 33 7 17 1977 11 DM F 36.0 NaN
33 34 7 17 1977 17 DM NaN NaN NaN
36 37 7 17 1977 11 DM F 35.0 NaN
37 38 7 17 1977 17 NL M 33.0 NaN
44 45 7 18 1977 22 DM M 36.0 NaN
48 49 7 18 1977 12 PP F 17.0 NaN
51 52 7 18 1977 12 DM F 26.0 NaN
52 53 7 18 1977 22 DM M 36.0 NaN
56 57 7 18 1977 22 DM NaN NaN NaN
57 58 7 18 1977 12 DS M 45.0 NaN
64 65 8 19 1977 4 DM F 34.0 29.0
65 66 8 19 1977 4 DM F 35.0 46.0
67 68 8 19 1977 8 DO F 32.0 52.0
68 69 8 19 1977 2 PF M 15.0 8.0
73 74 8 19 1977 8 PF M 12.0 7.0
74 75 8 19 1977 8 DM F 32.0 22.0
76 77 8 19 1977 4 SS NaN NaN NaN
88 89 8 20 1977 12 PP F 20.0 15.0
95 96 8 20 1977 12 DM M 36.0 41.0
117 118 8 21 1977 17 DM M 33.0 NaN
121 122 8 21 1977 22 DM F 33.0 NaN
122 123 8 21 1977 17 DM M 37.0 NaN
123 124 8 21 1977 14 DM M 36.0 NaN
124 125 8 21 1977 22 DM M 35.0 NaN
126 127 8 21 1977 14 DM M 33.0 NaN
128 129 8 21 1977 14 DM F 34.0 NaN
133 134 8 21 1977 17 DS F 45.0 NaN
138 139 8 21 1977 14 PF M 15.0 NaN
222 223 9 13 1977 8 DS F 50.0 NaN
In [ ]:
# why didn't this group like I thought it would? 
In [227]:
by_plots['species_id'].value_counts()
Out[227]:
plot_id  species_id
2        DM            578
         DO            313
         NL            201
         OT            194
         PP            187
                      ... 
22       CQ              1
         DX              1
         SF              1
         SU              1
         UP              1
Name: species_id, Length: 192, dtype: int64
In [232]:
spp_counts = by_plots['species_id'].value_counts()

# this output object is not a dataframe, it's a "series"
In [236]:
# OPTION FOR DISPLAYING ENTIRE DATA FRAME:
pd.set_option("display.max_rows", None, "display.max_columns", None)
In [237]:
print(spp_counts)
plot_id  species_id
2        DM             578
         DO             313
         NL             201
         OT             194
         PP             187
         PB             171
         PE             146
         DS             137
         RM              71
         OL              68
         PM              36
         PF              21
         SH              15
         AB              14
         SO               8
         AH               7
         SS               7
         DX               4
         SA               3
         OX               2
         BA               1
         CM               1
         CQ               1
         RF               1
         RO               1
         UL               1
         UP               1
         UR               1
4        DM            1075
         DS             277
         PF             136
         PP             115
         PB              97
         OT              76
         DO              62
         OL              57
         SS              22
         RM              19
         NL               6
         PM               4
         AB               3
         DX               3
         AH               2
         SH               2
         CB               1
         PC               1
         PE               1
         PI               1
8        DM             569
         DO             488
         PP             217
         DS             212
         PB             104
         OT              97
         PF              56
         OL              55
         SS              15
         RM              13
         AH              12
         PE              11
         NL               8
         PM               7
         AB               6
         DX               3
         CB               2
         CM               1
         PC               1
         PI               1
         SA               1
         UL               1
         UR               1
11       DM             912
         DO             209
         PP             156
         DS             130
         OT             119
         RM              71
         PB              54
         OL              52
         PE              51
         NL              43
         PF              32
         PM              17
         SS              14
         SH              11
         AH               7
         SF               6
         DX               5
         SA               5
         PC               4
         UR               3
         AB               2
         US               2
         CB               1
         CS               1
         PG               1
         PH               1
         RF               1
         SO               1
         SU               1
12       DM            1020
         DO             272
         PP             201
         OT             185
         NL             166
         DS             120
         RM              94
         OL              77
         PE              50
         PB              36
         AB              28
         PF              24
         AH              22
         PM              13
         SF              12
         RF               9
         SA               8
         SS               8
         SH               7
         DX               3
         PI               3
         OX               2
         AS               1
         PG               1
         UP               1
14       DM            1215
         OL              98
         DS              90
         OT              83
         PP              78
         NL              62
         DO              61
         AH              37
         RM              36
         PF              33
         PE              22
         PM              17
         SS              16
         AB               8
         CB               6
         DX               4
         PB               4
         SA               4
         PC               2
         OX               1
         PG               1
         PH               1
         PL               1
17       DM             869
         DO             261
         OT             169
         DS             121
         PP             111
         NL             106
         RM              82
         PB              67
         PF              66
         OL              43
         PE              39
         AH              34
         PM              22
         SS              16
         SH              15
         SA               6
         DX               4
         AB               3
         CB               1
         OX               1
         UL               1
22       DM             522
         DS             302
         PB             256
         OT              81
         PF              56
         PP              43
         PE              23
         OL              21
         NL              18
         RM              17
         AB              12
         PH               9
         DO               5
         SS               5
         AH               4
         PM               4
         CB               2
         SA               2
         CQ               1
         DX               1
         SF               1
         SU               1
         UP               1
Name: species_id, dtype: int64
In [243]:
spp_counts.dtype
Out[243]:
dtype('int64')
In [239]:
# let's subset just plot 2 
plot2 = controls[controls.plot_id == 2]
In [241]:
plot2.head()
Out[241]:
record_id month day year plot_id species_id sex hindfoot_length weight
0 1 7 16 1977 2 NL M 32.0 NaN
2 3 7 16 1977 2 DM F 37.0 NaN
6 7 7 16 1977 2 PE F NaN NaN
17 18 7 16 1977 2 PP M 22.0 NaN
68 69 8 19 1977 2 PF M 15.0 8.0
In [254]:
plot2['species_id'].value_counts()
Out[254]:
DM    578
DO    313
NL    201
OT    194
PP    187
PB    171
PE    146
DS    137
RM     71
OL     68
PM     36
PF     21
SH     15
AB     14
SO      8
AH      7
SS      7
DX      4
SA      3
OX      2
UP      1
RF      1
BA      1
UR      1
CM      1
CQ      1
UL      1
RO      1
Name: species_id, dtype: int64
In [255]:
type(plot2)
Out[255]:
pandas.core.frame.DataFrame
In [256]:
plot2.dtypes
Out[256]:
record_id            int64
month                int64
day                  int64
year                 int64
plot_id              int64
species_id          object
sex                 object
hindfoot_length    float64
weight             float64
dtype: object
In [258]:
counts = plot2['species_id'].value_counts()
In [259]:
counts.shape
Out[259]:
(28,)
In [261]:
spp_number = counts.shape[0]
In [262]:
spp_number
Out[262]:
28
In [263]:
# now we need to add up all those counts to get total number of individuals 
ind_number = counts.sum()
In [264]:
ind_number
Out[264]:
2191
In [265]:
# biodiversity index for Plot 2: 
biodiversity = spp_number / ind_number
In [266]:
biodiversity
Out[266]:
0.012779552715654952
In [ ]: