import pandas as pd

%pwd

'/Users/alisonfowler'

# downloaded survey data; now need to change working directory to retrieve it
%cd /Users/alisonfowler/Docs/Jupityr-notebooks/

/Users/alisonfowler/Docs/Jupityr-notebooks

# Note that pd.read_csv is used because we imported pandas as pd
pd.read_csv("data/surveys.csv")

surveys_df = pd.read_csv("data/surveys.csv")

surveys_df

surveys_df.head()

type(surveys_df)

pandas.core.frame.DataFrame

surveys_df.dtypes # the formats of different types of data

record_id            int64
month                int64
day                  int64
year                 int64
plot_id              int64
species_id          object
sex                 object
hindfoot_length    float64
weight             float64
dtype: object

# CHALLENGE : DATAFRAMES 
# 1. surveys_df.columns
# 2. surveys_df.shape Take note of the output of shape - what format does it return the shape of the DataFrame in?
# HINT: More on tuples, here.
# 3. surveys_df.head() Also, what does surveys_df.head(15) do?
# 4. surveys_df.tail()

surveys_df.columns # returns column names

Index(['record_id', 'month', 'day', 'year', 'plot_id', 'species_id', 'sex',
       'hindfoot_length', 'weight'],
      dtype='object')

surveys_df.shape # dimensions - returns the shape as a vector

(35549, 9)

surveys_df.head() # displays first five rows of the dataframe

surveys_df.head(15) # displays first fifteen rows of the dataframe

surveys_df.tail() # displays last five rows

pd.unique(surveys_df['species_id']) # list all the unique species

array(['NL', 'DM', 'PF', 'PE', 'DS', 'PP', 'SH', 'OT', 'DO', 'OX', 'SS',
       'OL', 'RM', nan, 'SA', 'PM', 'AH', 'DX', 'AB', 'CB', 'CM', 'CQ',
       'RF', 'PC', 'PG', 'PH', 'PU', 'CV', 'UR', 'UP', 'ZL', 'UL', 'CS',
       'SC', 'BA', 'SF', 'RO', 'AS', 'SO', 'PI', 'ST', 'CU', 'SU', 'RX',
       'PB', 'PL', 'PX', 'CT', 'US'], dtype=object)

# CHALLENGE : STATISTICS 
# 1. Create a list of unique site ID’s (“plot_id”) found in the surveys data. Call it site_names. 
# How many unique sites are there in the data? How many unique species are in the data?

# 2. What is the difference between len(site_names) and surveys_df['plot_id'].nunique()?

site_names=pd.unique(surveys_df['plot_id']) # create a new object with site names

site_names # look at new object

array([ 2,  3,  7,  1,  6,  5,  8,  4, 11, 14, 15, 13,  9, 10, 17, 16, 20,
       23, 18, 21, 22, 19, 12, 24])

site_names.shape # how many sites are there

(24,)

species_names=pd.unique(surveys_df['species_id']) # create a new object with species names

species_names # look

array(['NL', 'DM', 'PF', 'PE', 'DS', 'PP', 'SH', 'OT', 'DO', 'OX', 'SS',
       'OL', 'RM', nan, 'SA', 'PM', 'AH', 'DX', 'AB', 'CB', 'CM', 'CQ',
       'RF', 'PC', 'PG', 'PH', 'PU', 'CV', 'UR', 'UP', 'ZL', 'UL', 'CS',
       'SC', 'BA', 'SF', 'RO', 'AS', 'SO', 'PI', 'ST', 'CU', 'SU', 'RX',
       'PB', 'PL', 'PX', 'CT', 'US'], dtype=object)

species_names.shape # how many species are there

(49,)

len(species_names)

49

len(site_names) # length of vector

24

surveys_df['plot_id'].nunique() # number of unique values in the vector
# different from previous line b/c the vector could have repeating values. 
# in this case, it returns the same number since all values are unique

24

surveys_df['weight'].describe() # get basic stats for a given column

count    32283.000000
mean        42.672428
std         36.631259
min          4.000000
25%         20.000000
50%         37.000000
75%         48.000000
max        280.000000
Name: weight, dtype: float64

surveys_df['weight'].min() # extract a stat

4.0

# Groups 

# Group data by sex
grouped_data = surveys_df.groupby('sex')

# Summary statistics for all numeric columns by sex
grouped_data.describe()

# Provide the mean for each numeric column by sex
grouped_data.mean()

# CHALLENGE : SUMMARY DATA 
# 1. How many recorded individuals are female F and how many male M?
# 2. What happens when you group by two columns using the following syntax and then calculate mean values?
#   grouped_data2 = surveys_df.groupby(['plot_id', 'sex'])
#   grouped_data2.mean()
# 3. Summarize weight values for each site in your data. 
# HINT: you can use the following syntax to only create summary statistics for one column in your data. 
#   by_site['weight'].describe()

grouped_data.count() # number of recorded individuals
# females: 15,690
# males: 17,348 
# apparently some missing data for hindfoot length and weights?

grouped_data2 = surveys_df.groupby(['plot_id', 'sex']) # create a new object grouped by two variables

grouped_data2.mean() # displays mean for each variable grouped by sexes within each plot/site

grouped_data2['weight'].describe() # gives information about weights within groups

# group by site only
grouped_data3 = surveys_df.groupby('plot_id')

grouped_data3.mean()

grouped_data3['weight'].describe() # gives information about weights within sites
# but this output looks slightly different from the one on the tutorial site, which has the columns & rows switched

# Quickly creating summary counts 

# Count the number of samples by species
species_counts = surveys_df.groupby('species_id')['record_id'].count()
print(species_counts)

species_id
AB      303
AH      437
AS        2
BA       46
CB       50
CM       13
CQ       16
CS        1
CT        1
CU        1
CV        1
DM    10596
DO     3027
DS     2504
DX       40
NL     1252
OL     1006
OT     2249
OX       12
PB     2891
PC       39
PE     1299
PF     1597
PG        8
PH       32
PI        9
PL       36
PM      899
PP     3123
PU        5
PX        6
RF       75
RM     2609
RO        8
RX        2
SA       75
SC        1
SF       43
SH      147
SO       43
SS      248
ST        1
SU        5
UL        4
UP        8
UR       10
US        4
ZL        2
Name: record_id, dtype: int64

surveys_df.groupby('species_id')['record_id'].count()['DO'] # sample count for one particular species

3027

# CHALLENGE : MAKE A LIST
# What’s another way to create a list of species and associated count of the records in the data? 

# Hint: you can perform count, min, etc. functions on groupby DataFrames in the same way you can perform them 
# on regular DataFrames.

surveys_df.head()

grouped_data4 = surveys_df.groupby('species_id')

grouped_data4.count() # this works?

# Basic Math functions

# Multiply all weight values by 2
surveys_df['weight']*2

0          NaN
1          NaN
2          NaN
3          NaN
4          NaN
         ...  
35544      NaN
35545      NaN
35546     28.0
35547    102.0
35548      NaN
Name: weight, Length: 35549, dtype: float64

# Plotting 

# Make sure figures appear inline in Ipython Notebook
%matplotlib inline

# Create a quick bar chart
species_counts.plot(kind='bar'); # what does the semi-colon do?

total_count = surveys_df.groupby('plot_id')['record_id'].nunique()

# Let's plot that too
total_count.plot(kind='bar');

# CHALLENGE: PLOTS 
# 1. Create a plot of average weight across all species per site.
# 2. Create a plot of total males versus total females for the entire dataset.

site_weights = surveys_df.groupby('plot_id')['weight'] # group weights by site

site_weights.head()

0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
        ...  
1962    124.0
1969     41.0
2048     49.0
2054    119.0
2058     35.0
Name: weight, Length: 120, dtype: float64

site_mean_weights = site_weights.mean() # extract means

site_mean_weights.head()

plot_id
1    51.822911
2    52.251688
3    32.654386
4    47.928189
5    40.947802
Name: weight, dtype: float64

site_mean_weights.plot(kind = 'bar'); # plot mean weights of sites

sex_counts = surveys_df.groupby('sex')['record_id'].count()

print(sex_counts)

sex
F    15690
M    17348
Name: record_id, dtype: int64

sex_counts.plot(kind = 'bar'); # plot number of records of each sex of entire data set

# SUMMARY PLOTTING CHALLENGE
# Create a stacked bar plot, with weight on the Y axis, and the stacked variable being sex. 
# The plot should show total weight by sex for each site.

# Example data for making the plot 
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}

pd.DataFrame(d)

# Plot stacked data so columns 'one' and 'two' are stacked
my_df = pd.DataFrame(d)
my_df.plot(kind='bar', stacked=True, title="Example graph")

<matplotlib.axes._subplots.AxesSubplot at 0x123e94fd0>

# let's try to do this with weights by sites and sex 
site_sex = surveys_df.groupby(['plot_id', 'sex'])

site_sex_count = site_sex['weight'].sum()

print(site_sex_count)

plot_id  sex
1        F      38253.0
         M      59979.0
2        F      50144.0
         M      57250.0
3        F      27251.0
         M      28253.0
4        F      39796.0
         M      49377.0
5        F      21143.0
         M      23326.0
6        F      26210.0
         M      27245.0
7        F       6522.0
         M       6422.0
8        F      37274.0
         M      47755.0
9        F      44128.0
         M      48727.0
10       F       2359.0
         M       2776.0
11       F      34638.0
         M      43106.0
12       F      51825.0
         M      57420.0
13       F      24720.0
         M      30354.0
14       F      32770.0
         M      46469.0
15       F      12455.0
         M      11037.0
16       F       5446.0
         M       6310.0
17       F      42106.0
         M      48082.0
18       F      27353.0
         M      26433.0
19       F      11297.0
         M      11514.0
20       F      33206.0
         M      25988.0
21       F      15481.0
         M       9815.0
22       F      34656.0
         M      35363.0
23       F       3352.0
         M       3883.0
24       F      22951.0
         M      18835.0
Name: weight, dtype: float64

site_sex_count.unstack() 
# transforms grouped data into columns

site_sex_count.plot(kind='bar', stacked=True, title="Total weight by site and sex")
# so if we don't unstack first, it doesn't allow us to stack two variables within a site

<matplotlib.axes._subplots.AxesSubplot at 0x124028280>

to_plot = site_sex_count.unstack()

to_plot.plot(kind='bar', stacked=True, title="Total weight by site and sex") # yay

<matplotlib.axes._subplots.AxesSubplot at 0x13050d370>

	record_id	month	day	year	plot_id	hindfoot_length	weight
sex
F	18036.412046	6.583047	16.007138	1990.644997	11.440854	28.836780	42.170555
M	17754.835601	6.392668	16.184286	1990.480401	11.098282	29.709578	42.995379

		record_id	month	day	year	hindfoot_length	weight
plot_id	sex
1	F	18390.384434	6.597877	15.338443	1990.933962	31.733911	46.311138
1	M	17197.740639	6.121461	15.905936	1990.091324	34.302770	55.950560
2	F	17714.753608	6.426804	16.288660	1990.449485	30.161220	52.561845
2	M	18085.458042	6.340035	15.440559	1990.756119	30.353760	51.391382
3	F	19888.783875	6.604703	16.161254	1992.013438	23.774044	31.215349
3	M	20226.767857	6.271429	16.450000	1992.275000	23.833744	34.163241
4	F	17489.205275	6.442661	15.746560	1990.235092	33.249102	46.818824
4	M	18493.841748	6.430097	16.507767	1991.000971	34.097959	48.888119
5	F	12280.793169	6.142315	15.722960	1986.485769	28.921844	40.974806
5	M	12798.426621	6.194539	15.703072	1986.817406	29.694794	40.708551
6	F	19406.503392	6.628223	16.313433	1991.579376	26.981322	36.352288
6	M	17849.574607	6.035340	16.091623	1990.556283	27.425591	36.867388
7	F	19069.668657	6.385075	15.313433	1991.441791	19.779553	20.006135
7	M	19188.729642	6.719870	15.778502	1991.462541	20.536667	21.194719
8	F	18920.276190	6.632143	15.836905	1991.267857	32.187578	45.623011
8	M	19452.109868	6.571719	15.854527	1991.686673	33.751059	49.641372
9	F	16217.497069	6.499414	15.555686	1989.303634	35.126092	53.618469
9	M	18000.710159	6.361554	15.209163	1990.632470	34.175732	49.519309
10	F	16001.496454	5.588652	16.964539	1989.248227	18.641791	17.094203
10	M	15708.704225	5.718310	16.739437	1989.007042	19.567164	19.971223
11	F	16994.962287	6.759124	16.283455	1989.836983	32.029299	43.515075
11	M	16933.909621	6.374150	15.974733	1989.856171	32.078014	43.366197
12	F	17457.966981	6.509434	16.305660	1990.266981	30.975124	49.831731
12	M	17592.327500	6.304167	16.367500	1990.400833	31.762489	48.909710
13	F	18033.100318	6.802548	16.229299	1990.619427	27.201014	40.524590
13	M	16969.044700	6.480204	16.005109	1989.911877	27.893793	40.097754
14	F	17097.145275	6.510578	16.681241	1989.974612	32.973373	47.355491
14	M	17891.948598	6.660748	16.504673	1990.587850	32.961802	45.159378
15	F	20602.449064	6.569647	16.162162	1992.523909	21.949891	26.670236
15	M	18104.019560	6.185819	17.413203	1990.770171	21.803109	27.523691
16	F	19002.445946	6.360360	16.819820	1991.351351	23.144928	25.810427
16	M	18434.714286	6.201465	16.622711	1990.926740	23.480916	23.811321
17	F	18234.322870	6.650224	15.892377	1990.785874	30.918536	48.176201
17	M	18857.651472	6.569801	16.183286	1991.331434	32.227634	47.558853
18	F	17940.875497	6.698013	15.960265	1990.536424	26.690341	36.963514
18	M	15106.718850	6.610224	16.797125	1988.551118	27.703072	43.546952
19	F	21848.216475	6.701149	15.226054	1993.417625	21.257937	21.978599
19	M	19470.779690	6.533563	16.647160	1991.740103	21.071685	20.306878
20	F	17510.769231	6.743077	16.026154	1990.253846	27.069193	52.624406
20	M	16076.192496	6.489396	16.375204	1989.243067	27.908451	44.197279
21	F	22452.636661	6.860884	16.307692	1993.878887	22.366554	25.974832
21	M	20120.399113	6.671840	16.203991	1992.199557	21.736721	22.772622
22	F	18499.695976	6.651267	15.521610	1990.973174	34.108320	53.647059
22	M	18015.365527	6.381872	16.682021	1990.650817	33.359746	54.572531
23	F	15863.193939	6.860606	16.036364	1989.024242	20.051948	20.564417
23	M	17091.338164	6.391304	16.077295	1989.961353	19.850000	18.941463
24	F	13702.224280	6.596708	16.393004	1987.485597	26.993377	47.914405
24	M	15208.136082	6.360825	16.971134	1988.641237	25.786996	39.321503

		count	mean	std	min	25%	50%	75%	max
plot_id	sex
1	F	826.0	46.311138	33.240958	5.0	26.00	40.0	50.00	196.0
1	M	1072.0	55.950560	41.035686	4.0	37.00	46.0	54.00	231.0
2	F	954.0	52.561845	45.547697	5.0	25.00	40.0	51.00	274.0
2	M	1114.0	51.391382	46.690887	5.0	24.00	42.0	50.00	278.0
3	F	873.0	31.215349	30.687451	4.0	15.00	23.0	34.00	199.0
3	M	827.0	34.163241	40.260426	5.0	13.00	23.0	39.00	250.0
4	F	850.0	46.818824	33.560664	5.0	28.00	40.0	47.00	200.0
4	M	1010.0	48.888119	32.254168	4.0	32.00	44.5	50.00	187.0
5	F	516.0	40.974806	36.396966	5.0	21.00	35.0	45.00	248.0
5	M	573.0	40.708551	31.250967	6.0	21.00	40.0	49.00	240.0
6	F	721.0	36.352288	29.513333	5.0	19.00	29.0	41.00	188.0
6	M	739.0	36.867388	30.867779	6.0	18.00	31.0	46.00	241.0
7	F	326.0	20.006135	17.895937	6.0	12.00	17.0	23.00	170.0
7	M	303.0	21.194719	23.971252	4.0	11.00	17.0	23.00	235.0
8	F	817.0	45.623011	31.045426	5.0	25.00	42.0	50.00	178.0
8	M	962.0	49.641372	34.820355	5.0	29.00	45.0	52.00	173.0
9	F	823.0	53.618469	35.572793	6.0	35.00	43.0	54.00	177.0
9	M	984.0	49.519309	31.888023	6.0	37.00	46.0	50.00	275.0
10	F	138.0	17.094203	14.074820	7.0	10.00	13.0	20.00	130.0
10	M	139.0	19.971223	25.061068	4.0	10.00	12.0	22.00	237.0
11	F	796.0	43.515075	29.627049	5.0	27.00	40.0	46.00	208.0
11	M	994.0	43.366197	28.425105	6.0	25.00	43.0	49.00	212.0
12	F	1040.0	49.831731	43.790247	6.0	26.00	41.0	48.25	264.0
12	M	1174.0	48.909710	39.301038	7.0	25.25	43.0	50.00	280.0
13	F	610.0	40.524590	36.109806	5.0	21.00	31.0	42.00	192.0
13	M	757.0	40.097754	31.753448	6.0	20.00	34.0	47.00	241.0
14	F	692.0	47.355491	29.563455	5.0	37.00	43.0	48.00	211.0
14	M	1029.0	45.159378	25.272173	5.0	35.00	44.0	50.00	222.0
15	F	467.0	26.670236	31.983137	4.0	12.50	18.0	26.00	198.0
15	M	401.0	27.523691	38.631271	4.0	10.00	18.0	25.00	259.0
16	F	211.0	25.810427	20.902314	4.0	13.00	21.0	31.00	158.0
16	M	265.0	23.811321	14.663726	5.0	11.00	20.0	35.00	61.0
17	F	874.0	48.176201	37.485528	6.0	27.00	41.0	49.00	192.0
17	M	1011.0	47.558853	34.082010	4.0	27.00	45.0	51.00	216.0
18	F	740.0	36.963514	35.184417	5.0	17.00	28.5	40.00	212.0
18	M	607.0	43.546952	41.864279	7.0	18.00	33.0	48.00	256.0
19	F	514.0	21.978599	14.008822	6.0	12.00	20.0	29.00	139.0
19	M	567.0	20.306878	12.553954	4.0	10.00	19.0	25.00	100.0
20	F	631.0	52.624406	55.257665	5.0	17.00	30.0	48.00	220.0
20	M	588.0	44.197279	43.361503	5.0	17.00	34.0	47.00	223.0
21	F	596.0	25.974832	22.619863	4.0	11.00	24.0	31.00	188.0
21	M	431.0	22.772622	18.984554	4.0	9.00	19.0	32.00	190.0
22	F	646.0	53.647059	38.588538	5.0	29.00	39.0	54.00	161.0
22	M	648.0	54.572531	38.841066	6.0	31.00	44.0	53.00	212.0
23	F	163.0	20.564417	18.933945	8.0	12.00	16.0	23.00	199.0
23	M	205.0	18.941463	17.979740	4.0	10.00	12.0	22.00	131.0
24	F	479.0	47.914405	49.112574	6.0	21.00	33.0	44.00	251.0
24	M	479.0	39.321503	42.003947	4.0	17.00	24.0	45.00	230.0

	record_id	month	day	year	hindfoot_length	weight
plot_id
1	17674.516792	6.307769	15.646617	1990.428070	33.186760	51.822911
2	17669.809480	6.378760	15.760711	1990.429353	30.259519	52.251688
3	19768.905361	6.470460	16.266958	1991.931072	23.803327	32.654386
4	17998.813611	6.424073	16.165058	1990.625190	33.697636	47.928189
5	13133.017588	6.201005	15.813233	1987.082077	29.322306	40.947802
6	18579.403287	6.307206	16.252212	1991.035398	27.201129	36.738893
7	18080.537990	6.580882	15.511029	1990.680147	20.167203	20.663009
8	19161.210999	6.572713	15.878371	1991.467478	33.025143	47.758001
9	17295.930785	6.423554	15.409607	1990.107438	34.594318	51.432358
10	16180.501066	6.093817	16.377399	1989.330490	19.088561	18.541219
11	17002.651199	6.541710	16.132951	1989.872784	32.054679	43.451757
12	17383.576744	6.369979	16.253700	1990.229598	31.406425	49.496169
13	17414.790637	6.604031	16.135891	1990.201560	27.570887	40.445660
14	17670.336340	6.583554	16.563395	1990.410610	32.969430	46.277199
15	19172.228251	6.458372	16.738073	1991.507016	21.882840	27.042578
16	17776.507740	6.311146	16.910217	1990.462848	23.281184	24.585417
17	18572.008337	6.599313	15.989210	1991.078470	31.621505	47.889593
18	16615.919723	6.634602	16.281661	1989.605536	27.157242	40.005922
19	20042.891505	6.617325	16.070648	1992.127839	21.164474	21.105166
20	16796.862590	6.581295	16.184892	1989.754676	27.460034	48.665303
21	20764.940324	6.747656	16.262575	1992.654731	22.103314	24.627794
22	18185.315225	6.498928	16.135811	1990.763402	33.734854	54.146379
23	15881.472855	6.359019	16.353765	1989.110333	19.969101	19.634146
24	14718.620229	6.447519	16.643130	1988.257634	26.384444	43.679167

	count	mean	std	min	25%	50%	75%	max
plot_id
1	1903.0	51.822911	38.176670	4.0	30.0	44.0	53.0	231.0
2	2074.0	52.251688	46.503602	5.0	24.0	41.0	50.0	278.0
3	1710.0	32.654386	35.641630	4.0	14.0	23.0	36.0	250.0
4	1866.0	47.928189	32.886598	4.0	30.0	43.0	50.0	200.0
5	1092.0	40.947802	34.086616	5.0	21.0	37.0	48.0	248.0
6	1463.0	36.738893	30.648310	5.0	18.0	30.0	45.0	243.0
7	638.0	20.663009	21.315325	4.0	11.0	17.0	23.0	235.0
8	1781.0	47.758001	33.192194	5.0	26.0	44.0	51.0	178.0
9	1811.0	51.432358	33.724726	6.0	36.0	45.0	50.0	275.0
10	279.0	18.541219	20.290806	4.0	10.0	12.0	21.0	237.0
11	1793.0	43.451757	28.975514	5.0	26.0	42.0	48.0	212.0
12	2219.0	49.496169	41.630035	6.0	26.0	42.0	50.0	280.0
13	1371.0	40.445660	34.042767	5.0	20.5	33.0	45.0	241.0
14	1728.0	46.277199	27.570389	5.0	36.0	44.0	49.0	222.0
15	869.0	27.042578	35.178142	4.0	11.0	18.0	26.0	259.0
16	480.0	24.585417	17.682334	4.0	12.0	20.0	34.0	158.0
17	1893.0	47.889593	35.802399	4.0	27.0	42.0	50.0	216.0
18	1351.0	40.005922	38.480856	5.0	17.5	30.0	44.0	256.0
19	1084.0	21.105166	13.269840	4.0	11.0	19.0	27.0	139.0
20	1222.0	48.665303	50.111539	5.0	17.0	31.0	47.0	223.0
21	1029.0	24.627794	21.199819	4.0	10.0	22.0	31.0	190.0
22	1298.0	54.146379	38.743967	5.0	29.0	42.0	54.0	212.0
23	369.0	19.634146	18.382678	4.0	10.0	14.0	23.0	199.0
24	960.0	43.679167	45.936588	4.0	19.0	27.5	45.0	251.0

	record_id	month	day	year	plot_id	species_id	sex	hindfoot_length	weight
0	1	7	16	1977	2	NL	M	32.0	NaN
1	2	7	16	1977	3	NL	M	33.0	NaN
2	3	7	16	1977	2	DM	F	37.0	NaN
3	4	7	16	1977	7	DM	M	36.0	NaN
4	5	7	16	1977	3	DM	M	35.0	NaN
...	...	...	...	...	...	...	...	...	...
35544	35545	12	31	2002	15	AH	NaN	NaN	NaN
35545	35546	12	31	2002	15	AH	NaN	NaN	NaN
35546	35547	12	31	2002	10	RM	F	15.0	14.0
35547	35548	12	31	2002	7	DO	M	36.0	51.0
35548	35549	12	31	2002	5	NaN	NaN	NaN	NaN

	record_id	month	day	year	plot_id	species_id	hindfoot_length	weight
sex
F	15690	15690	15690	15690	15690	15690	14894	15303
M	17348	17348	17348	17348	17348	17348	16476	16879

	record_id	month	day	year	plot_id	sex	hindfoot_length	weight
species_id
AB	303	303	303	303	303	0	0	0
AH	437	437	437	437	437	1	2	0
AS	2	2	2	2	2	0	0	0
BA	46	46	46	46	46	45	45	45
CB	50	50	50	50	50	0	0	0
CM	13	13	13	13	13	0	0	0
CQ	16	16	16	16	16	0	0	0
CS	1	1	1	1	1	0	0	0
CT	1	1	1	1	1	0	0	0
CU	1	1	1	1	1	0	0	0
CV	1	1	1	1	1	0	0	0
DM	10596	10596	10596	10596	10596	10523	9972	10262
DO	3027	3027	3027	3027	3027	3015	2887	2904
DS	2504	2504	2504	2504	2504	2458	2132	2344
DX	40	40	40	40	40	0	0	0
NL	1252	1252	1252	1252	1252	1177	1074	1152
OL	1006	1006	1006	1006	1006	987	920	970
OT	2249	2249	2249	2249	2249	2213	2139	2160
OX	12	12	12	12	12	9	8	6
PB	2891	2891	2891	2891	2891	2862	2864	2810
PC	39	39	39	39	39	0	0	0
PE	1299	1299	1299	1299	1299	1277	1212	1260
PF	1597	1597	1597	1597	1597	1573	1493	1548
PG	8	8	8	8	8	0	0	0
PH	32	32	32	32	32	32	31	31
PI	9	9	9	9	9	8	9	8
PL	36	36	36	36	36	35	36	36
PM	899	899	899	899	899	883	847	876
PP	3123	3123	3123	3123	3123	3076	3027	3024
PU	5	5	5	5	5	0	0	0
PX	6	6	6	6	6	2	2	2
RF	75	75	75	75	75	75	73	75
RM	2609	2609	2609	2609	2609	2548	2442	2535
RO	8	8	8	8	8	8	8	8
RX	2	2	2	2	2	2	2	2
SA	75	75	75	75	75	0	0	0
SC	1	1	1	1	1	0	0	0
SF	43	43	43	43	43	40	41	41
SH	147	147	147	147	147	143	131	141
SO	43	43	43	43	43	43	41	41
SS	248	248	248	248	248	3	0	2
ST	1	1	1	1	1	0	0	0
SU	5	5	5	5	5	0	0	0
UL	4	4	4	4	4	0	0	0
UP	8	8	8	8	8	0	0	0
UR	10	10	10	10	10	0	0	0
US	4	4	4	4	4	0	0	0
ZL	2	2	2	2	2	0	0	0

sex	F	M
plot_id
1	38253.0	59979.0
2	50144.0	57250.0
3	27251.0	28253.0
4	39796.0	49377.0
5	21143.0	23326.0
6	26210.0	27245.0
7	6522.0	6422.0
8	37274.0	47755.0
9	44128.0	48727.0
10	2359.0	2776.0
11	34638.0	43106.0
12	51825.0	57420.0
13	24720.0	30354.0
14	32770.0	46469.0
15	12455.0	11037.0
16	5446.0	6310.0
17	42106.0	48082.0
18	27353.0	26433.0
19	11297.0	11514.0
20	33206.0	25988.0
21	15481.0	9815.0
22	34656.0	35363.0
23	3352.0	3883.0
24	22951.0	18835.0

	one	two
a	1.0	1.0
b	2.0	2.0
c	3.0	3.0
d	NaN	4.0