%cd /Users/alisonfowler/Docs/Jupityr-notebooks/
import pandas as pd
animals = ['lion', 'tiger', 'crocodile', 'vulture', 'hippo']
print(animals)
for creature in animals:
print (creature)
# In this example, creature is the loop variable that takes the value of the next entry in animals every time the loop goes
# around. We can call the loop variable anything we like. After the loop finishes, the loop variable will still exist and
# will have the value of the last entry in the collection:
for creature in animals:
pass
print('The loop variable is now: ' + creature)
# We are not asking Python to print the value of the loop variable anymore, but the for loop still runs and
# the value of creature changes on each pass through the loop. The statement pass in the body of the loop means “do nothing”.
# Challenge - What happens if we don't include the 'pass' statement?
for creature in animals:
print('The loop variable is now: ' + creature)
# Rewrite the loop so that the animals are separated by commas, not new lines
# (Hint: You can concatenate strings using a plus sign.
# For example, print(string1 + string2) outputs ‘string1string2’).
for creature in animals:
print(creature + ",")
# not sure how to get them all on the same line
# Automating data processing using For Loops
import os
# make a new directory inside the folder data to store all these files using the module "os"
os.mkdir("data/yearly_files")
os.listdir('data')
# load the data into a DataFrame
surveys_df = pd.read_csv("data/surveys.csv")
# select only data for the year 2002
surveys2002 = surveys_df[surveys_df.year == 2002]
# write the new DataFrame to a CSV file
surveys2002.to_csv('data/yearly_files/surveys2002.csv')
# let's write a loop that does this for all the years!
surveys_df['year']
# we want only unique years
surveys_df['year'].unique()
for year in surveys_df['year'].unique():
filename = 'data/yearly_files/surveys' + str(year) + '.csv'
print(filename)
# we can now add the rest of the steps we need to create separate text files
# load the data into a DataFrame
surveys_df = pd.read_csv('data/surveys.csv')
for year in surveys_df['year'].unique():
# select data for the year
surveys_year = surveys_df[surveys_df.year == year]
# write the new DataFrame to a csv file
filename = 'data/yearly_files/surveys' + str(year) + '.csv'
surveys_year.to_csv(filename)
# Writing unique file names
filename = 'data/yearly_files/surveys' + str(year) + '.csv'
# Let’s break down the parts of this name:
# The first part is some text that specifies the directory to store our data file in (data/yearly_files/) and the
# first part of the file name (surveys): 'data/yearly_files/surveys'
# We can concatenate this with the value of a variable, in this case year by using the plus + sign and the variable we
# want to add to the file name: + str(year)
# Then we add the file extension as another text string: + '.csv'
# Notice that we use sinlge quotes to add text strings.
# The variable is not surrounded by quotes. This code produces the string data/yearly_files/surveys2002.csv which contrains
# the path to the new filename AND the file name itself.
# Challenge - Modifying loops
# some of the surveys you saved are missing data (they have null values that show
# up as NaN - Not a number - in the dataframes and do not show up in the text files).
# Modify the for loop so that the entries with null values are not included in the yearly files.
for year in surveys_df['year'].unique():
# select data for the year
surveys_year = surveys_df[surveys_df.year == year]
# remove NaNs
surveys_year = surveys_year.dropna(axis=0)
# write the new DataFrame to a csv file
filename = 'data/yearly_files/surveys' + str(year) + '.csv'
surveys_year.to_csv(filename)
# Let's say you only want to look at data from a given multiple of years.
# How would you modify your loop in order to generate a data file for only every
# 5th year, starting from 1977?
# Instead of splitting out the data by years, a colleague wants to do analyses for
# each species separately. How would you write a unique CSV file for each species?