#Create the four variables


#Show the type of the TreeID variable

int


#Show the type of the Species variable

str


#Show the type of the Height variable

float


#Show the type of the Planted variable

bool


#Create a list describing Tree "A"
tree_A = [104,"Elm",12.1,True]


#Create a list describing Tree "B" -- using the variables created above


#Extract the height from the list for Tree "A"


#Extract the height from the list for Tree "A"


#Evaluate whether Tree "A" is taller than Tree "B"

False


#Import the numpy package, calling it "np"
import numpy as np


#Import the pandas package, calling it "pd"

/opt/conda/lib/python3.9/site-packages/pandas/core/computation/expressions.py:21: UserWarning: Pandas requires version '2.8.4' or newer of 'numexpr' (version '2.8.3' currently installed).
  from pandas.core.computation.check import NUMEXPR_INSTALLED


#Create a vector of tree height values, in meters
height_meters = np.array([2.1, 3.2, 5.6, 2.2, 3.1])


#Compute the mean height in meters
np.mean(height_meters)

3.2400000000000007


#Convert the values to cm


#Compute the median height in cm

310.0


#Convert matrix values from cm to inches


#Compute the median of the heights in inches

122.047


#Read the USGS dataset into a dataframe object
NTL_LTER = pd.read_csv("./data/Processed_KEY/NTL-LTER_Lake_ChemistryPhysics_PeterPaul_Processed.csv")


#View the first 5 records


#Display the data type of our NLT_LTER object

pandas.core.frame.DataFrame


#Reveal the column names

Index(['lakeid', 'lakename', 'year4', 'daynum', 'month', 'sampledate', 'depth',
       'temperature_C', 'dissolvedOxygen', 'irradianceWater', 'irradianceDeck',
       'comments'],
      dtype='object')


#Reveal the structure of our dataframe

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21613 entries, 0 to 21612
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   lakeid           21613 non-null  object 
 1   lakename         21613 non-null  object 
 2   year4            21613 non-null  int64  
 3   daynum           21613 non-null  int64  
 4   month            21613 non-null  int64  
 5   sampledate       21613 non-null  object 
 6   depth            21613 non-null  float64
 7   temperature_C    19442 non-null  float64
 8   dissolvedOxygen  19342 non-null  float64
 9   irradianceWater  15451 non-null  float64
 10  irradianceDeck   14626 non-null  float64
 11  comments         244 non-null    object 
dtypes: float64(5), int64(3), object(4)
memory usage: 2.0+ MB


#Reveal the dimensions of our dataframe

(21613, 12)


#Use the `value_counts()` function to reveal how many records correspond to unique values in the `lakename` column.

lakename
Peter Lake    11288
Paul Lake     10325
Name: count, dtype: int64


#Reveal datatype of the sampledate column

dtype('O')


#Change it to a proper datetime object


#Reveal datatype of the datetime column

dtype('<M8[ns]')


#Install plotnine (install if needed)
try: 
    from plotnine import *
except:
    !pip install plotnine
    from plotnine import *


#Create a bar plot of temperature by lakename


#Create a histogram of temperature values using 15 bins

/opt/conda/lib/python3.9/site-packages/plotnine/layer.py:284: PlotnineWarning: stat_bin : Removed 2171 rows containing non-finite values.


#Create one more plot of your choosing

/opt/conda/lib/python3.9/site-packages/plotnine/layer.py:364: PlotnineWarning: geom_point : Removed 6163 rows containing missing values.
/opt/conda/lib/python3.9/site-packages/plotnine/layer.py:364: PlotnineWarning: geom_smooth : Removed 46 rows containing missing values.

	lakeid	lakename	year4	daynum	month	sampledate	depth	temperature_C	dissolvedOxygen	irradianceWater	irradianceDeck	comments
0	L	Paul Lake	1984	148	5	1984-05-27	0.00	14.5	9.5	1750.0	1620.0	NaN
1	L	Paul Lake	1984	148	5	1984-05-27	0.25	NaN	NaN	1550.0	1620.0	NaN
2	L	Paul Lake	1984	148	5	1984-05-27	0.50	NaN	NaN	1150.0	1620.0	NaN
3	L	Paul Lake	1984	148	5	1984-05-27	0.75	NaN	NaN	975.0	1620.0	NaN
4	L	Paul Lake	1984	148	5	1984-05-27	1.00	14.5	8.8	870.0	1620.0	NaN

Getting Started with Python¶

Instructions¶

1. Working with variables and values¶

2. Working with data using NumPy and Pandas¶

2a. Importing the packages¶

2b. Creating and using Numpy arrays¶

2c. Working with dataframes in Pandas¶

3. Plotting with Plotnine/ggplot¶

Variable	Value
TreeID	101
Species	Oak
Height	15.5
Planted	False