pandas Foundations-Chapter 2

Browse more Python3 Examples
# =============================================================================
# Visual exploratory data analysis
# =============================================================================

''' pandas line plots '''

# given a dataframe "df" of stock price
# df.head()
'''
  Month        AAPL        GOOG         IBM
0   Jan  117.160004  534.522445  153.309998
1   Feb  128.460007  558.402511  161.940002
2   Mar  124.430000  548.002468  160.500000
3   Apr  125.150002  537.340027  171.289993
4   May  130.279999  532.109985  169.649994
'''

# Create a list of y-axis column names: y_columns
y_columns = ["AAPL", "IBM"]

# Generate a line plot
df.plot(x="Month", y=y_columns)

# Display the plot
plt.show()

''' pandas scatter plots '''

# Generate a scatter plot
df.plot(kind="scatter", x='hp', y='mpg', s=sizes)

''' pandas box plots '''

# Make a list of the column names to be plotted: cols
cols = ["weight", "mpg"]

# Generate the box plots
# subplots：將 x 軸的項目畫在不同圖表上
df[cols].plot(kind="box", subplots=True)

''' pandas hist, pdf and cdf '''

# This formats the plots such that they appear on separate rows
fig, axes = plt.subplots(nrows=2, ncols=1)

# Plot the PDF
df["fraction"].plot(ax=axes[0], kind='hist', bins=30, normed=True, range=(0,.3))
plt.show()

# Plot the CDF
df["fraction"].plot(ax=axes[1], kind='hist', bins=30, normed=True, cumulative=True, range=(0,.3))
plt.show()

# =============================================================================
# Statistical exploratory data analysis
# =============================================================================

''' 圖示平均值 '''

# Construct the mean percentage per year: mean
mean = df.mean(axis="columns")

# Plot the average percentage per year
mean.plot()

# Display the plot
plt.show()

''' box plot '''

# Print summary statistics of the fare column with .describe()
# mean, std, min, 25%, median, 75%, max
print(df["fare"].describe())

# Generate a box plot of the fare column
df["fare"].plot(kind="box")

# Show the plot
plt.show()

''' Quantiles '''

# Print the 5th and 95th percentiles
print(df.quantile([0.05,0.95]))

# =============================================================================
# Separating populations
# =============================================================================

''' Filtering and counting '''

# extract the rows that contain 'Asia'
print(df[df["origin"] == "Asia"].count())
'''
mpg       79
...
name      79
'''

''' Separate and plot '''

# Display the box plots on 3 separate rows and 1 column
fig, axes = plt.subplots(nrows=3, ncols=1)

# Generate a box plot of the fare prices for the First, second and third passenger class
titanic.loc[titanic['pclass'] == 1].plot(ax=axes[0], y='fare', kind='box')
titanic.loc[titanic["pclass"] == 2].plot(ax=axes[1], y='fare', kind='box')
titanic.loc[titanic["pclass"] == 3].plot(ax=axes[2], y='fare', kind='box')

# Display the plot
plt.show()
pandas Foundations-Chapter 2

Follow

Newsletter