Please note, this is a STATIC archive of website www.tutorialspoint.com from 11 May 2019, cach3.com does not collect or store any user information, there is no "phishing" involved.
# =============================================================================
# Visual exploratory data analysis
# =============================================================================
''' pandas line plots '''
# given a dataframe "df" of stock price
# df.head()
'''
Month AAPL GOOG IBM
0 Jan 117.160004 534.522445 153.309998
1 Feb 128.460007 558.402511 161.940002
2 Mar 124.430000 548.002468 160.500000
3 Apr 125.150002 537.340027 171.289993
4 May 130.279999 532.109985 169.649994
'''
# Create a list of y-axis column names: y_columns
y_columns = ["AAPL", "IBM"]
# Generate a line plot
df.plot(x="Month", y=y_columns)
# Display the plot
plt.show()
''' pandas scatter plots '''
# Generate a scatter plot
df.plot(kind="scatter", x='hp', y='mpg', s=sizes)
''' pandas box plots '''
# Make a list of the column names to be plotted: cols
cols = ["weight", "mpg"]
# Generate the box plots
# subplots:將 x 軸的項目畫在不同圖表上
df[cols].plot(kind="box", subplots=True)
''' pandas hist, pdf and cdf '''
# This formats the plots such that they appear on separate rows
fig, axes = plt.subplots(nrows=2, ncols=1)
# Plot the PDF
df["fraction"].plot(ax=axes[0], kind='hist', bins=30, normed=True, range=(0,.3))
plt.show()
# Plot the CDF
df["fraction"].plot(ax=axes[1], kind='hist', bins=30, normed=True, cumulative=True, range=(0,.3))
plt.show()
# =============================================================================
# Statistical exploratory data analysis
# =============================================================================
''' 圖示平均值 '''
# Construct the mean percentage per year: mean
mean = df.mean(axis="columns")
# Plot the average percentage per year
mean.plot()
# Display the plot
plt.show()
''' box plot '''
# Print summary statistics of the fare column with .describe()
# mean, std, min, 25%, median, 75%, max
print(df["fare"].describe())
# Generate a box plot of the fare column
df["fare"].plot(kind="box")
# Show the plot
plt.show()
''' Quantiles '''
# Print the 5th and 95th percentiles
print(df.quantile([0.05,0.95]))
# =============================================================================
# Separating populations
# =============================================================================
''' Filtering and counting '''
# extract the rows that contain 'Asia'
print(df[df["origin"] == "Asia"].count())
'''
mpg 79
...
name 79
'''
''' Separate and plot '''
# Display the box plots on 3 separate rows and 1 column
fig, axes = plt.subplots(nrows=3, ncols=1)
# Generate a box plot of the fare prices for the First, second and third passenger class
titanic.loc[titanic['pclass'] == 1].plot(ax=axes[0], y='fare', kind='box')
titanic.loc[titanic["pclass"] == 2].plot(ax=axes[1], y='fare', kind='box')
titanic.loc[titanic["pclass"] == 3].plot(ax=axes[2], y='fare', kind='box')
# Display the plot
plt.show()
Advertisements
We use cookies to provide and improve our services. By using our site, you consent to our Cookies Policy.
AcceptLearn more