Please note, this is a STATIC archive of website www.tutorialspoint.com from 11 May 2019, cach3.com does not collect or store any user information, there is no "phishing" involved.
df.head(3) # 檢視前3列,預設5列
df.tail(3) # 檢視末3列,預設5列
df.info() # DataFrame 基本資料
'''
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13374 entries, 0 to 13373
Data columns (total 5 columns):
CountryName 13374 non-null object
CountryCode 13374 non-null object
Year 13374 non-null int64
Total Population 9914 non-null float64
Urban population (% of total) 13374 non-null float64
dtypes: float64(2), int64(1), object(2)
memory usage: 522.5+ KB
'''
''' NumPy and pandas working together '''
# Import numpy
import numpy as np
print(df)
'''
Total Population
Year
1960 3.034971e+09
...
2010 6.924283e+09
'''
# Create array of DataFrame values: np_vals
np_vals = df.values # df.values 屬性:建立 numpy array
'''
[[3.03497056e+09]
...
[6.92428294e+09]]
'''
# =============================================================================
# Building DataFrames from scratch
# =============================================================================
''' Zip lists to build a DataFrame '''
print(list_keys) # ['Country', 'Total']
print(list_values) # [['United States', 'Soviet Union', 'United Kingdom'], [1118, 473, 273]]
# Zip the 2 lists together into one list of (key,value) tuples: zipped
zipped = list(zip(list_keys, list_values)) # [('Country', ['United States', 'Soviet Union', 'United Kingdom']), ('Total', [1118, 473, 273])]
# Build a dictionary with the zipped list: data
data = dict(zipped) # {'Country': ['United States', 'Soviet Union', 'United Kingdom'], 'Total': [1118, 473, 273]}
# Build and inspect a DataFrame from the dictionary: df
df = pd.DataFrame(data)
print(df)
'''
Country Total
0 United States 1118
1 Soviet Union 473
2 United Kingdom 273
'''
# 重新命名欄位
df.columns = ["國家", "總數"]
''' Broadcasting '''
# Broadcast = 設定所有資料的某個欄位值
print(cities) # 賓州的城市 ['Manheim', ..., 'Great bend']
# Make a string with the value 'PA': state
state = "PA"
# Construct a dictionary: data
data = {'state':state, 'city':cities}
# Construct a DataFrame from dictionary data: df
df = pd.DataFrame(data)
# Print the DataFrame
print(df)
'''
state city
0 PA Manheim
1 PA Preston park
...
14 PA Great bend
'''
# =============================================================================
# Importing & exporting data
# =============================================================================
''' Reading a flat file '''
# given a csv file "data_file"
# Create a list of the new column labels: new_labels
new_labels = ['year', 'population']
# Read in the file, specifying the header and names parameters: df2
# header = 1:須去除標題列
# names: 設定欄位名稱
df = pd.read_csv(data_file, header=0, names=new_labels)
''' Delimiters, headers, and extensions '''
# given a flat file "file_messy", which has multiple header lines, comment records (rows) interleaved throughout the data rows, and space delimiters instead of commas
# Read the raw file as-is: df1
df1 = pd.read_csv(file_messy)
# Print the output of df1.head()
print(df1.head())
'''
The following stock data was collect on 2016-AUG-25 from an unknown source
These kind of comments are not very useful are they?
Probably should just throw this line away too but not the next since those are column labels
name Jan Feb Mar Apr May Jun Jul Aug Sep Oct No... NaN
# So that line you just read has all the column... NaN
IBM 156.08 160.01 159.81 165.22 172.25 167.15 1... NaN
'''
# Read in the file with the correct parameters: df2
df2 = pd.read_csv(file_messy, delimiter=" ", header=3, comment="#")
# Print the output of df2.head()
print(df2.head())
'''
name Jan Feb Mar Apr ... Aug Sep Oct Nov Dec
0 IBM 156.08 160.01 159.81 165.22 ... 152.77 145.36 146.11 137.21 137.96
1 MSFT 45.51 43.08 42.13 43.47 ... 45.51 43.56 48.70 53.88 55.40
2 GOOGLE 512.42 537.99 559.72 540.50 ... 636.84 617.93 663.59 735.39 755.35
3 APPLE 110.64 125.43 125.97 127.29 ... 113.39 112.80 113.36 118.16 111.73
'''
# Save the cleaned up DataFrame to a CSV file without the index
df2.to_csv(file_clean, index=False)
# Save the cleaned up DataFrame to an excel file without the index
df2.to_excel('file_clean.xlsx', index=False)
# =============================================================================
# Ploting with pandas
# =============================================================================
Advertisements
We use cookies to provide and improve our services. By using our site, you consent to our Cookies Policy.
AcceptLearn more