pandas Foundations-Chapter 1

Browse more Python3 Examples
df.head(3)   # 檢視前3列，預設5列
df.tail(3)   # 檢視末3列，預設5列

df.info()   # DataFrame 基本資料
'''
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13374 entries, 0 to 13373
Data columns (total 5 columns):
CountryName                      13374 non-null object
CountryCode                      13374 non-null object
Year                             13374 non-null int64
Total Population                 9914 non-null float64
Urban population (% of total)    13374 non-null float64
dtypes: float64(2), int64(1), object(2)
memory usage: 522.5+ KB
'''

''' NumPy and pandas working together '''

# Import numpy
import numpy as np

print(df)
'''
      Total Population
Year                  
1960      3.034971e+09
...
2010      6.924283e+09
'''

# Create array of DataFrame values: np_vals
np_vals = df.values    # df.values 屬性：建立 numpy array
'''
[[3.03497056e+09]
 ...
 [6.92428294e+09]]
'''

# =============================================================================
# Building DataFrames from scratch
# =============================================================================

''' Zip lists to build a DataFrame '''

print(list_keys)    # ['Country', 'Total']
print(list_values)  # [['United States', 'Soviet Union', 'United Kingdom'], [1118, 473, 273]]

# Zip the 2 lists together into one list of (key,value) tuples: zipped
zipped = list(zip(list_keys, list_values))  # [('Country', ['United States', 'Soviet Union', 'United Kingdom']), ('Total', [1118, 473, 273])]

# Build a dictionary with the zipped list: data
data = dict(zipped) # {'Country': ['United States', 'Soviet Union', 'United Kingdom'], 'Total': [1118, 473, 273]}

# Build and inspect a DataFrame from the dictionary: df
df = pd.DataFrame(data)
print(df)
'''
          Country  Total
0   United States   1118
1    Soviet Union    473
2  United Kingdom    273
'''

# 重新命名欄位
df.columns = ["國家", "總數"]

''' Broadcasting '''

# Broadcast = 設定所有資料的某個欄位值

print(cities)   # 賓州的城市 ['Manheim', ..., 'Great bend']

# Make a string with the value 'PA': state
state = "PA"

# Construct a dictionary: data
data = {'state':state, 'city':cities}

# Construct a DataFrame from dictionary data: df
df = pd.DataFrame(data)

# Print the DataFrame
print(df)
'''
   state             city
0     PA          Manheim
1     PA     Preston park
...
14    PA       Great bend
'''

# =============================================================================
# Importing & exporting data
# =============================================================================

''' Reading a flat file '''

# given a csv file "data_file"

# Create a list of the new column labels: new_labels
new_labels = ['year', 'population']

# Read in the file, specifying the header and names parameters: df2
# header = 1：須去除標題列
# names: 設定欄位名稱
df = pd.read_csv(data_file, header=0, names=new_labels)

''' Delimiters, headers, and extensions '''

# given a flat file "file_messy", which has multiple header lines, comment records (rows) interleaved throughout the data rows, and space delimiters instead of commas

# Read the raw file as-is: df1
df1 = pd.read_csv(file_messy)

# Print the output of df1.head()
print(df1.head())
'''
                                                   The following stock data was collect on 2016-AUG-25 from an unknown source
These kind of comments are not very useful                                                  are they?                        
Probably should just throw this line away too          but not the next since those are column labels                        
name Jan Feb Mar Apr May Jun Jul Aug Sep Oct No...                                                NaN                        
# So that line you just read has all the column...                                                NaN                        
IBM 156.08 160.01 159.81 165.22 172.25 167.15 1...                                                NaN                        
'''

# Read in the file with the correct parameters: df2
df2 = pd.read_csv(file_messy, delimiter=" ", header=3, comment="#")

# Print the output of df2.head()
print(df2.head())
'''
     name     Jan     Feb     Mar     Apr  ...     Aug     Sep     Oct     Nov     Dec
0     IBM  156.08  160.01  159.81  165.22  ...  152.77  145.36  146.11  137.21  137.96
1    MSFT   45.51   43.08   42.13   43.47  ...   45.51   43.56   48.70   53.88   55.40
2  GOOGLE  512.42  537.99  559.72  540.50  ...  636.84  617.93  663.59  735.39  755.35
3   APPLE  110.64  125.43  125.97  127.29  ...  113.39  112.80  113.36  118.16  111.73
'''

# Save the cleaned up DataFrame to a CSV file without the index
df2.to_csv(file_clean, index=False)

# Save the cleaned up DataFrame to an excel file without the index
df2.to_excel('file_clean.xlsx', index=False)

# =============================================================================
# Ploting with pandas
# =============================================================================
pandas Foundations-Chapter 1

Follow

Newsletter