Please note, this is a STATIC archive of website www.tutorialspoint.com from 11 May 2019, cach3.com does not collect or store any user information, there is no "phishing" involved.
Tutorialspoint

Foundations of predictive analytics in python-Chapter 4

''' Retrieving information from the predictor insight table '''

# Inspect the predictor insight graph table of Country
print(pig_table)

'''
  Country   Size  Incidence
0   India  49849       0.05
1      UK  10057       0.05
2     USA  40094       0.05
'''

# Print the number of UK donors
print(pig_table["Size"][pig_table["Country"]=="UK"])

# Check the target incidence of USA and India donors
print(pig_table["Incidence"][pig_table["Country"]=="USA"])
print(pig_table["Incidence"][pig_table["Country"]=="India"])

# =============================================================================
# Discretization of continuous variables
# =============================================================================

''' Discretization of a certain variable '''

print(basetable)

'''
       Unnamed: 0  target  time_since_last_donation
0               0       1                       808
1               1       1                       977
2               2       1                       641
...           ...     ...                       ...
99997       99997       0                       703
99998       99998       0                       682
99999       99999       0                       666
'''

# Discretize the variable time_since_last_donation in 10 bins
basetable["bins_recency"] = pd.qcut(basetable["time_since_last_donation"],10)

print(basetable["bins_recency"])

'''
0          (738, 833]
1         (933, 1050]
2          (574, 657]
             ...     
99997      (657, 738]
99998      (657, 738]
99999      (657, 738]

Categories (10, object): [[32, 319] < (319, 462] < (462, 574] < (574, 657] ... (833, 933] <
                          (933, 1050] < (1050, 1209] < (1209, 2518]]
'''

# Print the group sizes of the discretized variable
print(basetable.groupby("bins_recency").size())

'''
[32, 319]       10058
(319, 462]       9953
(462, 574]       9999
             ...     
(933, 1050]     10009
(1050, 1209]    10004
(1209, 2518]     9949
'''

''' Discretizing all variables '''

# Get all the variable names except "target"
variables = list(basetable.columns)
variables.remove("target")

# Loop through all the variables and discretize in 10 bins if there are more than 5 different values
for variable in variables:
    if len(basetable.groupby(variable))>5:
        new_variable = "disc_" + variable
        basetable[new_variable] = pd.qcut(basetable[variable], 10)
       
''' Making clean cuts '''

# Discretize the variable 
basetable["disc_number_gift"] = pd.cut(basetable["number_gift"],[0, 5, 10, 20])

# Count the number of observations per group
print(basetable.groupby("disc_number_gift").size())

'''
    (0, 5]      55063
    (5, 10]     41120
    (10, 20]     3817
'''

''' Calculating average incidences '''

# Select the income and target columns
basetable_income = basetable[["target","income"]]

print(basetable_income)
'''
       target   income
0           1     high
1           1  average
2           1     high
...       ...      ...
99997       0  average
99998       0      low
99999       0      low
'''

# Group basetable_income by income
groups = basetable_income.groupby("income")

# Calculate the target incidence and print the result
incidence = groups["target"].agg({"Incidence" : np.mean}).reset_index()
print(incidence)
'''
    income  Incidence
0  average   0.049166
1     high   0.061543
2      low   0.043118
'''

# =============================================================================
# Preparing the predictor insight graph table
# =============================================================================

''' Constructing the predictor insight graph table '''

# Function that creates predictor insight graph table
def create_pig_table(basetable, target, variable):
  
    # Create groups for each variable
    groups = basetable[[target,variable]].groupby(variable)
    
    # Calculate size and target incidence for each group
    pig_table = groups["target"].agg({'Incidence' : np.mean, 'Size' : np.size}).reset_index()
    
    # Return the predictor insight graph table
    return pig_table

# Calculate the predictor insight graph table for the variable gender
pig_table_gender = create_pig_table(basetable, "target", "gender")

# Print the result
print(pig_table_gender)
'''
  gender   Size  Incidence
0      F  50033   0.053844
1      M  49967   0.045970
'''

''' Grouping all predictor insight graph tables '''

# Create the list of variables for our predictor insight graph tables
variables = ["income","gender","disc_mean_gift","disc_time_since_last_gift"]

# Create an empty dictionary
pig_tables = {}

# Loop through the variables
for variable in variables:
  
    # Create a predictor insight graph table
    pig_table = create_pig_table(basetable, "target", variable)
    
    # Add the table to the dictionary
    pig_tables[variable] = pig_table

# Print the predictor insight graph table of the variable "disc_time_since_last_gift"
print(pig_tables)
'''
{'disc_time_since_last_gift':
  disc_time_since_last_gift   Size  Incidence
0              (1050, 2518]  19953   0.023255
1                (462, 657]  20069   0.061986
2                (657, 833]  19996   0.050810
3               (833, 1050]  19971   0.033799
4                 [32, 462]  20011   0.079556,

 'disc_mean_gift':
     disc_mean_gift   Size  Incidence
0        (103, 197]  19551   0.103524
1  (78.111, 86.889]  19997   0.029554
2  (86.889, 94.167]  20034   0.040831
3     (94.167, 103]  20405   0.063563
4       [2, 78.111]  20013   0.013042,

 'gender':
  gender   Size  Incidence
0      F  50033   0.053844
1      M  49967   0.045970,

 'income':
    income   Size  Incidence
0  average  62950   0.049166
1     high  16200   0.061543
2      low  20850   0.043118}
'''

# =============================================================================
# Plotting the predictor insight graph
# =============================================================================

''' Plotting the incidences '''

import matplotlib.pyplot as plt
import numpy as np

# The function to plot a predictor insight graph.
def plot_incidence(pig_table,variable):
    
    # Plot the incidence line
    pig_table["incidence"].plot()
    
    # Formatting the predictor insight graph
    plt.xticks(np.arange(len(pig_table)), pig_table[variable])
    plt.xlim([-0.5, len(pig_table) - 0.5])
    plt.ylim([0, max(pig_table["Incidence"] * 2)])
    plt.ylabel("Incidence", rotation=0, rotation_mode="anchor", ha="right")
    plt.xlabel(variable)
    
    # Show the graph
    plt.show()

# Apply the function for the variable "country".
plot_incidence(pig_table, "country")

''' Plotting the group sizes '''

# The function to plot a predictor insight graph
def plot_pig(pig_table, variable):
    
    # Plot formatting
    plt.ylabel("Size", rotation=0, rotation_mode="anchor", ha="right")
    
    # Plot the bars with sizes 
    pig_table["Size"].plot(kind="bar", width=0.5, color="lightgray", edgecolor="none") 
    
    # Plot the incidence line on secondary axis
    pig_table["Incidence"].plot(secondary_y=True)
    
    # Plot formatting
    plt.xticks(np.arange(len(pig_table)), pig_table[variable])
    plt.xlim([-0.5, len(pig_table) - 0.5])
    plt.ylabel("Incidence", rotation=0, rotation_mode="anchor", ha="left")
    
    # Show the graph
    plt.show()
    
# Apply the function for the variable "country"
plot_pig(pig_table, "country")

''' Putting it all together '''

# Variables you want to make predictor insight graph tables for
variables = ["income","gender","disc_mean_gift","disc_time_since_last_gift"]

# Loop through the variables
for variable in variables: 
    
    # Create the predictor insight graph table
    pig_table = create_pig_table(basetable, "target", variable)
    
    # Plot the predictor insight graph
    plot_pig(pig_table, variable)

Advertisements
Loading...

We use cookies to provide and improve our services. By using our site, you consent to our Cookies Policy.