Please note, this is a STATIC archive of website www.tutorialspoint.com from 11 May 2019, cach3.com does not collect or store any user information, there is no "phishing" involved.
Tutorialspoint

Execute Python Online

import os
import time
import numpy as np
import rank_metrics as rank
from collections import defaultdict


def load_data_from_file(dataset, folder):
    #print(os.path.join(folder, dataset+"_admat_dgc.txt"))
    with open(os.path.join(folder, dataset+"_admat_dgc.txt"), "r") as inf:
        inf.next()
        int_array = [line.strip("\n").split()[1:] for line in inf]

    with open(os.path.join(folder, dataset+"_simmat_dc.txt"), "r") as inf:  # the drug similarity file
        inf.next()
        drug_sim = [line.strip("\n").split()[1:] for line in inf]

    with open(os.path.join(folder, dataset+"_simmat_dg.txt"), "r") as inf:  # the target similarity file
        inf.next()
        target_sim = [line.strip("\n").split()[1:] for line in inf]

    intMat = np.array(int_array, dtype=np.float64).T    # drug-target interaction matrix
    drugMat = np.array(drug_sim, dtype=np.float64)      # drug similarity matrix
    targetMat = np.array(target_sim, dtype=np.float64)  # target similarity matrix

    if dataset == "metz":
        intMat[intMat == -1] = 0
    
    return intMat, drugMat, targetMat


def get_drugs_targets_names(dataset, folder):
    with open(os.path.join(folder, dataset+"_admat_dgc.txt"), "r") as inf:
        drugs = inf.next().strip("\n").split()
        targets = [line.strip("\n").split()[0] for line in inf]
    return drugs, targets


def cross_validation(intMat, seeds, cv=0, invert=0, num=10):
    cv_data = defaultdict(list)
    for seed in seeds:
        num_drugs, num_targets = intMat.shape
        prng = np.random.RandomState(seed)
        if cv == 0:
            index = prng.permutation(num_drugs)
        if cv == 1:
            index = prng.permutation(intMat.size)
        step = index.size/num
        for i in xrange(num):
            if i < num-1:
                ii = index[i*step:(i+1)*step]
            else:
                ii = index[i*step:]
            if cv == 0:
                test_data = np.array([[k, j] for k in ii for j in xrange(num_targets)], dtype=np.int32)
            elif cv == 1:
                test_data = np.array([[k/num_targets, k % num_targets] for k in ii], dtype=np.int32)
            x, y = test_data[:, 0], test_data[:, 1]
            test_label = intMat[x, y]
            W = np.ones(intMat.shape)
            W[x, y] = 0
            
            #print test_data
            #print  np.column_stack((y,x))
            #print type(test_data), type(np.column_stack((y,x)))
            
            if invert:
                W_T = W.T
                test_data_T = np.column_stack((y,x))
                cv_data[seed].append((W_T, test_data_T, test_label))               
            else:    
                cv_data[seed].append((W, test_data, test_label))
    return cv_data


def train(model, cv_data, intMat, drugMat, targetMat):
    aupr, auc, ndcg, ndcg_inv, results = [], [], [], [], []
    for seed in cv_data.keys():
        for W, test_data, test_label in cv_data[seed]:
            t = time.clock()
            model.fix_model(W, intMat, drugMat, targetMat, seed)
            aupr_val, auc_val, ndcg_val, ndcg_inv_val = model.evaluation(test_data, test_label)
            results = results + [("","","","")] + zip(test_data[:,0],test_data[:,1],test_label,model.scores)
            
            print(aupr_val, auc_val, ndcg_val, ndcg_inv_val , time.clock()-t)
            aupr.append(aupr_val)
            auc.append(auc_val)
            ndcg.append(ndcg_val)
            ndcg_inv.append(ndcg_inv_val)
    return np.array(aupr, dtype=np.float64), np.array(auc, dtype=np.float64), np.array(ndcg, dtype=np.float64), np.array(ndcg_inv, dtype=np.float64), results


def svd_init(M, num_factors):
    from scipy.linalg import svd
    U, s, V = svd(M, full_matrices=False)
    ii = np.argsort(s)[::-1][:num_factors]
    s1 = np.sqrt(np.diag(s[ii]))
    U0, V0 = U[:, ii].dot(s1), s1.dot(V[ii, :])
    return U0, V0.T


def mean_confidence_interval(data, confidence=0.95):
    import scipy as sp
    import scipy.stats
    a = 1.0*np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * sp.stats.t._ppf((1+confidence)/2., n-1)
    return m, h


def write_metric_vector_to_file(auc_vec, file_name):
    np.savetxt(file_name, auc_vec, fmt='%.6f')


def load_metric_vector(file_name):
    return np.loadtxt(file_name, dtype=np.float64)


def plot_aupr(self, prec, rec, thr, name):
    import matplotlib.pyplot as plt
    plt.clf()
    plt.ioff()
    plt.plot(rec, prec, label='Precision-Recall')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('Precision-Recall ')
    plt.legend(loc="lower left")
    fig = plt.figure()
    fig.savefig(name+'.png', bbox_inches='tight')
    fig.savefig(name+'.pdf', bbox_inches='tight')
       
def normalized_discounted_cummulative_gain(test_data,test_label, scores):
    unique_users = np.unique(test_data[:,0])
    user_array = test_data[:,0]
    ndcg = []
                
    for u in unique_users:
        indices_u =  np.in1d(user_array, [u])
        labels_u = test_label[indices_u].astype(float)
        scores_u = scores[indices_u].astype(float)
        #ndcg is calculated only for the users with some positive examples
        if not all(i <= 0.001 for i in labels_u):                        
            tmp = np.c_[labels_u,scores_u]
            tmp = tmp[tmp[:,1].argsort()[::-1],:]
            ordered_labels = tmp[:,0]
            ndcg_u = rank.ndcg_at_k(ordered_labels,ordered_labels.shape[0],1)
            ndcg.append(ndcg_u)            
    return np.mean(ndcg)
        

        

Advertisements
Loading...

We use cookies to provide and improve our services. By using our site, you consent to our Cookies Policy.