Please note, this is a STATIC archive of website www.tutorialspoint.com from 11 May 2019, cach3.com does not collect or store any user information, there is no "phishing" involved.
Tutorialspoint

Execute Python Online

'''
We base the CMF implementation on the one from PyDTI project, https://github.com/stephenliu0423/PyDTI, changes were made to the evaluation procedure


[1] X. Zheng, H. Ding, H. Mamitsuka, and S. Zhu, "Collaborative matrix factorization with multiple similarities for predicting drug-target interaction", KDD, 2013.

'''
import numpy as np
from sklearn.metrics import precision_recall_curve, roc_curve
from sklearn.metrics import auc
from functions import normalized_discounted_cummulative_gain

class CMF:

    def __init__(self, K=10, lambda_l=0.01, lambda_d=0.01, lambda_t=0.01, max_iter=100):
        self.K = K
        self.lambda_l = lambda_l
        self.lambda_d = lambda_d
        self.lambda_t = lambda_t
        self.max_iter = max_iter

    def fix_model(self, W, intMat, drugMat, targetMat, seed):
        self.num_drugs, self.num_targets = intMat.shape
        self.drugMat, self.targetMat = drugMat, targetMat
        x, y = np.where(W > 0)
        self.train_drugs = set(x.tolist())
        self.train_targets = set(y.tolist())
        if seed is None:
            self.U = np.sqrt(1/float(self.K))*np.random.normal(size=(self.num_drugs, self.K))
            self.V = np.sqrt(1/float(self.K))*np.random.normal(size=(self.num_targets, self.K))
        else:
            prng = np.random.RandomState(seed)
            self.U = np.sqrt(1/float(self.K))*prng.normal(size=(self.num_drugs, self.K))
            self.V = np.sqrt(1/float(self.K))*prng.normal(size=(self.num_targets, self.K))
        self.ones = np.identity(self.K)
        last_loss = self.compute_loss(W, intMat, drugMat, targetMat)
        WR = W*intMat
        for t in xrange(self.max_iter):
            self.U = self.als_update(self.U, self.V, W, WR, drugMat, self.lambda_l, self.lambda_d)
            self.V = self.als_update(self.V, self.U, W.T, WR.T, targetMat, self.lambda_l, self.lambda_t)
            curr_loss = self.compute_loss(W, intMat, drugMat, targetMat)
            delta_loss = (curr_loss-last_loss)/last_loss
            # print "Epoach:%s, Curr_loss:%s, Delta_loss:%s" % (t+1, curr_loss, delta_loss)
            if abs(delta_loss) < 1e-6:
                break
            last_loss = curr_loss

    def als_update(self, U, V, W, R, S, lambda_l, lambda_d):
        X = R.dot(V) + 2*lambda_d*S.dot(U)
        Y = 2*lambda_d*np.dot(U.T, U)
        Z = lambda_d*(np.diag(S)-np.sum(np.square(U), axis=1))
        U0 = np.zeros(U.shape)
        D = np.dot(V.T, V)
        m, n = W.shape
        for i in xrange(m):
            # A = np.dot(V.T, np.diag(W[i, :]))
            # B = A.dot(V) + Y + (lambda_l+Z[i])*self.ones
            ii = np.where(W[i, :] > 0)[0]
            if ii.size == 0:
                B = Y + (lambda_l+Z[i])*self.ones
            elif ii.size == n:
                B = D + Y + (lambda_l+Z[i])*self.ones
            else:
                A = np.dot(V[ii, :].T, V[ii, :])
                B = A + Y + (lambda_l+Z[i])*self.ones
            U0[i, :] = X[i, :].dot(np.linalg.inv(B))
        return U0

    def compute_loss(self, W, intMat, drugMat, targetMat):
        loss = np.linalg.norm(W * (intMat - np.dot(self.U, self.V.T)), "fro")**(2)
        loss += self.lambda_l*(np.linalg.norm(self.U, "fro")**(2)+np.linalg.norm(self.V, "fro")**(2))
        loss += self.lambda_d*np.linalg.norm(drugMat-self.U.dot(self.U.T), "fro")**(2)+self.lambda_t*np.linalg.norm(targetMat-self.V.dot(self.V.T), "fro")**(2)
        return 0.5*loss

    def evaluation(self, test_data, test_label):
        ii, jj = test_data[:, 0], test_data[:, 1]
        scores = np.sum(self.U[ii, :]*self.V[jj, :], axis=1)
        self.scores = scores          
        x, y = test_data[:, 0], test_data[:, 1]
        test_data_T = np.column_stack((y,x))
        
        ndcg = normalized_discounted_cummulative_gain(test_data, test_label, np.array(scores))
        ndcg_inv = normalized_discounted_cummulative_gain(test_data_T, test_label, np.array(scores))
        prec, rec, thr = precision_recall_curve(test_label, scores)
        aupr_val = auc(rec, prec)
        fpr, tpr, thr = roc_curve(test_label, scores)
        auc_val = auc(fpr, tpr)
        
        #!!!!we should distinguish here between inverted and not inverted methods nDCGs!!!!
        return aupr_val, auc_val, ndcg, ndcg_inv

    def predict_scores(self, test_data, N):
        inx = np.array(test_data)
        return np.sum(self.U[inx[:, 0], :]*self.V[inx[:, 1], :], axis=1)

    def __str__(self):
        return "Model: CMF, K:%s, lambda_l:%s, lambda_d:%s, lambda_t:%s, max_iter:%s" % (self.K, self.lambda_l, self.lambda_d, self.lambda_t, self.max_iter)

Advertisements
Loading...

We use cookies to provide and improve our services. By using our site, you consent to our Cookies Policy.