Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean code #3

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

This repository contains a Pytorch Geometric implementation of the [Gravity-Inspired Graph Autoencoders for Directed Link Prediction](https://doi.org/10.1145/3357384.3358023) paper.

With resepect to the [original implementation](https://github.com/deezer/gravity_graph_autoencoders), it has the following limitations:
With respect to the [original implementation](https://github.com/deezer/gravity_graph_autoencoders), it has the following limitations:

- Only the Cora dataset has currently been tested (but support for the other datasets should be already there);
- Only the Gravity-GAE, Gravity-VGAE, SourceTarget-GAE and SourceTarget-VGAE are implemented.
Expand Down
18 changes: 9 additions & 9 deletions gravity_gae/Convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
import torch_sparse
from torch_geometric.nn import MessagePassing


# This conv expects self loops to have already been added, and that the rows of the adjm are NOT normalized by the inverse out-degree +1. Such normalization will be done using the "mean" aggregation inherited from MessagePassing The adjm in question should be the transpose of the usual adjm.
class Conv(MessagePassing):
"""
This convolutional layer expects self-loops to have already been added, and
that the rows of the adjacency matrix are NOT normalized by the inverse out-degree +1.
Such normalization will be done using the "mean" aggregation inherited from MessagePassing.
The adjacency matrix in question should be the transpose of the usual adjacency matrix.
"""

def __init__(self, in_channels, out_channels):
super().__init__(aggr = "mean")
self.W = Linear(in_channels, out_channels, bias = False)
super().__init__(aggr="mean")
self.W = Linear(in_channels, out_channels, bias=False)

def message_and_aggregate(self, adj_t, x):
return torch_sparse.matmul(adj_t, x, reduce=self.aggr)
Expand All @@ -18,13 +22,9 @@ def message(self, x_j):

def forward(self, x, edge_index):
transformed = self.W(x)

transformed_aggregated_normalized = self.propagate(edge_index, x = transformed)

transformed_aggregated_normalized = self.propagate(edge_index, x=transformed)
return transformed_aggregated_normalized

def reset_parameters(self):
super().reset_parameters()
self.W.reset_parameters()


79 changes: 31 additions & 48 deletions gravity_gae/GNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@


class LayerWrapper(Module):
def __init__(self, layer, normalization_before_activation = None, activation = None, normalization_after_activation =None, dropout_p = None, _add_remaining_self_loops = False, uses_sparse_representation = False ):
def __init__(self, layer, normalization_before_activation=None, activation=None,
normalization_after_activation=None, dropout_p=None,
_add_remaining_self_loops=False, uses_sparse_representation=False):
super().__init__()
self.activation = activation
self.normalization_before_activation = normalization_before_activation
Expand All @@ -18,82 +20,71 @@ def __init__(self, layer, normalization_before_activation = None, activation = N
self.uses_sparse_representation = uses_sparse_representation

def forward(self, batch):


new_batch = copy.copy(batch)
if self._add_remaining_self_loops and not self.uses_sparse_representation:
new_batch.edge_index, _ = add_remaining_self_loops(new_batch.edge_index)
new_batch.edge_index, _ = add_remaining_self_loops(new_batch.edge_index)
elif self._add_remaining_self_loops and self.uses_sparse_representation:
new_batch.adj_t = torch_sparse.fill_diag(new_batch.adj_t, 2)

if not self.uses_sparse_representation:
new_batch.x = self.layer(x = new_batch.x, edge_index = new_batch.edge_index)
new_batch.x = self.layer(x=new_batch.x, edge_index=new_batch.edge_index)
else:

new_batch.x = self.layer(x = new_batch.x, edge_index = new_batch.adj_t)
new_batch.x = self.layer(x=new_batch.x, edge_index=new_batch.adj_t)

if self.normalization_before_activation is not None:
new_batch.x = self.normalization_before_activation(new_batch.x)
if self.activation is not None:
new_batch.x = self.activation(new_batch.x)
if self.normalization_after_activation is not None:
new_batch.x = self.normalization_after_activation(new_batch.x)
new_batch.x = self.normalization_after_activation(new_batch.x)
if self.dropout_p is not None:
new_batch.x = dropout(new_batch.x, p=self.dropout_p, training=self.training)

new_batch.x = dropout(new_batch.x, p=self.dropout_p, training=self.training)

return new_batch





class GNN_FB(Module):
def __init__(self, gnn_layers, preprocessing_layers = [], postprocessing_layers = []):
def __init__(self, gnn_layers, preprocessing_layers=[], postprocessing_layers=[]):
super().__init__()
self.net = torch.nn.Sequential(*preprocessing_layers, *gnn_layers, *postprocessing_layers)

self.net = torch.nn.Sequential(*preprocessing_layers, *gnn_layers, *postprocessing_layers )


def forward(self, batch):
return self.net(batch)


return self.net(batch)


class LinkPropertyPredictorGravity(Module):
def __init__(self, l, EPS = 1e-2, CLAMP = None, train_l = True):
def __init__(self, l, EPS=1e-2, CLAMP=None, train_l=True):
super().__init__()
self.l_initialization = l
self.l = Parameter(torch.tensor([l]), requires_grad = train_l )
self.l = Parameter(torch.tensor([l]), requires_grad=train_l)
self.EPS = EPS
self.CLAMP = CLAMP
def forward(self, batch):

new_batch = copy.copy(batch)
def forward(self, batch):
new_batch = copy.copy(batch)

if batch.edge_label_index in ["general", "biased", "bidirectional"]:
m_i = new_batch.x[:,-1].reshape(-1,1).expand((-1,new_batch.x.size(0))).t()
r = new_batch.x[:,:-1]
m_i = new_batch.x[:, -1].reshape(-1, 1).expand((-1, new_batch.x.size(0))).t()
r = new_batch.x[:, :-1]

norm = (r * r).sum(dim = 1, keepdim = True)
norm = (r * r).sum(dim=1, keepdim=True)
r1r2 = torch.matmul(r, r.t())

r2 = norm - 2*r1r2 + norm.t()
r2 = norm - 2 * r1r2 + norm.t()

logr2 = torch.log(r2 + self.EPS)

if self.CLAMP is not None:
logr2 = logr2.clamp(min = -self.CLAMP, max = self.CLAMP)
logr2 = logr2.clamp(min=-self.CLAMP, max=self.CLAMP)

new_batch.x = (m_i - self.l * logr2).reshape(-1)
new_batch.x = (m_i - self.l * logr2).reshape(-1)

else:
m_j = new_batch.x[new_batch.edge_label_index[1, :], -1]

m_j = new_batch.x[new_batch.edge_label_index[1,:],-1]
diff = new_batch.x[new_batch.edge_label_index[0, :], :-1] - new_batch.x[new_batch.edge_label_index[1, :], :-1]

diff = new_batch.x[new_batch.edge_label_index[0,:], :-1] - new_batch.x[new_batch.edge_label_index[1,:], :-1]

r2 = (diff * diff).sum(dim = 1)
r2 = (diff * diff).sum(dim=1)
new_batch.x = m_j - self.l * torch.log(r2 + self.EPS)

return new_batch
Expand All @@ -102,33 +93,28 @@ def reset_parameters(self):
self.l.data = torch.tensor([self.l_initialization]).to(self.l.data.device)




class LinkPropertyPredictorSourceTarget(Module):
def __init__(self):
super().__init__()
def forward(self, batch):

new_batch = copy.copy(batch)
def forward(self, batch):
new_batch = copy.copy(batch)

hidden_dimension = batch.x.size(1)
half_dimension = int(hidden_dimension/2)
half_dimension = int(hidden_dimension / 2)

if batch.edge_label_index in ["general", "biased", "bidirectional"] and self.training:

source = batch.x[:, :half_dimension]
target = batch.x[:, half_dimension:]

new_batch.x = torch.matmul(source, target.t()).reshape(-1)

else:
new_batch.x = (new_batch.x[new_batch.edge_label_index[0, :], :half_dimension] *
new_batch.x[new_batch.edge_label_index[1, :], half_dimension:]).sum(dim=1).reshape(-1)

new_batch.x = (new_batch.x[new_batch.edge_label_index[0,:], :half_dimension] * new_batch.x[new_batch.edge_label_index[1,:], half_dimension:]).sum(dim = 1).reshape(-1)

return new_batch





class ParallelLayerWrapper(Module):
def __init__(self, layerwrappers):
Expand All @@ -137,6 +123,3 @@ def __init__(self, layerwrappers):

def forward(self, batch):
return [layerwrapper(batch) for layerwrapper in self.layerwrappers]



11 changes: 3 additions & 8 deletions gravity_gae/VGAE.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,24 @@

import torch
import copy
from torch.nn import Module

class VGAE_Reparametrization(Module):
def __init__(self, MAX_LOGSTD = None, num_noise_samples = 100):
def __init__(self, MAX_LOGSTD=None, num_noise_samples=100):
super().__init__()

self.MAX_LOGSTD = MAX_LOGSTD
self.num_noise_samples = num_noise_samples

def forward(self, mu_logstd):
new_batch = copy.copy(mu_logstd[0])

mu_batch, logstd_batch = mu_logstd


mu, logstd = mu_batch.x, logstd_batch.x

if self.MAX_LOGSTD is not None:
logstd = logstd.clamp(max = self.MAX_LOGSTD)

logstd = logstd.clamp(max=self.MAX_LOGSTD)

if self.training:
new_batch.x = mu + torch.randn_like(logstd) * torch.exp(logstd)
new_batch.x = mu + torch.randn_like(logstd) * torch.exp(logstd)
else:
new_batch.x = mu

Expand Down
52 changes: 21 additions & 31 deletions gravity_gae/custom_losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,72 +5,62 @@
import copy
from sklearn.metrics import average_precision_score, roc_auc_score


def recon_loss(logits, ground_truths, EPS = 1e-10):

pos_mask = (ground_truths == 1.)

pos_loss = -torch.log( sigmoid(logits[pos_mask]) + EPS ).mean()

neg_loss = -torch.log(1. - sigmoid(logits[~pos_mask]) + EPS).mean()
def recon_loss(logits, ground_truths, EPS=1e-10):
pos_mask = (ground_truths == 1.0)
pos_loss = -torch.log(sigmoid(logits[pos_mask]) + EPS).mean()
neg_loss = -torch.log(1.0 - sigmoid(logits[~pos_mask]) + EPS).mean()

return pos_loss + neg_loss



def hitsk(model, test_data_split, k):
test_data_split_pos = copy.copy(test_data_split)
test_data_split_pos.edge_label_index = test_data_split.pos_edge_label_index

test_data_split_neg = copy.copy(test_data_split)
test_data_split_neg.edge_label_index = test_data_split.neg_edge_label_index

return compute_hitsk(model(test_data_split_pos).x, model(test_data_split_neg).x, k )
return compute_hitsk(model(test_data_split_pos).x, model(test_data_split_neg).x, k)

def compute_hitsk(y_pred_pos, y_pred_neg, k):
tot = (y_pred_pos > torch.sort(y_pred_neg, descending = True)[0][k]).sum()
tot = (y_pred_pos > torch.sort(y_pred_neg, descending=True)[0][k]).sum()
return tot / y_pred_pos.size(0)



def average_precision(model, test_data):
return average_precision_score(test_data.edge_label.cpu().detach().numpy(), sigmoid(model(test_data).x).cpu().detach().numpy()) #avp.compute()

preds = sigmoid(model(test_data).x).cpu().detach().numpy()
labels = test_data.edge_label.cpu().detach().numpy()
return average_precision_score(labels, preds)

def auc_loss(logits, ground_truths):
return 1. - roc_auc_score(ground_truths.cpu(), logits.x.cpu())
return 1.0 - roc_auc_score(ground_truths.cpu(), logits.x.cpu())

def ap_loss(logits, ground_truths):
return 1. - average_precision_score(ground_truths.cpu().detach().numpy(), sigmoid(logits.x).cpu().detach().numpy())
preds = sigmoid(logits.x).cpu().detach().numpy()
labels = ground_truths.cpu().detach().numpy()
return 1.0 - average_precision_score(labels, preds)

def losses_sum_closure(losses):

return lambda logits, ground_truths: np.sum([loss(logits, ground_truths) for loss in losses])
return lambda logits, ground_truths: np.sum([loss(logits, ground_truths) for loss in losses])

class StandardLossWrapper(Module):
def __init__(self, norm, loss):
def __init__(self, norm, loss):
super().__init__()
self.loss = loss
self.norm = norm

def forward(self, batch, ground_truth):
return self.norm * self.loss(batch.x, ground_truth)


def kl_loss(mu , logstd):
return -0.5 * torch.mean(torch.sum(1 + 2 * logstd - mu**2 - logstd.exp()**2, dim=1))

def kl_loss(mu, logstd):
kld = 1 + 2 * logstd - mu.pow(2) - logstd.exp().pow(2)
return -0.5 * torch.mean(torch.sum(kld, dim=1))

class VGAELossWrapper(Module):
def __init__(self, norm, loss):
super().__init__()
self.loss = loss
self.norm = norm


def forward(self, batch, ground_truth):

# x_nan_idxs = torch.where(torch.isnan(batch.x) == 1)[0].tolist()

return self.norm * self.loss(batch.x, ground_truth) + (0.5 / batch.x.size(0)) * kl_loss(batch.mu, batch.logstd)

loss_val = self.norm * self.loss(batch.x, ground_truth)
kl_val = (0.5 / batch.x.size(0)) * kl_loss(batch.mu, batch.logstd)
return loss_val + kl_val
3 changes: 1 addition & 2 deletions gravity_gae/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,4 @@
from preprocessing import *
from input_data import *
from models import *
from data_loaders import *

from data_loaders import *
Loading