pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
pip install -q git+https://github.com/pyg-team/pytorch_geometric.git


# @title [RUN] Import modules

import os.path as osp

import matplotlib.cm as cm
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import scipy.io as sio
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from scipy.special import comb
from sklearn.metrics import accuracy_score
from torch_geometric.data import InMemoryDataset
from torch_geometric.data.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GraphConv, global_add_pool
from torch_geometric.utils import to_networkx, to_undirected


# @title [RUN] Helper functions for plots and visualisations

def get_color(node_emb, hashmap):
    max_val = max(hashmap.values(), default=0) + 1
    crt_node_emb = np.round(node_emb, decimals=2)

    if tuple(crt_node_emb) not in hashmap:
        hashmap[tuple(crt_node_emb)] = max_val
        max_val += 1

    crt_node_emb = hashmap[tuple(crt_node_emb)]

    # Map float number to a color
    crt_color = cm.tab10(crt_node_emb, bytes=True)
    crt_color = (
        crt_color[0] / 255.0,
        crt_color[1] / 255.0,
        crt_color[2] / 255.0,
        crt_color[3] / 255.0,
    )
    return crt_color


def draw_one_graph(
    ax, edges, hash, label=None, node_emb=None, layout=None,
):

    graph = nx.Graph()
    graph.add_edges_from(zip(edges[0], edges[1]))
    node_pos = layout(graph)
    # Add colors according to node embeding
    if node_emb is not None:
        color_map = [
            get_color(node_emb[node_id], hash)
            for node_id in graph.nodes()
        ]

        nx.draw_networkx_nodes(
            graph, node_pos, node_color=color_map, nodelist=graph.nodes(), ax=ax
        )
        nx.draw_networkx_edges(graph, node_pos, ax=ax)
        nx.draw_networkx_labels(graph, node_pos, ax=ax)
    else:
        nx.draw_networkx(graph, node_pos, ax=ax)


def gallery(
    graphs,
    hash=None,
    labels=None,
    node_emb=None,
    max_fig_size=(40, 10),
    layout=nx.layout.kamada_kawai_layout,
):
    if hash is None:
        hash = {}

    num_graphs = len(graphs)
    ff, axes = plt.subplots(
        1, num_graphs, figsize=max_fig_size, subplot_kw={"xticks": [], "yticks": []}
    )
    if num_graphs == 1:
        axes = [axes]
    if node_emb is None:
        node_emb = num_graphs * [None]
    if labels is None:
        labels = num_graphs * [" "]

    for i in range(num_graphs):
        draw_one_graph(
            axes[i],
            graphs[i].edge_index.numpy(),
            hash,
            labels[i],
            node_emb[i],
            layout,
        )
        if labels[i] != " ":
            axes[i].set_title(f"Target: {labels[i]}", fontsize=28)
        axes[i].set_axis_off()

    plt.show()


# @title [RUN] `Graph8c` data retrieval
# Let's get three (selected) graphs from the Graph8c dataset from 
# “Breaking the Limits of Message Passing Graph Neural Networks”
# (https://arxiv.org/pdf/2106.04319.pdf)

!wget https://raw.githubusercontent.com/balcilar/gnn-matlang/main/dataset/graph8c/raw/graph8c.g6

--2024-02-08 17:52:16--  https://raw.githubusercontent.com/balcilar/gnn-matlang/main/dataset/graph8c/raw/graph8c.g6
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 77819 (76K) [text/plain]
Saving to: ‘graph8c.g6.2’

graph8c.g6.2        100%[===================>]  76.00K  --.-KB/s    in 0.05s   

2024-02-08 17:52:16 (1.55 MB/s) - ‘graph8c.g6.2’ saved [77819/77819]


IDS = [379, 4625, 4657]


class Grapg8cDataset(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super(Grapg8cDataset, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        return ["graph8c.g6"]

    @property
    def processed_file_names(self):
        return "data.pt"

    def download(self):
        # Download to `self.raw_dir`.
        pass

    def process(self):
        # Read data into huge `Data` list.
        dataset = nx.read_graph6(self.raw_file_names[0])
        data_list = []
        for i, datum in enumerate(dataset):
            if i not in IDS:
                continue
            x = torch.ones(datum.number_of_nodes(), 1)
            edge_index = to_undirected(
                torch.tensor(list(datum.edges())).transpose(1, 0)
            )
            data_list.append(
                Data(edge_index=edge_index, x=x, y=torch.tensor(IDS.index(i)))
            )

        if self.pre_filter is not None:
            data_list = [data for data in data_list if self.pre_filter(data)]

        if self.pre_transform is not None:
            data_list = [self.pre_transform(data) for data in data_list]

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])


ds = Grapg8cDataset(root="tmp/Graph8c")
G1, G2, G3 = ds[0], ds[1], ds[2]


gallery(
    [G1, G2, G3],
    labels=np.array([G1.y.item(), G2.y.item(), G3.y.item()]),
)


class MLP(nn.Module):
    """A simple feed forward neural network"""

    def __init__(self, in_dim, emb_dim, num_layers=2):
        super(MLP, self).__init__()
        layer_list = []
        layer_list.append(torch.nn.Linear(in_dim, emb_dim))

        for _ in range(num_layers - 1):
            layer_list.append(torch.nn.BatchNorm1d(emb_dim))
            layer_list.append(torch.nn.ReLU())
            layer_list.append(torch.nn.Linear(emb_dim, emb_dim))

        self.layers = torch.nn.Sequential(*layer_list)

    def forward(self, x):
        return self.layers(x)


class GraphConvNet(torch.nn.Module):
    def __init__(self, in_dim, emb_dim, out_dim, num_layers, num_final_layers=1):
        super(GraphConvNet, self).__init__()

        self.convs = torch.nn.ModuleList()

        for layer in range(num_layers):
            self.convs.append(
                GraphConv(
                    emb_dim if layer != 0 else in_dim,
                    emb_dim,
                )
            )

        self.pool = global_add_pool
        self.final_layers = MLP(emb_dim, out_dim, num_layers=num_final_layers)

    def forward(self, data):
        h_node = data.x
        for conv in self.convs:
            h_node = F.relu(conv(h_node, data.edge_index))
        h_graph = self.pool(h_node, data.batch)
        return self.final_layers(h_graph), h_node.detach()


# @title [RUN] Hyperparameters GNNs

BATCH_SIZE = 128  # @param {type:"integer"}
NUM_EPOCHS = 25  # @param {type:"integer"}
HIDDEN_DIM = 64  # @param {type:"integer"}
NUM_LAYERS = 4  # @param {type:"integer"}
LR = 0.001  # @param {type:"number"}
SEED = 42  # @param {type:"integer"}


DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")


# @title [RUN] Set the seed

torch.manual_seed(SEED)
np.random.seed(SEED)


def train(train_loader, model, optimiser, loss_fn, metric_fn):
    """Train model for one epoch"""
    model.train()

    total_loss = 0
    num_graphs = 0
    for data in train_loader:
        optimiser.zero_grad()

        data = data.to(DEVICE)
        y_hat, _ = model(data)
        loss = loss_fn(y_hat, data.y)

        loss.backward()
        optimiser.step()

        total_loss += loss.item() * len(data.y)
        num_graphs += len(data.y)

    return total_loss / num_graphs


def evaluate(loader, model, metric_fn):
    """Evaluate model on dataset"""
    y_pred, y_true = [], []
    model.eval()
    for data in loader:
        data = data.to(DEVICE)
        y_hat, _ = model(data)

        y_pred.append(y_hat.detach().cpu())
        y_true.append(data.y.detach().cpu())

    y_pred = torch.cat(y_pred, dim=0)
    y_true = torch.cat(y_true, dim=0)

    return metric_fn(y_true, y_pred)


def run(
    model,
    train_loader,
    loaders,
    loss_fn,
    metric_fn,
    use_scheduler=False,
    print_steps=True,
):
    """Train the model for NUM_EPOCHS epochs"""

    # Instantiate optimiser and scheduler
    optimiser = optim.Adam(model.parameters(), lr=LR)
    scheduler = (
        optim.lr_scheduler.StepLR(optimiser, step_size=DECAY_STEP, gamma=DECAY_RATE)
        if use_scheduler
        else None
    )

    curves = {name: [] for name in loaders.keys()}

    for epoch in range(NUM_EPOCHS):

        train_loss = train(
            train_loader, model, optimiser, loss_fn, metric_fn
        )
        if scheduler is not None:
            scheduler.step()

        for name, loader in loaders.items():
            curves[name].append(evaluate(loader, model, metric_fn))

        if print_steps:
            print(
                f"[Epoch {epoch}]",
                f"train loss: {train_loss:.6f}",
                end=" ",
            )
            for name, metric in curves.items():
                print(f"{name} metric: {metric[-1]:.3f}", end=" ")
            print("\n")
    return curves


# Instantiate our models
graphconv_model = GraphConvNet(
    in_dim=ds.num_features,
    emb_dim=HIDDEN_DIM,
    out_dim=ds.num_classes,
    num_layers=NUM_LAYERS,
).to(DEVICE)


# Create the Dataloader
train_loader = DataLoader(ds, BATCH_SIZE, shuffle=True)


# Train our GNN model
_ = run(
    graphconv_model,
    train_loader,
    {"train": train_loader},
    loss_fn=F.cross_entropy,
    metric_fn=lambda x, y: accuracy_score(x, y.argmax(-1)),
)

[Epoch 0] train loss: 7.958117 train metric: 0.333 

[Epoch 1] train loss: 3.211746 train metric: 0.333 

[Epoch 2] train loss: 2.457222 train metric: 0.333 

[Epoch 3] train loss: 1.130873 train metric: 0.333 

[Epoch 4] train loss: 1.671703 train metric: 0.333 

[Epoch 5] train loss: 1.296378 train metric: 0.667 

[Epoch 6] train loss: 0.790557 train metric: 0.667 

[Epoch 7] train loss: 0.870086 train metric: 0.333 

[Epoch 8] train loss: 0.936117 train metric: 0.333 

[Epoch 9] train loss: 0.965901 train metric: 0.333 

[Epoch 10] train loss: 0.866404 train metric: 0.667 

[Epoch 11] train loss: 0.778652 train metric: 0.667 

[Epoch 12] train loss: 0.634940 train metric: 0.667 

[Epoch 13] train loss: 0.609730 train metric: 0.667 

[Epoch 14] train loss: 0.646420 train metric: 0.667 

[Epoch 15] train loss: 0.644886 train metric: 0.667 

[Epoch 16] train loss: 0.573604 train metric: 0.667 

[Epoch 17] train loss: 0.552052 train metric: 0.667 

[Epoch 18] train loss: 0.556559 train metric: 0.667 

[Epoch 19] train loss: 0.563795 train metric: 0.667 

[Epoch 20] train loss: 0.552862 train metric: 0.667 

[Epoch 21] train loss: 0.539810 train metric: 0.667 

[Epoch 22] train loss: 0.519177 train metric: 0.667 

[Epoch 23] train loss: 0.504517 train metric: 0.667 

[Epoch 24] train loss: 0.505564 train metric: 0.667


_, G1_h_nodes = graphconv_model.to("cpu")(G1)
_, G2_h_nodes = graphconv_model.to("cpu")(G2)
_, G3_h_nodes = graphconv_model.to("cpu")(G3)

stateful_hash = {}
gallery(
    [G1, G2, G3],
    hash=stateful_hash,
    labels=np.array([G1.y.item(), G2.y.item(), G3.y.item()]),
    node_emb=[G1_h_nodes.numpy(), G2_h_nodes.numpy(), G3_h_nodes.numpy()],
)


hash2 = nx.weisfeiler_lehman_graph_hash(to_networkx(G2), iterations=6)
hash3 = nx.weisfeiler_lehman_graph_hash(to_networkx(G3), iterations=6)

hash2 == hash3

True

Graph Representation Learning¶

Copyright: Bruno Ribeiro, Beatrice Bevilacqua¶

Purdue University¶

If reusing this material, please keep copyright notice above¶

What is a graph?¶

Adjacency Matrix and Adjacency Tensor¶

Types of graphs¶

Some less common graph tasks¶

Graph Tasks¶

Whole-graph classification task¶

Classical graph classification¶

Whole-graph classification via end-to-end learning¶

The above tasks are inductive graph tasks¶

Node classification task (via self-supervised learning)¶

Node classification via end-to-end learning¶

Definition of a Graph through Invariant Theory¶

Permutation action on graphs¶

Permutation action on the graph¶

The Consequence of Permutation Invariance on Node Representations¶

Structural Node Embeddings¶

Structural node embeddings¶

The Consequence of Permutation Invariance on Node Representations¶

Structural Node Embeddings¶

Most-expressive Neural Networks for Graph Representation Learning through Symmetrization¶

K-ary representations for graphs¶

Graph Neural Networks: Permutation-Equivariant Graph Representations (Inspired by Image Convolutions)¶

Images as a lattice¶

Layered representations¶

Can we design "convolutions" for arbitrary graphs?¶

Message Passing Graph Neural Layer¶

Message Passing Graph Neural Networks (MPNNs, GNNs)¶

Recursion¶

Pooling Function¶

MPNN (GNN) Variants¶

Most-expressive graph function¶

MPNN (GNN) representation power¶

Detour: The WL test¶

WL test: example¶

Implications of WL test¶

What are those graphs?¶

Expressive GNNs¶

Higher order GNNs (Morris et al., 2019, Maron et al., 2019a,b,c)¶

G-equivariant layers for learning on graphs (Maron et al., 2019a)¶

G-equivariant layers¶

Applicability of higher order GNNs (including Maron et al., 2019a,b,c)¶

Feature augmentation approaches¶

Substructure-aware models¶

Subgraph-based architectures¶

Positional Node Representations¶

A General view of Matrix Factorization (Positional) Node Embeddings¶

Matrix Factorization as Metric Embedding of Nodes¶

Positional node embeddings through invariant theory (Srinivasan & Ribeiro, 2020)¶

Intrinsic Randomness in Positional Representations¶

Impact of permutations on spectral decompositions¶

Isomorphic nodes and further randomness of positional embeddings¶

Matrix Factorization as a Sampled Positional Node Embeddings¶

Marginal distribution¶

Detour: Distributions and their samples¶

Link Prediction Task¶

Joint structural representations¶

Joint structural representations for link prediction¶

Double Equivariance: Graphs Beyond Node Permutation Invariances¶

Domain Transfer Task¶

Conclusions¶

References¶

Copyright: Bruno Ribeiro, Beatrice Bevilacqua ¶

**Applicability of higher order GNNs (including Maron et al., 2019a,b,c)**¶