Skip to content

Commit 43ce413

Browse files
Merge branch 'develop'
2 parents c1d8878 + 613f7a5 commit 43ce413

34 files changed

+1018
-480
lines changed

docs/autogen.py

+2
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@
4848
"functions": [],
4949
"methods": [],
5050
"classes": [
51+
layers.SRCPool,
5152
layers.DiffPool,
53+
layers.LaPool,
5254
layers.MinCutPool,
5355
layers.SAGPool,
5456
layers.TopKPool,

docs/templates/creating-layer.md

+8-6
Original file line numberDiff line numberDiff line change
@@ -53,28 +53,30 @@ def call(self, inputs):
5353
```
5454

5555
Then, we implement the `message` function.
56-
The `get_i` and `get_j` built-in methods can be used to automatically access either side of the edges \(i \leftarrow j\). For instance, we can use `get_j` to access the node features `x[j]` of all neighbors `j`.
56+
The `get_sources` and `get_targets` built-in methods can be used to automatically retrieve the node attributes of nodes that are sending (sources) or receiving (targets) a message.
57+
For instance, we can use `get_targets` to access the node features `x[j]` of all neighbors `j`.
5758

58-
If you need direct access to the edge indices, you can use the `index_i` and `index_j` attributes.
59+
If you need direct access to the edge indices, you can use the `index_sources` and `index_targets` attributes.
5960

60-
In this case, we only need to get the neighbors' features and return them:
61+
In this case, we only need to get the neighbors' features and return them:
6162

6263
```py
6364
def message(self, x):
6465
# Get the node features of all neighbors
65-
return self.get_j(x)
66+
return self.get_sources(x)
6667
```
6768

6869
Then, we define an aggregation function for the messages. We can use a simple average of the nodes:
6970

7071
```py
7172
from spektral.layers.ops import scatter_mean
7273

74+
7375
def aggregate(self, messages):
74-
return scatter_mean(messages, self.index_i, self.n_nodes)
76+
return scatter_mean(messages, self.index_targets, self.n_nodes)
7577
```
7678

77-
**Note**: `n_nodes` is computed dynamically at the start of propagation, exactly like `index_i`.
79+
**Note**: `n_nodes` is computed dynamically at the start of propagation, exactly like `index_targets`.
7880

7981
Since there are a few common aggregation functions that are often used in the literature, you can also skip the implementation of this method and simply pass a special keyword to the `__init__()` method of the superclass:
8082

examples/node_prediction/citation_gcn.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def mask_to_weights(mask):
4040
for mask in (dataset.mask_tr, dataset.mask_va, dataset.mask_te)
4141
)
4242

43-
model = GCN(n_labels=dataset.n_labels, n_input_channels=dataset.n_node_features)
43+
model = GCN(n_labels=dataset.n_labels)
4444
model.compile(
4545
optimizer=Adam(learning_rate),
4646
loss=CategoricalCrossentropy(reduction="sum"),

examples/node_prediction/citation_gcn_custom.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
x, a, y = graph.x, graph.a, graph.y
2323
mask_tr, mask_va, mask_te = dataset.mask_tr, dataset.mask_va, dataset.mask_te
2424

25-
model = GCN(n_labels=dataset.n_labels, n_input_channels=dataset.n_node_features)
25+
model = GCN(n_labels=dataset.n_labels)
2626
optimizer = Adam(lr=1e-2)
2727
loss_fn = CategoricalCrossentropy()
2828

examples/other/explain_node_predictions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def mask_to_weights(mask):
3737
for mask in (dataset.mask_tr, dataset.mask_va, dataset.mask_te)
3838
)
3939

40-
model = GCN(n_labels=dataset.n_labels, n_input_channels=dataset.n_node_features)
40+
model = GCN(n_labels=dataset.n_labels)
4141
model.compile(
4242
optimizer=Adam(learning_rate),
4343
loss=CategoricalCrossentropy(reduction="sum"),

examples/other/node_clustering_mincut.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def train_step(inputs):
5757
a_in = Input(shape=(None,), name="A_in", sparse=True)
5858

5959
x_1 = GCSConv(16, activation="elu")([x_in, a_in])
60-
x_1, a_1, s_1 = MinCutPool(n_clusters, return_mask=True)([x_1, a_in])
60+
x_1, a_1, s_1 = MinCutPool(n_clusters, return_selection=True)([x_1, a_in])
6161

6262
model = Model([x_in, a_in], [x_1, s_1])
6363

setup.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,18 @@
55

66
setup(
77
name="spektral",
8-
version="1.0.8",
8+
version="1.0.9",
99
packages=find_packages(),
1010
install_requires=[
1111
"joblib",
1212
"lxml",
1313
"networkx",
14-
"numpy<1.20",
14+
"numpy",
1515
"pandas",
1616
"requests",
1717
"scikit-learn",
1818
"scipy",
19-
"tensorflow>=2.1.0",
19+
"tensorflow>=2.2.0",
2020
"tqdm",
2121
],
2222
url="https://github.com/danielegrattarola/spektral",
@@ -27,8 +27,8 @@
2727
long_description=long_description,
2828
long_description_content_type="text/markdown",
2929
classifiers=[
30-
"Programming Language :: Python :: 3.6",
3130
"Programming Language :: Python :: 3.7",
3231
"Programming Language :: Python :: 3.8",
32+
"Programming Language :: Python :: 3.9",
3333
],
3434
)

spektral/data/loaders.py

+79-64
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from spektral.data.utils import (
55
batch_generator,
6+
collate_labels_batch,
67
collate_labels_disjoint,
78
get_spec,
89
prepend_none,
@@ -78,10 +79,10 @@ def train_step(inputs, target):
7879
**Arguments**
7980
8081
- `dataset`: a `spektral.data.Dataset` object;
81-
- `batch_size`: size of the mini-batches;
82-
- `epochs`: number of epochs to iterate over the dataset. By default (`None`)
82+
- `batch_size`: int, size of the mini-batches;
83+
- `epochs`: int, number of epochs to iterate over the dataset. By default (`None`)
8384
iterates indefinitely;
84-
- `shuffle`: whether to shuffle the dataset at the start of each epoch.
85+
- `shuffle`: bool, whether to shuffle the dataset at the start of each epoch.
8586
"""
8687

8788
def __init__(self, dataset, batch_size=1, epochs=None, shuffle=True):
@@ -178,11 +179,10 @@ class SingleLoader(Loader):
178179
**Arguments**
179180
180181
- `dataset`: a `spektral.data.Dataset` object with only one graph;
181-
- `epochs`: number of epochs to iterate over the dataset. By default (`None`)
182+
- `epochs`: int, number of epochs to iterate over the dataset. By default (`None`)
182183
iterates indefinitely;
183-
- `shuffle`: whether to shuffle the data at the start of each epoch;
184-
- `sample_weights`: if given, these will be appended to the output
185-
automatically.
184+
- `shuffle`: bool, whether to shuffle the data at the start of each epoch;
185+
- `sample_weights`: Numpy array, will be appended to the output automatically.
186186
187187
**Output**
188188
@@ -197,9 +197,8 @@ class SingleLoader(Loader):
197197
- `e`: same as `dataset[0].e`;
198198
199199
`labels` is the same as `dataset[0].y`.
200-
`sample_weights` is the same object passed to the constructor.
201-
202200
201+
`sample_weights` is the same array passed when creating the loader.
203202
"""
204203

205204
def __init__(self, dataset, epochs=None, sample_weights=None):
@@ -262,6 +261,8 @@ class DisjointLoader(Loader):
262261
**Arguments**
263262
264263
- `dataset`: a graph Dataset;
264+
- `node_level`: bool, if `True` stack the labels vertically for node-level
265+
prediction;
265266
- `batch_size`: size of the mini-batches;
266267
- `epochs`: number of epochs to iterate over the dataset. By default (`None`)
267268
iterates indefinitely;
@@ -321,7 +322,7 @@ def tf_signature(self):
321322
Adjacency matrix has shape [n_nodes, n_nodes]
322323
Node features have shape [n_nodes, n_node_features]
323324
Edge features have shape [n_edges, n_edge_features]
324-
Targets have shape [..., n_labels]
325+
Targets have shape [*, n_labels]
325326
"""
326327
signature = self.dataset.signature
327328
if "y" in signature:
@@ -347,33 +348,40 @@ class BatchLoader(Loader):
347348
If `n_max` is the number of nodes of the biggest graph in the batch, then
348349
the padding consist of adding zeros to the node features, adjacency matrix,
349350
and edge attributes of each graph so that they have shapes
350-
`(n_max, n_node_features)`, `(n_max, n_max)`, and
351-
`(n_max, n_max, n_edge_features)` respectively.
351+
`[n_max, n_node_features]`, `[n_max, n_max]`, and
352+
`[n_max, n_max, n_edge_features]` respectively.
352353
353354
The zero-padding is done batch-wise, which saves up memory at the cost of
354355
more computation. If latency is an issue but memory isn't, or if the
355356
dataset has graphs with a similar number of nodes, you can use
356-
the `PackedBatchLoader` that first zero-pads all the dataset and then
357+
the `PackedBatchLoader` that zero-pads all the dataset once and then
357358
iterates over it.
358359
359360
Note that the adjacency matrix and edge attributes are returned as dense
360-
arrays (mostly due to the lack of support for sparse tensor operations for
361-
rank >2).
361+
arrays.
362362
363-
Only graph-level labels are supported with this loader (i.e., labels are not
364-
zero-padded because they are assumed to have no "node" dimensions).
363+
if `mask=True`, node attributes will be extended with a binary mask that indicates
364+
valid nodes (the last feature of each node will be 1 if the node was originally in
365+
the graph and 0 if it is a fake node added by zero-padding).
366+
367+
Use this flag in conjunction with layers.base.GraphMasking to start the propagation
368+
of masks in a model (necessary for node-level prediction and models that use a
369+
dense pooling layer like DiffPool or MinCutPool).
370+
371+
If `node_level=False`, the labels are interpreted as graph-level labels and
372+
are returned as an array of shape `[batch, n_labels]`.
373+
If `node_level=True`, then the labels are padded along the node dimension and are
374+
returned as an array of shape `[batch, n_max, n_labels]`.
365375
366376
**Arguments**
367377
368378
- `dataset`: a graph Dataset;
369-
- `mask`: if True, node attributes will be extended with a binary mask that
370-
indicates valid nodes (the last feature of each node will be 1 if the node is valid
371-
and 0 otherwise). Use this flag in conjunction with layers.base.GraphMasking to
372-
start the propagation of masks in a model.
373-
- `batch_size`: size of the mini-batches;
374-
- `epochs`: number of epochs to iterate over the dataset. By default (`None`)
379+
- `mask`: bool, whether to add a mask to the node features;
380+
- `batch_size`: int, size of the mini-batches;
381+
- `epochs`: int, number of epochs to iterate over the dataset. By default (`None`)
375382
iterates indefinitely;
376-
- `shuffle`: whether to shuffle the data at the start of each epoch.
383+
- `shuffle`: bool, whether to shuffle the data at the start of each epoch;
384+
- `node_level`: bool, if `True` pad the labels along the node dimension;
377385
378386
**Output**
379387
@@ -385,19 +393,30 @@ class BatchLoader(Loader):
385393
- `a`: adjacency matrices of shape `[batch, n_max, n_max]`;
386394
- `e`: edge attributes of shape `[batch, n_max, n_max, n_edge_features]`.
387395
388-
`labels` have shape `[batch, n_labels]`.
396+
`labels` have shape `[batch, n_labels]` if `node_level=False` or
397+
`[batch, n_max, n_labels]` otherwise.
389398
"""
390399

391-
def __init__(self, dataset, mask=False, batch_size=1, epochs=None, shuffle=True):
400+
def __init__(
401+
self,
402+
dataset,
403+
mask=False,
404+
batch_size=1,
405+
epochs=None,
406+
shuffle=True,
407+
node_level=False,
408+
):
392409
self.mask = mask
410+
self.node_level = node_level
411+
self.signature = dataset.signature
393412
super().__init__(dataset, batch_size=batch_size, epochs=epochs, shuffle=shuffle)
394413

395414
def collate(self, batch):
396415
packed = self.pack(batch)
397416

398417
y = packed.pop("y_list", None)
399418
if y is not None:
400-
y = np.array(y)
419+
y = collate_labels_batch(y, node_level=self.node_level)
401420

402421
output = to_batch(**packed, mask=self.mask)
403422
output = sp_matrices_to_sp_tensors(output)
@@ -415,12 +434,13 @@ def tf_signature(self):
415434
Adjacency matrix has shape [batch, n_nodes, n_nodes]
416435
Node features have shape [batch, n_nodes, n_node_features]
417436
Edge features have shape [batch, n_nodes, n_nodes, n_edge_features]
418-
Targets have shape [batch, ..., n_labels]
437+
Labels have shape [batch, n_labels]
419438
"""
420-
signature = self.dataset.signature
439+
signature = self.signature
421440
for k in signature:
422441
signature[k]["shape"] = prepend_none(signature[k]["shape"])
423-
if "x" in signature:
442+
if "x" in signature and self.mask:
443+
# In case we have a mask, the mask is concatenated to the features
424444
signature["x"]["shape"] = signature["x"]["shape"][:-1] + (
425445
signature["x"]["shape"][-1] + 1,
426446
)
@@ -430,6 +450,9 @@ def tf_signature(self):
430450
if "e" in signature:
431451
# Edge attributes have an extra None dimension in batch mode
432452
signature["e"]["shape"] = prepend_none(signature["e"]["shape"])
453+
if "y" in signature and self.node_level:
454+
# Node labels have an extra None dimension
455+
signature["y"]["shape"] = prepend_none(signature["y"]["shape"])
433456

434457
return to_tf_signature(signature)
435458

@@ -454,10 +477,12 @@ class PackedBatchLoader(BatchLoader):
454477
**Arguments**
455478
456479
- `dataset`: a graph Dataset;
457-
- `batch_size`: size of the mini-batches;
458-
- `epochs`: number of epochs to iterate over the dataset. By default (`None`)
480+
- `mask`: bool, whether to add a mask to the node features;
481+
- `batch_size`: int, size of the mini-batches;
482+
- `epochs`: int, number of epochs to iterate over the dataset. By default (`None`)
459483
iterates indefinitely;
460-
- `shuffle`: whether to shuffle the data at the start of each epoch.
484+
- `shuffle`: bool, whether to shuffle the data at the start of each epoch;
485+
- `node_level`: bool, if `True` pad the labels along the node dimension;
461486
462487
**Output**
463488
@@ -469,22 +494,35 @@ class PackedBatchLoader(BatchLoader):
469494
- `a`: adjacency matrices of shape `[batch, n_max, n_max]`;
470495
- `e`: edge attributes of shape `[batch, n_max, n_max, n_edge_features]`.
471496
472-
`labels` have shape `[batch, ..., n_labels]`.
497+
`labels` have shape `[batch, n_labels]` if `node_level=False` or
498+
`[batch, n_max, n_labels]` otherwise.
473499
"""
474500

475-
def __init__(self, dataset, mask=False, batch_size=1, epochs=None, shuffle=True):
501+
def __init__(
502+
self,
503+
dataset,
504+
mask=False,
505+
batch_size=1,
506+
epochs=None,
507+
shuffle=True,
508+
node_level=False,
509+
):
476510
super().__init__(
477-
dataset, mask=mask, batch_size=batch_size, epochs=epochs, shuffle=shuffle
511+
dataset,
512+
mask=mask,
513+
batch_size=batch_size,
514+
epochs=epochs,
515+
shuffle=shuffle,
516+
node_level=node_level,
478517
)
479518

480519
# Drop the Dataset container and work on packed tensors directly
481520
packed = self.pack(self.dataset)
482521

483522
y = packed.pop("y_list", None)
484523
if y is not None:
485-
y = np.array(y)
524+
y = collate_labels_batch(y, node_level=self.node_level)
486525

487-
self.signature = dataset.signature
488526
self.dataset = to_batch(**packed, mask=mask)
489527
if y is not None:
490528
self.dataset += (y,)
@@ -501,29 +539,6 @@ def collate(self, batch):
501539
else:
502540
return batch[:-1], batch[-1]
503541

504-
def tf_signature(self):
505-
"""
506-
Adjacency matrix has shape [batch, n_nodes, n_nodes]
507-
Node features have shape [batch, n_nodes, n_node_features]
508-
Edge features have shape [batch, n_nodes, n_nodes, n_edge_features]
509-
Targets have shape [batch, ..., n_labels]
510-
"""
511-
signature = self.signature
512-
for k in signature:
513-
signature[k]["shape"] = prepend_none(signature[k]["shape"])
514-
if "x" in signature:
515-
signature["x"]["shape"] = signature["x"]["shape"][:-1] + (
516-
signature["x"]["shape"][-1] + 1,
517-
)
518-
if "a" in signature:
519-
# Adjacency matrix in batch mode is dense
520-
signature["a"]["spec"] = tf.TensorSpec
521-
if "e" in signature:
522-
# Edge attributes have an extra None dimension in batch mode
523-
signature["e"]["shape"] = prepend_none(signature["e"]["shape"])
524-
525-
return to_tf_signature(signature)
526-
527542
@property
528543
def steps_per_epoch(self):
529544
if len(self.dataset) > 0:
@@ -544,10 +559,10 @@ class MixedLoader(Loader):
544559
**Arguments**
545560
546561
- `dataset`: a graph Dataset;
547-
- `batch_size`: size of the mini-batches;
548-
- `epochs`: number of epochs to iterate over the dataset. By default (`None`)
562+
- `batch_size`: int, size of the mini-batches;
563+
- `epochs`: int, number of epochs to iterate over the dataset. By default (`None`)
549564
iterates indefinitely;
550-
- `shuffle`: whether to shuffle the data at the start of each epoch.
565+
- `shuffle`: bool, whether to shuffle the data at the start of each epoch.
551566
552567
**Output**
553568

0 commit comments

Comments
 (0)