diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml new file mode 100644 index 00000000..80a88a16 --- /dev/null +++ b/.github/workflows/pytest.yaml @@ -0,0 +1,28 @@ +name: Run Unit Test via Pytest + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e .[dev] + pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 + - name: Test with pytest + run: | + coverage run -m pytest -v -s -p no:warnings + - name: Generate Coverage Report + run: | + coverage report -m diff --git a/benchmarking/gcn/seastar/train.py b/benchmarking/gcn/seastar/train.py index b426d747..0e5e9cb2 100644 --- a/benchmarking/gcn/seastar/train.py +++ b/benchmarking/gcn/seastar/train.py @@ -6,22 +6,22 @@ import torch.nn as nn import torch.nn.functional as F from stgraph.graph.static.StaticGraph import StaticGraph -from stgraph.dataset.CoraDataLoader import CoraDataLoader -from utils import to_default_device, accuracy +from stgraph.dataset import CoraDataLoader +from utils import to_default_device, accuracy, generate_test_mask, generate_train_mask from model import GCN -def main(args): +def main(args): cora = CoraDataLoader(verbose=True) # To account for the initial CUDA Context object for pynvml - tmp = StaticGraph([(0,0)], [1], 1) - + tmp = StaticGraph([(0, 0)], [1], 1) + features = torch.FloatTensor(cora.get_all_features()) labels = torch.LongTensor(cora.get_all_targets()) - - train_mask = cora.get_train_mask() - test_mask = cora.get_test_mask() + + train_mask = generate_train_mask(len(features), 0.6) + test_mask = generate_test_mask(len(features), 0.6) train_mask = torch.BoolTensor(train_mask) test_mask = torch.BoolTensor(test_mask) @@ -47,7 +47,9 @@ def main(args): # A simple sanity check print("Measuerd Graph Size (pynvml): ", graph_mem, " B", flush=True) - print("Measuerd Graph Size (pynvml): ", (graph_mem)/(1024**2), " MB", flush=True) + print( + "Measuerd Graph Size (pynvml): ", (graph_mem) / (1024**2), " MB", flush=True + ) # normalization degs = torch.from_numpy(g.weighted_in_degrees()).type(torch.int32) @@ -58,23 +60,18 @@ def main(args): num_feats = features.shape[1] n_classes = int(max(labels) - min(labels) + 1) - print("Num Classes: ",n_classes) - - model = GCN(g, - num_feats, - args.num_hidden, - n_classes, - args.num_layers, - F.relu) - + print("Num Classes: ", n_classes) + + model = GCN(g, num_feats, args.num_hidden, n_classes, args.num_layers, F.relu) + if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer - optimizer = torch.optim.Adam(model.parameters(), - lr=args.lr, - weight_decay=args.weight_decay) + optimizer = torch.optim.Adam( + model.parameters(), lr=args.lr, weight_decay=args.weight_decay + ) # initialize graph dur = [] @@ -106,40 +103,45 @@ def main(args): dur.append(run_time_this_epoch) train_acc = accuracy(logits[train_mask], labels[train_mask]) - print('Epoch {:05d} | Time(s) {:.4f} | train_acc {:.6f} | Used_Memory {:.6f} mb '.format( - epoch, run_time_this_epoch, train_acc, (now_mem * 1.0 / (1024**2)) - )) + print( + "Epoch {:05d} | Time(s) {:.4f} | train_acc {:.6f} | Used_Memory {:.6f} mb ".format( + epoch, run_time_this_epoch, train_acc, (now_mem * 1.0 / (1024**2)) + ) + ) - Used_memory /= (1024**3) - print('^^^{:6f}^^^{:6f}'.format(Used_memory, np.mean(dur))) + Used_memory /= 1024**3 + print("^^^{:6f}^^^{:6f}".format(Used_memory, np.mean(dur))) -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='GCN') +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="GCN") # COMMENT IF SNOOP IS TO BE ENABLED snoop.install(enabled=False) - parser.add_argument("--dropout", type=float, default=0.5, - help="dropout probability") - parser.add_argument("--dataset", type=str, - help="Datset to train your model") - parser.add_argument("--gpu", type=int, default=0, - help="gpu") - parser.add_argument("--lr", type=float, default=1e-2, - help="learning rate") - parser.add_argument("--num_epochs", type=int, default=200, - help="number of training epochs") - parser.add_argument("--num_hidden", type=int, default=16, - help="number of hidden gcn units") - parser.add_argument("--num_layers", type=int, default=1, - help="number of hidden gcn layers") - parser.add_argument("--weight-decay", type=float, default=5e-4, - help="Weight for L2 loss") - parser.add_argument("--self-loop", action='store_true', - help="graph self-loop (default=False)") + parser.add_argument( + "--dropout", type=float, default=0.5, help="dropout probability" + ) + parser.add_argument("--dataset", type=str, help="Datset to train your model") + parser.add_argument("--gpu", type=int, default=0, help="gpu") + parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") + parser.add_argument( + "--num_epochs", type=int, default=200, help="number of training epochs" + ) + parser.add_argument( + "--num_hidden", type=int, default=16, help="number of hidden gcn units" + ) + parser.add_argument( + "--num_layers", type=int, default=1, help="number of hidden gcn layers" + ) + parser.add_argument( + "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" + ) + parser.add_argument( + "--self-loop", action="store_true", help="graph self-loop (default=False)" + ) parser.set_defaults(self_loop=False) args = parser.parse_args() print(args) - main(args) \ No newline at end of file + main(args) diff --git a/benchmarking/gcn/seastar/utils.py b/benchmarking/gcn/seastar/utils.py index dfae36be..0f2d52c9 100644 --- a/benchmarking/gcn/seastar/utils.py +++ b/benchmarking/gcn/seastar/utils.py @@ -1,21 +1,32 @@ import torch + def accuracy(logits, labels): _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) + # GPU | CPU def get_default_device(): - if torch.cuda.is_available(): - return torch.device('cuda:0') + return torch.device("cuda:0") else: - return torch.device('cpu') + return torch.device("cpu") + def to_default_device(data): - - if isinstance(data,(list,tuple)): - return [to_default_device(x,get_default_device()) for x in data] - - return data.to(get_default_device(),non_blocking = True) \ No newline at end of file + if isinstance(data, (list, tuple)): + return [to_default_device(x, get_default_device()) for x in data] + + return data.to(get_default_device(), non_blocking=True) + + +def generate_train_mask(size: int, train_test_split: int) -> list: + cutoff = size * train_test_split + return [1 if i < cutoff else 0 for i in range(size)] + + +def generate_test_mask(size: int, train_test_split: int) -> list: + cutoff = size * train_test_split + return [0 if i < cutoff else 1 for i in range(size)] diff --git a/pyproject.toml b/pyproject.toml index d91a7af9..eb772f94 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,8 @@ dependencies = [ [project.optional-dependencies] dev = [ "black", - "pytest", + "pytest >= 7.4.3", + "pytest-cov >= 4.1.0", "tqdm >= 4.64.1", "build >= 0.10.0", "gdown >= 4.6.6", diff --git a/stgraph/dataset/dynamic/england_covid_dataloader.py b/stgraph/dataset/dynamic/england_covid_dataloader.py index 13636fa3..d142cf78 100644 --- a/stgraph/dataset/dynamic/england_covid_dataloader.py +++ b/stgraph/dataset/dynamic/england_covid_dataloader.py @@ -53,6 +53,7 @@ class EnglandCovidDataLoader(STGraphDynamicDataset): The name of the dataset. gdata : dict Graph meta data. + """ def __init__( @@ -65,6 +66,16 @@ def __init__( """COVID-19 cases in England's NUTS3 regions.""" super().__init__() + if not isinstance(lags, int): + raise TypeError("lags must be of type int") + if lags < 0: + raise ValueError("lags must be a positive integer") + + if cutoff_time is not None and not isinstance(cutoff_time, int): + raise TypeError("cutoff_time must be of type int") + if cutoff_time is not None and cutoff_time < 0: + raise ValueError("cutoff_time must be a positive integer") + self.name = "England_COVID" self._url = "https://raw.githubusercontent.com/benedekrozemberczki/pytorch_geometric_temporal/master/dataset/england_covid.json" self._verbose = verbose diff --git a/stgraph/dataset/static/cora_dataloader.py b/stgraph/dataset/static/cora_dataloader.py index 5e484f71..7f85b48c 100644 --- a/stgraph/dataset/static/cora_dataloader.py +++ b/stgraph/dataset/static/cora_dataloader.py @@ -61,6 +61,7 @@ class CoraDataLoader(STGraphStaticDataset): The name of the dataset. gdata : dict Graph meta data. + """ def __init__( diff --git a/stgraph/dataset/stgraph_dataset.py b/stgraph/dataset/stgraph_dataset.py index 8e958e46..bef40db3 100644 --- a/stgraph/dataset/stgraph_dataset.py +++ b/stgraph/dataset/stgraph_dataset.py @@ -67,6 +67,7 @@ def __init__(self: STGraphDataset) -> None: _load_dataset() Loads the dataset from cache + """ self.name = "" self.gdata = {} @@ -106,6 +107,7 @@ def _has_dataset_cache(self: STGraphDataset) -> bool: # The dataset is cached, continue cached operations else: # The dataset is not cached, continue load and save operations + """ user_home_dir = os.path.expanduser("~") stgraph_dir = user_home_dir + "/.stgraph" @@ -128,6 +130,7 @@ def _get_cache_file_path(self: STGraphDataset) -> str: ------- str The absolute path of the cached dataset file + """ user_home_dir = os.path.expanduser("~") stgraph_dir = user_home_dir + "/.stgraph" diff --git a/stgraph/dataset/temporal/hungarycp_dataloader.py b/stgraph/dataset/temporal/hungarycp_dataloader.py index e94e356f..8bbf5ada 100644 --- a/stgraph/dataset/temporal/hungarycp_dataloader.py +++ b/stgraph/dataset/temporal/hungarycp_dataloader.py @@ -58,6 +58,7 @@ class HungaryCPDataLoader(STGraphTemporalDataset): The name of the dataset. gdata : dict Graph meta data. + """ def __init__( diff --git a/stgraph/dataset/temporal/metrla_dataloader.py b/stgraph/dataset/temporal/metrla_dataloader.py index a454784d..fef0bc14 100644 --- a/stgraph/dataset/temporal/metrla_dataloader.py +++ b/stgraph/dataset/temporal/metrla_dataloader.py @@ -67,6 +67,7 @@ class METRLADataLoader(STGraphTemporalDataset): The name of the dataset. gdata : dict Graph meta data. + """ def __init__( diff --git a/stgraph/dataset/temporal/montevideobus_dataloader.py b/stgraph/dataset/temporal/montevideobus_dataloader.py index 5ad608d8..e51a0a8c 100644 --- a/stgraph/dataset/temporal/montevideobus_dataloader.py +++ b/stgraph/dataset/temporal/montevideobus_dataloader.py @@ -66,6 +66,7 @@ class MontevideoBusDataLoader(STGraphTemporalDataset): The name of the dataset. gdata : dict Graph meta data. + """ def __init__( diff --git a/stgraph/dataset/temporal/pedalme_dataloader.py b/stgraph/dataset/temporal/pedalme_dataloader.py index dbb3edf1..d3371f43 100644 --- a/stgraph/dataset/temporal/pedalme_dataloader.py +++ b/stgraph/dataset/temporal/pedalme_dataloader.py @@ -58,6 +58,7 @@ class PedalMeDataLoader(STGraphTemporalDataset): The name of the dataset. gdata : dict Graph meta data. + """ def __init__( diff --git a/stgraph/dataset/temporal/wikimath_dataloader.py b/stgraph/dataset/temporal/wikimath_dataloader.py index 35eabf0f..3776b454 100644 --- a/stgraph/dataset/temporal/wikimath_dataloader.py +++ b/stgraph/dataset/temporal/wikimath_dataloader.py @@ -65,6 +65,7 @@ class WikiMathDataLoader(STGraphTemporalDataset): The name of the dataset. gdata : dict Graph meta data. + """ def __init__( diff --git a/stgraph/dataset/temporal/windmilloutput_dataloader.py b/stgraph/dataset/temporal/windmilloutput_dataloader.py index a6382f4b..5221f306 100644 --- a/stgraph/dataset/temporal/windmilloutput_dataloader.py +++ b/stgraph/dataset/temporal/windmilloutput_dataloader.py @@ -82,6 +82,7 @@ class WindmillOutputDataLoader(STGraphTemporalDataset): The name of the dataset. gdata : dict Graph meta data. + """ def __init__( diff --git a/tests/dataset/dynamic/test_EnglandCovidDataLoader.py b/tests/dataset/dynamic/test_EnglandCovidDataLoader.py index b683edc6..b9ce7865 100644 --- a/tests/dataset/dynamic/test_EnglandCovidDataLoader.py +++ b/tests/dataset/dynamic/test_EnglandCovidDataLoader.py @@ -1,5 +1,4 @@ -import numpy as np -import urllib.request +import pytest from stgraph.dataset import EnglandCovidDataLoader @@ -51,14 +50,26 @@ def EnglandCovidDataCheck(eng_covid: EnglandCovidDataLoader): def test_EnglandCovidDataLoader(): eng_covid = EnglandCovidDataLoader(verbose=True) eng_covid_1 = EnglandCovidDataLoader(cutoff_time=30) - eng_covid_2 = EnglandCovidDataLoader( - url="https://raw.githubusercontent.com/benedekrozemberczki/pytorch_geometric_temporal/master/dataset/england_covid.json" - ) eng_covid_3 = EnglandCovidDataLoader(lags=12) - # eng_covid_4 = EnglandCovidDataLoader(redownload=True) + eng_covid_4 = EnglandCovidDataLoader(redownload=True) EnglandCovidDataCheck(eng_covid) EnglandCovidDataCheck(eng_covid_1) - # EnglandCovidDataCheck(eng_covid_2) EnglandCovidDataCheck(eng_covid_3) - # EnglandCovidDataCheck(eng_covid_4) + EnglandCovidDataCheck(eng_covid_4) + + with pytest.raises(TypeError) as exec: + EnglandCovidDataLoader(lags="lags") + assert str(exec.value) == "lags must be of type int" + + with pytest.raises(ValueError) as exec: + EnglandCovidDataLoader(lags=-1) + assert str(exec.value) == "lags must be a positive integer" + + with pytest.raises(TypeError) as exec: + EnglandCovidDataLoader(cutoff_time="time") + assert str(exec.value) == "cutoff_time must be of type int" + + with pytest.raises(ValueError) as exec: + EnglandCovidDataLoader(cutoff_time=-1) + assert str(exec.value) == "cutoff_time must be a positive integer" diff --git a/tests/dataset/static/test_CoraDataLoader.py b/tests/dataset/static/test_CoraDataLoader.py index ae4a27c5..97c644bf 100644 --- a/tests/dataset/static/test_CoraDataLoader.py +++ b/tests/dataset/static/test_CoraDataLoader.py @@ -19,11 +19,8 @@ def CoraDataCheck(cora: CoraDataLoader): def test_CoraDataLoader(): - cora = CoraDataLoader() - - cora_1 = CoraDataLoader( - url="https://raw.githubusercontent.com/bfGraph/STGraph-Datasets/main/cora.json", - ) + cora = CoraDataLoader(verbose=True) + cora_1 = CoraDataLoader(redownload=True) CoraDataCheck(cora) CoraDataCheck(cora_1) diff --git a/tests/dataset/temporal/test_HungaryCPDataLoader.py b/tests/dataset/temporal/test_HungaryCPDataLoader.py index 5fc11ad8..5dfd7bf7 100644 --- a/tests/dataset/temporal/test_HungaryCPDataLoader.py +++ b/tests/dataset/temporal/test_HungaryCPDataLoader.py @@ -28,14 +28,12 @@ def test_HungaryCPDataLoader(): hungary_1 = HungaryCPDataLoader(verbose=True) hungary_2 = HungaryCPDataLoader(lags=6) hungary_3 = HungaryCPDataLoader(cutoff_time=100) - hungary_4 = HungaryCPDataLoader( - url="https://raw.githubusercontent.com/bfGraph/STGraph-Datasets/main/HungaryCP.json" - ) + hungary_4 = HungaryCPDataLoader(redownload=True) HungaryCPDataChecker(hungary_1) HungaryCPDataChecker(hungary_2) HungaryCPDataChecker(hungary_3) - # HungaryCPDataChecker(hungary_4) + HungaryCPDataChecker(hungary_4) with pytest.raises(TypeError) as exec: HungaryCPDataLoader(lags="lags") diff --git a/tests/dataset/temporal/test_METRLADataLoader.py b/tests/dataset/temporal/test_METRLADataLoader.py index de07ad3f..00963745 100644 --- a/tests/dataset/temporal/test_METRLADataLoader.py +++ b/tests/dataset/temporal/test_METRLADataLoader.py @@ -25,12 +25,9 @@ def METRLADataCheck(metrla: METRLADataLoader): def test_METRLADataLoader(): metrla_1 = METRLADataLoader(verbose=True) - metrla_2 = METRLADataLoader( - url="https://raw.githubusercontent.com/bfGraph/STGraph-Datasets/main/METRLA.json" - ) + metrla_2 = METRLADataLoader(redownload=True) metrla_3 = METRLADataLoader(num_timesteps_in=8, num_timesteps_out=8) metrla_4 = METRLADataLoader(cutoff_time=50) - # metrla_5 = METRLADataLoader(redownload=True) METRLADataCheck(metrla_1) METRLADataCheck(metrla_2) diff --git a/tests/dataset/temporal/test_MontevideoBusDataLoader.py b/tests/dataset/temporal/test_MontevideoBusDataLoader.py index 052b94f0..3562b69e 100644 --- a/tests/dataset/temporal/test_MontevideoBusDataLoader.py +++ b/tests/dataset/temporal/test_MontevideoBusDataLoader.py @@ -36,12 +36,9 @@ def MontevideoBusDataCheck(monte: MontevideoBusDataLoader): def test_MontevideoBusDataLoader(): monte_1 = MontevideoBusDataLoader(verbose=True) - monte_2 = MontevideoBusDataLoader( - url="https://raw.githubusercontent.com/bfGraph/STGraph-Datasets/main/montevideobus.json" - ) + monte_2 = MontevideoBusDataLoader(redownload=True) monte_3 = MontevideoBusDataLoader(lags=6) monte_4 = MontevideoBusDataLoader(cutoff_time=50) - # monte_5 = MontevideoBusDataLoader(redownload=True) MontevideoBusDataCheck(monte_1) MontevideoBusDataCheck(monte_2) diff --git a/tests/dataset/temporal/test_PedalMeDataLoader.py b/tests/dataset/temporal/test_PedalMeDataLoader.py index dee56646..930060b7 100644 --- a/tests/dataset/temporal/test_PedalMeDataLoader.py +++ b/tests/dataset/temporal/test_PedalMeDataLoader.py @@ -29,18 +29,14 @@ def PedalMeDataCheck(pedal: PedalMeDataLoader): def test_PedalMeDataLoader(): pedal_1 = PedalMeDataLoader(verbose=True) - pedal_2 = PedalMeDataLoader( - url="https://raw.githubusercontent.com/bfGraph/STGraph-Datasets/main/pedalme.json" - ) + pedal_2 = PedalMeDataLoader(redownload=True) pedal_3 = PedalMeDataLoader(lags=6) pedal_4 = PedalMeDataLoader(cutoff_time=20) - # pedal_5 = PedalMeDataLoader(redownload=True) PedalMeDataCheck(pedal_1) PedalMeDataCheck(pedal_2) PedalMeDataCheck(pedal_3) PedalMeDataCheck(pedal_4) - # PedalMeDataCheck(pedal_5) with pytest.raises(TypeError) as exec: PedalMeDataLoader(lags="lags") diff --git a/tests/dataset/temporal/test_WikiMathDataLoader.py b/tests/dataset/temporal/test_WikiMathDataLoader.py index 4e1f5f30..ef002837 100644 --- a/tests/dataset/temporal/test_WikiMathDataLoader.py +++ b/tests/dataset/temporal/test_WikiMathDataLoader.py @@ -26,18 +26,14 @@ def WikiMathDataCheck(wiki: WikiMathDataLoader): def test_WikiMathDataLoader(): wiki_1 = WikiMathDataLoader(verbose=True) - wiki_2 = WikiMathDataLoader( - url="https://raw.githubusercontent.com/bfGraph/STGraph-Datasets/main/wikivital_mathematics.json" - ) + wiki_2 = WikiMathDataLoader(redownload=True) wiki_3 = WikiMathDataLoader(lags=4) wiki_4 = WikiMathDataLoader(cutoff_time=500) - # wiki_5 = WikiMathDataLoader(redownload=True) WikiMathDataCheck(wiki_1) WikiMathDataCheck(wiki_2) WikiMathDataCheck(wiki_3) WikiMathDataCheck(wiki_4) - # WikiMathDataCheck(wiki_5) with pytest.raises(TypeError) as exec: WikiMathDataLoader(lags="lags") diff --git a/tests/dataset/temporal/test_WindmillOutputDataLoader.py b/tests/dataset/temporal/test_WindmillOutputDataLoader.py index 693486e5..368935fc 100644 --- a/tests/dataset/temporal/test_WindmillOutputDataLoader.py +++ b/tests/dataset/temporal/test_WindmillOutputDataLoader.py @@ -38,24 +38,16 @@ def WindmillOutputDataCheck(wind: WindmillOutputDataLoader): def test_WindmillOutputDataLoader(): - urls = { - "large": "https://graphmining.ai/temporal_datasets/windmill_output.json", - "medium": "https://graphmining.ai/temporal_datasets/windmill_output_medium.json", - "small": "https://graphmining.ai/temporal_datasets/windmill_output_small.json", - } - for size in ["large", "medium", "small"]: wind_1 = WindmillOutputDataLoader(verbose=True, size=size) - wind_2 = WindmillOutputDataLoader(url=urls[size], size=size) + wind_2 = WindmillOutputDataLoader(redownload=True, size=size) wind_3 = WindmillOutputDataLoader(lags=4, size=size) wind_4 = WindmillOutputDataLoader(cutoff_time=100, size=size) - # wind_5 = WindmillOutputDataLoader(redownload=True, size=size) WindmillOutputDataCheck(wind_1) WindmillOutputDataCheck(wind_2) WindmillOutputDataCheck(wind_3) WindmillOutputDataCheck(wind_4) - # WindmillOutputDataCheck(wind_5) with pytest.raises(TypeError) as exec: WindmillOutputDataLoader(lags="lags", size=size) @@ -72,3 +64,14 @@ def test_WindmillOutputDataLoader(): with pytest.raises(ValueError) as exec: WindmillOutputDataLoader(cutoff_time=-1, size=size) assert str(exec.value) == "cutoff_time must be a positive integer" + + with pytest.raises(TypeError) as exec: + WindmillOutputDataLoader(size=1) + assert str(exec.value) == "size must be of type string" + + with pytest.raises(ValueError) as exec: + WindmillOutputDataLoader(size="big") + assert ( + str(exec.value) == "size must take either of the following values : " + "large, medium or small" + )