diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml
index d12b4f50..41ab56d7 100644
--- a/.github/workflows/ruff.yaml
+++ b/.github/workflows/ruff.yaml
@@ -26,6 +26,9 @@ jobs:
         cd stgraph/graph
         ruff check .
         cd ../../
-        cd stgraph/benchmark_tools
+        cd stgraph/utils
         ruff check .
-        cd ../../
\ No newline at end of file
+        cd ../../
+        cd stgraph/nn/pytorch/static
+        ruff check gcn_conv.py
+        cd ../../../../
\ No newline at end of file
diff --git a/README.md b/README.md
index a93ff1e0..8373a0e6 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,7 @@
 
 [![Documentation Status](https://readthedocs.org/projects/stgraph/badge/?version=latest)](https://stgraph.readthedocs.io/en/latest/?badge=latest)
 [![TGL Workshop - @ NeurIPS'23](https://img.shields.io/badge/TGL_Workshop-%40_NeurIPS'23-6d4a8f)](https://neurips.cc/virtual/2023/76335)
+[![GrAPL - @IPDPS'24](https://img.shields.io/badge/GrAPL-%40IPDPS'24-282792)](https://hpc.pnl.gov/grapl/index.html)
 [![PyPI - 1.0.0](https://img.shields.io/static/v1?label=PyPI&message=1.0.0&color=%23ffdf76&logo=Python)](https://pypi.org/project/stgraph/)
 
 <div align="center">
diff --git a/benchmarking/dynamic-temporal-tgcn/pygt/train.py b/benchmarking/dynamic-temporal-tgcn/pygt/train.py
index 4a67e104..8cc093a6 100644
--- a/benchmarking/dynamic-temporal-tgcn/pygt/train.py
+++ b/benchmarking/dynamic-temporal-tgcn/pygt/train.py
@@ -9,7 +9,7 @@
 import os
 from model import PyGT_TGCN
 from stgraph.dataset.LinkPredDataLoader import LinkPredDataLoader
-from stgraph.benchmark_tools.table import BenchmarkTable
+from stgraph.utils import DataTable
 from utils import to_default_device, get_default_device
 
 def main(args):
@@ -71,7 +71,7 @@ def main(args):
     # metrics
     dur = []
     max_gpu = []
-    table = BenchmarkTable(f"(PyGT Dynamic-Temporal) TGCN on {dataloader.name} dataset", ["Epoch", "Time(s)", "MSE", "Used GPU Memory (Max MB)"])
+    table = DataTable(f"(PyGT Dynamic-Temporal) TGCN on {dataloader.name} dataset", ["Epoch", "Time(s)", "MSE", "Used GPU Memory (Max MB)"])
 
     try:
         # train
diff --git a/benchmarking/dynamic-temporal-tgcn/seastar/train.py b/benchmarking/dynamic-temporal-tgcn/seastar/train.py
index c79fcc2e..de46f81a 100644
--- a/benchmarking/dynamic-temporal-tgcn/seastar/train.py
+++ b/benchmarking/dynamic-temporal-tgcn/seastar/train.py
@@ -8,7 +8,8 @@
 import sys
 import os
 from stgraph.dataset.LinkPredDataLoader import LinkPredDataLoader
-from stgraph.benchmark_tools.table import BenchmarkTable
+from stgraph.benchmark_tools.table import DataTable
+from stgraph.utils import DataTable
 from stgraph.graph.dynamic.gpma.gpma_graph import GPMAGraph
 from stgraph.graph.dynamic.pcsr.pcsr_graph import PCSRGraph
 from stgraph.graph.dynamic.naive.naive_graph import NaiveGraph
@@ -159,7 +160,7 @@ def main(args):
     # metrics
     dur = []
     max_gpu = []
-    table = BenchmarkTable(
+    table = DataTable(
         f"(STGraph Dynamic-Temporal) TGCN on {dataloader.name} dataset",
         [
             "Epoch",
diff --git a/benchmarking/gat/seastar/model.py b/benchmarking/gat/seastar/model.py
index 75826088..3434d316 100644
--- a/benchmarking/gat/seastar/model.py
+++ b/benchmarking/gat/seastar/model.py
@@ -1,5 +1,5 @@
 import torch.nn as nn
-from stgraph.nn.pytorch.gat_conv import GATConv
+from stgraph.nn.pytorch.static.gat_conv import GATConv
 
 class GAT(nn.Module):
     def __init__(self,
diff --git a/benchmarking/gcn/seastar/model.py b/benchmarking/gcn/seastar/model.py
index f9554d9d..231e78c8 100644
--- a/benchmarking/gcn/seastar/model.py
+++ b/benchmarking/gcn/seastar/model.py
@@ -1,5 +1,5 @@
 import torch.nn as nn
-from stgraph.nn.pytorch.graph_conv import GraphConv
+from stgraph.nn.pytorch.static.gcn_conv import GCNConv
 
 class GCN(nn.Module):
     def __init__(self,
@@ -13,12 +13,12 @@ def __init__(self,
         self.g = g
         self.layers = nn.ModuleList()
         # input layer
-        self.layers.append(GraphConv(in_feats, n_hidden, activation))
+        self.layers.append(GCNConv(in_feats, n_hidden, activation))
         # hidden layers
         for i in range(n_layers - 1):
-            self.layers.append(GraphConv(n_hidden, n_hidden, activation))
+            self.layers.append(GCNConv(n_hidden, n_hidden, activation))
         # output layer
-        self.layers.append(GraphConv(n_hidden, n_classes, None))
+        self.layers.append(GCNConv(n_hidden, n_classes, None))
 
     def forward(self, g, features):
         h = features
diff --git a/benchmarking/results/result_generator_static.py b/benchmarking/results/result_generator_static.py
index fed35888..c6fcb4e1 100644
--- a/benchmarking/results/result_generator_static.py
+++ b/benchmarking/results/result_generator_static.py
@@ -1,7 +1,7 @@
 import csv
 from rich import inspect
 
-from stgraph.benchmark_tools.table import BenchmarkTable
+from stgraph.utils import DataTable
 
 all_results = []
 
@@ -43,7 +43,7 @@ def get_dataset_name(parameters):
     
             
     # forming the Table 1: Time measurements for varying feature sizes 
-    table_1 = BenchmarkTable(f"Time measurements (s) for varying feature sizes - {dataset}", ["Hidden Dimension", "PyG-T", "STGraph"])
+    table_1 = DataTable(f"Time measurements (s) for varying feature sizes - {dataset}", ["Hidden Dimension", "PyG-T", "STGraph"])
     pygt_results_table_1 = {}
     seastar_results_table_1 = {}
 
@@ -72,7 +72,7 @@ def get_dataset_name(parameters):
         table_1.display()
 
     # forming the Table 2: Memory measurements for varying feature sizes 
-    table_2 = BenchmarkTable(f"Memory taken (MB) for varying feature sizes - {dataset}", ["Hidden Dimension", "PyG-T", "STGraph"])
+    table_2 = DataTable(f"Memory taken (MB) for varying feature sizes - {dataset}", ["Hidden Dimension", "PyG-T", "STGraph"])
     pygt_results_table_2 = {}
     seastar_results_table_2 = {}
 
@@ -103,7 +103,7 @@ def get_dataset_name(parameters):
 for dataset in dataset_names:
         
     # forming the Table 1: Time measurements for varying sequence lengths
-    table_1 = BenchmarkTable(f"Time measurements (s) for varying sequence lengths - {dataset}", ["Hidden Dimension", "PyG-T", "STGraph"])
+    table_1 = DataTable(f"Time measurements (s) for varying sequence lengths - {dataset}", ["Hidden Dimension", "PyG-T", "STGraph"])
     pygt_results_table_1 = {}
     seastar_results_table_1 = {}
 
@@ -132,7 +132,7 @@ def get_dataset_name(parameters):
         table_1.display()
 
     # forming the Table 2: Memory measurements for varying sequence lengths
-    table_2 = BenchmarkTable(f"Memory taken (MB) for varying sequence lengths- {dataset}", ["Hidden Dimension", "PyG-T", "STGraph"])
+    table_2 = DataTable(f"Memory taken (MB) for varying sequence lengths- {dataset}", ["Hidden Dimension", "PyG-T", "STGraph"])
     pygt_results_table_2 = {}
     seastar_results_table_2 = {}
 
diff --git a/benchmarking/static-temporal-tgcn/pygt/train.py b/benchmarking/static-temporal-tgcn/pygt/train.py
index 16f35de7..b268fb5e 100644
--- a/benchmarking/static-temporal-tgcn/pygt/train.py
+++ b/benchmarking/static-temporal-tgcn/pygt/train.py
@@ -15,7 +15,7 @@
 from stgraph.dataset.METRLADataLoader import METRLADataLoader
 from stgraph.dataset.MontevideoBusDataLoader import MontevideoBusDataLoader
 
-from stgraph.benchmark_tools.table import BenchmarkTable
+from stgraph.utils import DataTable
 from utils import to_default_device, get_default_device
 
 def main(args):
@@ -74,7 +74,7 @@ def main(args):
     # metrics
     dur = []
     max_gpu = []
-    table = BenchmarkTable(f"(PyGT Static-Temporal) TGCN on {dataloader.name} dataset", ["Epoch", "Time(s)", "MSE", "Used GPU Memory (Max MB)"])
+    table = DataTable(f"(PyGT Static-Temporal) TGCN on {dataloader.name} dataset", ["Epoch", "Time(s)", "MSE", "Used GPU Memory (Max MB)"])
 
     try:
         # train
diff --git a/benchmarking/static-temporal-tgcn/seastar/train.py b/benchmarking/static-temporal-tgcn/seastar/train.py
index 1b7d1950..f991b486 100644
--- a/benchmarking/static-temporal-tgcn/seastar/train.py
+++ b/benchmarking/static-temporal-tgcn/seastar/train.py
@@ -18,7 +18,7 @@
 from stgraph.dataset.METRLADataLoader import METRLADataLoader
 from stgraph.dataset.MontevideoBusDataLoader import MontevideoBusDataLoader
 
-from stgraph.benchmark_tools.table import BenchmarkTable
+from stgraph.utils import DataTable
 from utils import to_default_device, get_default_device
 
 from rich import inspect
@@ -135,7 +135,7 @@ def main(args):
     # metrics
     dur = []
     max_gpu = []
-    table = BenchmarkTable(
+    table = DataTable(
         f"(STGraph Static-Temporal) TGCN on {dataloader.name} dataset",
         ["Epoch", "Time(s)", "MSE", "Used GPU Memory (Max MB)"],
     )
diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css
new file mode 100644
index 00000000..26b8da26
--- /dev/null
+++ b/docs/source/_static/custom.css
@@ -0,0 +1,4 @@
+.wy-table-responsive table td {
+    word-wrap: break-word;
+    white-space: normal;
+}
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 567a49ff..5c2f10c8 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -59,3 +59,7 @@
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ["_static"]
+
+html_css_files = [
+    'custom.css',
+]
\ No newline at end of file
diff --git a/docs/source/generated/stgraph.benchmark_tools.BenchmarkTable.rst b/docs/source/generated/stgraph.benchmark_tools.BenchmarkTable.rst
new file mode 100644
index 00000000..5cbd8dfa
--- /dev/null
+++ b/docs/source/generated/stgraph.benchmark_tools.BenchmarkTable.rst
@@ -0,0 +1,11 @@
+﻿.. role:: hidden
+    :class: hidden-section
+.. currentmodule:: stgraph.benchmark_tools
+
+
+BenchmarkTable
+==============
+
+.. autoclass:: BenchmarkTable
+    :show-inheritance:
+    :members:
\ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 4b9eb52a..6c3f86ec 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -32,7 +32,7 @@ Explore the STGraph documentation and tutorials to get started with writing and
    :caption: Tutorials
    :glob:
 
-   tutorials/gnn
+   tutorials/gcn_cora
 
 .. toctree::
    :maxdepth: 1
@@ -42,7 +42,8 @@ Explore the STGraph documentation and tutorials to get started with writing and
    package_reference/stgraph.dataset
    package_reference/stgraph.compiler
    package_reference/stgraph.graph
-   package_reference/stgraph.benchmark_tools
+   package_reference/stgraph.utils
+   package_reference/stgraph.nn
 
 .. toctree::
    :maxdepth: 1
diff --git a/docs/source/package_reference/index.rst b/docs/source/package_reference/index.rst
index a4633d11..dc709ddd 100644
--- a/docs/source/package_reference/index.rst
+++ b/docs/source/package_reference/index.rst
@@ -7,4 +7,4 @@ Package Reference
     stgraph.dataset
     stgraph.compiler
     stgraph.graph
-    stgraph.benchmark_tools
\ No newline at end of file
+    stgraph.utils
\ No newline at end of file
diff --git a/docs/source/package_reference/stgraph.benchmark_tools.rst b/docs/source/package_reference/stgraph.benchmark_tools.rst
deleted file mode 100644
index 24af61e7..00000000
--- a/docs/source/package_reference/stgraph.benchmark_tools.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-stgraph.benchmark_tools
-#######################
-
-.. currentmodule:: stgraph.benchmark_tools
-.. automodule:: stgraph.benchmark_tools
-
-.. autosummary::
-    :toctree: ../generated/
-    :nosignatures:
-    :template: class.rst
-
-    BenchmarkTable
\ No newline at end of file
diff --git a/docs/source/package_reference/stgraph.nn.rst b/docs/source/package_reference/stgraph.nn.rst
new file mode 100644
index 00000000..19b356d4
--- /dev/null
+++ b/docs/source/package_reference/stgraph.nn.rst
@@ -0,0 +1,17 @@
+stgraph.nn
+##########
+
+.. currentmodule:: stgraph.nn
+.. automodule:: stgraph.nn
+
+PyTorch
+=======
+
+GNN layer implementation for PyTorch specific backend
+
+.. autosummary::
+    :toctree: ../generated/
+    :nosignatures:
+    :template: class.rst
+
+    GCNConv
\ No newline at end of file
diff --git a/docs/source/package_reference/stgraph.utils.rst b/docs/source/package_reference/stgraph.utils.rst
new file mode 100644
index 00000000..2351478f
--- /dev/null
+++ b/docs/source/package_reference/stgraph.utils.rst
@@ -0,0 +1,22 @@
+stgraph.utils
+#############
+
+.. currentmodule:: stgraph.utils
+.. automodule:: stgraph.utils
+
+.. autosummary::
+    :toctree: ../generated/
+    :nosignatures:
+    :template: class.rst
+
+    DataTable
+
+Constants
+---------
+
+.. autosummary::
+    :toctree: ../generated/
+    :nosignatures:
+    :template: class.rst
+
+    SizeConstants
\ No newline at end of file
diff --git a/docs/source/tutorials/gcn_cora.rst b/docs/source/tutorials/gcn_cora.rst
new file mode 100644
index 00000000..80365265
--- /dev/null
+++ b/docs/source/tutorials/gcn_cora.rst
@@ -0,0 +1,484 @@
+Cora Publication Prediction using Graph Convolutional Networks (GCN)
+====================================================================
+
+Graph Neural Networks (GNNs) are specially designed to understand and learn from data organized in graphs, 
+making them incredibly versatile and powerful. Graph Convolutional Networks (GCNs) is a widely adopted
+model which makes use of both node features and local connections.
+
+In this introductory tutorial, you will be able to 
+
+1. Build a GCN model using STGraph's neural network layers.
+2. Load the Cora dataset provided by STGraph.
+3. Train and evaluate the GCN model for node classification task on the GPU.
+
+You can find the entire source code for this tutorial under the ``tutorials`` directory in our GitHub `repo <https://github.com/bfGraph/STGraph/tree/main/tutorials>`_
+
+The Task At Hand
+----------------
+
+The Cora dataset is a widely used citation network for benchmarking graph-based machine learning algorithms.
+It comprises and captures the relationship between 2708 scientific publications classified into one of seven classes, 
+where nodes represent individual papers, and edges denote citation links between them. The network comprises of 
+5,429 connections. Each publication in the dataset is characterized by a binary word vector (0 or 1), 
+signifying the non-existence or existence of the respective word from a dictionary of 1,433 unique words.
+
+Our task is to train a GCN model on the Cora dataset and predict the topic of a publication (node) by considering 
+the neighboring node information and the overall graph structure. Or in other words, Node Classification.
+
+.. figure:: ../_static/Images/tutorials/CoraBalloons.png
+   :alt: CoraBalloons
+   :align: center
+   :width: 400
+
+   Cora Dataset Visualized [1]
+
+.. note::
+
+    This tutorial does not cover the detailed mechanics of how or why a GCN layer works.We
+    will only focus on using the GCN layer provided by STGraph to create a trainable multi-layer GCN model for node classification
+    on the Cora dataset. To learn more about GCN layers, refer to the following resources:
+
+    1. `Semi-Supervised Classification with Graph Convolutional Networks <https://arxiv.org/abs/1609.02907>`_
+    2. `Graph Convolutional Networks (GCNs) made simple <https://youtu.be/2KRAOZIULzw?si=Ryc74igSJ-zVMhjf>`_
+
+
+Code File Structure
+-------------------
+
+We will structure our tutorial with the following 4 files:
+
+.. code-block:: python
+
+    ├── main.py
+    ├── model.py
+    ├── train.py
+    └── utils.py
+
+
+Writing the GCN model
+---------------------
+
+Let's start by building our GCN model within a file named ``model.py``. First, import all the required modules. We will use PyTorch as our backend framework,
+along with the :class:`GCNConv <stgraph.nn.pytorch.static.gcn_conv.GCNConv>` layer from STGraph, which is designed for the PyTorch backend.
+
+.. code-block:: python
+
+    # model.py
+
+    import torch.nn as nn
+    import torch.nn.functional as F
+
+    from stgraph.nn.pytorch.static.gcn_conv import GCNConv
+
+Our main component is the GCN class, which represents the Graph Convolutional Network we will train. Here’s the code to initialize the GCN object
+
+.. code-block:: python
+
+    # model.py
+
+    class GCN(nn.Module):
+        def __init__(
+            self,
+            graph,
+            in_feats: int,
+            n_hidden: int,
+            n_classes: int,
+            n_hidden_layers: int,
+        ) -> None:
+            super(GCN, self).__init__()
+
+            self._graph = graph
+            self._layers = nn.ModuleList()
+
+            # input layer
+            self._layers.append(GCNConv(in_feats, n_hidden, F.relu, bias=True))
+
+            # hidden layers
+            for i in range(n_hidden_layers):
+                self._layers.append(GCNConv(n_hidden, n_hidden, F.relu, bias=True))
+
+            # output layer
+            self._layers.append(GCNConv(n_hidden, n_classes, None, bias=True))
+
+
+First, let's review all the arguments passed to the initialization method
+
+1. **graph**: This should be an STGraph graph object representing our graph dataset. For our tutorial, the Cora dataset will be of type :class:`StaticGraph <stgraph.graph.static.static_graph.StaticGraph>`.
+2. **in_feats**: The size of node features, which would equal the number of neurons in the input layer of our GCN architecture.
+3. **n_hidden**: The number of neurons in each hidden layer. We assume all hidden layers have the same number of neurons.
+4. **n_classes**: The number of classes each node in the Cora dataset can be classified into. It also corresponds to the number of neurons in the output layer of our GCN architecture.
+5. **n_hidden_layers**: The number of hidden layers present in the GCN architecture.
+
+We will initialize a list to hold all the layers of our GCN model. Using ``nn.ModuleList()`` allows for easier management of these layers. To this list,
+we will append ``GraphConv`` layers for the input layer, all the hidden layers, and then the output layer. The in_channel for the input layer equals to the
+size of a single node feature list and the out_channel for the output layer equals to the number of classes we are trying to classify the nodes into.
+Note that we use an element-wise ReLU activation function only for the input and hidden layers.
+
+By setting the bias argument to true, we are associating a learnable bias parameter with the input, hidden and output layers.
+
+Next up we can add the ``forward`` method inside the GCN class. When given the node feature as input to the network, it returns the corresponding output activations
+by following the feedforward mechanism described for a GCN layer.
+
+.. code-block:: python
+
+    # model.py
+
+    def forward(self, features):
+        h = features
+        for layer in self._layers:
+            h = layer.forward(self._graph, h)
+        return h
+
+Preparing the Training Script
+-----------------------------
+
+Now that we have defined our GCN model, we can now prepare the training script to train our model on the Cora dataset. You can go ahead and import all the
+necessary modules first.
+
+.. code-block:: python
+
+    # train.py
+
+    import traceback
+
+    import torch
+    import torch.nn.functional as F
+
+    from stgraph.utils import DataTable
+    from stgraph.dataset import CoraDataLoader
+    from stgraph.graph.static.static_graph import StaticGraph
+    from model import GCN
+    from utils import (
+        accuracy,
+        generate_test_mask,
+        generate_train_mask,
+        row_normalize_feature,
+        get_node_norms,
+    )
+
+You would notice that we haven't defined any of the imported methods from ``utils``. We will write down the logic for each one of them as we progress through writing the training script.
+
+Loading the Cora Graph Data
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Let's define our ``train`` method first
+
+.. code-block:: python
+
+    # train.py
+
+    def train(lr, num_epochs, num_hidden, num_hidden_layers, weight_decay):
+        if not torch.cuda.is_available():
+            print("CUDA is not available")
+            exit(1)
+
+We are passing the following hyperparameters as arguments to ``train``
+
+1. **lr**: The learning rate for the model.
+2. **num_epochs**: Number of epochs to train the model for.
+3. **num_hidden**: Number of neurons in each hidden layer.
+4. **num_hidden_layers**: Count of hidden layers.
+5. **weight_decay**: Weight decay value for L2 regularization to avoid overfitting
+
+As soon as we enter the ``train`` function, we are checking whether CUDA is available on the system. If it is not available, then we exit from the program.
+STGraph requires CUDA to be present for it to train any model.
+
+Next up we load our Cora dataset and all the necessary features, labels and weights. Once loaded into CPU, they are finally moved into the GPU using the ``.cuda()`` method.
+
+.. code-block:: python
+
+    # train.py
+
+        cora = CoraDataLoader()
+
+        node_features = row_normalize_feature(
+            torch.FloatTensor(cora.get_all_features())
+        )
+        node_labels = torch.LongTensor(cora.get_all_targets())
+        edge_weights = [1 for _ in range(cora.gdata["num_edges"])]
+
+        train_mask = torch.BoolTensor(
+            generate_train_mask(cora.gdata["num_nodes"], 0.7)
+        )
+        test_mask = torch.BoolTensor(
+            generate_test_mask(cora.gdata["num_nodes"], 0.7)
+        )
+
+        torch.cuda.set_device(0)
+        node_features = node_features.cuda()
+        node_labels = node_labels.cuda()
+        train_mask = train_mask.cuda()
+        test_mask = test_mask.cuda()
+
+The node features are row-normalised as shown below
+
+.. code-block:: python
+
+    # utils.py
+
+    def row_normalize_feature(features):
+        row_sum = features.sum(dim=1, keepdim=True)
+        r_inv = torch.where(row_sum != 0, 1.0 / row_sum, torch.zeros_like(row_sum))
+        norm_features = features * r_inv
+
+        return norm_features
+
+We are considering that the edge-weight is 1 for all edges. The ``train_mask`` and ``test_mask`` can be generated using the following two helper functions. We are taking the test-train
+split to be 0.7, but you can experiment with different values.
+
+.. code-block:: python
+
+    # utils.py
+
+    def generate_train_mask(size, train_test_split):
+        cutoff = size * train_test_split
+        return [1 if i < cutoff else 0 for i in range(size)]
+
+
+    def generate_test_mask(size, train_test_split):
+        cutoff = size * train_test_split
+        return [0 if i < cutoff else 1 for i in range(size)]
+
+Creating STGraph Graph Object and GCN Model
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+We need to create a :class:`StaticGraph <stgraph.graph.static.static_graph.StaticGraph>` object representing our Cora dataset, which can then be passed to our GCN model.
+
+.. code-block:: python
+
+    # train.py
+
+    cora_graph = StaticGraph(
+        edge_list=cora.get_edges(),
+        edge_weights=edge_weights,
+        num_nodes=cora.gdata["num_nodes"]
+    )
+
+    cora_graph.set_ndata("norm", get_node_norms(cora_graph))
+
+The node-wise normalization ``norm`` is set as node meta-data. This is internally used by the :class:`GCNConv <stgraph.nn.pytorch.static.gcn_conv.GCNConv>` layer while aggregating the
+features of a nodes neighbours. We calculate the node-wise normalization as follows
+
+.. code-block:: python
+
+    # utils.py
+
+    def get_node_norms(graph: StaticGraph):
+        degrees = torch.from_numpy(graph.weighted_in_degrees()).type(torch.int32)
+        norm = torch.pow(degrees, -0.5)
+        norm[torch.isinf(norm)] = 0
+        return to_default_device(norm).unsqueeze(1)
+
+We can go ahead and now load up the GCN model we created earlier into the GPU using ``.cuda()``. Follow it up by using Cross Entropy Loss and Adam as the loss function and optimizer respectively.
+
+.. code-block:: python
+
+    # train.py
+
+        model = GCN(
+            graph=cora_graph,
+            in_feats=cora.gdata["num_feats"],
+            n_hidden=num_hidden,
+            n_classes=cora.gdata["num_classes"],
+            n_hidden_layers=num_hidden_layers
+        ).cuda()
+
+        loss_function = F.cross_entropy
+        optimizer = torch.optim.Adam(
+            model.parameters(), lr=lr, weight_decay=weight_decay
+        )
+
+Training the GCN Model
+^^^^^^^^^^^^^^^^^^^^^^
+
+To help visualize various metrics such as accuracy, loss, etc. during training, we can use the :class:`BenchmarkTable <stgraph.benchmark_tools.table.BenchmarkTable>` present in the STGraph utility package.
+
+.. code-block:: python
+
+    # train.py
+
+    table = DataTable(
+        f"STGraph GCN on CORA dataset",
+        ["Epoch", "Train Accuracy %", "Loss"],
+    )
+
+Here is the entire training block
+
+.. code-block:: python
+
+    # train.py
+
+        try:
+            print("Started Training")
+            for epoch in range(num_epochs):
+                model.train()
+                torch.cuda.synchronize()
+
+                logits = model.forward(node_features)
+                loss = loss_function(logits[train_mask], node_labels[train_mask])
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+
+                torch.cuda.synchronize()
+
+                train_acc = accuracy(logits[train_mask], node_labels[train_mask])
+
+                table.add_row(
+                    [epoch, float(f"{train_acc * 100:.2f}"), float(f"{loss.item():.5f}")]
+                )
+            print("Training Ended")
+            table.display()
+
+            print("Evaluating trained GCN model on the Test Set")
+
+            model.eval()
+            logits_test = model(node_features)
+            loss_test = loss_function(logits_test[train_mask], node_labels[train_mask])
+            test_acc = accuracy(logits_test[test_mask], node_labels[test_mask])
+
+            print(f"Loss for Test: {loss_test}")
+            print(f"Accuracy for Test: {float(test_acc) * 100} %")
+
+        except Exception as e:
+            print("------------- Error -------------")
+            print(e)
+            traceback.print_exc()
+
+For each epoch, we are doing the following
+
+1. Running a single forward pass with ``node_features`` as input and ``logits`` as output.
+2. Calculating the loss using the Cross Entropy Loss function.
+3. Reset the gradients of all the parameters that the optimizer is managing using ``optimizer.zero_grad()``.
+4. Perform backpropagation using ``loss.backward()``.
+5. Update the parameters with ``optimizer.step()``.
+6. Calculate the training accuracy.
+7. Add necessary information to be displayed in the table.
+
+Training accuracy is calculated as follows
+
+.. code-block:: python
+
+    # utils.py
+
+    def accuracy(logits, labels):
+        _, indices = torch.max(logits, dim=1)
+        correct = torch.sum(indices == labels)
+        return correct.item() * 1.0 / len(labels)
+
+Finally we evaluate the model on the test set and report the accuracy and loss.
+
+The main.py File
+^^^^^^^^^^^^^^^^
+
+Let's prepare a ``main.py`` which accepts the hyperparameters as command-line arguments and invokes the ``train`` method.
+
+.. code-block:: python
+
+    # main.py
+
+    import argparse
+
+    from train import train
+
+
+    def main(args) -> None:
+        train(
+            lr=args.learning_rate,
+            num_epochs=args.epochs,
+            num_hidden=args.num_hidden,
+            num_hidden_layers=args.num_hidden_layers,
+            weight_decay=args.weight_decay,
+        )
+
+
+    if __name__ == "__main__":
+        parser = argparse.ArgumentParser(description="Training GCN on CORA Dataset")
+
+        parser.add_argument(
+            "-lr",
+            "--learning-rate",
+            type=float,
+            default=0.01,
+            help="Learning Rate for the GCN Model",
+        )
+
+        parser.add_argument(
+            "-e",
+            "--epochs",
+            type=int,
+            default=200,
+            help="Number of Epochs to Train the GCN Model",
+        )
+
+        parser.add_argument(
+            "-n",
+            "--num-hidden",
+            type=int,
+            default=16,
+            help="Number of Neurons in Hidden Layers",
+        )
+
+        parser.add_argument(
+            "-l", "--num-hidden-layers", type=int, default=1, help="Number of Hidden Layers"
+        )
+
+        parser.add_argument(
+            "-w", "--weight-decay", type=float, default=5e-4, help="Weight Decay"
+        )
+
+        args = parser.parse_args()
+        main(args=args)
+
+Let's go ahead and train our GCN model! Run this command to train a GCN model with our default hyperparameters
+
+1. Learning rate set to 0.01
+2. 200 Epochs
+3. 16 neurons in the hidden layers
+4. 1 hidden layer
+5. Weight decay of 0.0005
+
+.. code-block:: bash
+
+    $ python3 main.py
+
+Here is a truncated output
+
+.. code-block:: bash
+
+    Started Training
+    Training Ended
+
+        STGraph GCN on CORA dataset
+
+     Epoch ┃ Train Accuracy % ┃ Loss
+    ━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━
+     0     │ 14.98            │ 1.94579
+     1     │ 27.74            │ 1.93584
+     2     │ 27.74            │ 1.92458
+     3     │ 27.74            │ 1.91228
+     4     │ 27.74            │ 1.89956
+     5     │ 27.74            │ 1.88697
+     .
+     .
+     .
+     195   │ 76.27            │ 0.6078
+     196   │ 76.16            │ 0.60734
+     197   │ 76.37            │ 0.60676
+     198   │ 76.16            │ 0.60579
+     199   │ 76.32            │ 0.60465
+
+    Evaluating trained GCN model on the Test Set
+    Loss for Test: 0.6035217642784119
+    Accuracy for Test: 75.1231527093596 %
+
+We are achieving a training accuracy of around 76% and testing accuracy of 75%. This is pretty good for our first attempt.
+
+Exercises
+---------
+
+STGraph users need not stop here and can try out the following exercises to try to make the model learn better
+
+1. In the tutorial we are splitting the dataset only into a training set and testing set. Try creating a validation set as well to tune and optimize the hyperparameters.
+2. Try changing the number of hidden layers and number of hidden layer neurons. Maybe use no hidden layer at all. Do you notice any form of improvement? Or does it make the model worse?
+3. We did not use any activation function in the output layer. Try finding some common activation functions that can be used in the output layer for classification tasks and modify the GCN model.
\ No newline at end of file
diff --git a/docs/source/tutorials/gnn.rst b/docs/source/tutorials/gnn.rst
deleted file mode 100644
index cfff54b1..00000000
--- a/docs/source/tutorials/gnn.rst
+++ /dev/null
@@ -1,68 +0,0 @@
-Training a GCN on the Cora dataset
-==================================
-
-Graph Neural Networks (GNNs) are specially designed to understand and learn from data organized in graphs, 
-making them incredibly versatile and powerful. Graph Convolutional Networks (GCNs) is a widely adopted
-model which makes use of both node features and local connections.
-
-In this introductory tutorial, you will be able to 
-
-1. Build a GCN model using STGraph's neural network layers.
-2. Load the Cora dataset provided by STGraph.
-3. Train and evaluate the GCN model for node classification task on the GPU.
-
-The task at hand
-----------------
-
-The Cora dataset is a widely used citation network for benchmarking graph-based machine learning algorithms.
-It comprises and captures the relationship between 2708 scientific publications classified into one of seven classes, 
-where nodes represent individual papers, and edges denote citation links between them. The network comprises of 
-5,429 connections. Each publication in the dataset is characterized by a binary word vector (0 or 1), 
-signifying the non-existence or existence of the respective word from a dictionary of 1,433 unique words.
-
-Our task is to train a GCN model on the Cora dataset and predict the topic of a publication (node) by considering 
-the neighboring node information and the overall graph structure. Or in other words, Node Classification.
-
-.. figure:: ../_static/Images/tutorials/CoraBalloons.png
-   :alt: CoraBalloons
-   :align: center
-   :width: 400
-
-   Cora Dataset Visualized [1]
-
-Writing the GCN model
----------------------
-
-Let's begin by creating our GCN model inside a file named ``model.py``.
-
-.. code-block:: python
-    :linenos:
-
-    import torch.nn as nn
-    from stgraph.nn.pytorch.graph_conv import GraphConv
-
-    class GCN(nn.Module):
-        def __init__(self,
-                    graph,
-                    in_feats,
-                    n_hidden,
-                    n_classes,
-                    n_layers,
-                    activation):
-
-            super(GCN, self).__init__()
-
-            self.graph = graph
-            self.layers = nn.ModuleList()
-            self.layers.append(GraphConv(in_feats, n_hidden, activation))
-            
-            for i in range(n_layers - 1):
-                self.layers.append(GraphConv(n_hidden, n_hidden, activation))
-            
-            self.layers.append(GraphConv(n_hidden, n_classes, None))
-
-        def forward(self, g, features):
-            h = features
-            for layer in self.layers:
-                h = layer(g, h)
-            return h
\ No newline at end of file
diff --git a/examples/README.md b/examples/README.md
index 04765eb5..6ed94ca5 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -9,9 +9,10 @@ In this beginner friendly tutorial, you will be writing your first GNN and TGNN
 Open up your favourite text editor or Python IDE and create a file named `model.py` with the following code which defines a GCN layer with PyTorch as the backend.
 
 **model.py**
+
 ```python
 import torch.nn as nn
-from stgraph.nn.pytorch.graph_conv import GraphConv
+from stgraph.nn.pytorch.static.gcn_conv import GCNConv
 
 
 class GCN(nn.Module):
@@ -20,12 +21,12 @@ class GCN(nn.Module):
 
         self.g = g
         self.layers = nn.ModuleList()
-        self.layers.append(GraphConv(in_feats, n_hidden, activation))
+        self.layers.append(GCNConv(in_feats, n_hidden, activation))
 
         for i in range(n_layers - 1):
-            self.layers.append(GraphConv(n_hidden, n_hidden, activation))
-        
-        self.layers.append(GraphConv(n_hidden, n_classes, None))
+            self.layers.append(GCNConv(n_hidden, n_hidden, activation))
+
+        self.layers.append(GCNConv(n_hidden, n_classes, None))
 
     def forward(self, g, features):
         h = features
diff --git a/ruff.toml b/ruff.toml
index 58ba992b..d23779d3 100644
--- a/ruff.toml
+++ b/ruff.toml
@@ -22,6 +22,7 @@ ignore = [
     "D212",
     "D213",
     "PIE790",
+    "E501",
 ]
 
 [lint.per-file-ignores]
diff --git a/stgraph/benchmark_tools/__init__.py b/stgraph/benchmark_tools/__init__.py
deleted file mode 100644
index 74de16aa..00000000
--- a/stgraph/benchmark_tools/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Benchmarking Tools provided by STGraph."""
-
-from stgraph.benchmark_tools.table import BenchmarkTable
diff --git a/stgraph/nn/__init__.py b/stgraph/nn/__init__.py
index 0514cbd6..98f055d6 100644
--- a/stgraph/nn/__init__.py
+++ b/stgraph/nn/__init__.py
@@ -1 +1,3 @@
-'''State of the art Graph Neural Networks written using STGraph'''
\ No newline at end of file
+"""Vertex-centric implementation for state-of-the-art Graph Neural Networks."""
+
+from stgraph.nn.pytorch.static.gcn_conv import GCNConv
diff --git a/stgraph/nn/pytorch/__init__.py b/stgraph/nn/pytorch/__init__.py
index e69de29b..2b0c5a28 100644
--- a/stgraph/nn/pytorch/__init__.py
+++ b/stgraph/nn/pytorch/__init__.py
@@ -0,0 +1 @@
+"""Vertex-centric implementation for state-of-the-art Graph Neural Networks for PyTorch specific backend."""
\ No newline at end of file
diff --git a/stgraph/nn/pytorch/graph_conv.py b/stgraph/nn/pytorch/graph_conv.py
deleted file mode 100644
index c62f4ead..00000000
--- a/stgraph/nn/pytorch/graph_conv.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import torch
-import torch.nn as nn
-from stgraph.compiler import STGraph
-from stgraph.compiler.backend.pytorch.torch_callback import STGraphBackendTorch
-
-class GraphConv(nn.Module):
-    def __init__(self,
-                 in_feats,
-                 out_feats,
-                 activation,
-                 bias=True):
-        super(GraphConv, self).__init__()
-        self.weight = nn.Parameter(torch.Tensor(in_feats, out_feats))
-        if bias:
-            self.bias = nn.Parameter(torch.Tensor(out_feats))
-        else:
-            self.bias = None
-        self.activation = activation
-        self.stgraph = STGraph(STGraphBackendTorch())
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        nn.init.xavier_uniform_(self.weight)
-        if self.bias is not None:
-            nn.init.zeros_(self.bias)
-
-    def forward(self, g, h, edge_weight=None):
-        h = torch.mm(h, self.weight)
-        
-        if edge_weight is None:
-            @self.stgraph.compile(gnn_module=self)
-            def nb_compute(v):
-                h = sum([nb.h*nb.norm for nb in v.innbs])
-                h = h * v.norm
-                return h
-            h = nb_compute(g=g, n_feats={'norm': g.get_ndata("norm"), 'h' : h})
-        else:
-            @self.stgraph.compile(gnn_module=self)
-            def nb_compute(v):
-                h = sum([nb_edge.src.norm * nb_edge.src.h * nb_edge.edge_weight for nb_edge in v.inedges])
-                h = h * v.norm
-                return h
-            h = nb_compute(g=g, n_feats={'norm': g.get_ndata("norm"), 'h' : h}, e_feats={'edge_weight':edge_weight})
-
-        # bias
-        if self.bias is not None:
-            h = h + self.bias
-        if self.activation:
-            h = self.activation(h)
-        return h
diff --git a/stgraph/nn/pytorch/static/__init__.py b/stgraph/nn/pytorch/static/__init__.py
new file mode 100644
index 00000000..38c8aed9
--- /dev/null
+++ b/stgraph/nn/pytorch/static/__init__.py
@@ -0,0 +1 @@
+"""State-of-the-art Static Graph Neural Networks written for PyTorch backend."""
diff --git a/stgraph/nn/pytorch/gat_conv.py b/stgraph/nn/pytorch/static/gat_conv.py
similarity index 100%
rename from stgraph/nn/pytorch/gat_conv.py
rename to stgraph/nn/pytorch/static/gat_conv.py
diff --git a/stgraph/nn/pytorch/static/gcn_conv.py b/stgraph/nn/pytorch/static/gcn_conv.py
new file mode 100644
index 00000000..25a004df
--- /dev/null
+++ b/stgraph/nn/pytorch/static/gcn_conv.py
@@ -0,0 +1,189 @@
+"""Graph Convolutional Network Layer."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Callable
+
+if TYPE_CHECKING:
+    from stgraph.compiler.node import CentralNode
+    from stgraph.graph import StaticGraph
+
+import torch
+from torch import Tensor, nn
+
+from stgraph.compiler import STGraph
+from stgraph.compiler.backend.pytorch.torch_callback import STGraphBackendTorch
+from stgraph.utils.constants import SizeConstants
+
+
+class GCNConv(nn.Module):
+    r"""Graph Convolutional Network Layer.
+
+    Vertex-centric implementation for Graph Convolutional Network (GCN)
+    layer as described in `Semi-supervised Classification with Graph
+    Convolutional Networks <https://arxiv.org/abs/1609.02907>`_.
+
+    A multi-layer GCN model has the following layer-wise propagation rule
+
+    .. math::
+
+        H^{(l+1)} = \sigma \left( \tilde{D}^{-1/2} \tilde{A} \tilde{D}^{-1/2} H^{(l)} W^{(l)} \right)
+
+    - :math:`H^{(l)}`: Matrix of activations in the :math:`l`-th layer; :math:`H^{(0)} = X` is the input feature matrix.
+    - :math:`\sigma`: Activation function (e.g., ReLU).
+    - :math:`\tilde{A} = A + I_N`: Adjacency matrix of the graph with added self-connections.
+    - :math:`I_N`: Identity matrix.
+    - :math:`\tilde{D}_{ii} = \sum_j \tilde{A}_{ij}`: Degree matrix of :math:`\tilde{A}`.
+    - :math:`W^{(l)}`: Trainable weight matrix for the :math:`l`-th layer.
+
+    **Vertex-Centric Formula**
+
+    The vertex-centric implementation can be achieved by aggregating all the
+    features of the neighbouring nodes of the central node
+
+    .. math::
+
+        h^{(l+1)} = \left( \sum_{\text{nb} \in \text{innbs}(v)} \text{nb}_{h^{(l)}} \cdot \text{nb}_{\text{norm}} \cdot \text{weight}_{\text{nb,v}} \right) \cdot v_{\text{norm}}
+
+    - :math:`h^{(l)}`: Activations of central-node in the :math:`l`-th layer.
+    - :math:`\text{innbs}(v)`: In-neighbours of central-node :math:`v`.
+    - :math:`\text{weight}_{\text{nb,v}}`: Weight of edge from :math:`nb` to :math:`v`. In case no edge weights are present, it is set to 1
+    - :math:`norm`: Node wise normalization factor, :math:`v_{\text{norm}} = \text{in_degrees(v)}^{-0.5}`.
+
+    **Node Data**
+
+    The following node data needs to be set using :class:`StaticGraph.set_ndata <stgraph.graph.static.static_graph.StaticGraph>` before calling
+    the :func:`~stgraph.nn.pytorch.static.gcn_conv.GCNConv.forward` method.
+
+    +---------------+--------------------------------+---------------------------------------------------------------------------------------------------+
+    | Node Property | Description                    | Type                                                                                              |
+    +===============+================================+===================================================================================================+
+    | norm          | Node-wise normalization factor | A PyTorch Tensor of shape (num_nodes, 1), where dim=1 contains the node-wise normalization factor |
+    +---------------+--------------------------------+---------------------------------------------------------------------------------------------------+
+
+
+    Parameters
+    ----------
+    in_channels : int
+        Size of input sample passed into the layer
+    out_channels : int
+        Size of output sample outputted by the layer
+    activation : optional
+        Non-linear activation function provided by `PyTorch <https://pytorch.org/docs/stable/nn.html#non-linear-activations-weighted-sum-nonlinearity>`_
+    bias : bool, optional
+        If set to *True*, learnable bias parameters are added to the layer
+
+    """
+
+    def __init__(
+        self: GCNConv,
+        in_channels: int,
+        out_channels: int,
+        activation: Callable[..., torch.Tensor] | None = None,
+        bias: bool = True,
+    ) -> None:
+        """Graph Convolutional Network Layer."""
+        super().__init__()
+        self.weight = nn.Parameter(torch.Tensor(in_channels, out_channels))
+        if bias:
+            self.bias = nn.Parameter(torch.Tensor(out_channels))
+        else:
+            self.bias = None
+        self.activation = activation
+        self.stgraph = STGraph(STGraphBackendTorch())
+        self.reset_parameters()
+
+    def reset_parameters(self: GCNConv) -> None:
+        r"""Reset the learnable weight and bias parameters.
+
+        The weight parameter is initialized using a Xavier Uniform distribution.
+        The bias parameter is initialized by setting all values to zero.
+        """
+        nn.init.xavier_uniform_(self.weight)
+        if self.bias is not None:
+            nn.init.zeros_(self.bias)
+
+    def forward(
+        self: GCNConv,
+        graph: StaticGraph,
+        h: Tensor,
+        edge_weight: Tensor | None = None,
+    ) -> Tensor:
+        r"""Execute a single forward pass for the GCN layer.
+
+        Runs a single forward pass using the vertex-centric implementation of the GCN layer.
+
+        Parameters
+        ----------
+        graph : StaticGraph
+            A StaticGraph graph object
+        h : Tensor
+            Input for the GCN forward pass
+        edge_weight : Tensor, optional
+            Edge weights for each edge in the graph
+
+        Returns
+        -------
+        Tensor
+            The output after executing the GCN forward pass
+
+        Raises
+        ------
+        KeyError
+            If ``norm`` n_data is not present for the graph
+        ValueError
+            If ``norm`` n_data passed is not of the shape (num_nodes, 1)
+
+        Example
+        -------
+
+        Example usage::
+
+            # Defining a method to run forward pass with multiple GCN layers
+
+            def forward(input: Tensor, layers: List[GCNConv], graph: StaticGraph):
+                h = input
+                for layer in layers:
+                    h = layer.forward(graph, h)
+                return h
+
+        """
+        if graph.get_ndata("norm") is None:
+            raise KeyError("StaticGraph passed to GCNConv forward pass does not contain 'norm' node data")
+        if (len(graph.get_ndata("norm").shape) != SizeConstants.NODE_NORM_SIZE.value or
+                graph.get_ndata("norm").shape[1] != 1 or
+                graph.get_ndata("norm").shape[0] != graph.get_num_nodes()):
+            raise ValueError("Node data 'norm' passed to GCNConv should be of shape (num_nodes, 1)")
+
+        h = torch.mm(h, self.weight)
+
+        if edge_weight is None:
+
+            @self.stgraph.compile(gnn_module=self)
+            def nb_compute(v: CentralNode) -> Tensor:
+                return sum([nb.h * nb.norm for nb in v.innbs]) * v.norm
+
+            h = nb_compute(g=graph, n_feats={"norm": graph.get_ndata("norm"), "h": h})
+        else:
+
+            @self.stgraph.compile(gnn_module=self)
+            def nb_compute(v: CentralNode) -> Tensor:
+                return sum(
+                    [
+                        nb_edge.src.norm * nb_edge.src.h * nb_edge.edge_weight
+                        for nb_edge in v.inedges
+                    ],
+                ) * v.norm
+
+            h = nb_compute(
+                g=graph,
+                n_feats={"norm": graph.get_ndata("norm"), "h": h},
+                e_feats={"edge_weight": edge_weight},
+            )
+
+        # bias
+        if self.bias is not None:
+            h = h + self.bias
+        if self.activation:
+            h = self.activation(h)
+        return h
diff --git a/stgraph/nn/pytorch/temporal/tgcn.py b/stgraph/nn/pytorch/temporal/tgcn.py
index 0e2d50ee..421bfdb3 100644
--- a/stgraph/nn/pytorch/temporal/tgcn.py
+++ b/stgraph/nn/pytorch/temporal/tgcn.py
@@ -1,16 +1,16 @@
 import torch
-from stgraph.nn.pytorch.graph_conv import GraphConv
+from stgraph.nn.pytorch.static.gcn_conv import GCNConv
 
 class TGCN(torch.nn.Module):
     def __init__(self, in_channels, out_channels):
         super(TGCN, self).__init__()
         self.in_channels = in_channels
         self.out_channels = out_channels
-        self.conv_z = GraphConv(self.in_channels, self.out_channels, activation=None)
+        self.conv_z = GCNConv(self.in_channels, self.out_channels, activation=None)
         self.linear_z = torch.nn.Linear(2 * self.out_channels, self.out_channels)
-        self.conv_r = GraphConv(self.in_channels, self.out_channels, activation=None)
+        self.conv_r = GCNConv(self.in_channels, self.out_channels, activation=None)
         self.linear_r = torch.nn.Linear(2 * self.out_channels, self.out_channels)
-        self.conv_h = GraphConv(self.in_channels, self.out_channels, activation=None)
+        self.conv_h = GCNConv(self.in_channels, self.out_channels, activation=None)
         self.linear_h = torch.nn.Linear(2 * self.out_channels, self.out_channels)
 
     def _set_hidden_state(self, X, H):
diff --git a/stgraph/utils/__init__.py b/stgraph/utils/__init__.py
new file mode 100644
index 00000000..6362328c
--- /dev/null
+++ b/stgraph/utils/__init__.py
@@ -0,0 +1,4 @@
+"""Utility package for STGraph."""
+
+from stgraph.utils.constants import SizeConstants
+from stgraph.utils.data_table import DataTable
diff --git a/stgraph/utils/constants.py b/stgraph/utils/constants.py
new file mode 100644
index 00000000..58ccac73
--- /dev/null
+++ b/stgraph/utils/constants.py
@@ -0,0 +1,17 @@
+r"""Constants to be used across the project."""
+
+from enum import Enum
+
+
+class SizeConstants(Enum):
+    r"""Data Size Related Constants.
+
+    +----------------+-------+----------------------------------------------+
+    | Constant       | Value | Description                                  |
+    +================+=======+==============================================+
+    | NODE_NORM_SIZE | 2     | Length of the node-wise normalization tensor |
+    +----------------+-------+----------------------------------------------+
+
+    """
+
+    NODE_NORM_SIZE = 2
diff --git a/stgraph/benchmark_tools/table.py b/stgraph/utils/data_table.py
similarity index 83%
rename from stgraph/benchmark_tools/table.py
rename to stgraph/utils/data_table.py
index 180449f2..4bcd452c 100644
--- a/stgraph/benchmark_tools/table.py
+++ b/stgraph/utils/data_table.py
@@ -8,7 +8,7 @@
 from rich.table import Table
 
 
-class BenchmarkTable:
+class DataTable:
     r"""Table that can display benchmarking data and other info.
 
     This class provides functionality to create and display tables for
@@ -19,9 +19,9 @@ class BenchmarkTable:
 
     .. code-block:: python
 
-        from stgraph.benchmark_tools import BenchmarkTable
+        from stgraph.utils import DataTable
 
-        table = BenchmarkTable(
+        table = DataTable(
             title = "GCN Benchmark Data",
             col_name_list = ["Model", "Time", "MSE"]
         )
@@ -47,7 +47,7 @@ class BenchmarkTable:
 
     """
 
-    def __init__(self: BenchmarkTable, title: str, col_name_list: list[str]) -> None:
+    def __init__(self: DataTable, title: str, col_name_list: list[str]) -> None:
         r"""Table that can display benchmarking data and other info."""
         self.title = "\n" + title + "\n"
         self.col_name_list = col_name_list
@@ -57,12 +57,12 @@ def __init__(self: BenchmarkTable, title: str, col_name_list: list[str]) -> None
 
         self._table_add_columns()
 
-    def _table_add_columns(self: BenchmarkTable) -> None:
+    def _table_add_columns(self: DataTable) -> None:
         r"""Prepare the table by adding all the columns."""
         for col_name in self.col_name_list:
             self._table.add_column(col_name, justify="left")
 
-    def add_row(self: BenchmarkTable, values: list) -> None:
+    def add_row(self: DataTable, values: list) -> None:
         r"""Add a row of data to the table.
 
         Parameters
@@ -74,7 +74,7 @@ def add_row(self: BenchmarkTable, values: list) -> None:
         values_str = tuple([str(val) for val in values])
         self._table.add_row(*values_str)
 
-    def display(self: BenchmarkTable, output_file: IO[str] | None = None) -> None:
+    def display(self: DataTable, output_file: IO[str] | None = None) -> None:
         r"""Display entire table with data.
 
         Parameters
diff --git a/tests/scripts/v1_1_0/gcn_dataloaders/gcn/model.py b/tests/scripts/v1_1_0/gcn_dataloaders/gcn/model.py
index f9554d9d..231e78c8 100644
--- a/tests/scripts/v1_1_0/gcn_dataloaders/gcn/model.py
+++ b/tests/scripts/v1_1_0/gcn_dataloaders/gcn/model.py
@@ -1,5 +1,5 @@
 import torch.nn as nn
-from stgraph.nn.pytorch.graph_conv import GraphConv
+from stgraph.nn.pytorch.static.gcn_conv import GCNConv
 
 class GCN(nn.Module):
     def __init__(self,
@@ -13,12 +13,12 @@ def __init__(self,
         self.g = g
         self.layers = nn.ModuleList()
         # input layer
-        self.layers.append(GraphConv(in_feats, n_hidden, activation))
+        self.layers.append(GCNConv(in_feats, n_hidden, activation))
         # hidden layers
         for i in range(n_layers - 1):
-            self.layers.append(GraphConv(n_hidden, n_hidden, activation))
+            self.layers.append(GCNConv(n_hidden, n_hidden, activation))
         # output layer
-        self.layers.append(GraphConv(n_hidden, n_classes, None))
+        self.layers.append(GCNConv(n_hidden, n_classes, None))
 
     def forward(self, g, features):
         h = features
diff --git a/tests/scripts/v1_1_0/gcn_dataloaders/gcn/train.py b/tests/scripts/v1_1_0/gcn_dataloaders/gcn/train.py
index 37f126e6..9424aa12 100644
--- a/tests/scripts/v1_1_0/gcn_dataloaders/gcn/train.py
+++ b/tests/scripts/v1_1_0/gcn_dataloaders/gcn/train.py
@@ -7,7 +7,7 @@
 import torch.nn.functional as F
 from rich.progress import Progress
 
-from stgraph.benchmark_tools.table import BenchmarkTable
+from stgraph.utils import DataTable
 from stgraph.dataset import CoraDataLoader
 from stgraph.graph.static.static_graph import StaticGraph
 from .model import GCN
@@ -82,7 +82,7 @@ def train(
 
         dur = []
         Used_memory = 0
-        table = BenchmarkTable(
+        table = DataTable(
             f"STGraph GCN on {dataloader.name} dataset",
             ["Epoch", "Time(s)", "Train Accuracy", "Used GPU Memory (Max MB)"],
         )
diff --git a/tests/scripts/v1_1_0/gcn_dataloaders/gcn_dataloaders.py b/tests/scripts/v1_1_0/gcn_dataloaders/gcn_dataloaders.py
index 7e9acba1..f19ddb14 100644
--- a/tests/scripts/v1_1_0/gcn_dataloaders/gcn_dataloaders.py
+++ b/tests/scripts/v1_1_0/gcn_dataloaders/gcn_dataloaders.py
@@ -40,7 +40,7 @@ def main(args):
                 dataset=dataset_name,
                 num_hidden=16,
                 lr=0.01,
-                num_epochs=30,
+                num_epochs=200,
                 num_layers=1,
                 weight_decay=5e-4,
                 self_loops=False,
diff --git a/tests/scripts/v1_1_0/temporal_tgcn_dataloaders/tgcn/train.py b/tests/scripts/v1_1_0/temporal_tgcn_dataloaders/tgcn/train.py
index 7737c95c..107e335a 100644
--- a/tests/scripts/v1_1_0/temporal_tgcn_dataloaders/tgcn/train.py
+++ b/tests/scripts/v1_1_0/temporal_tgcn_dataloaders/tgcn/train.py
@@ -6,7 +6,7 @@
 import torch
 from rich.progress import Progress
 
-from stgraph.benchmark_tools.table import BenchmarkTable
+from stgraph.utils import DataTable
 from stgraph.dataset import HungaryCPDataLoader
 from stgraph.dataset import METRLADataLoader
 from stgraph.dataset import MontevideoBusDataLoader
@@ -101,7 +101,7 @@ def train(
         # metrics
         dur = []
         max_gpu = []
-        table = BenchmarkTable(
+        table = DataTable(
             f"(STGraph Static-Temporal) TGCN on {dataloader.name} dataset",
             ["Epoch", "Time(s)", "MSE", "Used GPU Memory (Max MB)"],
         )
diff --git a/tutorials/README.md b/tutorials/README.md
new file mode 100644
index 00000000..82aebb0a
--- /dev/null
+++ b/tutorials/README.md
@@ -0,0 +1,8 @@
+# STGraph Tutorials
+
+Within this directory you can find the source code for all the tutorials. As the project expands and we introduce more Graph Neural Network layers and data loaders, additional tutorials and source code will be added over time.
+
+| Tutorial Name                                                                    | Task                | NN Layer | Dataset | Graph Type | Backend |
+|----------------------------------------------------------------------------------|---------------------|----------|---------|------------|---------|
+| [Cora Publication Prediction using Graph Convolutional Networks (GCN)](gcn/cora) | Node Classification | GCNConv  | Cora    | Static     | PyTorch |
+
diff --git a/tutorials/gcn/cora/README.md b/tutorials/gcn/cora/README.md
new file mode 100644
index 00000000..41ab0f52
--- /dev/null
+++ b/tutorials/gcn/cora/README.md
@@ -0,0 +1,3 @@
+# Cora Publication Prediction using Graph Convolutional Networks (GCN)
+
+You can find the detailed tutorial in the documentation page.
\ No newline at end of file
diff --git a/tutorials/gcn/cora/__init__.py b/tutorials/gcn/cora/__init__.py
new file mode 100644
index 00000000..6dfcfdc9
--- /dev/null
+++ b/tutorials/gcn/cora/__init__.py
@@ -0,0 +1 @@
+"""Cora Publication Prediction using Graph Convolutional Networks (GCN)"""
diff --git a/tutorials/gcn/cora/main.py b/tutorials/gcn/cora/main.py
new file mode 100644
index 00000000..d8f7c4b6
--- /dev/null
+++ b/tutorials/gcn/cora/main.py
@@ -0,0 +1,52 @@
+import argparse
+
+from train import train
+
+
+def main(args) -> None:
+    train(
+        lr=args.learning_rate,
+        num_epochs=args.epochs,
+        num_hidden=args.num_hidden,
+        num_hidden_layers=args.num_hidden_layers,
+        weight_decay=args.weight_decay,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Training GCN on CORA Dataset")
+
+    parser.add_argument(
+        "-lr",
+        "--learning-rate",
+        type=float,
+        default=0.01,
+        help="Learning Rate for the GCN Model",
+    )
+
+    parser.add_argument(
+        "-e",
+        "--epochs",
+        type=int,
+        default=200,
+        help="Number of Epochs to Train the GCN Model",
+    )
+
+    parser.add_argument(
+        "-n",
+        "--num-hidden",
+        type=int,
+        default=16,
+        help="Number of Neurons in Hidden Layers",
+    )
+
+    parser.add_argument(
+        "-l", "--num-hidden-layers", type=int, default=1, help="Number of Hidden Layers"
+    )
+
+    parser.add_argument(
+        "-w", "--weight-decay", type=float, default=5e-4, help="Weight Decay"
+    )
+
+    args = parser.parse_args()
+    main(args=args)
diff --git a/tutorials/gcn/cora/model.py b/tutorials/gcn/cora/model.py
new file mode 100644
index 00000000..d5a73140
--- /dev/null
+++ b/tutorials/gcn/cora/model.py
@@ -0,0 +1,77 @@
+"""Graph Convolutional Network Model."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from torch import Tensor, nn
+from torch.nn.functional import relu
+
+from stgraph.nn.pytorch.static.gcn_conv import GCNConv
+
+if TYPE_CHECKING:
+    from stgraph.graph import StaticGraph
+
+
+class GCN(nn.Module):
+    r"""Graph Convolutional Network Model.
+
+    A multi-layer Graph Convolutional Network Model for node classification task.
+
+    Parameters
+    ----------
+    graph : StaticGraph
+        The input static graph the GCN model operates.
+    in_feats : int
+        Number of input features.
+    n_hidden : int
+        Number of hidden units in a hidden layer.
+    n_classes : int
+        Number of output classes.
+    n_hidden_layers : int
+        Number of hidden layers.
+
+    """
+
+    def __init__(
+        self: GCN,
+        graph: StaticGraph,
+        in_feats: int,
+        n_hidden: int,
+        n_classes: int,
+        n_hidden_layers: int,
+    ) -> None:
+        r"""Graph Convolutional Network Model."""
+        super().__init__()
+
+        self._graph = graph
+        self._layers = nn.ModuleList()
+
+        # input layer
+        self._layers.append(GCNConv(in_feats, n_hidden, relu, bias=True))
+
+        # hidden layers
+        for _ in range(n_hidden_layers):
+            self._layers.append(GCNConv(n_hidden, n_hidden, relu, bias=True))
+
+        # output layer
+        self._layers.append(GCNConv(n_hidden, n_classes, None, bias=True))
+
+    def forward(self: GCN, features: Tensor) -> Tensor:
+        r"""Forward pass of the GCN model.
+
+        Parameters
+        ----------
+        features : Tensor
+            Input features for each node in the graph.
+
+        Returns
+        -------
+        Tensor :
+            The output features after applying all the GCN layers.
+
+        """
+        h = features
+        for layer in self._layers:
+            h = layer.forward(self._graph, h)
+        return h
diff --git a/tutorials/gcn/cora/train.py b/tutorials/gcn/cora/train.py
new file mode 100644
index 00000000..2724ac31
--- /dev/null
+++ b/tutorials/gcn/cora/train.py
@@ -0,0 +1,131 @@
+r"""Script to train GCN on Cora dataset."""
+
+import sys
+import traceback
+
+import torch
+from model import GCN
+from torch.nn.functional import cross_entropy
+from utils import (
+    accuracy,
+    generate_test_mask,
+    generate_train_mask,
+    get_node_norms,
+    row_normalize_feature,
+)
+
+from stgraph.utils import DataTable
+from stgraph.dataset import CoraDataLoader
+from stgraph.graph.static.static_graph import StaticGraph
+
+
+def train(
+    lr: float,
+    num_epochs: int,
+    num_hidden: int,
+    num_hidden_layers: int,
+    weight_decay: float,
+) -> None:
+    r"""Script to train GCN on Cora dataset.
+
+    Parameters
+    ----------
+    lr : float
+        Learning Rate.
+    num_epochs : int
+        Number of Epochs.
+    num_hidden : int
+        Number of hidden units in hidden layer.
+    num_hidden_layers : int
+        Number of hidden layers.
+    weight_decay : float
+        Weight decay value for L2 regularization.
+
+    """
+    if not torch.cuda.is_available():
+        print("CUDA is not available")
+        sys.exit(1)
+
+    cora = CoraDataLoader()
+
+    node_features = row_normalize_feature(
+        torch.FloatTensor(cora.get_all_features()),
+    )
+    node_labels = torch.LongTensor(cora.get_all_targets())
+    edge_weights = [1 for _ in range(cora.gdata["num_edges"])]
+
+    train_mask = torch.BoolTensor(
+        generate_train_mask(cora.gdata["num_nodes"], 0.7),
+    )
+    test_mask = torch.BoolTensor(
+        generate_test_mask(cora.gdata["num_nodes"], 0.7),
+    )
+
+    torch.cuda.set_device(0)
+    node_features = node_features.cuda()
+    node_labels = node_labels.cuda()
+    train_mask = train_mask.cuda()
+    test_mask = test_mask.cuda()
+
+    cora_graph = StaticGraph(
+        edge_list=cora.get_edges(),
+        edge_weights=edge_weights,
+        num_nodes=cora.gdata["num_nodes"],
+    )
+
+    cora_graph.set_ndata("norm", get_node_norms(cora_graph))
+
+    model = GCN(
+        graph=cora_graph,
+        in_feats=cora.gdata["num_feats"],
+        n_hidden=num_hidden,
+        n_classes=cora.gdata["num_classes"],
+        n_hidden_layers=num_hidden_layers,
+    ).cuda()
+
+    loss_function = cross_entropy
+    optimizer = torch.optim.Adam(
+        model.parameters(), lr=lr, weight_decay=weight_decay,
+    )
+
+    table = DataTable(
+        "STGraph GCN on CORA dataset",
+        ["Epoch", "Train Accuracy %", "Loss"],
+    )
+
+    try:
+        print("Started Training")
+        for epoch in range(num_epochs):
+            model.train()
+            torch.cuda.synchronize()
+
+            logits = model.forward(node_features)
+            loss = loss_function(logits[train_mask], node_labels[train_mask])
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+
+            torch.cuda.synchronize()
+
+            train_acc = accuracy(logits[train_mask], node_labels[train_mask])
+
+            table.add_row(
+                [epoch, float(f"{train_acc * 100:.2f}"), float(f"{loss.item():.5f}")],
+            )
+        print("Training Ended")
+        table.display()
+
+        print("Evaluating trained GCN model on the Test Set")
+
+        model.eval()
+        logits_test = model(node_features)
+        loss_test = loss_function(logits_test[train_mask], node_labels[train_mask])
+        test_acc = accuracy(logits_test[test_mask], node_labels[test_mask])
+
+        print(f"Loss for Test: {loss_test}")
+        print(f"Accuracy for Test: {float(test_acc) * 100} %")
+
+    except Exception as e:
+        print("------------- Error -------------")
+        print(e)
+        traceback.print_exc()
diff --git a/tutorials/gcn/cora/utils.py b/tutorials/gcn/cora/utils.py
new file mode 100644
index 00000000..736e898f
--- /dev/null
+++ b/tutorials/gcn/cora/utils.py
@@ -0,0 +1,170 @@
+"""Utility methods for GCN training on Cora dataset."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import torch
+from torch import Tensor
+
+if TYPE_CHECKING:
+    from stgraph.graph import StaticGraph
+
+
+def accuracy(logits: Tensor, labels: Tensor) -> float:
+    r"""Compute the accuracy of the predictions.
+
+    Parameters
+    ----------
+    logits : Tensor
+        The predicted output from the model, of shape (num_samples, num_classes).
+    labels : Tensor
+        The ground truth labels, of shape (num_samples,).
+
+    Returns
+    -------
+    float :
+        The accuracy of the predictions, calculated as the proportion of
+        correct predictions out of the total number of samples.
+
+    """
+    _, indices = torch.max(logits, dim=1)
+    correct = torch.sum(indices == labels)
+    return correct.item() * 1.0 / len(labels)
+
+
+# GPU | CPU
+def get_default_device() -> torch.device:
+    r"""Return the default device to be used for tensor operations.
+
+    Checks if CUDA is available and returns the first GPU device if it is;
+    otherwise, returns the CPU device.
+
+    Returns
+    -------
+    torch.device :
+        The default device ("cuda:0" if available, otherwise "cpu").
+
+    """
+    if torch.cuda.is_available():
+        return torch.device("cuda:0")
+
+    return torch.device("cpu")
+
+
+def to_default_device(data: Tensor | list | tuple) -> Tensor | list | tuple:
+    r"""Move the given data to the default device.
+
+    If the data is a list or tuple, recursively moves each element to the default device.
+    Otherwise, moves the data directly to the default device.
+
+    Parameters
+    ----------
+    data : Tensor | list | tuple
+        The data to be moved to the default device
+
+    Returns
+    -------
+    Tensor | list | tuple :
+        The data that is moved to the default device
+
+    """
+    if isinstance(data, (list, tuple)):
+        return [to_default_device(x) for x in data]
+
+    return data.to(get_default_device(), non_blocking=True)
+
+
+def generate_train_mask(size: int, train_test_split: float) -> list:
+    r"""Generate a mask for training data.
+
+    Creates a binary mask where the first portion, determined by ``train_test_split``, is set to 1
+    (indicating training samples) and the rest is set to 0 (indicating non-training samples).
+
+    Parameters
+    ----------
+    size : int
+        The total number of samples.
+    train_test_split : float
+        Fraction of samples used for training.
+
+    Returns
+    -------
+    list :
+        A binary mask where 1 represents the training sample.
+
+    """
+    cutoff = size * train_test_split
+    return [1 if i < cutoff else 0 for i in range(size)]
+
+
+def generate_test_mask(size: int, train_test_split: float) -> list:
+    r"""Generate a mask for testing data.
+
+    Creates a binary mask where the first portion, determined by ``train_test_split``, is set to 0
+    (indicating non-testing samples) and the rest is set to 1 (indicating testing samples).
+
+    Parameters
+    ----------
+    size : int
+        The total number of samples.
+    train_test_split : float
+        Fraction of samples used for training.
+
+    Returns
+    -------
+    list :
+        A binary mask where 1 represents the testing sample.
+
+    """
+    cutoff = size * train_test_split
+    return [0 if i < cutoff else 1 for i in range(size)]
+
+
+def row_normalize_feature(features: Tensor) -> Tensor:
+    """Row-normalizes the node features.
+
+    Scales each node features such that the sum of the elements for each node feature is 1.0.
+    If the sum of a row is zero, the row is normalized to zero.
+
+    Parameters
+    ----------
+    features : Tensor
+        The node feature tensor of shape (num_nodes, feat_size).
+
+    Returns
+    -------
+    Tensor :
+        The row-normalized node features.
+
+    """
+    # Compute the sum of each row
+    row_sum = features.sum(dim=1, keepdim=True)
+
+    # Compute the inverse of the row sums, handling division by zero
+    r_inv = torch.where(row_sum != 0, 1.0 / row_sum, torch.zeros_like(row_sum))
+
+    return features * r_inv
+
+
+def get_node_norms(graph: StaticGraph) -> Tensor:
+    r"""Compute node normalization factors for a graph.
+
+    The normalization factor for each node is calculated as the inverse square root of its degree.
+    Nodes with an infinite normalization factor (due to zero degree) are set to zero.
+
+    Parameters
+    ----------
+    graph : StaticGraph
+        The static graph object.
+
+    Returns
+    -------
+    Tensor :
+        A tensor of shape (num_nodes, 1) containing the normalization factors for each node.
+
+    """
+    degrees = torch.from_numpy(graph.weighted_in_degrees()).type(torch.int32)
+    norm = torch.pow(degrees, -0.5)
+    norm[torch.isinf(norm)] = 0
+    return to_default_device(norm).unsqueeze(1)