diff --git a/examples/kge-transe.ipynb b/examples/kge-transe.ipynb new file mode 100644 index 000000000..33ca8a3b7 --- /dev/null +++ b/examples/kge-transe.ipynb @@ -0,0 +1,125 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "b68543d5e71ceeb2", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from graphdatascience import GraphDataScience" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e685a47b61f968ef", + "metadata": {}, + "outputs": [], + "source": [ + "NEO4J_URI = \"bolt://localhost:7687\"\n", + "NEO4J_DB = \"neo4j\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "initial_id", + "metadata": {}, + "outputs": [], + "source": [ + "if os.environ.get(\"NEO4J_USER\") and os.environ.get(\"NEO4J_PASSWORD\"):\n", + " NEO4J_AUTH = (\n", + " os.environ.get(\"NEO4J_USER\"),\n", + " os.environ.get(\"NEO4J_PASSWORD\"),\n", + " )\n", + "gds = GraphDataScience(NEO4J_URI, auth=NEO4J_AUTH, database=NEO4J_DB)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a14f06aebe1ed34c", + "metadata": {}, + "outputs": [], + "source": [ + "_ = gds.run_cypher(\n", + " \"\"\"\n", + " CREATE\n", + " (dan:Person {name: 'Dan'}),\n", + " (annie:Person {name: 'Annie'}),\n", + " (matt:Person {name: 'Matt'}),\n", + " (jeff:Person {name: 'Jeff'}),\n", + " (brie:Person {name: 'Brie'}),\n", + " (elsa:Person {name: 'Elsa'}),\n", + "\n", + " (cookies:Product {name: 'Cookies'}),\n", + " (tomatoes:Product {name: 'Tomatoes'}),\n", + " (cucumber:Product {name: 'Cucumber'}),\n", + " (celery:Product {name: 'Celery'}),\n", + " (kale:Product {name: 'Kale'}),\n", + " (milk:Product {name: 'Milk'}),\n", + " (chocolate:Product {name: 'Chocolate'}),\n", + "\n", + " (dan)-[:BUYS {amount: 1.2}]->(cookies),\n", + " (dan)-[:BUYS {amount: 3.2}]->(milk),\n", + " (dan)-[:BUYS {amount: 2.2}]->(chocolate),\n", + "\n", + " (annie)-[:BUYS {amount: 1.2}]->(cucumber),\n", + " (annie)-[:BUYS {amount: 3.2}]->(milk),\n", + " (annie)-[:BUYS {amount: 3.2}]->(tomatoes),\n", + "\n", + " (matt)-[:BUYS {amount: 3}]->(tomatoes),\n", + " (matt)-[:BUYS {amount: 2}]->(kale),\n", + " (matt)-[:BUYS {amount: 1}]->(cucumber),\n", + "\n", + " (jeff)-[:BUYS {amount: 3}]->(cookies),\n", + " (jeff)-[:BUYS {amount: 2}]->(milk),\n", + "\n", + " (brie)-[:BUYS {amount: 1}]->(tomatoes),\n", + " (brie)-[:BUYS {amount: 2}]->(milk),\n", + " (brie)-[:BUYS {amount: 2}]->(kale),\n", + " (brie)-[:BUYS {amount: 3}]->(cucumber),\n", + " (brie)-[:BUYS {amount: 0.3}]->(celery),\n", + "\n", + " (elsa)-[:BUYS {amount: 3}]->(chocolate),\n", + " (elsa)-[:BUYS {amount: 3}]->(milk)\n", + " \"\"\"\n", + ")\n", + "node_projection = [\"Person\", \"Product\"]\n", + "relationship_projection = {\"BUYS\": {\"orientation\": \"UNDIRECTED\", \"properties\": \"amount\"}}\n", + "G, result = gds.graph.project(\"purchases222\", node_projection, relationship_projection)\n", + "print(f\"The projection took {result['projectMillis']} ms\")\n", + "print(f\"Graph '{G.name()}' node count: {G.node_count()}\")\n", + "print(f\"Graph '{G.name()}' node labels: {G.node_labels()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e049480efa34e8ca", + "metadata": {}, + "outputs": [], + "source": [ + "gds.model.transe.train(\n", + " G,\n", + " proportions=[0.8, 0.1, 0.1],\n", + " embedding_dimension=50,\n", + " batch_size=512,\n", + " epochs=100,\n", + " optimizer=\"Adam\",\n", + " optimizer_kwargs={\"lr\": 0.01, \"weight_decay\": 5e-4},\n", + " # loss\n", + ")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/graphdatascience/model/model_proc_runner.py b/graphdatascience/model/model_proc_runner.py index 20e5dd6e3..d06782526 100644 --- a/graphdatascience/model/model_proc_runner.py +++ b/graphdatascience/model/model_proc_runner.py @@ -1,3 +1,4 @@ +import json from typing import Any, Dict, List, Optional, Tuple from pandas import DataFrame, Series @@ -45,6 +46,34 @@ def create( relationship_type_embeddings, ) + @compatible_with("train", min_inclusive=ServerVersion(2, 5, 0)) + @client_only_endpoint("gds.model.transe") + def train(self, + G: Graph, + proportions: list, + embedding_dimension: int, + batch_size: int, + epochs: int, + optimizer: str, + optimizer_kwargs: dict, + # loss: str + ) -> int: + config = {'scoring_function': 'TransE', + 'proportions': proportions, + 'embedding_dimension': embedding_dimension, + 'num_epochs': epochs, + 'graph_name': G.name(), + 'batch_size': batch_size, + 'optimizer': optimizer, + 'optimizer_kwargs': optimizer_kwargs, + # 'loss': loss, + } + config_path = "/tmp/kge-train-config-dump.json" + print('Dumped to ' + config_path) + config_file = open(config_path, "w") + + json.dump(config, config_file) + return 0 class ModelProcRunner(ModelResolver): @client_only_endpoint("gds.model")