Skip to content

add aml example notebook #2186

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
399 changes: 399 additions & 0 deletions aml.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,399 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "42794c9f-3ad7-43ce-87f7-fc1e0b1ba270",
"metadata": {},
"outputs": [],
"source": [
"OPENAI_API_KEY=\"\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "10eca9e8-4b06-43d8-bef5-0f0535ddf180",
"metadata": {},
"outputs": [],
"source": [
"from pydantic import BaseModel, Field\n",
"from typing import Literal\n",
"from openai import OpenAI\n",
"import instructor\n",
"from raphtory import PersistentGraph"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "44704177-8344-4dde-a402-4994837a854b",
"metadata": {},
"outputs": [],
"source": [
"def expand_from_subgraph(nodes, limit=10):\n",
" return nodes + [neighbour.name for node in nodes for neighbour in list(g.node(node).neighbours)[0:limit]]\n",
"\n",
"g = PersistentGraph.load_from_file(\"/tmp/aml/graphs/master_graph\")\n",
" \n",
"reports = [node.name for node in g.nodes if node.properties.get(\"type\") == \"Report\"]\n",
"reports_and_related = expand_from_subgraph(reports)\n",
"\n",
"suspicious = [\"Polux Management Lp\", \"Lcm Alliance Llp\", \"Riverlane Llp\", \"Larkstone Limited\"]\n",
"suspicious_and_related = expand_from_subgraph(suspicious)\n",
"\n",
"g = g.subgraph(reports_and_related + suspicious_and_related).materialize()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "73fbf7e8-d7c3-4b82-96c2-eb19eb1f026b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Hilux Services Lp sent $2000000 to Geys Sirajli\n",
"\n"
]
}
],
"source": [
"def get_embeddings(strings, model=\"text-embedding-3-small\"):\n",
" client = OpenAI(api_key=OPENAI_API_KEY)\n",
" return [client.embeddings.create(input=text, model=model).data[0].embedding for text in strings]\n",
"\n",
"node_document = \"\"\"\n",
"{% if properties.type == \"Company\" %}\n",
"{{ name }} is a company with the following details:\n",
"Employee count: {{ properties.employeeCount}}\n",
"Account: {{ properties.account}}\n",
"Location: {{ properties.location}}\n",
"Jurisdiction: {{ properties.jurisdiction}}\n",
"Partnerships: {{ properties.partnerships}}\n",
"{% endif %}\n",
"{% if properties.type == \"Person\" %}\n",
"{{ name }} is a director with the follwing details:\n",
"Age: {{ properties.age }}\n",
"Mobile: {{ properties.mobile }}\n",
"Home address: {{ properties.homeAddress }}\n",
"Email: {{ properties.email }}\n",
"{% endif %}\n",
"{% if properties.type == \"Report\" %}\n",
"{{name}} is a suspicious activity report with the following content:\n",
"{{ properties.document }}\n",
"{% endif %}\n",
"\"\"\"\n",
"\n",
"edge_document = \"\"\"\n",
"{% if layers[0] == \"report\" %}\n",
"{{ src.name }} was raised against {{ dst.name}}\n",
"{% elif layers[0] == \"director\" %}\n",
"{{ dst.name }} is a director of {{ src.name }}\n",
"{% else %}\n",
"{{ src.name }} transferred ${{ properties.amount_usd }} to {{ dst.name }}\n",
"{% endif %}\n",
"\"\"\"\n",
"\n",
"v = g.vectorise(get_embeddings, nodes=node_document, edges=edge_document, verbose=True)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "a60372cc-7a76-4e64-88fe-e0e52c418bcb",
"metadata": {},
"outputs": [],
"source": [
"client = instructor.from_openai(OpenAI(api_key=OPENAI_API_KEY))\n",
"\n",
"def send_query_with_docs(query: str, selection):\n",
" formatted_docs = \"\\n\".join(doc.content for doc in selection.get_documents())\n",
" instructions = f\"You are helpful assistant. Answer the user question using the following context:\\n{formatted_docs}\"\n",
" response = client.responses.create(\n",
" model=\"o4-mini\",\n",
" instructions=instructions,\n",
" input=query,\n",
" )\n",
" print(response.output_text)\n",
"\n",
"def default_pipeline(query: str):\n",
" print(\">>> Using the DEFAULT pipeline\")\n",
" \n",
" # We just use regular similarity search plus expansion by similarity in the default pipeline\n",
" s = v.entities_by_similarity(query, limit=10)\n",
" s.expand_entities_by_similarity(query, limit=10)\n",
" \n",
" send_query_with_docs(query, s)\n",
"\n",
"def aml_pipeline(query: str):\n",
" print(\">>> Using the AML pipeline\")\n",
" \n",
" # We start by including all the suspicious activity reports in this case\n",
" reports = [node for node in g.nodes if node.properties.get(\"type\") == \"Report\"]\n",
" s = v.empty_selection()\n",
" s.add_nodes(reports)\n",
" s.expand_entities_by_similarity(query, limit=10)\n",
"\n",
" # We make sure to include the largest money transfers in this subset of the network\n",
" additional_edges = [edge for node in s.nodes() for edge in node.edges]\n",
" largest_transfers = sorted(additional_edges, key=lambda edge: edge.properties.get(\"amount_usd\"))[-10:]\n",
" transfer_parties = [node for edge in largest_transfers for node in [edge.src, edge.dst]]\n",
" s.add_edges([(edge.src, edge.dst) for edge in largest_transfers])\n",
" s.add_nodes(transfer_parties)\n",
" \n",
" send_query_with_docs(query, s)\n",
"\n",
"def kyc_pipeline(query: str):\n",
" print(\">>> Using the KYC pipeline\")\n",
" \n",
" # In this case we know we are targeting a node, that might be a person or a company:\n",
" s = v.nodes_by_similarity(query, limit=3)\n",
" \n",
" # then instead of expanding by similarity we include all the context\n",
" s.expand(hops=2)\n",
" \n",
" send_query_with_docs(query, s)\n",
"\n",
"class ClassificationResponse(BaseModel):\n",
" \"\"\"\n",
" A few-shot example of agent classification:\n",
"\n",
" Examples:\n",
" - \"Write a KYC report for Metastar Invest Llp\": KYC\n",
" - \"Can you find any activities suspicious of money laundry for Esmira Jamalkhanova?\": AML\n",
" - \"Tell me what you know about Larkstone Limited\": DEFAULT\n",
" \"\"\"\n",
" agent: Literal[\"KYC\", \"AML\", \"DEFAULT\"] = Field(\n",
" ...,\n",
" description=\"The agent to use to answer the question. KYC when the user asks for some KYC report. AML when the user wants to identify money landry patterns or suspicious activity. Otherwise, use the agent DEFAULT\",\n",
" )\n",
"\n",
"def pipeline(query: str):\n",
" response = client.chat.completions.create(\n",
" model=\"o4-mini\",\n",
" response_model=ClassificationResponse,\n",
" messages=[{\"role\": \"user\", \"content\": query}]\n",
" )\n",
" if response.agent == \"AML\":\n",
" aml_pipeline(query)\n",
" elif response.agent == \"KYC\":\n",
" kyc_pipeline(query)\n",
" else:\n",
" default_pipeline(query)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "cee423bb-cc21-44bf-b9ef-9d60a402e988",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
">>> Using the AML pipeline\n",
"5\n",
"6\n",
"Based on the information and SARs you’ve provided, the following stand out as suspicious:\n",
"\n",
"1. Hilux Services Lp → Geys Sirajli \n",
" – Amount: USD 2,000,000 \n",
" – Hilux Services Lp is a UK-jurisdiction LLP with no employees or partnerships (i.e. likely a shell). \n",
" – Geys Sirajli (director at Stellar Innovations) is already the subject of SAR 1 for “unexplained wealth…potential Russian Laundromat links.” \n",
" – A single, large inbound payment like this is inconsistent with his known business activities and flagged business rationale.\n",
"\n",
"2. Metastar Invest Lp → Esmira Jamalkhanova \n",
" – Amount: EUR 248,884 \n",
" – Metastar Invest Llp is also a UK LLP with no employees or partnerships (another potential shell). \n",
" – Esmira Jamalkhanova (director at Nexus Electronics) is subject of SAR 2 for “transactions involving sanctioned entities…links to organized crime.” \n",
" – A sudden, sizeable credit from a shell-type entity fits the pattern of her flagged behavior.\n",
"\n",
"No comparable transaction data were provided for Olivia Patel or Carlos Ramirez, though both are separately flagged in SAR 0 and SAR 3 for unusually large or complex transactions. \n",
"\n",
"Next steps/recommendations: \n",
"• Conduct enhanced due diligence on Hilux Services Lp and Metastar Invest Llp (ownership, beneficial owners, business purpose). \n",
"• Verify source of funds and economic rationale for each transaction. \n",
"• Screen Sirajli and Jamalkhanova, and the sending entities, against sanctions and PEP lists. \n",
"• Consider filing follow-up SARs or initiating formal investigations.\n"
]
}
],
"source": [
"# AML example\n",
"pipeline(\"Can you find any suspicious activity\")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "5c0bf404-9167-45b5-aceb-d4b42b509a87",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
">>> Using the DEFAULT pipeline\n",
"Hilux Services Lp sent Geys Sirajli 2,000,000 USD.\n"
]
}
],
"source": [
"pipeline(\"How much money has Hilux Services Lp sent to Geys Sirajli\")"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "7d65eec1-b283-4cc6-aa7e-ec2b0b923179",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
">>> Using the KYC pipeline\n",
"KYC REPORT: HILUX SERVICES LP \n",
"Date: 2025-07-11 \n",
"\n",
"1. Company Identity \n",
"• Legal Name: Hilux Services Lp \n",
"• Jurisdiction: United Kingdom \n",
"• Bank Account: EE12 3300 3335 1615 0001 \n",
"• Employee Count: None declared \n",
"• Partnerships/Affiliations: None declared \n",
"\n",
"2. Ownership & Management \n",
"• No directors or ultimate beneficial owners (UBOs) are on record. \n",
"• No employees or local footprint to support declared transactional volumes—ownership and governance information is incomplete. \n",
"\n",
"3. Business Profile & Purpose \n",
"• No stated business activity, address or operating premises. \n",
"• No trade or service description has been provided to justify large cross-border flows. \n",
"\n",
"4. Transaction Overview (Jan–Jun 2025) \n",
"Total Outflows: ~USD 5.7 million + EUR 1.9 million + CHF 53,000+ \n",
"Total Inflows: ~USD 1.4 million + EUR 170,000+ \n",
"\n",
"Key counterparty flows: \n",
"• Polux Management Lp (UK): Outflows USD 673,000; multiple EUR/USD/CHF transfers; round-trip: Hilux→Polux→Riverlane→Hilux \n",
"• KG Commerce Llp (UK): USD 2,000,000; Polux→KG Commerce USD 650,000 \n",
"• Geys Sirajli (Director, London): USD 2,000,000 \n",
"• China Intl Industry Trade Ltd (China): USD 38,400; Polux→China USD 18,000 \n",
"• N.r.g. Research And Advising Fze (UAE): EUR 539,540 \n",
"• Kronospan Bulgaria Eood (Bulgaria): EUR 52,490; Polux→Kronospan EUR 27,870 \n",
"• La Prairie Group Ag (Switzerland): Polux→CHF 69,931; Hilux→CHF 37,561 \n",
"• Grohe Ag, Hansgrohe Ag (Germany): EUR 54,632; EUR 53,857; Polux→Grohe EUR 44,254; Polux→Hansgrohe EUR 95,189 \n",
"• Eduard Lintner, Umit Uslu (both individual directors): mixed small payments \n",
"• Faberlex Lp (UK): Hilux→USD 899,700; Faberlex→Hilux small USD 6,000 \n",
"…plus numerous sub-€100 transfers. \n",
"\n",
"5. AML/CTF Risk Assessment & Red Flags \n",
"• Lack of declared business activity vs. very high-value, frequent cross-border transactions. \n",
"• Round-trip flows involving related parties (Polux, Riverlane) suggest possible layering or circular money movement. \n",
"• Transactions span high-risk jurisdictions (China, UAE, Turkey, Bulgaria). \n",
"• Payments to individuals with no clear service agreements (e.g., Geys Sirajli, Eduard Lintner, Umit Uslu). \n",
"• No transparency on UBOs, directors or source of funds. \n",
"• No prior Suspicious Activity Reports (SARs) on Hilux, but similar patterns flagged in SAR 3 on an unrelated individual. \n",
"\n",
"6. Recommendations \n",
"a) Enhanced Due Diligence: \n",
" – Obtain certified copies of corporate incorporation documents, shareholder registry, board minutes. \n",
" – Identify and verify all UBOs and directors; collect proof of address and source of wealth. \n",
" – Request a detailed business plan, contracts or invoices supporting major transactions. \n",
"\n",
"b) Transaction Monitoring: \n",
" – Implement real-time alerts for transactions above USD/EUR 50,000 and for round-trip flows. \n",
" – Conduct periodic reviews of counterparties’ risk profiles, esp. those in higher-risk jurisdictions. \n",
"\n",
"c) Suspicious Activity Considerations: \n",
" – File an SAR if the business purpose and source of funds cannot be satisfactorily explained. \n",
" – Consider account restrictions or enhanced review pending resolution of ownership and activity questions. \n",
"\n",
"Prepared by: [Name], AML/KYC Analyst \n",
"Financial Institution: [Your Bank] \n",
"Date: 2025-07-11\n"
]
}
],
"source": [
"pipeline(\"Please write a KYC report about Hilux Services Lp\")"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "c002405b-68c0-4401-b339-82418fee3099",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
">>> Using the DEFAULT pipeline\n",
"Here’s a consolidated profile of Metastar Invest LLP based on the information provided:\n",
"\n",
"1. Basic Company Details \n",
" • Legal form: Limited Liability Partnership (LLP) \n",
" • Jurisdiction: United Kingdom \n",
" • Number of employees: None recorded \n",
" • Registered location: Not specified \n",
" • Known partnerships: None recorded \n",
" • Bank account (IBAN): EE77 3300 3334 8704 0004 \n",
"\n",
"2. Recorded Fund Transfers \n",
" A. Outgoing (Metastar Invest LLP → …) \n",
" – 1,200,000 USD → Metastar Invest LLP (internal/self-transfer) \n",
" – 63 EUR → Larkstone Limited \n",
" – 100,300 USD → LCM Alliance LLP \n",
" – 248,884 EUR → Esmira Jamalkhanova \n",
" – 29,700 USD → KG Commerce LLP \n",
" – 52,627 EUR → Eduard Lintner \n",
"\n",
" B. Incoming (… → Metastar Invest LLP) \n",
" – 634,372 USD ← Larkstone Limited \n",
" – 56,090 USD ← KG Commerce LLP \n",
" – 177,332 USD ← LCM Alliance LLP \n",
"\n",
"3. Summary of Transaction Volumes \n",
" • Total USD outflow: 1,330,000 USD (including the 1.2 M internal transfer) \n",
" • Total EUR outflow: 301,574 EUR \n",
" • Total USD inflow: 867,794 USD \n",
" • Total EUR inflow: 0 EUR \n",
"\n",
"No other assets, locations, employees or partnerships are on record.\n"
]
}
],
"source": [
"pipeline(\"tell me everything you know about Metastar Invest Llp\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "pometry-ui",
"language": "python",
"name": "pometry-ui"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading