From c7fac572eb3806fe7b648461fc032c8fbecc9b98 Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Thu, 19 Sep 2024 14:29:48 +0200 Subject: [PATCH] fix fix sparql query function --- notebooks/test_expasy_chat.ipynb | 1027 ++---------------------------- src/sparql_llm/api.py | 29 +- 2 files changed, 70 insertions(+), 986 deletions(-) diff --git a/notebooks/test_expasy_chat.ipynb b/notebooks/test_expasy_chat.ipynb index 173bbd7..1910921 100644 --- a/notebooks/test_expasy_chat.ipynb +++ b/notebooks/test_expasy_chat.ipynb @@ -17,954 +17,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "๐Ÿงช Testing 13 queries\n", - "\n", - "๐Ÿง  Testing model: gpt-4o-mini\n", - "\n", - "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n", - "โ›”๏ธ 1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n", - "\n", - "Results mismatch. Ref: 4 != gen: 0\n", - "\n", - "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n", - "โ›”๏ธ 2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n", - "\n", - "Results mismatch. Ref: 4 != gen: 0\n", - "\n", - "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n", - "โŒ 1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n", - "\n", - "Results mismatch. Ref: 4 != gen: 0\n", - "\n", - "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n", - "โŒ 2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n", - "\n", - "Results mismatch. Ref: 4 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 0, Error: 2\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 2, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 0, Different results: 0, No results: 2, Error: 0\n", - "โœ… 1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n", - "โœ… 2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n", - "โœ… 1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n", - "โœ… 2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 0, Error: 4\n", - "๐ŸŽฏ RAG without validation - Success: 2, Different results: 0, No results: 2, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 0, No results: 2, Error: 0\n", - "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n", - "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n", - "โœ… 1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n", - "โœ… 2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n", - "โœ… 1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n", - "โœ… 2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 4\n", - "๐ŸŽฏ RAG without validation - Success: 4, Different results: 0, No results: 2, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 4, Different results: 0, No results: 2, Error: 0\n", - "Missing from reference: {'chebi': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/B3KRG8'}}\n", - "โ›”๏ธ 1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n", - "\n", - "Results mismatch. Ref: 710 != gen: 413\n", - "\n", - "Missing from reference: {'chebi': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/B3KRG8'}}\n", - "โ›”๏ธ 2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n", - "\n", - "Results mismatch. Ref: 710 != gen: 413\n", - "\n", - "Missing from reference: {'chebi': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/B3KRG8'}}\n", - "โŒ 1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n", - "\n", - "Results mismatch. Ref: 710 != gen: 413\n", - "\n", - "Missing from reference: {'chebi': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/B3KRG8'}}\n", - "โŒ 2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n", - "\n", - "Results mismatch. Ref: 710 != gen: 413\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 6\n", - "๐ŸŽฏ RAG without validation - Success: 4, Different results: 2, No results: 2, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 4, Different results: 2, No results: 2, Error: 0\n", - "Missing from reference: {'hgncSymbol': {'type': 'literal', 'value': 'AKT3'}, 'diseaseLabel': {'type': 'literal', 'value': 'AKT3 is a key modulator of several tumors like melanoma, glioma and ovarian cancer. Active AKT3 increases progressively during melanoma tumor progression with highest levels present in advanced-stage metastatic melanomas. Promotes melanoma tumorigenesis by decreasing apoptosis. Plays a key role in the genesis of ovarian cancers through modulation of G2/M phase transition. With AKT2, plays a pivotal role in the biology of glioblastoma.'}}\n", - "โ›”๏ธ 1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n", - "\n", - "Results mismatch. Ref: 121 != gen: 6308\n", - "\n", - "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n", - "โ›”๏ธ 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n", - "\n", - "Results mismatch. Ref: 121 != gen: 0\n", - "\n", - "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n", - "โŒ 1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n", - "\n", - "Results mismatch. Ref: 121 != gen: 0\n", - "\n", - "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n", - "โŒ 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n", - "\n", - "Results mismatch. Ref: 121 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 8\n", - "๐ŸŽฏ RAG without validation - Success: 4, Different results: 3, No results: 3, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 4, Different results: 2, No results: 4, Error: 0\n", - "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n", - "โ›”๏ธ 1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n", - "\n", - "Results mismatch. Ref: 10 != gen: 0\n", - "\n", - "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n", - "โ›”๏ธ 2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n", - "\n", - "Results mismatch. Ref: 10 != gen: 0\n", - "\n", - "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n", - "โŒ 1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n", - "\n", - "Results mismatch. Ref: 10 != gen: 0\n", - "\n", - "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n", - "โŒ 2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n", - "\n", - "Results mismatch. Ref: 10 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 10\n", - "๐ŸŽฏ RAG without validation - Success: 4, Different results: 3, No results: 5, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 4, Different results: 2, No results: 6, Error: 0\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โ›”๏ธ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 0\n", - "\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โ›”๏ธ 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 0\n", - "\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โŒ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 0\n", - "\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โŒ 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 12\n", - "๐ŸŽฏ RAG without validation - Success: 4, Different results: 3, No results: 7, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 4, Different results: 2, No results: 8, Error: 0\n", - "โœ… 1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20 = 20\n", - "Missing from reference: {'orthologCluster': {'type': 'literal', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'uri', 'value': 'https://omabrowser.org/oma/current/#DATASET_OMA'}}\n", - "โ›”๏ธ 2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 20\n", - "\n", - "โœ… 1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20 = 20\n", - "โœ… 2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20 = 20\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 14\n", - "๐ŸŽฏ RAG without validation - Success: 5, Different results: 4, No results: 7, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 6, Different results: 2, No results: 8, Error: 0\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "โ›”๏ธ 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "โ›”๏ธ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "โŒ 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "โŒ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 16\n", - "๐ŸŽฏ RAG without validation - Success: 5, Different results: 4, No results: 9, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 6, Different results: 2, No results: 10, Error: 0\n", - "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n", - "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n", - "โ›”๏ธ 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n", - "โ›”๏ธ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n", - "โŒ 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n", - "โŒ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 3, Error: 17\n", - "๐ŸŽฏ RAG without validation - Success: 5, Different results: 4, No results: 11, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 6, Different results: 2, No results: 12, Error: 0\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "โ›”๏ธ 1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n", - "\n", - "Results mismatch. Ref: 3 != gen: 0\n", - "\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "โ›”๏ธ 2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n", - "\n", - "Results mismatch. Ref: 3 != gen: 0\n", - "\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "โŒ 1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n", - "\n", - "Results mismatch. Ref: 3 != gen: 0\n", - "\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "โŒ 2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n", - "\n", - "Results mismatch. Ref: 3 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 3, Error: 19\n", - "๐ŸŽฏ RAG without validation - Success: 5, Different results: 4, No results: 13, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 6, Different results: 2, No results: 14, Error: 0\n", - "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/138'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q16643'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Cell cortex'}}\n", - "โ›”๏ธ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 20\n", - "\n", - "Missing from generated: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/95'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 1'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P05067'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Endoplasmic reticulum'}}\n", - "โ›”๏ธ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/138'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q16643'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Cell cortex'}}\n", - "โŒ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 20\n", - "\n", - "Missing from generated: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/95'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 1'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P05067'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Endoplasmic reticulum'}}\n", - "โŒ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 3, Error: 21\n", - "๐ŸŽฏ RAG without validation - Success: 5, Different results: 5, No results: 14, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 6, Different results: 3, No results: 15, Error: 0\n", - "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n", - "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n", - "โœ… 1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n", - "โœ… 2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n", - "โœ… 1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n", - "โœ… 2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 5, Error: 21\n", - "๐ŸŽฏ RAG without validation - Success: 7, Different results: 5, No results: 14, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 8, Different results: 3, No results: 15, Error: 0\n", - "\n", - "๐Ÿง  Testing model: Llama3.1 8B\n", - "\n", - "โ›”๏ธ 1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n", - "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0A%0ASELECT+DISTINCT+%3Fprotein%0AWHERE+%7B%0A++%3Fprotein+a+up%3AProtein+%3B%0A+++++++++++up%3Aorganism+taxon%3A9606+%3B%0A+++++++++++up%3AencodedBy+%3Fgene+.%0A++%3Fgene+skos%3AprefLabel+%22LCT%22+.%0A%7D\n", - "โ›”๏ธ 2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n", - "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0A%0ASELECT+DISTINCT+%3Fprotein%0AWHERE+%7B%0A++%3Fprotein+a+up%3AProtein+%3B%0A+++++++++++up%3Aorganism+taxon%3A9606+%3B%0A+++++++++++up%3AencodedBy+%3Fgene+.%0A++%3Fgene+skos%3AprefLabel+%22LCT%22+.%0A%7D\n", - "โœ… 1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs = 4\n", - "โœ… 2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs = 4\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 0, Error: 2\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 0, Error: 2\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 0, No results: 0, Error: 0\n", - "Missing from generated: {'sequence': {'type': 'literal', 'value': 'GPQQENMMEE'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P83854'}}\n", - "โ›”๏ธ 1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'sequence': {'type': 'literal', 'value': 'GPQQENMMEE'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P83854'}}\n", - "โ›”๏ธ 2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'sequence': {'type': 'literal', 'value': 'GPQQENMMEE'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P83854'}}\n", - "โŒ 1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'sequence': {'type': 'literal', 'value': 'GPQQENMMEE'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P83854'}}\n", - "โŒ 2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 0, Error: 4\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 2, Error: 2\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 0, No results: 2, Error: 0\n", - "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n", - "โ›”๏ธ 1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n", - "\n", - "Results mismatch. Ref: 11650 != gen: 0\n", - "\n", - "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n", - "โ›”๏ธ 2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n", - "\n", - "Results mismatch. Ref: 11650 != gen: 0\n", - "\n", - "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n", - "โŒ 1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n", - "\n", - "Results mismatch. Ref: 11650 != gen: 0\n", - "\n", - "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n", - "โŒ 2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n", - "\n", - "Results mismatch. Ref: 11650 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 0, Error: 6\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 4, Error: 2\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 0, No results: 4, Error: 0\n", - "โ›”๏ธ 1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n", - "list index out of range\n", - "โ›”๏ธ 2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n", - "list index out of range\n", - "โŒ 1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n", - "list index out of range\n", - "โŒ 2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n", - "list index out of range\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 0, Error: 8\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 4, Error: 4\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 0, No results: 4, Error: 2\n", - "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n", - "โ›”๏ธ 1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n", - "\n", - "Results mismatch. Ref: 121 != gen: 0\n", - "\n", - "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n", - "โ›”๏ธ 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n", - "\n", - "Results mismatch. Ref: 121 != gen: 0\n", - "\n", - "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n", - "โŒ 1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n", - "\n", - "Results mismatch. Ref: 121 != gen: 0\n", - "\n", - "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n", - "โŒ 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n", - "\n", - "Results mismatch. Ref: 121 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 0, Error: 10\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 6, Error: 4\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 0, No results: 6, Error: 2\n", - "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n", - "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n", - "โ›”๏ธ 1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n", - "400 Client Error: Bad Request for url: https://www.bgee.org/sparql/?query=%23+https%3A%2F%2Fwww.bgee.org%2Fsparql%2F%0APREFIX+genex%3A+%3Chttp%3A%2F%2Fpurl.org%2Fgenex%23%3E%0APREFIX+obo%3A+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2F%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0ASELECT+DISTINCT+%3Fgene+%3Fconfidence+%3Ffdr%0AWHERE+%7B%0A++%3Fgene+a+genex%3AExpression+%3B%0A++++genex%3AhasConfidenceLevel+%3Fconfidence+%3B%0A++++genex%3AhasFDRpvalue+%3Ffdr+.%0A++FILTER%28lang%28%3Fconfidence%29+%3D+%22en%22%29%0A++FILTER%28lang%28%3Ffdr%29+%3D+%22en%22%29%0A++LIMIT+10%0A%7D\n", - "โ›”๏ธ 2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n", - "400 Client Error: Bad Request for url: https://www.bgee.org/sparql/?query=%23+https%3A%2F%2Fwww.bgee.org%2Fsparql%2F%0APREFIX+genex%3A+%3Chttp%3A%2F%2Fpurl.org%2Fgenex%23%3E%0APREFIX+obo%3A+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2F%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0ASELECT+DISTINCT+%3Fgene+%3Fconfidence+%3Ffdr%0AWHERE+%7B%0A++%3Fgene+a+genex%3AExpression+%3B%0A++++genex%3AhasConfidenceLevel+%3Fconfidence+%3B%0A++++genex%3AhasFDRpvalue+%3Ffdr+.%0A++FILTER%28lang%28%3Fconfidence%29+%3D+%22en%22%29%0A++FILTER%28lang%28%3Ffdr%29+%3D+%22en%22%29%0A++LIMIT+10%0A%7D\n", - "โŒ 1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n", - "400 Client Error: Bad Request for url: https://www.bgee.org/sparql/?query=%23+https%3A%2F%2Fwww.bgee.org%2Fsparql%2F%0APREFIX+genex%3A+%3Chttp%3A%2F%2Fpurl.org%2Fgenex%23%3E%0APREFIX+obo%3A+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2F%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0A%0ASELECT+DISTINCT+%3Fgene+%3Fconfidence+%3Ffdr%0AWHERE+%0A%7B%0A++FILTER%28lang%28%3Fconfidence%29+%3D+%22en%22%29%0A++FILTER%28lang%28%3Ffdr%29+%3D+%22en%22%29%0A++%7B+%3Fgene+a+genex%3AExpression+%3B%0A++++++genex%3AhasConfidenceLevel+%3Fconfidence+%3B%0A++++++genex%3AhasFDRpvalue+%3Ffdr+.%0A++%7D+LIMIT+10%0A%7D\n", - "โŒ 2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n", - "HTTPSConnectionPool(host='www.bgee.org', port=443): Read timed out. (read timeout=200)\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 10\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 6, Error: 6\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 0, No results: 6, Error: 4\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โ›”๏ธ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 0\n", - "\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โ›”๏ธ 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 0\n", - "\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โŒ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 0\n", - "\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โŒ 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 12\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 8, Error: 6\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 0, No results: 8, Error: 4\n", - "โ›”๏ธ 1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n", - "400 Client Error: Bad Request for url: https://sparql.omabrowser.org/sparql/?query=%23+https%3A%2F%2Fsparql.omabrowser.org%2Fsparql%2F%0APREFIX+orth%3A+%3Chttp%3A%2F%2Fpurl.org%2Fnet%2Forth%23%3E%0ASELECT+%3ForthologCluster+%3FdatasetURI%0AWHERE%0A%7B%0A++%3ForthologCluster+a+orth%3AOrthologsCluster+%3B%0A++++++++++++++++++++orth%3AinDataset+%3Fdataset+.%0A++%3Fdataset+a+orth%3AOrthologyDataset+%3B%0A+++++++++++%3Chttp%3A%2F%2Fpurl.org%2Fpav%2Fversion%3E+%3Fversion+.%0A++FILTER+%28regex%28%3Fversion%2C+%22All.Jul2023%22%29%29%0A++LIMIT+20%0A%7D\n", - "โ›”๏ธ 2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n", - "400 Client Error: Bad Request for url: https://sparql.omabrowser.org/sparql/?query=%23+https%3A%2F%2Fsparql.omabrowser.org%2Fsparql%2F%0APREFIX+orth%3A+%3Chttp%3A%2F%2Fpurl.org%2Fnet%2Forth%23%3E%0ASELECT+%3ForthologCluster+%3FdatasetURI%0AWHERE%0A%7B%0A++%3ForthologCluster+a+orth%3AOrthologsCluster+%3B%0A++++++++++++++++++++orth%3AinDataset+%3Fdataset+.%0A++%3Fdataset+a+orth%3AOrthologyDataset+%3B%0A+++++++++++%3Chttp%3A%2F%2Fpurl.org%2Fpav%2Fversion%3E+%3Fversion+.%0A++FILTER+%28regex%28%3Fversion%2C+%22All.Jul2023%22%29%29%0A++LIMIT+20%0A%7D\n", - "โŒ 1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n", - "Invalid URL 'None': No scheme supplied. Perhaps you meant https://None?\n", - "Missing from reference: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'literal', 'value': 'Jul2023'}}\n", - "โŒ 2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 1048576\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 14\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 8, Error: 8\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 1, No results: 8, Error: 5\n", - "โ›”๏ธ 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+taxon%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Ftaxonomy%2F%3E%0A%0ASELECT+%3Ftaxon+%3Fstrain+%3Fname%0AWHERE+%7B%0A++%3Ftaxon+a+up%3ATaxon+%3B%0A+++++++++up%3AscientificName+%22Escherichia+coli%22+%3B%0A+++++++++up%3Arank+up%3ASpecies+%3B%0A+++++++++up%3AhasStrain+%3Fstrain+.%0A++%3Fstrain+a+up%3AStrain+%3B%0A++++++++++up%3Aname+%3Fname+.%0A++FILTER+%28+%3Ftaxon+%3D+%3Fstrain+%29%0A++LIMIT+20%0A%7D\n", - "โ›”๏ธ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+taxon%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Ftaxonomy%2F%3E%0A%0ASELECT+%3Ftaxon+%3Fstrain+%3Fname%0AWHERE+%7B%0A++%3Ftaxon+a+up%3ATaxon+%3B%0A+++++++++up%3AscientificName+%22Escherichia+coli%22+%3B%0A+++++++++up%3Arank+up%3ASpecies+%3B%0A+++++++++up%3AhasStrain+%3Fstrain+.%0A++%3Fstrain+a+up%3AStrain+%3B%0A++++++++++up%3Aname+%3Fname+.%0A++FILTER+%28+%3Ftaxon+%3D+%3Fstrain+%29%0A++LIMIT+20%0A%7D\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "โŒ 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "โŒ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 16\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 8, Error: 10\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 1, No results: 10, Error: 5\n", - "โ›”๏ธ 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+upa%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fannotation%2F%3E%0A%0ASELECT+DISTINCT+%3FproteinURI+%3FproteinLabel+%3FpathwayLabel%0AWHERE+%7B%0A++SERVICE+%3Chttps%3A%2F%2Fsparql.uniprot.org%2Fsparql%3E+%7B%0A++++%3Fprotein+a+up%3AProtein+%3B%0A+++++++++++++up%3ArecommendedName%2Fup%3AfullName+%3FproteinLabel+.%0A++++%3Fprotein+up%3Aannotation%2Fup%3APathway_Annotation+%3Fpathway+.%0A++++%3Fpathway+up%3Asequence+%3Fsequence+.%0A++++%3Fpathway+rdfs%3Alabel+%3FpathwayLabel+.%0A++++%3Fpathway+skos%3AprefLabel+%3FpathwayLabel+.%0A++++%3Fpathway+upa%3AhasComponent+%3Fcomponent+.%0A++++%3Fcomponent+upa%3AhasComponent+%3Fglycolysis+.%0A++++%3Fglycolysis+rdfs%3Alabel+%22glycolysis%22+.%0A++%7D%0A++FILTER+%28regex%28%3FpathwayLabel%2C+%22glycolysis%22%29%29%0A++LIMIT+20%0A%7D\n", - "โ›”๏ธ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "400 Client Error: Bad Request for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+upa%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fannotation%2F%3E%0A%0ASELECT+DISTINCT+%3FproteinURI+%3FproteinLabel+%3FpathwayLabel%0AWHERE+%7B%0A++SERVICE+%3Chttps%3A%2F%2Fsparql.uniprot.org%2Fsparql%3E+%7B%0A++++%3Fprotein+a+up%3AProtein+%3B%0A+++++++++++++up%3ArecommendedName%2Fup%3AfullName+%3FproteinLabel+.%0A++++%3Fprotein+up%3Aannotation%2Fup%3APathway_Annotation+%3Fpathway+.%0A++++%3Fpathway+up%3Asequence+%3Fsequence+.%0A++++%3Fpathway+rdfs%3Alabel+%3FpathwayLabel+.%0A++++%3Fpathway+skos%3AprefLabel+%3FpathwayLabel+.%0A++++%3Fpathway+upa%3AhasComponent+%3Fcomponent+.%0A++++%3Fcomponent+upa%3AhasComponent+%3Fglycolysis+.%0A++++%3Fglycolysis+rdfs%3Alabel+%22glycolysis%22+.%0A++%7D%0A++FILTER+%28regex%28%3FpathwayLabel%2C+%22glycolysis%22%29%29%0A++LIMIT+20%0A%7D\n", - "โŒ 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0A%0ASELECT+DISTINCT+%3FproteinURI+%3FproteinLabel+%3FpathwayLabel%0AWHERE+%7B%0A++SERVICE+%3Chttps%3A%2F%2Fsparql.uniprot.org%2Fsparql%3E+%7B%0A++++%3Fprotein+a+up%3AProtein+%3B%0A+++++++++++++up%3ArecommendedName%2Fup%3AfullName+%3FproteinLabel+%3B%0A+++++++++++++up%3Aidentifier+%3FproteinURI+.%0A++++%3Fprotein+up%3Aannotation%2Fup%3APathway_Annotation+%3Fpathway+.%0A++++%3Fpathway+up%3Asequence+%3Fsequence+.%0A++++%3Fpathway+rdfs%3Alabel+%3FpathwayLabel+.%0A++++FILTER+%28regex%28%3FpathwayLabel%2C+%22glycolysis%22%29+%7C%7C+regex%28%3FpathwayLabel%2C+%22Glycolysis%22%29+%7C%7C+regex%28%3FpathwayLabel%2C+%22glycolytic+pathway%22%29+%7C%7C+regex%28%3FpathwayLabel%2C+%22GLYCOLYTIC+PATHWAY%22%29%29%0A++%7D%0A++LIMIT+20%0A%7D\n", - "โŒ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+upa%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fannotation%2F%3E%0A%0ASELECT+DISTINCT+%3FproteinURI+%3FproteinLabel+%3FpathwayLabel%0AWHERE+%7B%0A++%3Chttps%3A%2F%2Fsparql.uniprot.org%2Funiprot%3E+%7B%0A++++%3Fprotein+a+up%3AProtein+%3B%0A+++++++++++++up%3ArecommendedName%2Fup%3AfullName+%3FproteinLabel+.%0A++%7D%0A++%3Chttp%3A%2F%2Fsparql.uniprot.org%2Fpathways%2F%3E+%7B%0A++++%3Fpathway+up%3Asequence+%3Fsequence+.%0A++++%3Fpathway+rdfs%3Alabel+%3FpathwayLabel+.%0A++++%3Fpathway+skos%3AprefLabel+%3FpathwayLabel+.%0A++++%3Fpathway+upa%3AhasComponent+%3Fcomponent+.%0A++++%3Fcomponent+upa%3AhasComponent+%3Fglycolysis+.%0A++++%3Fglycolysis+rdfs%3Alabel+%22glycolysis%22+.%0A++%7D%0A++%3Chttps%3A%2F%2Fglycolysis.org%2Fpathways%2F%3E+%7B%0A++++%3Fpathway+a+%3Fclass+%3B%0A++++%3Fclass+rdfs%3AsubClassOf+%3Chttps%3A%2F%2Fglycolysis.org%2Fpathways%2Fglycolysis%3E+%3B%0A++++%3Fprotein+up%3Aannotation%2Fpathway+%3Fpathway+.%0A++++%3Fprotein+a+%3Fannotation+%3B%0A++++%3Fannotation+rdfs%3AseeAlso+%3Chttps%3A%2F%2Fglycolysis.org%2Fpathways%2Fglycolysis%3E+.%0A++++%3Fprotein+%3Fpdb+up%3Apdb%2Fmolecule+%3FproteinURI+.%0A++%7D++%0A++FILTER+%28regex%28STR%28%3FproteinLabel%29%2C+%22glycolysis%22%29+%26%26+STRstarts%28STR%28%3FproteinURI%29%2C+%22http%3A%2F%2Fpurl.uniprot.org%2Fpdb%2F%22%29%29%0A%7D\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 18\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 8, Error: 12\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 1, No results: 10, Error: 7\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "โ›”๏ธ 1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n", - "\n", - "Results mismatch. Ref: 3 != gen: 0\n", - "\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "โ›”๏ธ 2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n", - "\n", - "Results mismatch. Ref: 3 != gen: 0\n", - "\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "โŒ 1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n", - "\n", - "Results mismatch. Ref: 3 != gen: 0\n", - "\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "โŒ 2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n", - "\n", - "Results mismatch. Ref: 3 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 20\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 10, Error: 12\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 1, No results: 12, Error: 7\n", - "โ›”๏ธ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0A%0ASELECT+%0A++++%3Fprotein+%0A++++%3Fdisease+%0A++++%3Flocation_inside_cell+%0A++++%3Flocation_inside_cell_uri%0AWHERE+%7B%0A++++%3Fprotein+up%3Aannotation+%3FdiseaseAnnotation+.%0A++++%3FdiseaseAnnotation+up%3Adisease%2Fskos%3AprefLabel+%3Fdisease+.%0A++++FILTER+%28regex%28%3Fdisease%2C+%22Alzheimer%22%2C+%22i%22%29%29%0A++++%3Fprotein+up%3Aannotation+%3FsubcellAnnotation+.%0A++++%3FsubcellAnnotation+up%3AlocatedIn%2Fup%3AcellularComponent+%3Fcellcmpt+.%0A++++%3Fcellcmpt+skos%3AprefLabel+%3Flocation_inside_cell+.%0A++++%3Fcellcmpt+skos%3AprefLabel+%3Flocation_inside_cell_uri+.%0A++++LIMIT+20%0A%7D\n", - "โ›”๏ธ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "400 Client Error: Bad Request for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0A%0ASELECT+%0A++++%3Fprotein+%0A++++%3Fdisease+%0A++++%3Flocation_inside_cell+%0A++++%3Flocation_inside_cell_uri%0AWHERE+%7B%0A++++%3Fprotein+up%3Aannotation+%3FdiseaseAnnotation+.%0A++++%3FdiseaseAnnotation+up%3Adisease%2Fskos%3AprefLabel+%3Fdisease+.%0A++++FILTER+%28regex%28%3Fdisease%2C+%22Alzheimer%22%2C+%22i%22%29%29%0A++++%3Fprotein+up%3Aannotation+%3FsubcellAnnotation+.%0A++++%3FsubcellAnnotation+up%3AlocatedIn%2Fup%3AcellularComponent+%3Fcellcmpt+.%0A++++%3Fcellcmpt+skos%3AprefLabel+%3Flocation_inside_cell+.%0A++++%3Fcellcmpt+skos%3AprefLabel+%3Flocation_inside_cell_uri+.%0A++++LIMIT+20%0A%7D\n", - "Missing from reference: {'location_inside_cell_uri': {'type': 'uri', 'value': 'http://www.uni-mainz.de/FB/Medizin/Anatomie/workshop/EM/EMMVBE.html'}, 'disease': {'type': 'literal', 'value': 'Alzheimer disease 2'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P02649'}, 'location_inside_cell_label': {'type': 'literal', 'value': 'Multivesicular body'}}\n", - "โŒ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 2\n", - "\n", - "โŒ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F%0APREFIX+up%3A+%3Chttp%3A%2F%2Fpurl.uniprot.org%2Fcore%2F%3E%0APREFIX+skos%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2004%2F02%2Fskos%2Fcore%23%3E%0APREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0A%0ASELECT+%0A++++%3Fprotein+%0A++++%3Fdisease+%0A++++%3Flocation_inside_cell+%0A++++%3Flocation_inside_cell_uri%0AWHERE+%7B%0A++++%3Fprotein+up%3Aannotation+%3FdiseaseAnnotation+.%0A++++%3FdiseaseAnnotation+up%3Adisease%2Fskos%3AprefLabel+%3Fdisease+.%0A++++FILTER+%28regex%28%3Fdisease%2C+%22Alzheimer%22%2C+%22i%22%29%29%0A++++%3Fprotein+up%3Aannotation%2Fup%3AcatalyticActivity%2Fup%3AcatalyzedReaction+%3Freaction+.%0A++++%3Fprotein+%3Fannotation+subcellLocation+.%0A++++%3FsubcellLocation+rdfs%3Alabel+%3Flocation_inside_cell+.%0A++++%23+Add+condition+to+identify+location+of+interest%3A%0A++++%3FsubcellLocation+a+up%3ASubcellular_Location_Annotation+.%0A++++%3FsubcellLocation+up%3AlocatedIn%2Fup%3AcellularComponent+%3Fcellcmpt+.%0A++++%3Fcellcmpt+skos%3AprefLabel+%3Flocation_inside_cell+.%0A++++%3Fcellcmpt+up%3Aexact_location_uri+%3Flocation_inside_cell_uri+.%0A++++LIMIT+20%0A%7D\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 22\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 10, Error: 14\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 2, No results: 12, Error: 8\n", - "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n", - "โ›”๏ธ 1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI\n", - "\n", - "Results mismatch. Ref: 143 != gen: 0\n", - "\n", - "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n", - "โ›”๏ธ 2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI\n", - "\n", - "Results mismatch. Ref: 143 != gen: 0\n", - "\n", - "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n", - "โŒ 1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI\n", - "\n", - "Results mismatch. Ref: 143 != gen: 0\n", - "\n", - "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n", - "โŒ 2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI\n", - "\n", - "Results mismatch. Ref: 143 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 24\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 12, Error: 14\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 2, No results: 14, Error: 8\n", - "\n", - "๐Ÿง  Testing model: Mixtral 8x22B\n", - "\n", - "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n", - "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n", - "โ›”๏ธ 1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n", - "\n", - "Results mismatch. Ref: 4 != gen: 0\n", - "\n", - "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n", - "โ›”๏ธ 2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n", - "\n", - "Results mismatch. Ref: 4 != gen: 0\n", - "\n", - "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n", - "โŒ 1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n", - "\n", - "Results mismatch. Ref: 4 != gen: 0\n", - "\n", - "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n", - "โŒ 2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs\n", - "\n", - "Results mismatch. Ref: 4 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 1, Error: 1\n", - "๐ŸŽฏ RAG without validation - Success: 0, Different results: 0, No results: 2, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 0, Different results: 0, No results: 2, Error: 0\n", - "โœ… 1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n", - "Missing from reference: {'sequence': {'type': 'uri', 'value': 'http://purl.uniprot.org/isoforms/P83854-1'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P83854'}}\n", - "โ›”๏ธ 2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence\n", - "\n", - "Results mismatch. Ref: 20 != gen: 20\n", - "\n", - "โœ… 1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n", - "โœ… 2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 1, Error: 3\n", - "๐ŸŽฏ RAG without validation - Success: 1, Different results: 1, No results: 2, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 0, No results: 2, Error: 0\n", - "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n", - "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n", - "โ›”๏ธ 1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n", - "\n", - "Results mismatch. Ref: 11650 != gen: 4806\n", - "\n", - "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n", - "โ›”๏ธ 2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n", - "\n", - "Results mismatch. Ref: 11650 != gen: 4806\n", - "\n", - "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n", - "โŒ 1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n", - "\n", - "Results mismatch. Ref: 11650 != gen: 4806\n", - "\n", - "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n", - "โŒ 2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI\n", - "\n", - "Results mismatch. Ref: 11650 != gen: 4806\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 4\n", - "๐ŸŽฏ RAG without validation - Success: 1, Different results: 3, No results: 2, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 2, No results: 2, Error: 0\n", - "โœ… 1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI = 710\n", - "โœ… 2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI = 710\n", - "Missing from reference: {'mouseProtein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/ACES_MOUSE'}, 'cluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0671680_33154#PG_13'}, 'reaction': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/10100'}, 'chebi': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'humanProtein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P19835'}}\n", - "โŒ 1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n", - "\n", - "Results mismatch. Ref: 710 != gen: 31078\n", - "\n", - "Missing from reference: {'mouseProtein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/ACES_MOUSE'}, 'cluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0671680_33154#PG_13'}, 'reaction': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/10100'}, 'chebi': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'humanProtein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P19835'}}\n", - "โŒ 2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI\n", - "\n", - "Results mismatch. Ref: 710 != gen: 31078\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 6\n", - "๐ŸŽฏ RAG without validation - Success: 3, Different results: 3, No results: 2, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 4, No results: 2, Error: 0\n", - "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n", - "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n", - "โ›”๏ธ 1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n", - "\n", - "Results mismatch. Ref: 121 != gen: 0\n", - "\n", - "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n", - "โ›”๏ธ 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n", - "\n", - "Results mismatch. Ref: 121 != gen: 0\n", - "\n", - "โœ… 1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol = 121\n", - "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n", - "โŒ 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n", - "\n", - "Results mismatch. Ref: 121 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 3, Error: 7\n", - "๐ŸŽฏ RAG without validation - Success: 3, Different results: 3, No results: 4, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 3, Different results: 4, No results: 3, Error: 0\n", - "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n", - "โœ… 1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10 = 10\n", - "โœ… 2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10 = 10\n", - "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n", - "โŒ 1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n", - "\n", - "Results mismatch. Ref: 10 != gen: 0\n", - "\n", - "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n", - "โŒ 2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10\n", - "\n", - "Results mismatch. Ref: 10 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 4, Error: 8\n", - "๐ŸŽฏ RAG without validation - Success: 5, Different results: 3, No results: 4, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 3, Different results: 4, No results: 5, Error: 0\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โ›”๏ธ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 0\n", - "\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โ›”๏ธ 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 0\n", - "\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โŒ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 1\n", - "\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โŒ 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 1\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 6, Error: 8\n", - "๐ŸŽฏ RAG without validation - Success: 5, Different results: 3, No results: 6, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 3, Different results: 6, No results: 5, Error: 0\n", - "Missing from generated: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'uri', 'value': 'https://omabrowser.org/oma/current/#DATASET_OMA'}}\n", - "Missing from generated: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'uri', 'value': 'https://omabrowser.org/oma/current/#DATASET_OMA'}}\n", - "Missing from generated: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'uri', 'value': 'https://omabrowser.org/oma/current/#DATASET_OMA'}}\n", - "โ›”๏ธ 1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from reference: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'literal', 'value': 'Jul2023'}}\n", - "โ›”๏ธ 2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 20\n", - "\n", - "Missing from generated: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'uri', 'value': 'https://omabrowser.org/oma/current/#DATASET_OMA'}}\n", - "โŒ 1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from reference: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'literal', 'value': 'Jul2023'}}\n", - "โŒ 2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 20\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 8, Error: 8\n", - "๐ŸŽฏ RAG without validation - Success: 5, Different results: 4, No results: 7, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 3, Different results: 7, No results: 6, Error: 0\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "โ›”๏ธ 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "โ›”๏ธ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "โŒ 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "โŒ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 10, Error: 8\n", - "๐ŸŽฏ RAG without validation - Success: 5, Different results: 4, No results: 9, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 3, Different results: 7, No results: 8, Error: 0\n", - "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n", - "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n", - "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n", - "โ›”๏ธ 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n", - "โ›”๏ธ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n", - "โŒ 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n", - "โŒ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 12, Error: 8\n", - "๐ŸŽฏ RAG without validation - Success: 5, Different results: 4, No results: 11, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 3, Different results: 7, No results: 10, Error: 0\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "โ›”๏ธ 1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n", - "\n", - "Results mismatch. Ref: 3 != gen: 0\n", - "\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "โ›”๏ธ 2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n", - "\n", - "Results mismatch. Ref: 3 != gen: 0\n", - "\n", - "โœ… 1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref = 3\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "โŒ 2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref\n", - "\n", - "Results mismatch. Ref: 3 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 14, Error: 8\n", - "๐ŸŽฏ RAG without validation - Success: 5, Different results: 4, No results: 13, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 4, Different results: 7, No results: 11, Error: 0\n", - "Missing from generated: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/95'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 1'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P05067'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Endoplasmic reticulum'}}\n", - "Missing from generated: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/95'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 1'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P05067'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Endoplasmic reticulum'}}\n", - "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/93'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 9'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q8IZY2'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Early endosome membrane'}}\n", - "โ›”๏ธ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 20\n", - "\n", - "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/138'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q16643'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Cell cortex'}}\n", - "โ›”๏ธ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 20\n", - "\n", - "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/134'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 9'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q8IZY2'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Golgi apparatus membrane'}}\n", - "โŒ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 20\n", - "\n", - "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/138'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q16643'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Cell cortex'}}\n", - "โŒ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 20\n", - "\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 16, Error: 8\n", - "๐ŸŽฏ RAG without validation - Success: 5, Different results: 6, No results: 13, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 4, Different results: 9, No results: 11, Error: 0\n", - "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n", - "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n", - "โœ… 1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n", - "โœ… 2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n", - "โœ… 1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n", - "โœ… 2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 18, Error: 8\n", - "๐ŸŽฏ RAG without validation - Success: 7, Different results: 6, No results: 13, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 6, Different results: 9, No results: 11, Error: 0\n", - "\n", - "๐Ÿง  Testing model: gpt-4o\n", - "\n", - "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n", - "Missing from generated: {'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H0Y4E4'}}\n", - "โœ… 1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs = 4\n", - "โœ… 2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs = 4\n", - "โœ… 1/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs = 4\n", - "โœ… 2/2 What is the accession number in uniprot of the human gene LCT? Return only unique protein URIs = 4\n", - "๐ŸŽฏ No RAG - Success: 0, Different results: 0, No results: 2, Error: 0\n", - "๐ŸŽฏ RAG without validation - Success: 2, Different results: 0, No results: 0, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 2, Different results: 0, No results: 0, Error: 0\n", - "โœ… 1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n", - "โœ… 2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n", - "โœ… 1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n", - "โœ… 2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n", - "โœ… 1/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n", - "โœ… 2/2 How do I filter for reviewed mouse proteins which carry an N-terminal glycine? Return protein URI and AA sequence = 20\n", - "๐ŸŽฏ No RAG - Success: 2, Different results: 0, No results: 2, Error: 0\n", - "๐ŸŽฏ RAG without validation - Success: 4, Different results: 0, No results: 0, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 4, Different results: 0, No results: 0, Error: 0\n", - "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n", - "Missing from generated: {'rhea': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/21744'}}\n", - "โœ… 1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n", - "โœ… 2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n", - "โœ… 1/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n", - "โœ… 2/2 How could I download a table that only includes the Rhea reactions for which there is experimental evidence? Return only the rhea URI = 11650\n", - "๐ŸŽฏ No RAG - Success: 2, Different results: 0, No results: 4, Error: 0\n", - "๐ŸŽฏ RAG without validation - Success: 6, Different results: 0, No results: 0, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 6, Different results: 0, No results: 0, Error: 0\n", - "Missing from generated: {'reaction': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/10100'}, 'sterol': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/B3KRG8'}}\n", - "Missing from generated: {'reaction': {'type': 'uri', 'value': 'http://rdf.rhea-db.org/10100'}, 'sterol': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CHEBI_15889'}, 'protein': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/B3KRG8'}}\n", - "โœ… 1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI = 710\n", - "โœ… 2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI = 710\n", - "โœ… 1/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI = 710\n", - "โœ… 2/2 Which human proteins are enzymes catalyzing a reaction involving sterols? Return the protein, sterol and reaction URI = 710\n", - "๐ŸŽฏ No RAG - Success: 2, Different results: 0, No results: 6, Error: 0\n", - "๐ŸŽฏ RAG without validation - Success: 8, Different results: 0, No results: 0, Error: 0\n", - "๐ŸŽฏ RAG with validation - Success: 8, Different results: 0, No results: 0, Error: 0\n", - "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n", - "Missing from generated: {'hgncSymbol': {'type': 'literal', 'value': 'BRAF'}, 'diseaseLabel': {'type': 'literal', 'value': 'Colorectal cancer'}}\n", - "โœ… 1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol = 121\n", - "โ›”๏ธ 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol\n", - "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F\n", - "โœ… 1/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol = 121\n", - "โœ… 2/2 Which are the human proteins associated with cancer? Return distinct ?diseaseLabel and ?hgncSymbol = 121\n", - "๐ŸŽฏ No RAG - Success: 2, Different results: 0, No results: 8, Error: 0\n", - "๐ŸŽฏ RAG without validation - Success: 9, Different results: 0, No results: 0, Error: 1\n", - "๐ŸŽฏ RAG with validation - Success: 10, Different results: 0, No results: 0, Error: 0\n", - "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n", - "Missing from generated: {'gene': {'type': 'uri', 'value': 'http://omabrowser.org/ontology/oma#GENE_WBGene00001030'}, 'confidence': {'type': 'uri', 'value': 'http://purl.obolibrary.org/obo/CIO_0000029'}, 'fdr': {'type': 'typed-literal', 'datatype': 'http://www.w3.org/2001/XMLSchema#double', 'value': '0.0016728772206653400815'}}\n", - "โœ… 1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10 = 10\n", - "โœ… 2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10 = 10\n", - "โœ… 1/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10 = 10\n", - "โœ… 2/2 In bgee how can I retrieve the confidence level and false discovery rate of a gene expression? Return distinct ?gene, ?confidence and ?fdr, limit to 10 = 10\n", - "๐ŸŽฏ No RAG - Success: 2, Different results: 0, No results: 10, Error: 0\n", - "๐ŸŽฏ RAG without validation - Success: 11, Different results: 0, No results: 0, Error: 1\n", - "๐ŸŽฏ RAG with validation - Success: 12, Different results: 0, No results: 0, Error: 0\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โ›”๏ธ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 0\n", - "\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โ›”๏ธ 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 0\n", - "\n", - "Missing from generated: {'ensemblURI': {'type': 'uri', 'value': 'http://rdf.ebi.ac.uk/resource/ensembl.protein/ENSBIXP00000024889'}}\n", - "โŒ 1/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI\n", - "\n", - "Results mismatch. Ref: 96 != gen: 0\n", - "\n", - "โœ… 2/2 How can I get the cross-reference to the ensembl protein for the LCT protein in OMA? Return only the distinct ?ensemblURI = 96\n", - "๐ŸŽฏ No RAG - Success: 2, Different results: 0, No results: 12, Error: 0\n", - "๐ŸŽฏ RAG without validation - Success: 11, Different results: 0, No results: 2, Error: 1\n", - "๐ŸŽฏ RAG with validation - Success: 13, Different results: 0, No results: 1, Error: 0\n", - "Missing from generated: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'uri', 'value': 'https://omabrowser.org/oma/current/#DATASET_OMA'}}\n", - "Missing from generated: {'orthologCluster': {'type': 'uri', 'value': 'https://omabrowser.org/oma/hog/resolve/HOG:D0000193_-2035759834'}, 'datasetURI': {'type': 'uri', 'value': 'https://omabrowser.org/oma/current/#DATASET_OMA'}}\n", - "โœ… 1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20 = 20\n", - "โœ… 2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20 = 20\n", - "โœ… 1/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20 = 20\n", - "โœ… 2/2 How can I get the URI of a dataset to which an ortholog cluster belongs in OMA? Return orthologCluster, datasetURI and limit to 20 = 20\n", - "๐ŸŽฏ No RAG - Success: 2, Different results: 0, No results: 14, Error: 0\n", - "๐ŸŽฏ RAG without validation - Success: 13, Different results: 0, No results: 2, Error: 1\n", - "๐ŸŽฏ RAG with validation - Success: 15, Different results: 0, No results: 1, Error: 0\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "โ›”๏ธ 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "โ›”๏ธ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F\n", - "โœ… 1/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20 = 20\n", - "Missing from generated: {'strain': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734#strain-KTE188'}, 'name': {'type': 'literal', 'value': 'KTE188'}, 'taxon': {'type': 'uri', 'value': 'http://purl.uniprot.org/taxonomy/1181734'}}\n", - "โŒ 2/2 Give me the list of strains associated to the Escherichia coli taxon and their name. Return ?taxon, ?strain, ?name, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 2, Different results: 0, No results: 16, Error: 0\n", - "๐ŸŽฏ RAG without validation - Success: 13, Different results: 0, No results: 3, Error: 2\n", - "๐ŸŽฏ RAG with validation - Success: 16, Different results: 0, No results: 2, Error: 0\n", - "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n", - "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n", - "โ›”๏ธ 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F\n", - "โ›”๏ธ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "400 Client Error: Bad Request for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F\n", - "โœ… 1/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20 = 20\n", - "Missing from generated: {'proteinLabel': {'type': 'literal', 'value': 'ADP-specific phosphofructokinase'}, 'pathwayLabel': {'type': 'literal', 'value': 'Carbohydrate degradation; glycolysis'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/H1KZ61'}}\n", - "โŒ 2/2 Retrieve all proteins involved in pathways involving glycolysis. Return ?proteinURI, ?proteinLabel, ?pathwayLabel, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 0\n", - "\n", - "๐ŸŽฏ No RAG - Success: 2, Different results: 0, No results: 18, Error: 0\n", - "๐ŸŽฏ RAG without validation - Success: 13, Different results: 0, No results: 3, Error: 4\n", - "๐ŸŽฏ RAG with validation - Success: 17, Different results: 0, No results: 3, Error: 0\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "Missing from generated: {'ratProtein': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/RATNO00407'}, 'ratUniProtXref': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/A0A0G2K4L4'}}\n", - "โœ… 1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref = 3\n", - "โœ… 2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref = 3\n", - "โœ… 1/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref = 3\n", - "โœ… 2/2 What are the orthologs in rat for protein Q9Y2T1 ? Return ?ratProtein ?ratUniProtXref = 3\n", - "๐ŸŽฏ No RAG - Success: 2, Different results: 0, No results: 20, Error: 0\n", - "๐ŸŽฏ RAG without validation - Success: 15, Different results: 0, No results: 3, Error: 4\n", - "๐ŸŽฏ RAG with validation - Success: 19, Different results: 0, No results: 3, Error: 0\n", - "Missing from generated: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/95'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 1'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P05067'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Endoplasmic reticulum'}}\n", - "Missing from generated: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/95'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 1'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/P05067'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Endoplasmic reticulum'}}\n", - "โ›”๏ธ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F\n", - "โ›”๏ธ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "400 Client Error: for url: https://sparql.uniprot.org/sparql/?query=%23+https%3A%2F%2Fsparql.uniprot.org%2Fsparql%2F\n", - "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/473'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 9'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q8IZY2'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Phagocytic cup'}}\n", - "โŒ 1/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 20\n", - "\n", - "Missing from reference: {'locationInsideCellUri': {'type': 'uri', 'value': 'http://purl.uniprot.org/locations/473'}, 'diseaseLabel': {'type': 'literal', 'value': 'Alzheimer disease 9'}, 'proteinURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/Q8IZY2'}, 'locationInsideCellLabel': {'type': 'literal', 'value': 'Phagocytic cup'}}\n", - "โŒ 2/2 Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\n", - "\n", - "Results mismatch. Ref: 20 != gen: 20\n", - "\n", - "๐ŸŽฏ No RAG - Success: 2, Different results: 0, No results: 22, Error: 0\n", - "๐ŸŽฏ RAG without validation - Success: 15, Different results: 0, No results: 3, Error: 6\n", - "๐ŸŽฏ RAG with validation - Success: 19, Different results: 2, No results: 3, Error: 0\n", - "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n", - "Missing from generated: {'proteinOMA': {'type': 'uri', 'value': 'https://omabrowser.org/oma/info/CRIGR03030'}, 'speciesLabel': {'type': 'literal', 'value': 'Cricetulus griseus'}, 'mnemonic': {'type': 'literal', 'value': 'P53_CRIGR'}, 'evidenceType': {'type': 'literal', 'value': 'Evidence at transcript level'}, 'uniprotURI': {'type': 'uri', 'value': 'http://purl.uniprot.org/uniprot/O09185'}}\n", - "โœ… 1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n", - "โœ… 2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n", - "โœ… 1/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n", - "โœ… 2/2 Retrieve all proteins in OMA that are encoded by the TP53 gene and their mnemonics and evidence types from the UniProt database. Return ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI = 143\n", - "๐ŸŽฏ No RAG - Success: 2, Different results: 0, No results: 24, Error: 0\n", - "๐ŸŽฏ RAG without validation - Success: 17, Different results: 0, No results: 3, Error: 6\n", - "๐ŸŽฏ RAG with validation - Success: 21, Different results: 2, No results: 3, Error: 0\n", - " Model RAG Approach Success Different Results \\\n", - "0 gpt-4o-mini No RAG 0 0 \n", - "1 gpt-4o-mini RAG without validation 7 5 \n", - "2 gpt-4o-mini RAG with validation 8 3 \n", - "3 Llama3.1 8B No RAG 0 0 \n", - "4 Llama3.1 8B RAG without validation 0 0 \n", - "5 Llama3.1 8B RAG with validation 2 2 \n", - "6 Mixtral 8x22B No RAG 0 0 \n", - "7 Mixtral 8x22B RAG without validation 7 6 \n", - "8 Mixtral 8x22B RAG with validation 6 9 \n", - "9 gpt-4o No RAG 2 0 \n", - "10 gpt-4o RAG without validation 17 0 \n", - "11 gpt-4o RAG with validation 21 2 \n", - "\n", - " No Results Errors Precision Price \n", - "0 5 21 0.000000 0.00012 \n", - "1 14 0 0.269231 0.00111 \n", - "2 15 0 0.307692 0.00111 \n", - "3 2 24 0.000000 0.00011 \n", - "4 12 14 0.000000 0.00141 \n", - "5 14 8 0.076923 0.00141 \n", - "6 18 8 0.000000 0.00079 \n", - "7 13 0 0.269231 0.01075 \n", - "8 11 0 0.230769 0.01077 \n", - "9 24 0 0.076923 0.00480 \n", - "10 3 6 0.653846 0.03734 \n", - "11 3 0 0.807692 0.03734 \n", - "\n", - "\n", - "\n", - "None\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "from collections import defaultdict\n", + "import time\n", + "from weakref import ref\n", "\n", "import pandas as pd\n", "import requests\n", @@ -1182,17 +242,15 @@ "}\"\"\",\n", " },\n", " {\n", - " \"question\": \"Retrieve all proteins that are associated with Alzheimer diseases and where they are known to be located in the cell. Return ?proteinURI, ?diseaseLabel, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\",\n", + " \"question\": \"\"\"Retrieve all proteins that are associated with Alzheimer disease (http://purl.uniprot.org/diseases/3832) and where they are known to be located in the cell. Return ?proteinURI, ?locationInsideCellLabel, ?locationInsideCellUri, limit to 20\"\"\",\n", " \"endpoint\": \"https://sparql.uniprot.org/sparql/\",\n", " \"query\": \"\"\"PREFIX up: \n", "PREFIX skos: \n", - "SELECT ?proteinURI ?diseaseLabel ?locationInsideCellLabel ?locationInsideCellUri\n", + "SELECT ?proteinURI ?locationInsideCellLabel ?locationInsideCellUri\n", "WHERE {\n", " ?proteinURI a up:Protein ;\n", " up:annotation ?diseaseAnnotation , ?subcellAnnotation .\n", - " ?diseaseAnnotation up:disease ?disease .\n", - " ?disease skos:prefLabel ?diseaseLabel .\n", - " FILTER(CONTAINS(LCASE(?diseaseLabel), \"alzheimer\"))\n", + " ?diseaseAnnotation up:disease .\n", " ?subcellAnnotation up:locatedIn/up:cellularComponent ?locationInsideCellUri .\n", " ?locationInsideCellUri skos:prefLabel ?locationInsideCellLabel .\n", "} LIMIT 20\"\"\",\n", @@ -1205,7 +263,7 @@ "PREFIX orth: \n", "PREFIX obo: \n", "PREFIX lscr: \n", - "SELECT DISTINCT ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI\n", + "SELECT DISTINCT ?proteinOMA ?speciesLabel ?mnemonic ?evidenceType ?uniprotURI\n", "WHERE {\n", " ?proteinOMA a orth:Protein ;\n", " orth:organism/obo:RO_0002162/up:scientificName ?speciesLabel ;\n", @@ -1379,15 +437,10 @@ "\n", "# QLEVER_UNIPROT = \"https://qlever.cs.uni-freiburg.de/api/uniprot\"\n", "\n", - "# Price per M tokens, open source models based on fireworks.io pricing\n", + "# Price per million tokens, open source models based on fireworks.io pricing\n", "# https://openai.com/api/pricing/\n", "# https://fireworks.ai/pricing\n", "models = {\n", - " \"gpt-4o-mini\": {\n", - " \"id\": \"gpt-4o-mini\",\n", - " \"price_input\": 0.15,\n", - " \"price_output\": 0.6,\n", - " },\n", " \"Llama3.1 8B\": {\n", " \"id\": \"hf:meta-llama/Meta-Llama-3.1-8B-Instruct\",\n", " \"price_input\": 0.2,\n", @@ -1403,6 +456,11 @@ " \"price_input\": 5,\n", " \"price_output\": 15,\n", " },\n", + " \"gpt-4o-mini\": {\n", + " \"id\": \"gpt-4o-mini\",\n", + " \"price_input\": 0.15,\n", + " \"price_output\": 0.6,\n", + " },\n", "}\n", "\n", "\n", @@ -1416,9 +474,7 @@ " ],\n", " stream=False,\n", " temperature=0,\n", - " # response_format={ \"type\": \"json_object\" },\n", " )\n", - " # response.choices[0].message.content\n", " return response.to_dict()\n", "\n", "\n", @@ -1476,10 +532,26 @@ " \"F1\": [],\n", "}\n", "\n", - "number_of_tries = 2\n", + "number_of_tries = 3\n", "\n", "print(f\"๐Ÿงช Testing {len(example_queries)} queries\")\n", "\n", + "# Get results for the reference queries\n", + "ref_results = []\n", + "for i, test_query in enumerate(example_queries):\n", + " res_ref_finally_pass = False\n", + " while not res_ref_finally_pass:\n", + " try:\n", + " query_start_time = time.time()\n", + " res_from_ref = query_sparql(test_query[\"query\"], test_query[\"endpoint\"], timeout=300)[\"results\"][\"bindings\"]\n", + " print(f\"Reference query {i} '{test_query['question']}' took {time.time() - query_start_time:.2f} seconds\")\n", + " ref_results.append(res_from_ref)\n", + " res_ref_finally_pass = True\n", + " except Exception as e:\n", + " print(f\"Timeout for reference query {i}: {e}, Trying again because we know if should work.\")\n", + " res_ref_finally_pass = False\n", + " # res_from_ref = query_sparql(test_query[\"query\"], QLEVER_UNIPROT)[\"results\"][\"bindings\"]\n", + "\n", "for model_label, model in models.items():\n", " print(f\"\\n๐Ÿง  Testing model: {model_label}\\n\")\n", " res = defaultdict(dict)\n", @@ -1487,10 +559,18 @@ " for approach in list_of_approaches:\n", " res[approach] = defaultdict(int)\n", "\n", - " for _i, test_query in enumerate(example_queries):\n", - " # Execute the reference query\n", - " res_from_ref = query_sparql(test_query[\"query\"], test_query[\"endpoint\"], timeout=200)[\"results\"][\"bindings\"]\n", - " # res_from_ref = query_sparql(test_query[\"query\"], QLEVER_UNIPROT)[\"results\"][\"bindings\"]\n", + " for query_num, test_query in enumerate(example_queries):\n", + " # # Execute the reference query.\n", + " # res_ref_finally_pass = False\n", + " # while not res_ref_finally_pass:\n", + " # try:\n", + " # res_from_ref = query_sparql(test_query[\"query\"], test_query[\"endpoint\"], timeout=300)[\"results\"][\"bindings\"]\n", + " # res_ref_finally_pass = True\n", + " # except Exception as e:\n", + " # print(f\"Error in reference query: {e}, Trying again because we know if should work :)\")\n", + " # res_ref_finally_pass = False\n", + " # # res_from_ref = query_sparql(test_query[\"query\"], test_query[\"endpoint\"], timeout=300)[\"results\"][\"bindings\"]\n", + " # # res_from_ref = query_sparql(test_query[\"query\"], QLEVER_UNIPROT)[\"results\"][\"bindings\"]\n", "\n", " for approach, approach_func in list_of_approaches.items():\n", " for t in range(number_of_tries):\n", @@ -1509,17 +589,17 @@ "\n", " # Execute the generated query\n", " res_from_generated = query_sparql(\n", - " generated_sparql[\"query\"], generated_sparql[\"endpoint_url\"], timeout=200\n", + " generated_sparql[\"query\"], generated_sparql[\"endpoint_url\"], timeout=300\n", " )[\"results\"][\"bindings\"]\n", " # res_from_generated = query_sparql(generated_sparql[\"query\"], QLEVER_UNIPROT)[\"results\"][\"bindings\"]\n", "\n", - " if not result_sets_are_same(res_from_generated, res_from_ref):\n", + " if not result_sets_are_same(res_from_generated, ref_results[query_num]):\n", " if len(res_from_generated) == 0:\n", " res[approach][\"no_results\"] += 1\n", " else:\n", " res[approach][\"different_results\"] += 1\n", " raise Exception(\n", - " f\"\\nResults mismatch. Ref: {len(res_from_ref)} != gen: {len(res_from_generated)}\\n\"\n", + " f\"\\nResults mismatch. Ref: {len(ref_results[query_num])} != gen: {len(res_from_generated)}\\n\"\n", " )\n", " else:\n", " print(f\"โœ… {t+1}/{number_of_tries} {test_query['question']} = {len(res_from_generated)}\")\n", @@ -1530,10 +610,10 @@ " if approach != \"No RAG\":\n", " fail_emoji = \"โŒ\" if approach == \"RAG with validation\" else \"โ›”๏ธ\"\n", " print(f\"{fail_emoji} {t+1}/{number_of_tries} {test_query['question']}\\n{e}\")\n", - " # print(generated_sparql[\"query\"])\n", - " # print(\"Correct query:\")\n", - " # print(test_query[\"query\"])\n", - " # print(\"\")\n", + " print(generated_sparql[\"query\"])\n", + " print(\"Correct query:\")\n", + " print(test_query[\"query\"])\n", + " print(\"\")\n", "\n", " for approach in list_of_approaches:\n", " print(\n", @@ -1556,7 +636,10 @@ " results_data[\"Price\"].append(round(mean_price, 5))\n", " # results_data['Precision'].append(precision)\n", " # results_data['Recall'].append(recall)\n", - " results_data[\"F1\"].append(2 * (precision * recall) / (precision + recall))\n", + " if precision + recall == 0:\n", + " results_data[\"F1\"].append(0)\n", + " else:\n", + " results_data[\"F1\"].append(round(2 * (precision * recall) / (precision + recall), 2))\n", "\n", "\n", "df = pd.DataFrame(results_data)\n", @@ -1566,7 +649,7 @@ "\n", "# Output Latex table\n", "latex_str = \"\"\n", - "prev_model = models.keys()[0]\n", + "prev_model = list(models.keys())[0]\n", "for index, row in df.iterrows():\n", " row_str = \" & \".join([str(item) for item in row]) # Join all values in the row with \" & \"\n", " row_str += \" \\\\\\\\\"\n", diff --git a/src/sparql_llm/api.py b/src/sparql_llm/api.py index 53dcfb5..d53f83c 100644 --- a/src/sparql_llm/api.py +++ b/src/sparql_llm/api.py @@ -9,7 +9,7 @@ from fastapi.responses import HTMLResponse, StreamingResponse from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates -from openai import Stream, OpenAI +from openai import OpenAI, Stream from openai.types.chat import ChatCompletion, ChatCompletionChunk from pydantic import BaseModel from qdrant_client.models import FieldCondition, Filter, MatchValue, ScoredPoint @@ -194,10 +194,7 @@ async def chat(request: ChatCompletionRequest): # Use messages from the request to keep memory of previous messages sent by the client # Replace the question asked by the user with the big prompt with all contextual infos request.messages[-1].content = prompt_with_context - all_messages = [ - Message(role="system", content=settings.system_prompt), - *request.messages - ] + all_messages = [Message(role="system", content=settings.system_prompt), *request.messages] # Send the prompt to OpenAI to get a response response = client.chat.completions.create( @@ -214,12 +211,10 @@ async def chat(request: ChatCompletionRequest): stream_openai(response, query_hits + docs_hits, prompt_with_context), media_type="application/x-ndjson" ) - print(response) + # print(response) # print(response.choices[0].message.content) response: ChatCompletion = ( - validate_and_fix_sparql(response, all_messages, client, request.model) - if request.validate_output - else response + validate_and_fix_sparql(response, all_messages, client, request.model) if request.validate_output else response ) # NOTE: the response is similar to OpenAI API, but we add the list of hits and the full prompt used to ask the question response.docs = query_hits + docs_hits @@ -243,10 +238,12 @@ def validate_and_fix_sparql( """Recursive function to validate the SPARQL queries in the chat response and fix them if needed.""" if try_count >= settings.max_try_fix_sparql: - resp.choices[0].message.content = f"{resp.choices[0].message.content}\n\nThe SPARQL query could not be fixed after multiple tries. Please do it yourself!" + resp.choices[ + 0 + ].message.content = f"{resp.choices[0].message.content}\n\nThe SPARQL query could not be fixed after multiple tries. Please do it yourself!" return resp generated_sparqls = extract_sparql_queries(resp.choices[0].message.content) - print("generated_sparqls", generated_sparqls) + # print("generated_sparqls", generated_sparqls) error_detected = False for gen_query in generated_sparqls: try: @@ -261,7 +258,9 @@ def validate_and_fix_sparql( except Exception as e: if "Unknown namespace prefix" in str(e): - md_resp = md_resp.replace(gen_query["query"], add_missing_prefixes(gen_query["query"], prefixes_map)) + resp.choices[0].message.content = resp.choices[0].message.content.replace( + gen_query["query"], add_missing_prefixes(gen_query["query"], prefixes_map) + ) else: # Ask the LLM to try to fix it print(f"Error in SPARQL query try #{try_count}: {e}\n{gen_query['query']}") @@ -279,7 +278,7 @@ def validate_and_fix_sparql( # {md_resp} messages.append({"role": "assistant", "content": fix_prompt}) fixing_resp = client.chat.completions.create( - model=resp.model, + model=llm_model, messages=messages, stream=False, ) @@ -289,7 +288,9 @@ def validate_and_fix_sparql( resp.usage.prompt_tokens += fixing_resp.usage.prompt_tokens resp.usage.completion_tokens += fixing_resp.usage.completion_tokens resp.usage.total_tokens += fixing_resp.usage.total_tokens - resp.choices[0].message.content = resp.choices[0].message.content.replace(gen_query["query"], fixed_query["query"]) + resp.choices[0].message.content = resp.choices[0].message.content.replace( + gen_query["query"], fixed_query["query"] + ) if error_detected: # Check again the fixed query return validate_and_fix_sparql(resp, messages, client, llm_model, try_count)