diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fd6a795
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.env
+**/chroma_db_LAB/
+**/chroma_db_Bonus/
\ No newline at end of file
diff --git a/Eisenhorn-Dan-Abnett-3.PDF b/Eisenhorn-Dan-Abnett-3.PDF
new file mode 100644
index 0000000..8543917
Binary files /dev/null and b/Eisenhorn-Dan-Abnett-3.PDF differ
diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index e3a225a..be77fb0 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -69,15 +69,15 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {
"id": "6heKZkQUxYZr"
},
"outputs": [],
"source": [
"import os\n",
- "from langchain.document_loaders import PyPDFLoader\n",
- "from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter\n",
+ "from langchain_community.document_loaders import PyPDFLoader\n",
+ "from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n"
]
@@ -96,7 +96,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {
"id": "cuREtJRixYZt"
},
@@ -104,7 +104,7 @@
"source": [
"# File path for the document\n",
"\n",
- "file_path = \"LAB/ai-for-everyone.pdf\""
+ "file_path = \"../ai-for-everyone.pdf\""
]
},
{
@@ -122,12 +122,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {
"id": "_b5Z_45UxYZu",
"outputId": "a600d69f-14fe-4492-f236-97261d6ff36c"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "297"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Load and split the document\n",
"loader = PyPDFLoader(file_path)\n",
@@ -168,9 +179,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1096"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"text_splitter = RecursiveCharacterTextSplitter(\n",
" chunk_size=1000,\n",
@@ -285,31 +307,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {
"id": "L0xDxElwxYZw"
},
"outputs": [],
"source": [
- "from langchain.embeddings import OpenAIEmbeddings\n",
+ "from langchain_openai import OpenAIEmbeddings\n",
"from dotenv import load_dotenv"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {
"id": "_WRIo3_0xYZx",
"outputId": "78bfbbf3-9d25-4e31-bdbc-3e932e6bbfec"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"load_dotenv()"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {
"id": "MNZfTng5xYZz",
"outputId": "db1a7c85-ef9f-447e-92cd-9d097e959847"
@@ -343,23 +376,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {
"id": "brKe6wUgxYZ0"
},
"outputs": [],
"source": [
- "from langchain.vectorstores import Chroma"
+ "from langchain_chroma import Chroma"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {
"id": "VkjHR-RkxYZ0",
"outputId": "bc11bda9-f283-457a-f584-5a06b95c4dd9"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "ChromaDB created with document embeddings.\n"
+ ]
+ }
+ ],
"source": [
"db = Chroma.from_documents(chunks, embeddings, persist_directory=\"./chroma_db_LAB\")\n",
"print(\"ChromaDB created with document embeddings.\")"
@@ -383,24 +424,73 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {
"id": "XiLv-TfrxYZ1"
},
"outputs": [],
"source": [
- "user_question = \"\" # User question\n",
+ "user_question = \"What is the difference between AI, machine learning, and deep learning?\" # User question\n",
"retrieved_docs = db.similarity_search(user_question, k=10) # k is the number of documents to retrieve"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {
"id": "qgWsh50JxYZ1",
"outputId": "c8640c5d-5955-471f-fdd2-37096f5f68c7"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Document 1:\n",
+ " simply put, is ‘methods that help computers learn without \n",
+ "being explicitly programmed’ (Kaplan and Haenlein 2019, 17), and is applied \n",
+ "in order to identify underlying patterns within the big data, and as such is an \n",
+ "essential element of artificial intelligence. A more elaborated definition comes \n",
+ "from Mitchell (1997, 2) stating ‘ A computer program is said to learn from \n",
+ "experience E with respect to some class of tasks T and performance measure P \n",
+ "if its performance at tasks in T, as measured by P, improves with experience E.’ \n",
+ "AI is much broader than machine learning, as it additionally comprises such \n",
+ "abilities as the perception of data (e.g., voice/image recognition, natural lan -\n",
+ "guage processing, etc.) or the control and movement of objects (robotics \n",
+ "or cybernetics).\n",
+ "Artificial intelligence can be classified into three types of systems: analyti -\n",
+ "cal, human-inspired and humanised (Kaplan and Haenlein 2019). Analytical\n",
+ "Document 2:\n",
+ "as machine learning. Machine learning is \n",
+ "often anthropomorphised, but it is at base the use of statistical methods, called \n",
+ "learning algorithms, to find patterns in large datasets. On the basis of these pat-\n",
+ "terns an algorithm called a ‘model’ is produced which may be used to analyse \n",
+ "new data (Alpaydin 2014, 2–3). A model thus represents ‘knowledge’ of the pat-\n",
+ "terns found by the learning algorithm and can be used to make useful analyses \n",
+ "or predictions. Much of the hype around machine learning derives from this \n",
+ "automated production of models from data, which Domingos (2015) calls the \n",
+ "‘inverse of programming’ (6–7). \n",
+ "Machine learning is being applied almost anywhere electronic data is accessi-\n",
+ "ble. Brynjolfsson and McAfee (2017) argue that machine learning is a general-\n",
+ "purpose technology comparable to the combustion engine. While this remains \n",
+ "to be seen, AI has found diverse applications from recommendation engines\n",
+ "Document 3:\n",
+ "umans and Machines Might Have to Coexist 23\n",
+ "definitions exist and experts disagree on how to best characterise artificial \n",
+ "intelligence. By analysing different AI definitions, Russell and Norvig (2016), \n",
+ "e.g., concluded that there are four main approaches for defining AI, i.e., see it \n",
+ "as systems that (1) think like humans, (2) act like humans, (3) think rationally \n",
+ "and (4) act rationally.\n",
+ "Often terms such as big data, machine learning or the Internet-of-Things \n",
+ "(IoT) are incorrectly applied as synonyms for artificial intelligence, yet they \n",
+ "are indeed differing concepts and terms. An AI-driven system needs big data \n",
+ "from which to learn, which essentially are ‘datasets made up by huge quanti -\n",
+ "ties (volume) of frequently updated data (velocity) in various formats, such as \n",
+ "numeric, textual or images/videos (variety)’ (Kaplan and Haenlein 2019, 17). \n",
+ "Again, a variety of different definitions for big data exists: while one group of\n"
+ ]
+ }
+ ],
"source": [
"# Display top results\n",
"for i, doc in enumerate(retrieved_docs[:3]): # Display top 3 results\n",
@@ -418,7 +508,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {
"id": "2iB3lZqHxYZ2"
},
@@ -434,12 +524,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
"metadata": {
"id": "2okzmuADxYZ2",
"outputId": "0aa6cdca-188d-40e0-f5b4-8888d3549ea4"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Context formatted for GPT model.\n"
+ ]
+ }
+ ],
"source": [
"# Generate a formatted context from the retrieved documents\n",
"formatted_context = _get_document_prompt(retrieved_docs)\n",
@@ -464,22 +562,69 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
"metadata": {
"id": "tqxVh9s3xYZ3",
"outputId": "97cca95d-4ab3-44d8-a76c-5713aad387d8"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Prompt constructed.\n"
+ ]
+ }
+ ],
"source": [
"prompt = f\"\"\"\n",
+ "## SYSTEM ROLE\n",
+ "You are an AI expert explaining concepts to people with no technical background.\n",
+ "Your answers must be based exclusively on provided content from technical books provided.\n",
+ "\n",
+ "## USER QUESTION\n",
+ "The user has asked:\n",
+ "\"{user_question}\"\n",
+ "\n",
+ "## CONTEXT\n",
+ "Here is the relevant content from the technical books:\n",
+ "'''\n",
+ "{formatted_context}\n",
+ "'''\n",
"\n",
+ "## GUIDELINES\n",
+ "1. **Accuracy**:\n",
+ " - Only use the content in the `CONTEXT` section to answer.\n",
+ " - Start explain what AI is\n",
"\n",
- "\"\"\"\n"
+ "2. **Transparency**:\n",
+ " - Do not speculate or provide opinions.\n",
+ "\n",
+ "3. **Clarity**:\n",
+ " - Use simple, professional, and concise language.\n",
+ " - Make comparisons with everyday events.\n",
+ " - Format your response in Markdown for readability.\n",
+ "\n",
+ "## TASK\n",
+ "1. Answer the user's question **directly** if possible.\n",
+ "2. Point the user to relevant parts of the documentation.\n",
+ "3. Provide the response in the following format:\n",
+ "\n",
+ "## RESPONSE FORMAT\n",
+ "'''\n",
+ "# [Brief Title of the Answer]\n",
+ "[Answer in simple, clear text.]\n",
+ "\n",
+ "**Source**:\n",
+ "• [Book Title], Page(s): [...]\n",
+ "'''\n",
+ "\"\"\"\n",
+ "print(\"Prompt constructed.\")\n"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 22,
"metadata": {
"id": "0mjkQJ_ZxYZ3"
},
@@ -497,7 +642,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"metadata": {
"id": "ylypRWRlxYZ4"
},
@@ -507,11 +652,11 @@
"client = openai.OpenAI()\n",
"model_params = {\n",
" 'model': 'gpt-4o',\n",
- " 'temperature': , # Increase creativity\n",
- " 'max_tokens': , # Allow for longer responses\n",
- " 'top_p': , # Use nucleus sampling\n",
- " 'frequency_penalty': , # Reduce repetition\n",
- " 'presence_penalty': # Encourage new topics\n",
+ " 'temperature': 0.8, # Increase creativity\n",
+ " 'max_tokens': 3000, # Allow for longer responses\n",
+ " 'top_p': 0.9, # Use nucleus sampling\n",
+ " 'frequency_penalty': 0.8, # Reduce repetition\n",
+ " 'presence_penalty': 0.8 # Encourage new topics\n",
"}"
]
},
@@ -526,7 +671,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"metadata": {
"id": "4eXZO4pIxYZ4"
},
@@ -538,17 +683,58 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 25,
"metadata": {
"id": "wLPAcchBxYZ5",
"outputId": "976c7800-16ed-41fe-c4cf-58f60d3230d2"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "'''\n",
+ "# Understanding AI, Machine Learning, and Deep Learning\n",
+ "\n",
+ "Artificial Intelligence (AI) is a broad concept that encompasses any computational system designed to mimic intelligent human behaviors. This includes systems that can learn from data, adapt to new information, and perform tasks typically requiring human intelligence, such as perception of data (e.g., voice or image recognition), natural language processing, and robotics.\n",
+ "\n",
+ "Machine Learning (ML) is a subset of AI focusing specifically on the ability of computers to learn from experience without being explicitly programmed. It involves using statistical methods to find patterns in large datasets and creating models that can make predictions or analyze new data based on those patterns. Essentially, machine learning allows programs to automatically improve their performance on particular tasks by learning from vast amounts of data.\n",
+ "\n",
+ "Deep Learning is a specialized form of machine learning that uses neural networks with many layers ('deep' structures) to analyze various factors of data more comprehensively. It's particularly effective for complex pattern recognition tasks like speech or image recognition.\n",
+ "\n",
+ "**Source**:\n",
+ "• Artificial Intelligence: When Humans and Machines Might Have to Coexist; The Language Labyrinth 91\n",
+ "• Various Authors in Provided Context\n",
+ "'''\n"
+ ]
+ }
+ ],
"source": [
"answer = completion.choices[0].message.content\n",
"print(answer)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "\n",
+ "# Understanding AI, Machine Learning, and Deep Learning\n",
+ "\n",
+ "Artificial Intelligence (AI) is a broad concept that encompasses any computational system designed to mimic intelligent human behaviors. This includes systems that can learn from data, adapt to new information, and perform tasks typically requiring human intelligence, such as perception of data (e.g., voice or image recognition), natural language processing, and robotics.\n",
+ "\n",
+ "Machine Learning (ML) is a subset of AI focusing specifically on the ability of computers to learn from experience without being explicitly programmed. It involves using statistical methods to find patterns in large datasets and creating models that can make predictions or analyze new data based on those patterns. Essentially, machine learning allows programs to automatically improve their performance on particular tasks by learning from vast amounts of data.\n",
+ "\n",
+ "Deep Learning is a specialized form of machine learning that uses neural networks with many layers ('deep' structures) to analyze various factors of data more comprehensively. It's particularly effective for complex pattern recognition tasks like speech or image recognition.\n",
+ "\n",
+ "**Source**:\n",
+ "• Artificial Intelligence: When Humans and Machines Might Have to Coexist; The Language Labyrinth 91\n",
+ "• Various Authors in Provided Context\n",
+ "\n",
+ "
"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {
@@ -595,7 +781,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 26,
"metadata": {
"id": "nCXL9Cz1xYaV"
},
@@ -615,7 +801,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 27,
"metadata": {
"id": "9y3E0YWExYaV"
},
@@ -636,14 +822,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 28,
"metadata": {
"id": "i7SkWPpnxYaW",
"outputId": "28e82563-edba-4b41-acad-ec27e5ba134f"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Snippet 1:\n",
+ "and exchange data.\n",
+ "Machine \u001b[1m\u001b[32mlearn\u001b[0ming, simply put, is ‘methods that help \u001b[1m\u001b[32mcomputer\u001b[0ms \u001b[1m\u001b[32mlearn\u001b[0m without \n",
+ "being explicitly programmed’ (Kaplan and Haenlein 2019, 17), and is applied \n",
+ "in order to identify underl\n",
+ "--------------------------------------------------------------------------------\n"
+ ]
+ }
+ ],
"source": [
- "query_keywords = [] # add your keywords\n",
+ "query_keywords = [\"human\", \"computer\", \"machine\", \"learn\"] # add your keywords\n",
"for i, doc in enumerate(retrieved_docs[:1]):\n",
" snippet = doc.page_content[:200]\n",
" highlighted = highlight_keywords(snippet, query_keywords)\n",
@@ -680,6 +879,472 @@
"source": [
"**Try loading one of your own PDF books and go through the steps again to explore how the pipeline works with your content**:\n"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# File path for the document\n",
+ "new_file_path = \"../Eisenhorn-Dan-Abnett-3.PDF\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1204"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Load and split the document\n",
+ "loader2 = PyPDFLoader(new_file_path)\n",
+ "pages2 = loader2.load_and_split()\n",
+ "len(pages2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2911"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "text_splitter2 = RecursiveCharacterTextSplitter(\n",
+ " chunk_size=1000,\n",
+ " chunk_overlap=200\n",
+ ")\n",
+ "chunks2 = text_splitter2.split_documents(pages2)\n",
+ "\n",
+ "len(chunks2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "New ChromaDB created with document embeddings.\n"
+ ]
+ }
+ ],
+ "source": [
+ "db2 = Chroma.from_documents(chunks2, embeddings, persist_directory=\"./chroma_db_Bonus\")\n",
+ "print(\"New ChromaDB created with document embeddings.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "new_question = \"Is Gregor Eisenhorn a good inquisitor?\" # new question\n",
+ "retrieved_docs2 = db2.similarity_search(new_question, k=10) # k is the number of documents to retrieve"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Document 1:\n",
+ " la Inquisición. Son la fuerza\n",
+ "justiciera del Emperador y aniquilan la herejía y la corrupción en\n",
+ "todas sus formas. Gregor Eisenhorn es uno de ellos, elegido por su\n",
+ "fe inquebrantable, su voluntad de hierro y su increíble tenacidad. A\n",
+ "pesar de ser un puritano entregado a la causa de la destrucción del\n",
+ "Caos y de todos sus oscuros seguidores, incluso él se ve tentado a\n",
+ "usar el gran poder del Caos. Cuando cruza la frontera, da el primer\n",
+ "paso en el peligroso camino que podría llevarlo a convertirse en\n",
+ "aquello que ha jurado destruir.\n",
+ "Por primera vez en un solo volumen se publican las novelas Xenos,\n",
+ "Malleus y Hereticus, junto con dos relatos inéditos ambientados en\n",
+ "el cruel mundo de la Inquisición.\n",
+ "Document 2:\n",
+ "L DIOS-EMPERADOR DE LA TIERRA\n",
+ "EXPEDIENTES INQUISITORIALES RESERVADOS.\n",
+ "SÓLO PERSONAL AUTORIZADO\n",
+ "EXPEDIENTE: 442:41F:JL3:Kbu\n",
+ "Sírvase introducir su código de autorización *************\n",
+ "Validando…\n",
+ "Gracias, Inquisidor.\n",
+ "Puede continuar.\n",
+ "Para Gregor Eisenhorn. Comunicado transmitido por el\n",
+ "Gremio Astropático (Scarus) mediante onda mnemónica\n",
+ "45~a.639 triple intro.\n",
+ "Descripción de la senda de transmisión:\n",
+ "Origen: Tracian Primaris, Subsector Helicano 81281, con\n",
+ "fecha de emisión 142.386.M41 (estación repetidora: divergente\n",
+ "M-12/Ostall VII).\n",
+ "Destino: Durer, Subsector Ofidiano 52981, con fecha de\n",
+ "recepción 144.386.M41.\n",
+ "La transcripción ha sido enviada y registrada tal como se\n",
+ "indica en los encabezamientos (se ha archivado una copia\n",
+ "redundante en la memoria intermedia, clave 11, 4362).\n",
+ "Autor: Señor Inquisidor Flebas Alessandro Rorken\n",
+ "Maestre de la Ordo Xenos Helicana,\n",
+ "Document 3:\n",
+ "er me perdonó la vida. De hecho, me\n",
+ "pareció que lo hacía con una connivencia ya acordada.\n",
+ "El Inquisidor Eisenhorn es muy alabado, ha sido muy condecorado y se\n",
+ "le considera un ejemplo meritorio de todo lo que es bueno, fuerte y\n",
+ "dogmático en nuestra hermandad. Sin embargo, debido a las\n",
+ "circunstancias anteriores, he empezado a preguntarme, a temerme…\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Display top results\n",
+ "for i, doc in enumerate(retrieved_docs2[:3]): # Display top 3 results\n",
+ " print(f\"Document {i+1}:\\n{doc.page_content[36:1000]}\") # Display content"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "New context formatted for GPT model.\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Generate a formatted context from the retrieved documents\n",
+ "formatted_context2 = _get_document_prompt(retrieved_docs2)\n",
+ "print(\"New context formatted for GPT model.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "Content:\n",
+ "Todos los mundos del Imperio temen a la Inquisición. Son la fuerza\n",
+ "justiciera del Emperador y aniquilan la herejía y la corrupción en\n",
+ "todas sus formas. Gregor Eisenhorn es uno de ellos, elegido por su\n",
+ "fe inquebrantable, su voluntad de hierro y su increíble tenacidad. A\n",
+ "pesar de ser un puritano entregado a la causa de la destrucción del\n",
+ "Caos y de todos sus oscuros seguidores, incluso él se ve tentado a\n",
+ "usar el gran poder del Caos. Cuando cruza la frontera, da el primer\n",
+ "paso en el peligroso camino que podría llevarlo a convertirse en\n",
+ "aquello que ha jurado destruir.\n",
+ "Por primera vez en un solo volumen se publican las novelas Xenos,\n",
+ "Malleus y Hereticus, junto con dos relatos inéditos ambientados en\n",
+ "el cruel mundo de la Inquisición.\n",
+ "\n",
+ "\n",
+ "Content:\n",
+ "POR ORDEN DE SU SANTÍSIMA MAJESTAD\n",
+ "EL DIOS-EMPERADOR DE LA TIERRA\n",
+ "EXPEDIENTES INQUISITORIALES RESERVADOS.\n",
+ "SÓLO PERSONAL AUTORIZADO\n",
+ "EXPEDIENTE: 442:41F:JL3:Kbu\n",
+ "Sírvase introducir su código de autorización *************\n",
+ "Validando…\n",
+ "Gracias, Inquisidor.\n",
+ "Puede continuar.\n",
+ "Para Gregor Eisenhorn. Comunicado transmitido por el\n",
+ "Gremio Astropático (Scarus) mediante onda mnemónica\n",
+ "45~a.639 triple intro.\n",
+ "Descripción de la senda de transmisión:\n",
+ "Origen: Tracian Primaris, Subsector Helicano 81281, con\n",
+ "fecha de emisión 142.386.M41 (estación repetidora: divergente\n",
+ "M-12/Ostall VII).\n",
+ "Destino: Durer, Subsector Ofidiano 52981, con fecha de\n",
+ "recepción 144.386.M41.\n",
+ "La transcripción ha sido enviada y registrada tal como se\n",
+ "indica en los encabezamientos (se ha archivado una copia\n",
+ "redundante en la memoria intermedia, clave 11, 4362).\n",
+ "Autor: Señor Inquisidor Flebas Alessandro Rorken\n",
+ "Maestre de la Ordo Xenos Helicana,\n",
+ "\n",
+ "\n",
+ "Content:\n",
+ "todos nosotros, debo añadir, aquel ser me perdonó la vida. De hecho, me\n",
+ "pareció que lo hacía con una connivencia ya acordada.\n",
+ "El Inquisidor Eisenhorn es muy alabado, ha sido muy condecorado y se\n",
+ "le considera un ejemplo meritorio de todo lo que es bueno, fuerte y\n",
+ "dogmático en nuestra hermandad. Sin embargo, debido a las\n",
+ "circunstancias anteriores, he empezado a preguntarme, a temerme…\n",
+ "\n",
+ "\n",
+ "Content:\n",
+ "No concibo una forma mejor de servir, ni una forma mejor de ser\n",
+ "inquisidor.\n",
+ "Así queda completo mi retrato. Gregor Eisenhorn, inquisidor, puritano,\n",
+ "amalatiano, cuarenta y dos años estándar de edad, con dieciocho años\n",
+ "como inquisidor. Soy alto y ancho de hombros, fuerte, resuelto. Ya les he\n",
+ "hablado de mi fuerza de voluntad y estoy seguro de que habrán notado mi\n",
+ "habilidad con la espada.\n",
+ "¿Qué más puedo decir? ¿Si llevo barba? ¡No! Además tengo ojos\n",
+ "oscuros y el pelo aún más oscuro y espeso. Estos son detalles sin\n",
+ "importancia.\n",
+ "Déjenme que les cuente ahora cómo maté a Eyclone.\n",
+ "\n",
+ "\n",
+ "Content:\n",
+ "inquisidor Eskane Koth, un amalatiano, nacido y criado en Tracian\n",
+ "Primaris, y que en el futuro sería conocido como la Paloma de Avignon. El\n",
+ "inquisidor Laslo Menderef, natural de las tierras bajas de Sancour, y que\n",
+ "más tarde se convertiría en Menderef el Grave, un istvaano con una gran\n",
+ "capacidad de discernimiento en los crímenes heréticos y una escasa\n",
+ "higiene corporal. El inquisidor Poul Rassi, hijo de las estepas de Kilwaddi,\n",
+ "un servidor del orden anciano, justo y fiable. El inquisidor novicio Bastían\n",
+ "Verveuk.\n",
+ "Y yo mismo. Gregor Eisenhorn. Inquisidor y mandatario principal del\n",
+ "auto interrogatorio.\n",
+ "Pridde era el primero de los doscientos sesenta individuos\n",
+ "identificados por la labor de Lord Rorken como posibles herejes que\n",
+ "debían ser juzgados por la Corte Formal de Investigación. Mantenía un\n",
+ "aspecto digno a pesar de parecer nervioso. Jugueteaba con su cuello\n",
+ "abotonado mientras se encaraba con nosotros. Había contratado a un\n",
+ "defensor llamado Fen de Clincy para que hablara en su nombre.\n",
+ "\n",
+ "\n",
+ "Content:\n",
+ "—Soy el inquisidor Gregor Eisenhorn, de la Ordo Xenos Helicana.\n",
+ "Estamos aquí en misión oficial. No tenga miedo.\n",
+ "Me miró sin dejar de parpadear y alargó lentamente una mano cubierta\n",
+ "de roña ennegrecida para verla mejor. Dejé que se la quedara. La miró\n",
+ "durante un buen rato en sus manos temblorosas. Luego comenzó a sollozar.\n",
+ "Le indiqué a Fischig y a los demás que se alejaran y me arrodillé a su\n",
+ "lado.\n",
+ "—¿Cómo se llama?\n",
+ "—Dro… Dronicus.\n",
+ "—¿Dronicus?\n",
+ "—Pater Hershel Dronicus, jerarca de la parroquia de Miquol, bendito\n",
+ "sea el Dios-Emperador de la Humanidad.\n",
+ "—El Dios-Emperador nos protege a todos —le respondí—. ¿Puede\n",
+ "decirme cómo ha llegado hasta aquí, padre?\n",
+ "—Siempre he estado aquí —me contestó—. Puede que los soldados se\n",
+ "hayan ido, pero mientras exista una capilla aquí, hay una parroquia, y\n",
+ "mientras haya una parroquia, debe quedar un sacerdote.\n",
+ "Por el Trono Dorado, aquel anciano estaba viviendo sólo allí y\n",
+ "manteniendo la capilla desde hacía treinta años.\n",
+ "—¿Nunca desacralizaron el terreno?\n",
+ "\n",
+ "\n",
+ "Content:\n",
+ "Para Gregor Eisenhorn. Comunicado transmitido por el\n",
+ "Gremio Astropático (Scarus) mediante bucle mnemónico.\n",
+ "Repetición 45-3.5611 segura.\n",
+ "Descripción de la senda de transmisión:\n",
+ "Origen: Tracian Primaris, Subsector Helicano 81281, con\n",
+ "fecha de emisión 142.386.M41 (bucle de navigatus 351/eco de la\n",
+ "baliza Gernale).\n",
+ "Destino: Durer, Subsector Ofidiano 52981, con fecha de\n",
+ "recepción 144.386.M41.\n",
+ "La transcripción ha sido enviada y registrada tal como se\n",
+ "indica en los encabezamientos (se ha archivado una copia\n",
+ "redundante en la memoria intermedia, clave 34, 7002).\n",
+ "Autor: Inquisidor Bastían Verveuk, Ordo Xenos,\n",
+ "Oficio del Gran Consejo de la Inquisición, Sector Scarus,\n",
+ "Scarus Mayor.\n",
+ "¡Salutaciones, señor!\n",
+ "En el nombre del Dios-Emperador, bendita sea su eterna\n",
+ "vigilia, y por los Altos Señores de Terra, me encomiendo a vuestra\n",
+ "eminente persona, y espero que al recibir este comunicado se\n",
+ "encuentre bien de salud.\n",
+ "Grande fue mi entusiasmo cuando Lord Rorken me informó de\n",
+ "\n",
+ "\n",
+ "Content:\n",
+ "lo han nombrado inquisidor, ¿no?\n",
+ "—Hace ya sesenta años… Eisenhorn, llevas una vida bastante solitaria,\n",
+ "¿verdad?\n",
+ "—Si se refiere a que no ando pendiente de las idas y venidas, de las\n",
+ "elecciones y de los asuntos de otros inquisidores, sí, señor, así es. Me\n",
+ "concentro en mi trabajo, y en las necesidades de mi personal.\n",
+ "\n",
+ "\n",
+ "Content:\n",
+ "Me encogí de hombros. ¿Qué podía decir?\n",
+ "—No nos parecemos, Gregor Eisenhorn —prosiguió con voz trémula\n",
+ "—. Nuestro concepto de la Inquisición es muy diferente, pero a pesar de\n",
+ "todo alabo su valentía y su dedicación. Ha demostrado su valía ante mí.\n",
+ "Formas diferentes, medios diferentes ¿no es ésa la auténtica ética de\n",
+ "nuestra orden? Moriré pronto, creo… y en paz… sabiendo que hombres\n",
+ "como usted seguirán luchando.\n",
+ "Me sentí honrado. Pensara lo que pensase de su modus operandi, sabía\n",
+ "que los dos teníamos el mismo fin.\n",
+ "Con un gesto débil indicó a Heldane que se adelantara. El aspecto de su\n",
+ "cabeza llena de cicatrices no había mejorado nada desde la última vez que\n",
+ "lo había visto.\n",
+ "—Quiero encomendarle a Heldane, de todos mis discípulos es el mejor.\n",
+ "Pienso recomendar su promoción al nivel de alto interrogador, desde el\n",
+ "cual se puede optar al de inquisidor. Si yo muero, ocúpese de él por mí. No\n",
+ "tengo la menor duda de que la Inquisición se beneficiará contando con él.\n",
+ "\n",
+ "\n",
+ "Content:\n",
+ "479.M41, aunque numerosos informes posteriores sugieren que sobrevivió\n",
+ "más allá de esa fecha.\n",
+ "El Gran Inquisidor Phlebas Alessandro Rorken se recuperó de su mala\n",
+ "salud y se convirtió en el Gran Maestre de la Ordos Helicana después de\n",
+ "la desaparición de Leonid Osma. Mantuvo el cargo durante trescientos\n",
+ "cincuenta años.\n",
+ "Se cree que el inquisidor Gregor Eisenhorn continuó al servicio de su\n",
+ "Ordos después de los hechos acaecidos en 5213X, aunque los datos de su\n",
+ "vida y de su trabajo registrados a partir de aquella fecha son conjeturas\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(formatted_context2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "New prompt constructed.\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt2 = f\"\"\"\n",
+ "## SYSTEM ROLE\n",
+ "You are a knowledgeable and factual chatbot designed to assist with questions about Warhammer 40000 books.\n",
+ "Your answers must be based exclusively on provided content from books provided.\n",
+ "\n",
+ "## USER QUESTION\n",
+ "The user has asked:\n",
+ "\"{new_question}\"\n",
+ "\n",
+ "## CONTEXT\n",
+ "Here is the relevant content from the books:\n",
+ "'''\n",
+ "{formatted_context2}\n",
+ "'''\n",
+ "\n",
+ "## GUIDELINES\n",
+ "1. **Accuracy**:\n",
+ " - Only use the content in the `CONTEXT` section to answer.\n",
+ " - If the answer cannot be found, explicitly state: \"You DO NOT have permissions to get that information.\"\n",
+ "\n",
+ "2. **Transparency**:\n",
+ " - Reference the book's name and page numbers when providing information.\n",
+ " - Do not speculate or provide opinions.\n",
+ "\n",
+ "3. **Clarity**:\n",
+ " - Use simple and concise language.\n",
+ " - Format your response in Markdown for readability.\n",
+ "\n",
+ "## TASK\n",
+ "1. Answer the user's question **directly** if possible.\n",
+ "2. Point the user to relevant parts of the documentation.\n",
+ "3. Provide the response in the following format:\n",
+ "\n",
+ "## RESPONSE FORMAT\n",
+ "'''\n",
+ "# [Brief Title of the Answer]\n",
+ "[Answer in simple, clear text.]\n",
+ "\n",
+ "**Source**:\n",
+ "• [Book Title], Page(s): [...]\n",
+ "'''\n",
+ "\"\"\"\n",
+ "print(\"New prompt constructed.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "client2 = openai.OpenAI()\n",
+ "model_params2 = {\n",
+ " 'model': 'gpt-4o',\n",
+ " 'temperature': 0.7, # Increase creativity\n",
+ " 'max_tokens': 4000, # Allow for longer responses\n",
+ " 'top_p': 0.9, # Use nucleus sampling\n",
+ " 'frequency_penalty': 0.5, # Reduce repetition\n",
+ " 'presence_penalty': 0.6 # Encourage new topics\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "messages2 = [{'role': 'user', 'content': prompt2}]\n",
+ "completion2 = client2.chat.completions.create(messages=messages2, **model_params2, timeout=120)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "'''\n",
+ "# Gregor Eisenhorn as an Inquisitor\n",
+ "\n",
+ "Gregor Eisenhorn is depicted as a highly regarded inquisitor, known for his unwavering faith, iron will, and tenacity. He is described as a puritan committed to the destruction of Chaos and its followers. Despite this dedication, he faces temptation to use the power of Chaos himself. This complexity suggests that while he is celebrated for his virtues and accomplishments, there are concerns about the potential risks of his actions.\n",
+ "\n",
+ "**Source**:\n",
+ "• [No specific book title or page numbers provided in the content.]\n",
+ "'''\n"
+ ]
+ }
+ ],
+ "source": [
+ "answer2 = completion2.choices[0].message.content\n",
+ "print(answer2)"
+ ]
}
],
"metadata": {
@@ -687,7 +1352,7 @@
"provenance": []
},
"kernelspec": {
- "display_name": "llm",
+ "display_name": "base",
"language": "python",
"name": "python3"
},
@@ -701,7 +1366,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.12.10"
+ "version": "3.13.9"
}
},
"nbformat": 4,