diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7d46427 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.env +chroma_db_LAB/ \ No newline at end of file diff --git a/your-code/main.ipynb b/your-code/main.ipynb index e3a225a..dff0edb 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -69,15 +69,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "id": "6heKZkQUxYZr" }, "outputs": [], "source": [ "import os\n", - "from langchain.document_loaders import PyPDFLoader\n", - "from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter\n", + "from langchain_community.document_loaders import PyPDFLoader\n", + "from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter\n", "import warnings\n", "warnings.filterwarnings('ignore')\n" ] @@ -96,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "id": "cuREtJRixYZt" }, @@ -104,7 +104,7 @@ "source": [ "# File path for the document\n", "\n", - "file_path = \"LAB/ai-for-everyone.pdf\"" + "file_path = r\"C:\\Users\\jgest\\Desktop\\IRONHACK\\labs\\week7\\day5\\lab-intro-rag\\ai-for-everyone.pdf\"" ] }, { @@ -122,12 +122,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "id": "_b5Z_45UxYZu", "outputId": "a600d69f-14fe-4492-f236-97261d6ff36c" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "297" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Load and split the document\n", "loader = PyPDFLoader(file_path)\n", @@ -168,9 +179,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "1096" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "text_splitter = RecursiveCharacterTextSplitter(\n", " chunk_size=1000,\n", @@ -285,31 +307,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "id": "L0xDxElwxYZw" }, "outputs": [], "source": [ - "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain_openai import OpenAIEmbeddings\n", "from dotenv import load_dotenv" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "id": "_WRIo3_0xYZx", "outputId": "78bfbbf3-9d25-4e31-bdbc-3e932e6bbfec" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "load_dotenv()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": { "id": "MNZfTng5xYZz", "outputId": "db1a7c85-ef9f-447e-92cd-9d097e959847" @@ -343,23 +376,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "id": "brKe6wUgxYZ0" }, "outputs": [], "source": [ - "from langchain.vectorstores import Chroma" + "from langchain_chroma import Chroma" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": { "id": "VkjHR-RkxYZ0", "outputId": "bc11bda9-f283-457a-f584-5a06b95c4dd9" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ChromaDB created with document embeddings.\n" + ] + } + ], "source": [ "db = Chroma.from_documents(chunks, embeddings, persist_directory=\"./chroma_db_LAB\")\n", "print(\"ChromaDB created with document embeddings.\")" @@ -383,24 +424,56 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": { "id": "XiLv-TfrxYZ1" }, "outputs": [], "source": [ - "user_question = \"\" # User question\n", + "user_question = \"How does the book explain the relationship between AI, digital capitalism, and existing social inequalities?\" # User question\n", "retrieved_docs = db.similarity_search(user_question, k=10) # k is the number of documents to retrieve" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": { "id": "qgWsh50JxYZ1", "outputId": "c8640c5d-5955-471f-fdd2-37096f5f68c7" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document 1:\n", + "vestigates the normative projections of what \n", + "AI should be and what it should do. This section poses critical questions about \n", + "how AI needs to debunk the myths surrounding it.\n", + "Part 3: AI Power and Inequalities – advances the debate around AI by criti -\n", + "cally examining what ‘ AI for Everyone?’ means. This is dealing with the root of \n", + "the problem: who will benefit from AI is ultimately down to who has the power \n", + "to decide. These contributions look at how AI capitalism is organised, what \n", + "(new) inequalities it might bring about and how we can fight back.\n", + "Why do we need a book on AI for Everyone? and why do we need it now? \n", + "The 2007–2008 financial crisis, and the resulting global economic crisis, has not \n", + "only brought about a decade of austerity in large parts of the Western world; \n", + "it has also been the context in which social media and digital platforms have \n", + "transformed into behemoths. Tech companies are now dominating the top 10\n", + "Document 2:\n", + "tends long-standing debates on modes of \n", + "capitalism that significantly shape the circumstances of working people whilst \n", + "limiting their ability to influence decisions that govern their lives.\n", + "Document 3:\n", + " came along with observations that AI can \n", + "not only supercharge innovation and bring about economic prosperity but also \n", + "lead to inequalities and unfairness. \n", + "This book contributes to this debate by critically reflecting on how we \n", + "should think about AI and the relationship between humans and machines. \n", + "It analyses the discourses and myths that exist around AI; what it will enable\n" + ] + } + ], "source": [ "# Display top results\n", "for i, doc in enumerate(retrieved_docs[:3]): # Display top 3 results\n", @@ -418,7 +491,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": { "id": "2iB3lZqHxYZ2" }, @@ -434,18 +507,184 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": { "id": "2okzmuADxYZ2", "outputId": "0aa6cdca-188d-40e0-f5b4-8888d3549ea4" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Context formatted for GPT model.\n" + ] + } + ], "source": [ "# Generate a formatted context from the retrieved documents\n", "formatted_context = _get_document_prompt(retrieved_docs)\n", "print(\"Context formatted for GPT model.\")" ] }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Content:\n", + "lar and scholarly discussions and investigates the normative projections of what \n", + "AI should be and what it should do. This section poses critical questions about \n", + "how AI needs to debunk the myths surrounding it.\n", + "Part 3: AI Power and Inequalities – advances the debate around AI by criti -\n", + "cally examining what ‘ AI for Everyone?’ means. This is dealing with the root of \n", + "the problem: who will benefit from AI is ultimately down to who has the power \n", + "to decide. These contributions look at how AI capitalism is organised, what \n", + "(new) inequalities it might bring about and how we can fight back.\n", + "Why do we need a book on AI for Everyone? and why do we need it now? \n", + "The 2007–2008 financial crisis, and the resulting global economic crisis, has not \n", + "only brought about a decade of austerity in large parts of the Western world; \n", + "it has also been the context in which social media and digital platforms have \n", + "transformed into behemoths. Tech companies are now dominating the top 10\n", + "\n", + "\n", + "Content:\n", + "ment of AI across social life. AI extends long-standing debates on modes of \n", + "capitalism that significantly shape the circumstances of working people whilst \n", + "limiting their ability to influence decisions that govern their lives.\n", + "\n", + "\n", + "Content:\n", + "other. These first critical insights came along with observations that AI can \n", + "not only supercharge innovation and bring about economic prosperity but also \n", + "lead to inequalities and unfairness. \n", + "This book contributes to this debate by critically reflecting on how we \n", + "should think about AI and the relationship between humans and machines. \n", + "It analyses the discourses and myths that exist around AI; what it will enable\n", + "\n", + "\n", + "Content:\n", + "Towards Data Justice Unionism? 271\n", + "class owns the means of organising the means of production. Importantly, this \n", + "does not necessarily make away with the exploitation of labour in value chains. \n", + "As Srnicek (2020) has pointed out, AI systems rely not just on vast amounts of \n", + "data, but on significant computational power and control over labour to drive \n", + "monopolisation. We have a growing economy based on what Gray and Suri \n", + "(2019, ix) refer to as ‘ ghost work’: a new digital assembly line that aggregates \n", + "the collective input of distributed workers, ships pieces of projects rather than \n", + "products, and operates continuously across a host of economic sectors in order \n", + "for AI systems to function. \n", + "The implications of AI for labour therefore extend from the workplace to \n", + "the reorganisation of employment through to the operations of capital upon \n", + "which AI depends and advances. The use of AI in automated hiring systems,\n", + "\n", + "\n", + "Content:\n", + "to an already skewed social landscape (Eubanks 2018). There’s more data about \n", + "the poor and marginalised because they are already most surveilled, and they \n", + "are most surveilled because our social systems already categorise them as trou-\n", + "blesome. As a result, any unfairness that algorithms add to the mix will fall \n", + "more heavily on those who are already struggling the most. However, it’s not \n", + "only or even mainly data that shapes the politics of AI.\n", + "Langdon Winner wrote about the way particular technologies appear to \n", + "have an inherent compatibility with particular socio-political systems (Winner \n", + "2020), so it’s fair to ask what feedback loops connect AI and the societies into \n", + "which it has emerged. This attentiveness may help to bring neglected features \n", + "to the fore, to remind us of framings that are so pervasive they are usually \n", + "ignored or to highlight new dynamics that are going to change more than just\n", + "\n", + "\n", + "Content:\n", + "the reorganisation of employment through to the operations of capital upon \n", + "which AI depends and advances. The use of AI in automated hiring systems, \n", + "performance assessment tools, scheduling, and other forms of algorithmic man-\n", + "agement in the workplace (platforms or otherwise) intersect with broader trans-\n", + "formations in the economy and dynamics of capitalism in which developments \n", + "in AI are embedded. These different concerns highlight the many complex and \n", + "intricate ways AI impacts on the experiences of working people, the way their \n", + "work is organised and how it is valued, and their ability to influence decisions \n", + "that govern their lives. Y et, as I will go on to outline below, workers’ voices and \n", + "union perspectives have been notably absent from AI governance debates that \n", + "have instead overwhelmingly championed liberal frameworks based on citizen \n", + "and consumer rights. If we are to contend with AI in relation to the advance -\n", + "\n", + "\n", + "Content:\n", + "Introduction: Why We Need Critical Perspectives on AI 3\n", + "of the most valuable companies in the world (Verdegem 2021). Austerity has \n", + "also led to growing inequalities and political polarisation, bringing right-wing \n", + "authoritarian politics into power in a number of countries (Fuchs 2018). A \n", + "world already cracked by economic uncertainty and the looming threat of \n", + "climate change was then shaken in 2020 by a global pandemic. COVID-19 \n", + "has massively impacted the global economy, on a much larger scale than the \n", + "2007–2008 crisis. On top of this, the pandemic has also resulted in an even \n", + "bigger dependence and dominance of tech platforms such as Amazon, Alibaba, \n", + "Google and Tencent. These companies are, not surprisingly, also leading AI \n", + "companies. Only a small number of corporations have the necessary compu -\n", + "tational power to develop AI systems, are financially strong enough to hire the \n", + "brightest AI talent and have access to the gigantic datasets that are needed to\n", + "\n", + "\n", + "Content:\n", + "when talking about AI and intelligent systems. Angela Daly, S. Kate Devitt and \n", + "Monique Mann (Chapter 7) introduce and discuss their Good Data approach in \n", + "order to overcome the limitations of AI ethics and governance. James Steinhoff \n", + "(Chapter 8) critically analyses the social reconfiguration of AI and discusses \n", + "the central questions about utility and feasibility. Benedetta Brevini (Chapter 9) \n", + "analyses AI policies in Europe and unpacks some of the myths around AI \n", + "that legitimate capitalism. Alkim Almila Akdag Salah ( Chapter 10 ) reflects \n", + "on how the discourses of artistic computational production have changed and \n", + "how myths about AI need to be uncovered in this context.\n", + "Part 3: AI Power and Inequalities involves five contributions. Carrie O’Connell \n", + "and Chad Van de Wiele ( Chapter 11) revisit Wiener’s cybernetic prediction \n", + "as the theoretical foundation of AI and make a plea how we need to uncover \n", + "the black box of what is behind prediction and simulation. Jernej A. Prodnik\n", + "\n", + "\n", + "Content:\n", + "to pass for social class as well as race; how it seems to always be the poorest \n", + "and most marginalised who bear the brunt of collateral damage from algorith-\n", + "mic systems even when the bureaucrats involved are making sincere efforts \n", + "to be fair (which they often aren’t) (Eubanks 2018). The data demands of AI \n", + "mean that the pattern of having to trade private personal information for ser -\n", + "vices will become even more invasive. The optimisations of AI act as an inverse \n", + "intersectionality, applying additional downward pressure on existing fissures in \n", + "the social fabric. Like Eubanks, we should be asking what specific forms these \n", + "fractures will take, and how to recognise them. One marker will be the emer -\n", + "gence of machinic moralism. The more that AI is seen as a solution to austerity, \n", + "the more its classifications and rankings will be enrolled in the rationing of \n", + "goods and the assigning of sanctions. AI will be put in the position of decid -\n", + "\n", + "\n", + "Content:\n", + "Tech-Determinism, Tech-Solutionism and AI\n", + "The technological deterministic argument that technology can and will fix \n", + "capitalism – and its intrinsic power to exacerbate inequalities of economic, \n", + "racial, gender forms – is far from being a recent elaboration (Gilder 1990; \n", + "Negroponte 1998). To use the words of Mosco, ‘one generation after another \n", + "has renewed the belief that, whatever was said about earlier technologies, the \n", + "latest one will fulfil a radical and revolutionary promise’ (Mosco 2004, 21; \n", + "Brevini 2020). Mosco (2004) rightly reminds us of James Carey’s (1992) work \n", + "that discussed how machines have often been framed employing a powerful \n", + "religious ethos: ‘in contemporary popular commentary and even in technical \n", + "discussions of new communications technology, the historic religious under -\n", + "current has never been eliminated from our thought’ (Carey 1992, 18).\n", + "As a result, technology becomes the most powerful weapon purporting to lift\n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(formatted_context)" + ] + }, { "cell_type": "markdown", "metadata": { @@ -464,22 +703,74 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": { "id": "tqxVh9s3xYZ3", "outputId": "97cca95d-4ab3-44d8-a76c-5713aad387d8" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "prompt constructed.\n" + ] + } + ], "source": [ "prompt = f\"\"\"\n", + "## SYSTEM ROLE\n", + "You are a helpful academic assistant helping a student understand arguments from the book *AI for Everyone? Critical Perspectives on AI*.\n", + "You are given excerpts from the book as context and a user question.\n", + "\n", + "## USER QUESTION\n", + "The user has asked:\n", + "{user_question}\n", + "\n", + "## CONTEXT\n", + "Here is the relevant content from the technical books:\n", + "'''\n", + "{formatted_context}\n", + "'''\n", + "\n", + "## GUIDELINES\n", + "1. **Accuracy**:\n", + " - Only use the content in the `CONTEXT` section to answer.\n", + " - Base your answer ONLY on the context above.\n", + " - If the context is not sufficient to fully answer, say that explicitly and explain what is missing.\n", + " - Emphasise the book's critical perspective on AI (power, inequalities, digital capitalism, labour, data justice, AI myths).\n", + " - When appropriate, connect different themes (e.g. AI ethics, data, capitalism, labour) instead of treating them in isolation.\n", + "\n", + " \n", + "2. **Transparency**:\n", + " - Reference the book's name and page numbers when providing information.\n", + " - Do not speculate or provide opinions.\n", + "\n", "\n", + "3. **Clarity**:\n", + " - Use simple, professional, and concise language.\n", + " - Format your response in Markdown for readability.\n", "\n", - "\"\"\"\n" + "## TASK\n", + "1. Answer the user's question **directly** if possible.\n", + "2. Point the user to relevant parts of the documentation.\n", + "3. Provide the response in the following format:\n", + "\n", + "## RESPONSE FORMAT\n", + "'''\n", + "# [Brief Title of the Answer]\n", + "[Answer in simple, clear text.]\n", + "\n", + "**Source**:\n", + "• [Book Title], Page(s): [...]\n", + "'''\n", + "\"\"\"\n", + "print(\"prompt constructed.\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": { "id": "0mjkQJ_ZxYZ3" }, @@ -497,7 +788,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": { "id": "ylypRWRlxYZ4" }, @@ -507,11 +798,11 @@ "client = openai.OpenAI()\n", "model_params = {\n", " 'model': 'gpt-4o',\n", - " 'temperature': , # Increase creativity\n", - " 'max_tokens': , # Allow for longer responses\n", - " 'top_p': , # Use nucleus sampling\n", - " 'frequency_penalty': , # Reduce repetition\n", - " 'presence_penalty': # Encourage new topics\n", + " 'temperature': 0.4, # Increase creativity\n", + " 'max_tokens': 2000, # Allow for longer responses\n", + " 'top_p': 0.9, # Use nucleus sampling\n", + " 'frequency_penalty': 0.3, # Reduce repetition\n", + " 'presence_penalty': 0.5 # Encourage new topics\n", "}" ] }, @@ -526,7 +817,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": { "id": "4eXZO4pIxYZ4" }, @@ -538,12 +829,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": { "id": "wLPAcchBxYZ5", "outputId": "976c7800-16ed-41fe-c4cf-58f60d3230d2" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'''\n", + "# The Relationship Between AI, Digital Capitalism, and Social Inequalities\n", + "\n", + "The book *AI for Everyone? Critical Perspectives on AI* explores the intricate relationship between AI, digital capitalism, and existing social inequalities. It critically examines how AI is embedded within the broader transformations of capitalism and how it perpetuates or exacerbates social inequalities. The text highlights several key points:\n", + "\n", + "1. **AI and Capitalism**: The book discusses how AI systems are deeply intertwined with capitalist structures, particularly in the way they rely on vast data resources and computational power controlled by a few dominant tech companies. These corporations, such as Amazon, Google, and Tencent, have become central players in the global economy due to their ability to harness AI technologies (Page 3).\n", + "\n", + "2. **Labour Implications**: AI's impact on labour is significant as it reorganizes employment through automated hiring systems and algorithmic management tools. This reorganization often limits workers' influence over decisions that affect their lives, thus reinforcing existing power imbalances within capitalist economies (Page 271).\n", + "\n", + "3. **Social Inequalities**: The book also addresses how AI can contribute to social inequalities by disproportionately affecting marginalized communities. Since these groups are already heavily surveilled, any biases inherent in AI systems tend to exacerbate their struggles (Page 271). Moreover, the data demands of AI can lead to more invasive practices that further entrench existing societal fissures.\n", + "\n", + "4. **Technological Determinism and Myths**: There is a critique of the deterministic belief that technology alone can solve societal issues like inequality. The text argues that such views overlook the socio-political contexts in which technologies like AI operate and fail to address the root causes of inequality (Page 21).\n", + "\n", + "Overall, the book calls for critical reflection on these dynamics to uncover myths surrounding AI and to consider whose interests are served by its development.\n", + "\n", + "**Source**:\n", + "• *AI for Everyone? Critical Perspectives on AI*, Pages: 3, 21, 271\n", + "'''\n" + ] + } + ], "source": [ "answer = completion.choices[0].message.content\n", "print(answer)" @@ -595,7 +911,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": { "id": "nCXL9Cz1xYaV" }, @@ -615,7 +931,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": { "id": "9y3E0YWExYaV" }, @@ -636,14 +952,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": { "id": "i7SkWPpnxYaW", "outputId": "28e82563-edba-4b41-acad-ec27e5ba134f" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Snippet 1:\n", + "lar and scholarly discussions and investigates the normative projections of what \n", + "\u001b[1m\u001b[32mAI\u001b[0m should be and what it should do. This section poses critical questions about \n", + "how \u001b[1m\u001b[32mAI\u001b[0m needs to debunk the myths surr\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], "source": [ - "query_keywords = [] # add your keywords\n", + "query_keywords = [\"AI\",\n", + " \"digital capitalism\",\n", + " \"capitalism\",\n", + " \"social inequalities\",\n", + " \"inequalities\",\n", + " \"power\",\n", + " \"labour\",\n", + " \"data justice\"] # add your keywords\n", "for i, doc in enumerate(retrieved_docs[:1]):\n", " snippet = doc.page_content[:200]\n", " highlighted = highlight_keywords(snippet, query_keywords)\n", @@ -687,7 +1022,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "llm", + "display_name": "base", "language": "python", "name": "python3" }, @@ -701,7 +1036,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.10" + "version": "3.12.7" } }, "nbformat": 4,