diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..85b9578 --- /dev/null +++ b/.gitignore @@ -0,0 +1,46 @@ +# virtual environment +.venv/ +venv/ +env/ + +# Credentials and configs +.env +.env.local +*.pem +*.key + +# Chroma_db +chroma_db_LAB/ + +# Python cache and builds +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Jupyter/IPython +.ipynb_checkpoints/ +.ipynb_checkpoints +*.ipynb_checkpoints + +# Logs and dumps +*.log +logs/ +.mongo/ +mongo/ diff --git a/README.md b/README.md index 690f54b..1da9bed 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,21 @@ -![logo_ironhack_blue 7](https://user-images.githubusercontent.com/23629340/40541063-a07a0a8a-601a-11e8-91b5-2f13e4e6b441.png) - -# Lab | Intro to RAG - -## Introduction - -The goal of this lab is to help you practice the concepts you learned in the lesson and provide you with some hands-on experience using RAG. - -## Getting Started - -In this lab you will be working on [main.ipynb](your-code/main.ipynb). To launch it, first navigate to the directory that contains `main.ipynb` in Terminal, then execute `jupyter notebook`. In the webpage that is automatically opened, click the `main.ipynb` link to launch it. - -When you are on `main.ipynb`, read the instructions for each cell and provide your answers. Make sure to test your answers in each cell and save. Jupyter Notebook should automatically save your work progress. But it's a good idea to periodically save your work manually just in case. - -## Deliverables - -- `main.ipynb` with your responses to each of the exercises. - -## Submission - +![logo_ironhack_blue 7](https://user-images.githubusercontent.com/23629340/40541063-a07a0a8a-601a-11e8-91b5-2f13e4e6b441.png) + +# Lab | Intro to RAG + +## Introduction + +The goal of this lab is to help you practice the concepts you learned in the lesson and provide you with some hands-on experience using RAG. + +## Getting Started + +In this lab you will be working on [main.ipynb](your-code/main.ipynb). To launch it, first navigate to the directory that contains `main.ipynb` in Terminal, then execute `jupyter notebook`. In the webpage that is automatically opened, click the `main.ipynb` link to launch it. + +When you are on `main.ipynb`, read the instructions for each cell and provide your answers. Make sure to test your answers in each cell and save. Jupyter Notebook should automatically save your work progress. But it's a good idea to periodically save your work manually just in case. + +## Deliverables + +- `main.ipynb` with your responses to each of the exercises. + +## Submission + Upon completion, add your deliverables to git. Then commit git and push your branch to the remote. \ No newline at end of file diff --git a/your-code/main.ipynb b/your-code/main.ipynb index e3a225a..5076df1 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -64,20 +64,21 @@ "outputs": [], "source": [ "%pip install langchain langchain_community pypdf\n", - "%pip install termcolor langchain_openai langchain-huggingface sentence-transformers chromadb langchain_chroma tiktoken openai python-dotenv\n" + "%pip install termcolor langchain_openai langchain-huggingface sentence-transformers chromadb langchain_chroma tiktoken openai python-dotenv\n", + "%pip install langchain-text-splitters" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "id": "6heKZkQUxYZr" }, "outputs": [], "source": [ "import os\n", - "from langchain.document_loaders import PyPDFLoader\n", - "from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter\n", + "from langchain_community.document_loaders import PyPDFLoader\n", + "from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter\n", "import warnings\n", "warnings.filterwarnings('ignore')\n" ] @@ -96,7 +97,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "id": "cuREtJRixYZt" }, @@ -104,7 +105,7 @@ "source": [ "# File path for the document\n", "\n", - "file_path = \"LAB/ai-for-everyone.pdf\"" + "file_path = \"../ai-for-everyone.pdf\"" ] }, { @@ -122,12 +123,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "id": "_b5Z_45UxYZu", "outputId": "a600d69f-14fe-4492-f236-97261d6ff36c" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "297" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Load and split the document\n", "loader = PyPDFLoader(file_path)\n", @@ -168,9 +180,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "1096" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "text_splitter = RecursiveCharacterTextSplitter(\n", " chunk_size=1000,\n", @@ -285,31 +308,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": { "id": "L0xDxElwxYZw" }, "outputs": [], "source": [ - "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain_openai import OpenAIEmbeddings\n", "from dotenv import load_dotenv" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": { "id": "_WRIo3_0xYZx", "outputId": "78bfbbf3-9d25-4e31-bdbc-3e932e6bbfec" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "load_dotenv()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": { "id": "MNZfTng5xYZz", "outputId": "db1a7c85-ef9f-447e-92cd-9d097e959847" @@ -343,23 +377,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": { "id": "brKe6wUgxYZ0" }, "outputs": [], "source": [ - "from langchain.vectorstores import Chroma" + "from langchain_community.vectorstores import Chroma" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": { "id": "VkjHR-RkxYZ0", "outputId": "bc11bda9-f283-457a-f584-5a06b95c4dd9" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ChromaDB created with document embeddings.\n" + ] + } + ], "source": [ "db = Chroma.from_documents(chunks, embeddings, persist_directory=\"./chroma_db_LAB\")\n", "print(\"ChromaDB created with document embeddings.\")" @@ -383,24 +425,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": { "id": "XiLv-TfrxYZ1" }, "outputs": [], "source": [ - "user_question = \"\" # User question\n", - "retrieved_docs = db.similarity_search(user_question, k=10) # k is the number of documents to retrieve" + "user_question = \"What practical implications for those who develop AI systems (such as data scientists and machine learning engineers) are discussed in AI for Everyone? Critical Perspectives in terms of ethics, responsibility, and social impact?\" # User question\n", + "retrieved_docs = db.similarity_search(user_question, k=5) # k is the number of documents to retrieve" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": { "id": "qgWsh50JxYZ1", "outputId": "c8640c5d-5955-471f-fdd2-37096f5f68c7" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document 1:\n", + "uses that essentially have technological solutions, preferably \n", + "through further data collection and algorithmic sophistication. We see this for \n", + "example with the growing industry that now concerns itself with ‘fairness’ in \n", + "the design of systems, creating more inclusive data-sets and algorithms that \n", + "can account for more diverse experiences, or the development of ‘bias mitiga-\n", + "tion’ tools (Zelevansky 2019). Such projects have drawn attention to some of \n", + "the contentious assumptions that are embedded in the design of technological \n", + "systems, but have also been accused of advancing technical fixes that serve to \n", + "legitimise the industry (Gangadharan and Niklas 2019). \n", + "The growing debate surrounding ethical challenges and the bias of algorith-\n", + "mic processes has helped spur on an engagement with data-driven technologies \n", + "as socio-technical systems that have an impact on people’s lives. Some of this is\n", + "Document 2:\n", + " we also see in the US, the mere fact of AI ethics principles hav-\n", + "ing the binding force of law is insufficient to establish their ‘goodness’ .\n", + "It is also insufficient to focus solely on social norms in the form of unen -\n", + "forceable ethical principles as these can indeed result in ethics washing. We \n", + "agree with Powles and Nissenbaum (2018) who see a focus on ‘solving’ issues \n", + "of fairness, accountability and transparency as foremost among these discus -\n", + "sions through code (and to some extent social norms) as problematic, and one \n", + "which obscures the broader structural problems which are at the root of bias \n", + "in machine learning. Again, Ihde’s critique of the ‘designer fallacy’ mentioned \n", + "above also applies here regarding the unintended consequences of attempting \n", + "to ‘design out’ bias and other problems in digital technologies. Furthermore, \n", + "broader existential questions about whether a particular system or technol -\n", + "Document 3:\n", + " full of examples of how technology is being developed by (pre-\n", + "dominantly) white middle-class men, thereby excluding people of colour and \n", + "minority communities. Wright (2019) also connects democracy and freedom \n", + "in order to reflect the value of self-determination. In this sense, members of \n", + "society should be given the possibility to participate meaningfully in decisions \n", + "that affect their lives. As AI becomes more omnipresent, people should have \n", + "a say about this. Principles such as fairness, accountability and transparency \n", + "(ACM 2020) are key when we want technological development not only to \n", + "represent the people but also guaranteeing control by the people to counterbal-\n", + "ance the power of the state and corporations.\n", + "#Principle 3: AI Should Be Beneficial to Everyone\n", + "The third and last principle states that developments in AI should contribute \n", + "to the well-being of everyone in society. This matches with Wright’s (2019)\n" + ] + } + ], "source": [ "# Display top results\n", "for i, doc in enumerate(retrieved_docs[:3]): # Display top 3 results\n", @@ -418,7 +507,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": { "id": "2iB3lZqHxYZ2" }, @@ -434,12 +523,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": { "id": "2okzmuADxYZ2", "outputId": "0aa6cdca-188d-40e0-f5b4-8888d3549ea4" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Context formatted for GPT model.\n" + ] + } + ], "source": [ "# Generate a formatted context from the retrieved documents\n", "formatted_context = _get_document_prompt(retrieved_docs)\n", @@ -464,22 +561,90 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": { "id": "tqxVh9s3xYZ3", "outputId": "97cca95d-4ab3-44d8-a76c-5713aad387d8" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prompt constructed.\n" + ] + } + ], "source": [ "prompt = f\"\"\"\n", + "## SYSTEM ROLE\n", + "You are a knowledgeable and critical chatbot designed to analyze the book *AI for Everyone? Critical Perspectives* (University of Westminster Press, 2021)\n", + "Your answers must be based exclusively on the provided context extracted from this book, with a focus on ethics, power, inequalities, and the social implications of AI for practitioners\n", + "\n", + "\n", + "## USER QUESTION\n", + "The user has asked:\n", + "\"{user_question}\"\n", + "\n", + "\n", + "## CONTEXT\n", + "Here is the relevant content from the book:\n", + "'''\n", + "{formatted_context}\n", + "'''\n", + "\n", + "\n", + "## GUIDELINES\n", + "1. **Accuracy**:\n", + " - Only use the content in the `CONTEXT` section to answer.\n", + " - If the answer cannot be found, explicitly state: \"The provided context does not contain this information.\"\n", + " - Prioritize discussions of power, capitalism, inequalities, labour, data justice, AI ethics, and their implications for how AI systems are designed and deployed\n", "\n", + "2. **Perspective**:\n", + " - Reflect the critical tone of the book: question hype, techno-solutionism, and purely corporate framings of “AI for everyone”\n", + " - Highlight who benefits and who is harmed by AI systems, and how this relates to developers’ responsibilities\n", "\n", - "\"\"\"\n" + "3. **Practicality for Developers**:\n", + " - Translate theoretical or critical arguments into concrete implications for data scientists and machine learning engineers (e.g. data practices, model design, documentation, attention to labour conditions, governance, and accountability)\n", + " - When possible, turn insights into actionable recommendations or principles for practice.\n", + "\n", + "4. **Transparency**:\n", + " - Refer to the book and, if available in the context, mention chapter titles or section headings instead of page numbers.\n", + " - Do not speculate beyond the context or add external information.\n", + "\n", + "5. **Clarity**:\n", + " - Use simple, professional, and concise language.\n", + " - Format your response in Markdown for readability.\n", + "\n", + "\n", + "## TASK\n", + "1. Answer the user's question **directly** based only on the given context.\n", + "2. Explicitly connect the book's critical perspectives to the practical work of AI developers.\n", + "3. Provide the response in the following format:\n", + "\n", + "\n", + "## RESPONSE FORMAT\n", + "'''\n", + "# [Brief, Informative Title]\n", + "\n", + "[Answer in clear, structured text, 3–6 short paragraphs.]\n", + "\n", + "## Practical Implications for AI Developers\n", + "- List 5–10 concrete, actionable recommendations for AI developers.\n", + "- Each bulletpoint must be a specific action or practice, not a placeholder.\n", + "\n", + "\n", + "**Source**:\n", + "• *AI for Everyone? Critical Perspectives* (ed. Pieter Verdegem, 2021), relevant chapters as provided in the context.\n", + "'''\n", + "\"\"\"\n", + "print(\"Prompt constructed.\")\n", + "\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "metadata": { "id": "0mjkQJ_ZxYZ3" }, @@ -497,7 +662,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "metadata": { "id": "ylypRWRlxYZ4" }, @@ -506,12 +671,12 @@ "# Set up GPT client and parameters\n", "client = openai.OpenAI()\n", "model_params = {\n", - " 'model': 'gpt-4o',\n", - " 'temperature': , # Increase creativity\n", - " 'max_tokens': , # Allow for longer responses\n", - " 'top_p': , # Use nucleus sampling\n", - " 'frequency_penalty': , # Reduce repetition\n", - " 'presence_penalty': # Encourage new topics\n", + " 'model': 'gpt-4o-mini',\n", + " 'temperature': 0.9, # Increase creativity\n", + " 'max_tokens': 3000, # Allow for longer responses\n", + " 'top_p': 0.9, # Use nucleus sampling\n", + " 'frequency_penalty': 0.3, # Reduce repetition\n", + " 'presence_penalty': 0.4 # Encourage new topics\n", "}" ] }, @@ -526,7 +691,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "metadata": { "id": "4eXZO4pIxYZ4" }, @@ -538,17 +703,74 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": { "id": "wLPAcchBxYZ5", "outputId": "976c7800-16ed-41fe-c4cf-58f60d3230d2" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# Ethical Responsibilities in AI Development\n", + "\n", + "The book *AI for Everyone? Critical Perspectives* emphasizes the ethical implications and social responsibilities of those who develop AI systems, such as data scientists and machine learning engineers. A significant concern is the pervasive bias embedded within algorithmic processes that can reinforce existing inequalities. Developers must recognize that merely implementing technical solutions, such as bias mitigation tools, often oversimplifies the complex social dynamics at play, potentially legitimizing systemic issues instead of addressing them directly.\n", + "\n", + "Furthermore, discussions on ethical AI highlight the necessity for inclusivity in data collection and algorithm design. The historical context shows a dominance of perspectives from predominantly white, middle-class males, which has led to the exclusion of marginalized communities. As AI becomes more integrated into daily life, developers are called to ensure that their work reflects diverse experiences and enables meaningful participation from all societal members.\n", + "\n", + "In addition to fairness and transparency principles, developers must grapple with broader existential questions regarding the societal impact of their technologies. They should strive to develop AI systems that genuinely contribute to human well-being rather than simply serving corporate or state interests. By acknowledging these ethical dimensions, developers can better align their work with the principles of accountability and beneficial outcomes for society.\n", + "\n", + "## Practical Implications for AI Developers\n", + "- **Prioritize Inclusive Data Practices**: Actively seek out diverse datasets to ensure that AI models reflect varied perspectives and experiences.\n", + "- **Engage in Interdisciplinary Collaboration**: Work alongside ethicists, sociologists, and community representatives to address the socio-technical implications of AI systems.\n", + "- **Document Design Decisions**: Maintain transparent documentation detailing how design choices are made and the potential biases identified during development.\n", + "- **Adopt Ethical Guidelines**: Familiarize yourself with established ethical frameworks (e.g., Asilomar Principles) and incorporate them into project planning and execution.\n", + "- **Implement Bias Testing Procedures**: Develop formal methods to assess algorithmic bias regularly, akin to reliability and validity testing in traditional research.\n", + "- **Address Labor Conditions**: Advocate for fair labor practices within your teams and across supply chains involved in data gathering and model training.\n", + "- **Facilitate Public Engagement**: Create opportunities for community feedback on AI systems that affect public welfare, allowing for meaningful input in decision-making processes.\n", + "- **Explore Data Justice Principles**: Ensure that users are compensated fairly for their data contributions when applicable, fostering a sense of ownership over personal information.\n", + "- **Critically Assess Technical Solutions**: Resist techno-solutionism by questioning whether proposed technological fixes genuinely address deeper societal issues or merely serve as temporary patches.\n", + "- **Commit to Continuous Learning**: Stay informed about ongoing debates regarding ethics in AI and adapt practices accordingly based on emerging research and community needs.\n", + "\n", + "**Source**:\n", + "• *AI for Everyone? Critical Perspectives* (ed. Pieter Verdegem, 2021), relevant chapters as provided in the context.\n" + ] + } + ], "source": [ "answer = completion.choices[0].message.content\n", "print(answer)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Ethical Responsibilities in AI Development\n", + "\n", + "The book *AI for Everyone? Critical Perspectives* emphasizes the ethical implications and social responsibilities of those who develop AI systems, such as data scientists and machine learning engineers. A significant concern is the pervasive bias embedded within algorithmic processes that can reinforce existing inequalities. Developers must recognize that merely implementing technical solutions, such as bias mitigation tools, often oversimplifies the complex social dynamics at play, potentially legitimizing systemic issues instead of addressing them directly.\n", + "\n", + "Furthermore, discussions on ethical AI highlight the necessity for inclusivity in data collection and algorithm design. The historical context shows a dominance of perspectives from predominantly white, middle-class males, which has led to the exclusion of marginalized communities. As AI becomes more integrated into daily life, developers are called to ensure that their work reflects diverse experiences and enables meaningful participation from all societal members.\n", + "\n", + "In addition to fairness and transparency principles, developers must grapple with broader existential questions regarding the societal impact of their technologies. They should strive to develop AI systems that genuinely contribute to human well-being rather than simply serving corporate or state interests. By acknowledging these ethical dimensions, developers can better align their work with the principles of accountability and beneficial outcomes for society.\n", + "\n", + "## Practical Implications for AI Developers\n", + "- **Prioritize Inclusive Data Practices**: Actively seek out diverse datasets to ensure that AI models reflect varied perspectives and experiences.\n", + "- **Engage in Interdisciplinary Collaboration**: Work alongside ethicists, sociologists, and community representatives to address the socio-technical implications of AI systems.\n", + "- **Document Design Decisions**: Maintain transparent documentation detailing how design choices are made and the potential biases identified during development.\n", + "- **Adopt Ethical Guidelines**: Familiarize yourself with established ethical frameworks (e.g., Asilomar Principles) and incorporate them into project planning and execution.\n", + "- **Implement Bias Testing Procedures**: Develop formal methods to assess algorithmic bias regularly, akin to reliability and validity testing in traditional research.\n", + "- **Address Labor Conditions**: Advocate for fair labor practices within your teams and across supply chains involved in data gathering and model training.\n", + "- **Facilitate Public Engagement**: Create opportunities for community feedback on AI systems that affect public welfare, allowing for meaningful input in decision-making processes.\n", + "- **Explore Data Justice Principles**: Ensure that users are compensated fairly for their data contributions when applicable, fostering a sense of ownership over personal information.\n", + "- **Critically Assess Technical Solutions**: Resist techno-solutionism by questioning whether proposed technological fixes genuinely address deeper societal issues or merely serve as temporary patches.\n", + "- **Commit to Continuous Learning**: Stay informed about ongoing debates regarding ethics in AI and adapt practices accordingly based on emerging research and community needs.\n", + "\n", + "**Source**:\n", + "• *AI for Everyone? Critical Perspectives* (ed. Pieter Verdegem, 2021), relevant chapters as provided in the context." + ] + }, { "cell_type": "markdown", "metadata": { @@ -595,7 +817,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, "metadata": { "id": "nCXL9Cz1xYaV" }, @@ -615,7 +837,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": { "id": "9y3E0YWExYaV" }, @@ -636,16 +858,112 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 48, "metadata": { "id": "i7SkWPpnxYaW", "outputId": "28e82563-edba-4b41-acad-ec27e5ba134f" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Snippet 1:\n", + "274 AI for Everyone?\n", + "application; causes that essentially have technological solutions, preferably \n", + "through further data collection and algorithmic sophistication. We see this for \n", + "example with the growing industry that now concerns itself with ‘fairness’ in \n", + "the design of systems, creating more inclusive data-sets and algorithms that \n", + "can account for more diverse experiences, or the development of ‘\u001b[1m\u001b[32mbias\u001b[0m mitiga-\n", + "tion’ tools (Zelevansky 2019). Such projects have drawn attention to some of \n", + "the contentious assumptions that are embedded in the design of technological \n", + "systems, but have also been accused of advancing technical fixes that serve to \n", + "legitimise the industry (Gangadharan and Niklas 2019). \n", + "The growing debate surrounding ethical challenges and the \u001b[1m\u001b[32mbias\u001b[0m of algorith-\n", + "mic processes has helped spur on an engagement with data-driven technologies \n", + "as socio-technical systems that have an impact on people’s lives. Some of this is\n", + "--------------------------------------------------------------------------------\n", + "Snippet 2:\n", + "114 AI for Everyone?\n", + "(Ihde 2006). As we also see in the US, the mere fact of AI \u001b[1m\u001b[32methics\u001b[0m principles hav-\n", + "ing the binding force of law is insufficient to establish their ‘goodness’ .\n", + "It is also insufficient to focus solely on social norms in the form of unen -\n", + "forceable ethical principles as these can indeed result in \u001b[1m\u001b[32methics\u001b[0m washing. We \n", + "agree with Powles and Nissenbaum (2018) who see a focus on ‘solving’ issues \n", + "of fairness, accountability and transparency as foremost among these discus -\n", + "sions through code (and to some extent social norms) as problematic, and one \n", + "which obscures the broader structural problems which are at the root of \u001b[1m\u001b[32mbias\u001b[0m \n", + "in machine learning. Again, Ihde’s critique of the ‘designer fallacy’ mentioned \n", + "above also applies here regarding the unintended consequences of attempting \n", + "to ‘design out’ \u001b[1m\u001b[32mbias\u001b[0m and other problems in digital technologies. Furthermore, \n", + "broader existential questions about whether a particular system or technol -\n", + "--------------------------------------------------------------------------------\n", + "Snippet 3:\n", + "14 AI for Everyone?\n", + "history of AI is full of examples of how technology is being developed by (pre-\n", + "dominantly) white middle-class men, thereby excluding people of colour and \n", + "minority communities. Wright (2019) also connects democracy and freedom \n", + "in order to reflect the value of self-determination. In this sense, members of \n", + "society should be given the possibility to participate meaningfully in decisions \n", + "that affect their lives. As AI becomes more omnipresent, people should have \n", + "a say about this. Principles such as fairness, accountability and transparency \n", + "(ACM 2020) are key when we want technological development not only to \n", + "represent the people but also guaranteeing control by the people to counterbal-\n", + "ance the \u001b[1m\u001b[32mpower\u001b[0m of the state and corporations.\n", + "#Principle 3: AI Should Be Beneficial to Everyone\n", + "The third and last principle states that developments in AI should contribute \n", + "to the well-being of everyone in society. This matches with Wright’s (2019)\n", + "--------------------------------------------------------------------------------\n", + "Snippet 4:\n", + "30 AI for Everyone?\n", + "Future research will be needed to address the various challenges with regards to \n", + "the development of artificial intelligence. Which formal method can be used \n", + "to test for algorithmic \u001b[1m\u001b[32mbias\u001b[0m? Can we identify simple to use measures to assess \n", + "\u001b[1m\u001b[32mbias\u001b[0m, similar to the way we assess reliability and validity? What is the best way to \n", + "bridge (deep) learning and privacy? Should learning be conducted on the user \n", + "side (with algorithms requiring new data)? Or should data be transferred to a \n", + "trusted intermediary who performs the analysis on behalf of firms? Do users \n", + "need to be compensated in one way or another for data or resources provided? \n", + "Moreover, how can the refusal to share data lead to \u001b[1m\u001b[32mbias\u001b[0mes in the data available \n", + "for learning? Which data sources can and should be used for algorithmic learn-\n", + "ing? Are there certain types of data that should be ‘off-limits’? What role will \n", + "interdisciplinary AI teams play in establishing coexistence between humans\n", + "--------------------------------------------------------------------------------\n", + "Snippet 5:\n", + "8 AI for Everyone?\n", + "talks about AI that needs to ‘contribute to society and to human well-being’ , \n", + "while IEEE (2020) has come up with principles for ethically aligned design . \n", + "General principles include human rights, well-being, data agency, effectiveness, \n", + "transparency, accountability, awareness of misuse and competence. \n", + "Often cited are the Asilomar AI Principles. The Asilomar Conference on Benefi-\n", + "cial AI was organised by the Future of Life Institute (2017) and brought together \n", + "more than 100 AI researchers from academia and industry and thought leaders \n", + "in economics, law, \u001b[1m\u001b[32methics\u001b[0m and philosophy to address and formulate principles \n", + "of beneficial AI. The resulting Asilomar AI principles are organised around \n", + "(1) research issues, (2) \u001b[1m\u001b[32methics\u001b[0m and values and (3) longer-term issues (Future of \n", + "Life Institute 2017). The first category, research issues, sets out some guidelines \n", + "in terms of research goals, funding and culture. Secondly, thirteen specific eth-\n", + "--------------------------------------------------------------------------------\n" + ] + } + ], "source": [ - "query_keywords = [] # add your keywords\n", - "for i, doc in enumerate(retrieved_docs[:1]):\n", - " snippet = doc.page_content[:200]\n", + "query_keywords = [\n", + " \"developers\",\n", + " \"engineers\",\n", + " \"labour\",\n", + " \"workers\",\n", + " \"governance\",\n", + " \"data justice\",\n", + " \"bias\",\n", + " \"discrimination\",\n", + " \"ethics\",\n", + " \"power\",\n", + " \"inequalities\"\n", + "]\n", + " # add your keywords\n", + "for i, doc in enumerate(retrieved_docs[:]):\n", + " snippet = doc.page_content[:]\n", " highlighted = highlight_keywords(snippet, query_keywords)\n", " print(f\"Snippet {i+1}:\\n{highlighted}\\n{'-'*80}\")" ] @@ -687,7 +1005,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "llm", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -701,7 +1019,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.10" + "version": "3.12.3" } }, "nbformat": 4,