diff --git a/Fake_News_Detector_Improved.ipynb b/Fake_News_Detector_Improved.ipynb
new file mode 100644
index 0000000..dbe7c65
--- /dev/null
+++ b/Fake_News_Detector_Improved.ipynb
@@ -0,0 +1,647 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "b6edf3ba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import re\n",
+ "import pickle\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+ "from sklearn.naive_bayes import MultinomialNB\n",
+ "from sklearn.metrics import accuracy_score, confusion_matrix\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "a8b8f15a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def preprocess(text):\n",
+ " text = text.lower()\n",
+ " text = re.sub(r'\\W', ' ', text)\n",
+ " text = re.sub(r'\\s+', ' ', text)\n",
+ " return text.strip()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "cb9f81aa",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load datasets\n",
+ "true = pd.read_csv('data/True.csv')\n",
+ "fake = pd.read_csv('data/Fake.csv')\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "610f29ba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add labels\n",
+ "true['label'] = 1\n",
+ "fake['label'] = 0\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "e5bd1bae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Preprocess\n",
+ "true['text'] = true['text'].apply(preprocess)\n",
+ "fake['text'] = fake['text'].apply(preprocess)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "7013c671",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Balance dataset\n",
+ "min_len = min(len(true), len(fake))\n",
+ "true = true.sample(min_len, random_state=42)\n",
+ "fake = fake.sample(min_len, random_state=42)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "90325fb1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Combine and shuffle\n",
+ "df = pd.concat([true, fake])\n",
+ "df = df.sample(frac=1, random_state=42).reset_index(drop=True)\n",
+ "\n",
+ "X = df['text']\n",
+ "y = df['label']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "2565508c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+ "\n",
+ "# Improved vectorizer settings\n",
+ "vectorizer = TfidfVectorizer(stop_words='english', max_df=0.9, min_df=2, ngram_range=(1,2))\n",
+ "X_train_vec = vectorizer.fit_transform(X_train)\n",
+ "X_test_vec = vectorizer.transform(X_test)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "2fdcccc5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
MultinomialNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
+ ],
+ "text/plain": [
+ "MultinomialNB()"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model = MultinomialNB()\n",
+ "model.fit(X_train_vec, y_train)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "dcb27ef0",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Accuracy: 0.9540095716119995\n",
+ "Confusion Matrix:\n",
+ " [[3921 284]\n",
+ " [ 110 4252]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "y_pred = model.predict(X_test_vec)\n",
+ "print(\"Accuracy:\", accuracy_score(y_test, y_pred))\n",
+ "print(\"Confusion Matrix:\\n\", confusion_matrix(y_test, y_pred))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "066dc8e6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pickle.dump(model, open(\"model.pkl\", \"wb\"))\n",
+ "pickle.dump(vectorizer, open(\"vectorizer.pkl\", \"wb\"))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ed3218e5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'sample_news = [\\n \"NASA launches Artemis mission to return to the Moon.\",\\n \"Chocolate cures cancer, scientists claim in new study.\",\\n \"Government confirms alien contact in leaked documents.\",\\n \"Apple unveils new iPhone with revolutionary AI chip.\",\\n \"United Nations announces climate change mitigation fund.\",\\n \"Time traveler from 3030 visits Earth to warn of zombie apocalypse.\"\\n]\\n\\n# Preprocess and predict\\nprocessed = [preprocess(news) for news in sample_news]\\nvec = vectorizer.transform(processed)\\npreds = model.predict(vec)\\n\\nfor news, pred in zip(sample_news, preds):\\n print(f\"News: {news}\\nPrediction: {\\'Real News ✅\\' if pred == 1 else \\'Fake News ❌\\'}\\n\")\\n'"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Sample news list\n",
+ "\"\"\"sample_news = [\n",
+ " \"NASA launches Artemis mission to return to the Moon.\",\n",
+ " \"Chocolate cures cancer, scientists claim in new study.\",\n",
+ " \"Government confirms alien contact in leaked documents.\",\n",
+ " \"Apple unveils new iPhone with revolutionary AI chip.\",\n",
+ " \"United Nations announces climate change mitigation fund.\",\n",
+ " \"Time traveler from 3030 visits Earth to warn of zombie apocalypse.\"\n",
+ "]\n",
+ "\n",
+ "# Preprocess and predict\n",
+ "processed = [preprocess(news) for news in sample_news]\n",
+ "vec = vectorizer.transform(processed)\n",
+ "preds = model.predict(vec)\n",
+ "\n",
+ "for news, pred in zip(sample_news, preds):\n",
+ " print(f\"News: {news}\\nPrediction: {'Real News ✅' if pred == 1 else 'Fake News ❌'}\\n\")\n",
+ "\"\"\""
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}