coding-blocks-archives
diff --git a/‎class_05/.ipynb_checkpoints/NaiveBayes-checkpoint.ipynb
+323 b/‎class_05/.ipynb_checkpoints/NaiveBayes-checkpoint.ipynb
+323
diff --git a/‎class_05/.ipynb_checkpoints/Untitled-checkpoint.ipynb
-47 b/‎class_05/.ipynb_checkpoints/Untitled-checkpoint.ipynb
-47
@@ -0,0 +1,323 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from matplotlib import pyplot as plt\n",
+    "%matplotlib inline\n",
+    "import random"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[1 3 3 2 1 2 2 2 3 3 2 2 2 1 3 2 2 3 2 2]\n",
+      "[2 1 3 1 2 3 3 1 1 1 1 3 2 1 2 2 1 2 2 1]\n",
+      "[1 0 1 1 0 1 1 1 0 1 0 1 1 0 0 0 1 0 0 0]\n"
+     ]
+    }
+   ],
+   "source": [
+    "set1 = [1, 2, 3]\n",
+    "lab_set = [0, 1]\n",
+    "\n",
+    "n_exmp = 20\n",
+    "\n",
+    "var1 = np.asarray([random.choice(set1) for ix in range(n_exmp)])\n",
+    "var2 = np.asarray([random.choice(set1) for ix in range(n_exmp)])\n",
+    "lab = np.asarray([random.choice(lab_set) for ix in range(n_exmp)])\n",
+    "\n",
+    "print var1\n",
+    "print var2\n",
+    "print lab"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.45\n"
+     ]
+    }
+   ],
+   "source": [
+    "def probability(val, n_sample):\n",
+    "    cnt = 0\n",
+    "    \n",
+    "    for ix in range(n_sample.shape[0]):\n",
+    "        if n_sample[ix] == val:\n",
+    "            cnt += 1\n",
+    "    return float(cnt)/float(n_sample.shape[0])\n",
+    "\n",
+    "def conditional_probability(c1, data, c2):\n",
+    "    n = lab.shape[0]\n",
+    "    count_c2 = 0\n",
+    "    count_c1_c2 = 0\n",
+    "    for ix in range(n):\n",
+    "        if lab[ix] == c2:\n",
+    "            count_c2 += 1\n",
+    "            if lab[ix] == c2 and data[ix] == c1:\n",
+    "                count_c1_c2 += 1\n",
+    "    return float(count_c1_c2)/float(count_c2)\n",
+    "            \n",
+    "\n",
+    "print probability(1, var2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def classify(inp):\n",
+    "    # for given input\n",
+    "    # decide probability for class 0\n",
+    "    \n",
+    "    prod_cond_prob = 1\n",
+    "    prod_pos_prob = 1\n",
+    "    \n",
+    "    prod_cond_prob *= conditional_probability(inp[0], var1, 0)\n",
+    "    prod_cond_prob *= conditional_probability(inp[1], var2, 0)\n",
+    "    \n",
+    "    prod_pos_prob *= probability(inp[0], var1)\n",
+    "    prod_pos_prob *= probability(inp[1], var2)\n",
+    "    \n",
+    "    p0 = probability(0, lab) * prod_cond_prob / prod_pos_prob\n",
+    "    \n",
+    "    prod_cond_prob = 1\n",
+    "    prod_pos_prob = 1\n",
+    "    \n",
+    "    prod_cond_prob *= conditional_probability(inp[0], var1, 1)\n",
+    "    prod_cond_prob *= conditional_probability(inp[1], var2, 1)\n",
+    "    \n",
+    "    print conditional_probability(inp[0], var1, 1)\n",
+    "    print conditional_probability(inp[1], var2, 1)\n",
+    "    \n",
+    "    prod_pos_prob *= probability(inp[0], var1)\n",
+    "    prod_pos_prob *= probability(inp[1], var2)\n",
+    "    \n",
+    "    print probability(inp[0], var1)\n",
+    "    print probability(inp[1], var2)\n",
+    "\n",
+    "    \n",
+    "    p1 = (probability(1, lab) * prod_cond_prob) / prod_pos_prob\n",
+    "    \n",
+    "    return p0, p1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[1 3 3 2 1 2 2 2 3 3 2 2 2 1 3 2 2 3 2 2]\n",
+      "[2 1 3 1 2 3 3 1 1 1 1 3 2 1 2 2 1 2 2 1]\n",
+      "[1 0 1 1 0 1 1 1 0 1 0 1 1 0 0 0 1 0 0 0]\n",
+      "0.7\n",
+      "0.4\n",
+      "0.55\n",
+      "0.2\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(0.0, 1.2727272727272725)"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "print var1\n",
+    "print var2\n",
+    "print lab\n",
+    "\n",
+    "classify([2, 3])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import sklearn\n",
+    "import pandas as pd\n",
+    "from sklearn.naive_bayes import GaussianNB"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(10000, 784) (5000, 784)\n",
+      "(10000,) (5000,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "ds = pd.read_csv('./train.csv')\n",
+    "data_X = ds.values[:10000, 1:]\n",
+    "data_y = ds.values[:10000, 0]\n",
+    "\n",
+    "test_X = ds.values[10000:15000, 1:]\n",
+    "test_y = ds.values[10000:15000, 0]\n",
+    "\n",
+    "print data_X.shape, test_X.shape\n",
+    "print data_y.shape, test_y.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.55820000000000003"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "gnb = GaussianNB()\n",
+    "\n",
+    "gnb.fit(data_X, data_y)\n",
+    "\n",
+    "gnb.score(test_X, test_y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.decomposition import PCA\n",
+    "\n",
+    "pca = PCA(n_components=319)\n",
+    "\n",
+    "dt = pca.fit_transform(ds.values[:15000, 1:])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(10000, 442)\n"
+     ]
+    }
+   ],
+   "source": [
+    "trans_X = dt[:10000, :]\n",
+    "test_trans_X = dt[10000:, :]\n",
+    "\n",
+    "print trans_X.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.67859999999999998"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "gnb = GaussianNB()\n",
+    "\n",
+    "gnb.fit(trans_X, data_y)\n",
+    "\n",
+    "gnb.score(test_trans_X, test_y)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}