From 6153dfeb472cb48882eb9467a013169517ce3e39 Mon Sep 17 00:00:00 2001
From: unknown <josef.eiglsperger@web.de>
Date: Thu, 27 Jan 2022 12:03:58 +0100
Subject: [PATCH 1/5] added selecting more hyperparameters

---
 model/ctabgan.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/model/ctabgan.py b/model/ctabgan.py
index fde96e7..df7fc93 100644
--- a/model/ctabgan.py
+++ b/model/ctabgan.py
@@ -21,11 +21,17 @@ def __init__(self,
                  mixed_columns= {'capital-loss':[0.0],'capital-gain':[0.0]},
                  integer_columns = ['age', 'fnlwgt','capital-gain', 'capital-loss','hours-per-week'],
                  problem_type= {"Classification": 'income'},
-                 epochs = 1):
+                 epochs = 1,
+                 batch_size=500,
+                 class_dim=(256, 256, 256, 256),
+                 random_dim=100,
+                 num_channels=64,
+                 l2scale=1e-5):
 
         self.__name__ = 'CTABGAN'
               
-        self.synthesizer = CTABGANSynthesizer(epochs = epochs)
+        self.synthesizer = CTABGANSynthesizer(epochs = epochs, batch_size = batch_size, class_dim = class_dim, random_dim = random_dim,
+                                              num_channels = num_channels, l2scale = l2scale)
         self.raw_df = pd.read_csv(raw_csv_path)
         self.test_ratio = test_ratio
         self.categorical_columns = categorical_columns
@@ -33,20 +39,20 @@ def __init__(self,
         self.mixed_columns = mixed_columns
         self.integer_columns = integer_columns
         self.problem_type = problem_type
-        
+
     def fit(self):
-        
+
         start_time = time.time()
         self.data_prep = DataPrep(self.raw_df,self.categorical_columns,self.log_columns,self.mixed_columns,self.integer_columns,self.problem_type,self.test_ratio)
-        self.synthesizer.fit(train_data=self.data_prep.df, categorical = self.data_prep.column_types["categorical"], 
+        self.synthesizer.fit(train_data=self.data_prep.df, categorical = self.data_prep.column_types["categorical"],
         mixed = self.data_prep.column_types["mixed"],type=self.problem_type)
         end_time = time.time()
         print('Finished training in',end_time-start_time," seconds.")
 
 
-    def generate_samples(self):
-        
-        sample = self.synthesizer.sample(len(self.raw_df)) 
+    def generate_samples(self, num_samples):
+
+        sample = self.synthesizer.sample(num_samples)
         sample_df = self.data_prep.inverse_prep(sample)
         
         return sample_df

From 25f821e1219ca4ef27914b923cf3164b3fec80ac Mon Sep 17 00:00:00 2001
From: unknown <josef.eiglsperger@web.de>
Date: Thu, 27 Jan 2022 12:48:20 +0100
Subject: [PATCH 2/5] updated the jupyter notebook example

---
 Experiment_Script_Adult.ipynb | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/Experiment_Script_Adult.ipynb b/Experiment_Script_Adult.ipynb
index 31336b8..dfda79c 100644
--- a/Experiment_Script_Adult.ipynb
+++ b/Experiment_Script_Adult.ipynb
@@ -27,9 +27,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  0%|                                                                                                                                                 | 0/300 [00:00<?, ?it/s]"
+     ]
+    }
+   ],
    "source": [
     "synthesizer =  CTABGAN(raw_csv_path = real_path,\n",
     "                 test_ratio = 0.20,\n",
@@ -38,11 +54,16 @@
     "                 mixed_columns= {'capital-loss':[0.0],'capital-gain':[0.0]},\n",
     "                 integer_columns = ['age', 'fnlwgt','capital-gain', 'capital-loss','hours-per-week'],\n",
     "                 problem_type= {\"Classification\": 'income'},\n",
-    "                 epochs = 300) \n",
+    "                 epochs = 300,\n",
+    "                 batch_size = 500,\n",
+    "                 class_dim = (256, 256, 256, 256),\n",
+    "                 random_dim = 100,\n",
+    "                 num_channels = 64,\n",
+    "                 l2scale = 1e-5) \n",
     "\n",
     "for i in range(num_exp):\n",
     "    synthesizer.fit()\n",
-    "    syn = synthesizer.generate_samples()\n",
+    "    syn = synthesizer.generate_samples(100)\n",
     "    syn.to_csv(fake_file_root+\"/\"+dataset+\"/\"+ dataset+\"_fake_{exp}.csv\".format(exp=i), index= False)"
    ]
   },

From 298114639b3163d588cf105b9e8ed779e8d0697f Mon Sep 17 00:00:00 2001
From: unknown <josef.eiglsperger@web.de>
Date: Thu, 3 Feb 2022 14:58:19 +0100
Subject: [PATCH 3/5] Updated the juypter notebook example

---
 Experiment_Script_Adult.ipynb | 196 ++--------------------------------
 1 file changed, 9 insertions(+), 187 deletions(-)

diff --git a/Experiment_Script_Adult.ipynb b/Experiment_Script_Adult.ipynb
index dfda79c..a98cf49 100644
--- a/Experiment_Script_Adult.ipynb
+++ b/Experiment_Script_Adult.ipynb
@@ -41,8 +41,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\r",
-      "  0%|                                                                                                                                                 | 0/300 [00:00<?, ?it/s]"
+      "  1%|█▋                                                                                                                               | 4/300 [3:00:00<219:14:43, 2666.50s/it]"
      ]
     }
    ],
@@ -69,7 +68,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -78,77 +77,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Acc</th>\n",
-       "      <th>AUC</th>\n",
-       "      <th>F1_Score</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>lr</th>\n",
-       "      <td>8.383663</td>\n",
-       "      <td>0.076099</td>\n",
-       "      <td>-0.016126</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>dt</th>\n",
-       "      <td>11.372710</td>\n",
-       "      <td>0.116821</td>\n",
-       "      <td>0.126888</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>rf</th>\n",
-       "      <td>11.587675</td>\n",
-       "      <td>0.119969</td>\n",
-       "      <td>0.120840</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>mlp</th>\n",
-       "      <td>11.700276</td>\n",
-       "      <td>0.095017</td>\n",
-       "      <td>0.088192</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "           Acc       AUC  F1_Score\n",
-       "lr    8.383663  0.076099 -0.016126\n",
-       "dt   11.372710  0.116821  0.126888\n",
-       "rf   11.587675  0.119969  0.120840\n",
-       "mlp  11.700276  0.095017  0.088192"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "classifiers_list = [\"lr\",\"dt\",\"rf\",\"mlp\"]\n",
     "result_mat = get_utility_metrics(real_path,fake_paths,\"MinMax\",classifiers_list, test_ratio = 0.20)\n",
@@ -160,59 +91,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Average WD (Continuous Columns</th>\n",
-       "      <th>Average JSD (Categorical Columns)</th>\n",
-       "      <th>Correlation Distance</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.018747</td>\n",
-       "      <td>0.085125</td>\n",
-       "      <td>1.847952</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   Average WD (Continuous Columns  Average JSD (Categorical Columns)  \\\n",
-       "0                        0.018747                           0.085125   \n",
-       "\n",
-       "   Correlation Distance  \n",
-       "0              1.847952  "
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "adult_categorical = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'gender', 'native-country', 'income']\n",
     "stat_res_avg = []\n",
@@ -227,68 +108,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>DCR between Real and Fake (5th perc)</th>\n",
-       "      <th>DCR within Real(5th perc)</th>\n",
-       "      <th>DCR within Fake (5th perc)</th>\n",
-       "      <th>NNDR between Real and Fake (5th perc)</th>\n",
-       "      <th>NNDR within Real (5th perc)</th>\n",
-       "      <th>NNDR within Fake (5th perc)</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.569546</td>\n",
-       "      <td>0.216545</td>\n",
-       "      <td>0.451031</td>\n",
-       "      <td>0.634042</td>\n",
-       "      <td>0.442052</td>\n",
-       "      <td>0.567227</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   DCR between Real and Fake (5th perc)  DCR within Real(5th perc)  \\\n",
-       "0                              0.569546                   0.216545   \n",
-       "\n",
-       "   DCR within Fake (5th perc)  NNDR between Real and Fake (5th perc)  \\\n",
-       "0                    0.451031                               0.634042   \n",
-       "\n",
-       "   NNDR within Real (5th perc)  NNDR within Fake (5th perc)  \n",
-       "0                     0.442052                     0.567227  "
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "priv_res_avg = []\n",
     "for fake_path in fake_paths:\n",
@@ -302,7 +124,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [

From b749b0f465f6c2f159f24a1af085e6ed7ea16f04 Mon Sep 17 00:00:00 2001
From: unknown <josef.eiglsperger@web.de>
Date: Thu, 3 Feb 2022 15:00:12 +0100
Subject: [PATCH 4/5] Updated the juypter notebook example

---
 .../Experiment_Script_Adult-checkpoint.ipynb  | 328 ++++++++++++++++++
 1 file changed, 328 insertions(+)
 create mode 100644 .ipynb_checkpoints/Experiment_Script_Adult-checkpoint.ipynb

diff --git a/.ipynb_checkpoints/Experiment_Script_Adult-checkpoint.ipynb b/.ipynb_checkpoints/Experiment_Script_Adult-checkpoint.ipynb
new file mode 100644
index 0000000..38d90e0
--- /dev/null
+++ b/.ipynb_checkpoints/Experiment_Script_Adult-checkpoint.ipynb
@@ -0,0 +1,328 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from model.ctabgan import CTABGAN\n",
+    "from model.eval.evaluation import get_utility_metrics,stat_sim,privacy_metrics\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import glob"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_exp = 1\n",
+    "dataset = \"Adult\"\n",
+    "real_path = \"Real_Datasets/Adult.csv\"\n",
+    "fake_file_root = \"Fake_Datasets\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "synthesizer =  CTABGAN(raw_csv_path = real_path,\n",
+    "                 test_ratio = 0.20,\n",
+    "                 categorical_columns = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'gender', 'native-country', 'income'], \n",
+    "                 log_columns = [],\n",
+    "                 mixed_columns= {'capital-loss':[0.0],'capital-gain':[0.0]},\n",
+    "                 integer_columns = ['age', 'fnlwgt','capital-gain', 'capital-loss','hours-per-week'],\n",
+    "                 problem_type= {\"Classification\": 'income'},\n",
+    "                 epochs = 300,\n",
+    "                 batch_size = 500,\n",
+    "                 class_dim = (256, 256, 256, 256),\n",
+    "                 random_dim = 100,\n",
+    "                 num_channels = 64,\n",
+    "                 l2scale = l2scale) \n",
+    "\n",
+    "for i in range(num_exp):\n",
+    "    synthesizer.fit()\n",
+    "    syn = synthesizer.generate_samples()\n",
+    "    syn.to_csv(fake_file_root+\"/\"+dataset+\"/\"+ dataset+\"_fake_{exp}.csv\".format(exp=i), index= False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fake_paths = glob.glob(fake_file_root+\"/\"+dataset+\"/\"+\"*\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Acc</th>\n",
+       "      <th>AUC</th>\n",
+       "      <th>F1_Score</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>lr</th>\n",
+       "      <td>8.383663</td>\n",
+       "      <td>0.076099</td>\n",
+       "      <td>-0.016126</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>dt</th>\n",
+       "      <td>11.372710</td>\n",
+       "      <td>0.116821</td>\n",
+       "      <td>0.126888</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>rf</th>\n",
+       "      <td>11.587675</td>\n",
+       "      <td>0.119969</td>\n",
+       "      <td>0.120840</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mlp</th>\n",
+       "      <td>11.700276</td>\n",
+       "      <td>0.095017</td>\n",
+       "      <td>0.088192</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           Acc       AUC  F1_Score\n",
+       "lr    8.383663  0.076099 -0.016126\n",
+       "dt   11.372710  0.116821  0.126888\n",
+       "rf   11.587675  0.119969  0.120840\n",
+       "mlp  11.700276  0.095017  0.088192"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "classifiers_list = [\"lr\",\"dt\",\"rf\",\"mlp\"]\n",
+    "result_mat = get_utility_metrics(real_path,fake_paths,\"MinMax\",classifiers_list, test_ratio = 0.20)\n",
+    "\n",
+    "result_df  = pd.DataFrame(result_mat,columns=[\"Acc\",\"AUC\",\"F1_Score\"])\n",
+    "result_df.index = classifiers_list\n",
+    "result_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Average WD (Continuous Columns</th>\n",
+       "      <th>Average JSD (Categorical Columns)</th>\n",
+       "      <th>Correlation Distance</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.018747</td>\n",
+       "      <td>0.085125</td>\n",
+       "      <td>1.847952</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Average WD (Continuous Columns  Average JSD (Categorical Columns)  \\\n",
+       "0                        0.018747                           0.085125   \n",
+       "\n",
+       "   Correlation Distance  \n",
+       "0              1.847952  "
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "adult_categorical = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'gender', 'native-country', 'income']\n",
+    "stat_res_avg = []\n",
+    "for fake_path in fake_paths:\n",
+    "    stat_res = stat_sim(real_path,fake_path,adult_categorical)\n",
+    "    stat_res_avg.append(stat_res)\n",
+    "\n",
+    "stat_columns = [\"Average WD (Continuous Columns\",\"Average JSD (Categorical Columns)\",\"Correlation Distance\"]\n",
+    "stat_results = pd.DataFrame(np.array(stat_res_avg).mean(axis=0).reshape(1,3),columns=stat_columns)\n",
+    "stat_results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>DCR between Real and Fake (5th perc)</th>\n",
+       "      <th>DCR within Real(5th perc)</th>\n",
+       "      <th>DCR within Fake (5th perc)</th>\n",
+       "      <th>NNDR between Real and Fake (5th perc)</th>\n",
+       "      <th>NNDR within Real (5th perc)</th>\n",
+       "      <th>NNDR within Fake (5th perc)</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.569546</td>\n",
+       "      <td>0.216545</td>\n",
+       "      <td>0.451031</td>\n",
+       "      <td>0.634042</td>\n",
+       "      <td>0.442052</td>\n",
+       "      <td>0.567227</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   DCR between Real and Fake (5th perc)  DCR within Real(5th perc)  \\\n",
+       "0                              0.569546                   0.216545   \n",
+       "\n",
+       "   DCR within Fake (5th perc)  NNDR between Real and Fake (5th perc)  \\\n",
+       "0                    0.451031                               0.634042   \n",
+       "\n",
+       "   NNDR within Real (5th perc)  NNDR within Fake (5th perc)  \n",
+       "0                     0.442052                     0.567227  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "priv_res_avg = []\n",
+    "for fake_path in fake_paths:\n",
+    "    priv_res = privacy_metrics(real_path,fake_path)\n",
+    "    priv_res_avg.append(priv_res)\n",
+    "    \n",
+    "privacy_columns = [\"DCR between Real and Fake (5th perc)\",\"DCR within Real(5th perc)\",\"DCR within Fake (5th perc)\",\"NNDR between Real and Fake (5th perc)\",\"NNDR within Real (5th perc)\",\"NNDR within Fake (5th perc)\"]\n",
+    "privacy_results = pd.DataFrame(np.array(priv_res_avg).mean(axis=0).reshape(1,6),columns=privacy_columns)\n",
+    "privacy_results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "syn.to_csv(fake_file_root+\"/\"+dataset+\"/\"+ dataset+\"_fake_{exp}.csv\".format(exp=i), index= False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "5bbd4e8a0020626d1955d6e7d647b883363040a056d10513dec12a340be08610"
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 0c78b34566cbd9b4709fa92e4f97f310fd07f745 Mon Sep 17 00:00:00 2001
From: Zepp3 <36689688+Zepp3@users.noreply.github.com>
Date: Thu, 3 Feb 2022 15:20:47 +0100
Subject: [PATCH 5/5] Delete Experiment_Script_Adult-checkpoint.ipynb

---
 .../Experiment_Script_Adult-checkpoint.ipynb  | 328 ------------------
 1 file changed, 328 deletions(-)
 delete mode 100644 .ipynb_checkpoints/Experiment_Script_Adult-checkpoint.ipynb

diff --git a/.ipynb_checkpoints/Experiment_Script_Adult-checkpoint.ipynb b/.ipynb_checkpoints/Experiment_Script_Adult-checkpoint.ipynb
deleted file mode 100644
index 38d90e0..0000000
--- a/.ipynb_checkpoints/Experiment_Script_Adult-checkpoint.ipynb
+++ /dev/null
@@ -1,328 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from model.ctabgan import CTABGAN\n",
-    "from model.eval.evaluation import get_utility_metrics,stat_sim,privacy_metrics\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import glob"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "num_exp = 1\n",
-    "dataset = \"Adult\"\n",
-    "real_path = \"Real_Datasets/Adult.csv\"\n",
-    "fake_file_root = \"Fake_Datasets\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "synthesizer =  CTABGAN(raw_csv_path = real_path,\n",
-    "                 test_ratio = 0.20,\n",
-    "                 categorical_columns = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'gender', 'native-country', 'income'], \n",
-    "                 log_columns = [],\n",
-    "                 mixed_columns= {'capital-loss':[0.0],'capital-gain':[0.0]},\n",
-    "                 integer_columns = ['age', 'fnlwgt','capital-gain', 'capital-loss','hours-per-week'],\n",
-    "                 problem_type= {\"Classification\": 'income'},\n",
-    "                 epochs = 300,\n",
-    "                 batch_size = 500,\n",
-    "                 class_dim = (256, 256, 256, 256),\n",
-    "                 random_dim = 100,\n",
-    "                 num_channels = 64,\n",
-    "                 l2scale = l2scale) \n",
-    "\n",
-    "for i in range(num_exp):\n",
-    "    synthesizer.fit()\n",
-    "    syn = synthesizer.generate_samples()\n",
-    "    syn.to_csv(fake_file_root+\"/\"+dataset+\"/\"+ dataset+\"_fake_{exp}.csv\".format(exp=i), index= False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fake_paths = glob.glob(fake_file_root+\"/\"+dataset+\"/\"+\"*\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Acc</th>\n",
-       "      <th>AUC</th>\n",
-       "      <th>F1_Score</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>lr</th>\n",
-       "      <td>8.383663</td>\n",
-       "      <td>0.076099</td>\n",
-       "      <td>-0.016126</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>dt</th>\n",
-       "      <td>11.372710</td>\n",
-       "      <td>0.116821</td>\n",
-       "      <td>0.126888</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>rf</th>\n",
-       "      <td>11.587675</td>\n",
-       "      <td>0.119969</td>\n",
-       "      <td>0.120840</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>mlp</th>\n",
-       "      <td>11.700276</td>\n",
-       "      <td>0.095017</td>\n",
-       "      <td>0.088192</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "           Acc       AUC  F1_Score\n",
-       "lr    8.383663  0.076099 -0.016126\n",
-       "dt   11.372710  0.116821  0.126888\n",
-       "rf   11.587675  0.119969  0.120840\n",
-       "mlp  11.700276  0.095017  0.088192"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "classifiers_list = [\"lr\",\"dt\",\"rf\",\"mlp\"]\n",
-    "result_mat = get_utility_metrics(real_path,fake_paths,\"MinMax\",classifiers_list, test_ratio = 0.20)\n",
-    "\n",
-    "result_df  = pd.DataFrame(result_mat,columns=[\"Acc\",\"AUC\",\"F1_Score\"])\n",
-    "result_df.index = classifiers_list\n",
-    "result_df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Average WD (Continuous Columns</th>\n",
-       "      <th>Average JSD (Categorical Columns)</th>\n",
-       "      <th>Correlation Distance</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.018747</td>\n",
-       "      <td>0.085125</td>\n",
-       "      <td>1.847952</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   Average WD (Continuous Columns  Average JSD (Categorical Columns)  \\\n",
-       "0                        0.018747                           0.085125   \n",
-       "\n",
-       "   Correlation Distance  \n",
-       "0              1.847952  "
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "adult_categorical = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'gender', 'native-country', 'income']\n",
-    "stat_res_avg = []\n",
-    "for fake_path in fake_paths:\n",
-    "    stat_res = stat_sim(real_path,fake_path,adult_categorical)\n",
-    "    stat_res_avg.append(stat_res)\n",
-    "\n",
-    "stat_columns = [\"Average WD (Continuous Columns\",\"Average JSD (Categorical Columns)\",\"Correlation Distance\"]\n",
-    "stat_results = pd.DataFrame(np.array(stat_res_avg).mean(axis=0).reshape(1,3),columns=stat_columns)\n",
-    "stat_results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>DCR between Real and Fake (5th perc)</th>\n",
-       "      <th>DCR within Real(5th perc)</th>\n",
-       "      <th>DCR within Fake (5th perc)</th>\n",
-       "      <th>NNDR between Real and Fake (5th perc)</th>\n",
-       "      <th>NNDR within Real (5th perc)</th>\n",
-       "      <th>NNDR within Fake (5th perc)</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.569546</td>\n",
-       "      <td>0.216545</td>\n",
-       "      <td>0.451031</td>\n",
-       "      <td>0.634042</td>\n",
-       "      <td>0.442052</td>\n",
-       "      <td>0.567227</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   DCR between Real and Fake (5th perc)  DCR within Real(5th perc)  \\\n",
-       "0                              0.569546                   0.216545   \n",
-       "\n",
-       "   DCR within Fake (5th perc)  NNDR between Real and Fake (5th perc)  \\\n",
-       "0                    0.451031                               0.634042   \n",
-       "\n",
-       "   NNDR within Real (5th perc)  NNDR within Fake (5th perc)  \n",
-       "0                     0.442052                     0.567227  "
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "priv_res_avg = []\n",
-    "for fake_path in fake_paths:\n",
-    "    priv_res = privacy_metrics(real_path,fake_path)\n",
-    "    priv_res_avg.append(priv_res)\n",
-    "    \n",
-    "privacy_columns = [\"DCR between Real and Fake (5th perc)\",\"DCR within Real(5th perc)\",\"DCR within Fake (5th perc)\",\"NNDR between Real and Fake (5th perc)\",\"NNDR within Real (5th perc)\",\"NNDR within Fake (5th perc)\"]\n",
-    "privacy_results = pd.DataFrame(np.array(priv_res_avg).mean(axis=0).reshape(1,6),columns=privacy_columns)\n",
-    "privacy_results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "syn.to_csv(fake_file_root+\"/\"+dataset+\"/\"+ dataset+\"_fake_{exp}.csv\".format(exp=i), index= False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "interpreter": {
-   "hash": "5bbd4e8a0020626d1955d6e7d647b883363040a056d10513dec12a340be08610"
-  },
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}