diff --git a/notebooks/Colorization_fcn_unet.ipynb b/notebooks/Colorization_fcn_unet.ipynb new file mode 100644 index 0000000..34060b3 --- /dev/null +++ b/notebooks/Colorization_fcn_unet.ipynb @@ -0,0 +1,482 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fully Convolutional Network & U-Net\n", + "\n", + "contributors:\n", + "\n", + " -Skander Jemaa" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import importlib\n", + "from scipy.misc import imread, imresize, imsave, fromimage, toimage\n", + "from scipy.optimize import fmin_l_bfgs_b\n", + "import numpy as np\n", + "import time\n", + "import random\n", + "from PIL import Image\n", + "import os\n", + "import pickle\n", + "import matplotlib.pyplot as plt\n", + "import tensorflow as tf\n", + "import keras\n", + "from keras.models import Model\n", + "from keras.layers import Input\n", + "from keras.layers.convolutional import Convolution2D, AveragePooling2D, MaxPooling2D\n", + "from keras.layers import Conv2D, UpSampling2D, Lambda, ZeroPadding2D\n", + "from keras.models import Sequential\n", + "from vgg16_avg import VGG16_Avg\n", + "import sys\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import skimage.io as io\n", + "import skimage.transform as trans\n", + "import random as r\n", + "from keras.optimizers import Adam" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example of Image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "file ='images/Spongesbob/img/frame_0001.png'\n", + "\n", + "img = io.imread(file)/255\n", + "greyscale = rgb2grey(img)\n", + "canny = feature.canny(greyscale)\n", + "\n", + "plt.figure(figsize=(15,10))\n", + "\n", + "plt.subplot(131)\n", + "plt.title('Input')\n", + "plt.imshow(greyscale,cmap = 'gray')\n", + "\n", + "plt.subplot(132)\n", + "plt.title('Ground Truth')\n", + "skimage.io.imshow(img)\n", + "\n", + "plt.subplot(133)\n", + "plt.title('Prediction')\n", + "plt.imshow(canny,cmap = 'gray')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.array(greyscale).shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data loading and Prepocessing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def create_data(src, mask, label=False):\n", + " files = glob(src + mask, recursive=True)\n", + " imgs = []\n", + " imgs_grey = []\n", + " for file in files:\n", + " feat_grey = []\n", + " img = io.imread(file)\n", + " if label:\n", + " img[img == 4] = 1\n", + " img[img != 1] = 0\n", + " img = img.astype('float32')\n", + " else:\n", + " img = img/255\n", + " imgs.append(img)\n", + " img_grey = rgb2grey(img)\n", + " feat_grey.append(img_grey)\n", + " canny = feature.canny(img_grey)\n", + " feat_grey.append(canny)\n", + " imgs_grey.append(feat_grey)\n", + " name = 'y'\n", + " np.save(name, np.array(imgs).astype('float32'))\n", + " print('Saved', len(files), 'to', name)\n", + " name = 'x'\n", + " np.save(name, np.array(imgs_grey)[..., np.newaxis].astype('float32'))\n", + " print('Saved', len(files), 'to', name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "create_data('images/Spongebob/img/', '**.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x = np.load('x.npy')\n", + "print('x: ', x.shape)\n", + "y = np.load('y.npy')\n", + "print('y:', y.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Models\n", + "\n", + "## Fully Convolutional Network" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def colorization_net(arr):\n", + " inputs = Input(arr.shape[1:])\n", + " conv1 = Conv2D(32, 3, 3, activation='relu', border_mode='same')(inputs)\n", + " conv1 = Conv2D(32, 3, 3, activation='relu', border_mode='same')(conv1)\n", + " pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)\n", + " \n", + " conv2 = Conv2D(64, 3, 3, activation='relu', border_mode='same')(pool1)\n", + " conv2 = Conv2D(64, 3, 3, activation='relu', border_mode='same')(conv2)\n", + " pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)\n", + " \n", + " conv3 = Conv2D(128, 3, 3, activation='relu', border_mode='same')(pool2)\n", + " conv3 = Conv2D(128, 3, 3, activation='relu', border_mode='same')(conv3)\n", + " pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)\n", + " \n", + " conv4 = Conv2D(256, 3, 3, activation='relu', border_mode='same')(pool3)\n", + " conv4 = Conv2D(256, 3, 3, activation='relu', border_mode='same')(conv4)\n", + " pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)\n", + " \n", + " up1 = UpSampling2D(size=(2, 2))(pool4)\n", + " conv5 = Convolution2D(128, 2, 2, activation='relu', border_mode='same')(up1)\n", + " \n", + " up2 = UpSampling2D(size=(2, 2))(conv5)\n", + " conv6 = Convolution2D(64, 2, 2, activation='relu', border_mode='same')(up2)\n", + " \n", + " up3 = UpSampling2D(size=(2, 2))(conv6)\n", + " conv7 = Convolution2D(33, 2, 2, activation='relu', border_mode='same')(up3)\n", + "\n", + " up4 = UpSampling2D(size=(2, 2))(conv7)\n", + " conv8 = Convolution2D(3, 2, 2, activation='relu', border_mode='same')(up4)\n", + " \n", + " outputs = conv8\n", + " \n", + " return outputs #model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# UNet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def conv_block(x, filters, size, stride=(2,2), mode='same', act=True):\n", + " x = Convolution2D(filters, size, size, subsample=stride, border_mode=mode)(x)\n", + " x = BatchNormalization(mode=2)(x)\n", + " return Activation('relu')(x) if act else x\n", + "\n", + "def res_block(ip, nf=64):\n", + " x = conv_block(ip, nf, 3, (1,1))\n", + " x = conv_block(x, nf, 3, (1,1), act=False)\n", + " return merge([x, ip], mode='sum')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from keras.models import Input, Model\n", + "from keras.layers import Conv2D, Concatenate, MaxPooling2D, Reshape\n", + "from keras.layers import UpSampling2D, Activation, Permute\n", + "\n", + "def level_block(m, dim, depth, factor, acti):\n", + " if depth > 0:\n", + " n = Conv2D(dim, 3, activation=acti, padding='same')(m)\n", + " n = Conv2D(dim, 3, activation=acti, padding='same')(n)\n", + " m = MaxPooling2D()(n)\n", + " m = level_block(m, int(factor*dim), depth-1, factor, acti)\n", + " m = UpSampling2D()(m)\n", + " m = Conv2D(dim, 2, activation=acti, padding='same')(m)\n", + " m = Concatenate(axis=3)([n, m])\n", + " m = Conv2D(dim, 3, activation=acti, padding='same')(m)\n", + " return Conv2D(dim, 3, activation=acti, padding='same')(m)\n", + "\n", + "def UNet(img_shape, n_out=1, dim=64, depth=4, factor=2, acti='relu', flatten=False):\n", + " i = Input(shape=img_shape)\n", + " o = level_block(i, dim, depth, factor, acti)\n", + " o = Conv2D(n_out, (1, 1))(o)\n", + " if flatten:\n", + " o = Reshape(n_out, img_shape[0] * img_shape[1])(o)\n", + " o = Permute((2, 1))(o)\n", + " o = Activation('relu')(o)\n", + " return o" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training \n", + "\n", + "## Fully Convolutionnal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "model = colorization_net(x)\n", + "model.compile(optimizer=Adam(lr=0.01), loss='mse')\n", + "model.fit(x, y, validation_split=0.2, epochs=50, batch_size=8)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## U-Net" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "model_u = UNet(x.shape[1:], n_out = 3, dim=8, factor=1)\n", + "model_u.load_weights('unet.h5')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "model_u.compile(optimizer=Adam(lr=0.001), loss='mae')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_u.fit(x[:500], y[:500], validation_split=0.2, epochs=100, batch_size=8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "model_u.compile(optimizer=Adam(lr=0.0001), loss='mse')\n", + "model_u.fit(x[:500], y[:500], validation_split=0.2, epochs=100, batch_size=8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "model_u.save_weights('unet.h5')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Results\n", + "\n", + "## Fully Convolutional" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "pred = model.predict(x[:50])\n", + "pred = np.clip(pred,0,255)/255" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import random as r\n", + "for n in range(20):\n", + " i = int(r.random() * pred.shape[0])\n", + " plt.figure(figsize=(15,10))\n", + "\n", + " plt.subplot(131)\n", + " plt.title('Input')\n", + " plt.imshow(x[i,:,:,0],cmap = 'gray')\n", + "\n", + " plt.subplot(132)\n", + " plt.title('Ground Truth')\n", + " skimage.io.imshow(y[i]/255)\n", + "\n", + " plt.subplot(133)\n", + " plt.title('Prediction')\n", + " plt.imshow(pred[i])\n", + "\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## U-Net" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "pred_u = model_u.predict(x[:50])\n", + "pred_u = np.clip(pred_u,0,255)/255" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "import random as r\n", + "import skimage\n", + "for n in range(20):\n", + " i = int(r.random() * pred_u.shape[0])\n", + " plt.figure(figsize=(15,10))\n", + "\n", + " plt.subplot(131)\n", + " plt.title('Input')\n", + " plt.imshow(x[i,:,:,0],cmap = 'gray')\n", + "\n", + " plt.subplot(132)\n", + " plt.title('Ground Truth')\n", + " skimage.io.imshow(y[i])\n", + "\n", + " plt.subplot(133)\n", + " plt.title('Prediction')\n", + " plt.imshow(pred_u[i])\n", + "\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}