From 92f05370af7e17f4383afa9a42865da3ae217330 Mon Sep 17 00:00:00 2001
From: developerpawandeep <developer.pawandeep@gmail.com>
Date: Thu, 12 Dec 2019 16:26:27 +0530
Subject: [PATCH 1/2] inference

---
 colab_inference/howtorun.txt | 40 ++++++++++++++++++++++++++++++++++++
 colab_inference/infer2.ipynb |  1 +
 2 files changed, 41 insertions(+)
 create mode 100644 colab_inference/howtorun.txt
 create mode 100644 colab_inference/infer2.ipynb

diff --git a/colab_inference/howtorun.txt b/colab_inference/howtorun.txt
new file mode 100644
index 0000000..728dd0b
--- /dev/null
+++ b/colab_inference/howtorun.txt
@@ -0,0 +1,40 @@
+# Already inside notebook just run code
+###################################################################################
+## Install these versions
+
+!pip install Torch==0.3.1
+!pip install Torchvision==0.2.1
+
+## Clone repo
+
+!git clone https://github.com/TencentYoutuResearch/FaceDetection-DSFD.git
+
+## Mount drive
+
+change directory to: /content/FaceDetection-DSFD
+
+###################################################################################
+
+# Do these manually (path to weight file and path to images you want to test and save
+## change paths
+
+change path of trained_model in code cell
+change path of save folder,etc 
+
+!it look like this 
+
+widerface_root="WIDERFace_ROOT"
+trained_model = "/content/drive/My Drive/WIDERFace_DSFD_RES152.pth"
+save_folder = "eval_tools/"
+visual_threshold = 0.1
+cuda = True
+img_root="./data/worlds-largest-selfie.jpg"
+
+
+change path to your images in next code
+
+!it look like this 
+
+filesdir='/content/drive/My Drive/FolderSeconds/'
+
+
diff --git a/colab_inference/infer2.ipynb b/colab_inference/infer2.ipynb
new file mode 100644
index 0000000..a313d66
--- /dev/null
+++ b/colab_inference/infer2.ipynb
@@ -0,0 +1 @@
+{"nbformat":4,"nbformat_minor":0,"metadata":{"accelerator":"GPU","colab":{"name":"infer2.ipynb","provenance":[],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.4"}},"cells":[{"cell_type":"code","metadata":{"colab_type":"code","id":"oJ-O1t4bATth","outputId":"bd85fe80-4caf-4212-c8f9-88f329e71799","scrolled":true,"executionInfo":{"status":"ok","timestamp":1576064509480,"user_tz":-330,"elapsed":6062,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":68}},"source":["!pip install Torch==0.3.1\n"],"execution_count":4,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: Torch==0.3.1 in /usr/local/lib/python3.6/dist-packages (0.3.1)\n","Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from Torch==0.3.1) (3.13)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from Torch==0.3.1) (1.17.4)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"NTekWu-OAp2C","outputId":"f6b43008-016a-4410-d8af-b96567b9d2bc","executionInfo":{"status":"ok","timestamp":1576064513862,"user_tz":-330,"elapsed":10432,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":136}},"source":["!pip install Torchvision==0.2.1"],"execution_count":5,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: Torchvision==0.2.1 in /usr/local/lib/python3.6/dist-packages (0.2.1)\n","Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (1.12.0)\n","Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (4.3.0)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (1.17.4)\n","Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (0.3.1)\n","Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from pillow>=4.1.1->Torchvision==0.2.1) (0.46)\n","Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from torch->Torchvision==0.2.1) (3.13)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"PKyc13d4Ascv","outputId":"357a473f-eab7-44fe-cfc2-cca8704a3089","executionInfo":{"status":"ok","timestamp":1576064517783,"user_tz":-330,"elapsed":14343,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["!git clone https://github.com/TencentYoutuResearch/FaceDetection-DSFD.git"],"execution_count":6,"outputs":[{"output_type":"stream","text":["fatal: destination path 'FaceDetection-DSFD' already exists and is not an empty directory.\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"EQXrLdu5A3fG","outputId":"e849c788-4b01-4e16-d852-debc8ddb183d","executionInfo":{"status":"ok","timestamp":1576064517784,"user_tz":-330,"elapsed":14333,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":7,"outputs":[{"output_type":"stream","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"cm2Zo2f7A5E6","outputId":"d5c13c6c-9579-4847-c5a6-ffe1623d2728","executionInfo":{"status":"ok","timestamp":1576064517786,"user_tz":-330,"elapsed":14324,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["%cd /content/FaceDetection-DSFD"],"execution_count":8,"outputs":[{"output_type":"stream","text":["/content/FaceDetection-DSFD\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IjT9EwG-HKpM","colab_type":"code","colab":{}},"source":["%cp /content/drive/My\\ Drive/WIDERFace_DSFD_RES152.pth /content/FaceDetection-DSFD/weights"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"colab_type":"code","id":"PV5ozxH8C-Yo","outputId":"fe0855ac-cb42-46be-f3f1-527e1a1ea9ca","executionInfo":{"status":"ok","timestamp":1576064542478,"user_tz":-330,"elapsed":38997,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":68}},"source":["!python demo.py"],"execution_count":10,"outputs":[{"output_type":"stream","text":["loading pretrained resnet model\n","Finished loading model!\n","650\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"qGaZJNuaW80k","colab":{}},"source":["from __future__ import print_function \n","import sys\n","import os\n","import argparse\n","import torch\n","import torch.nn as nn\n","import torch.backends.cudnn as cudnn\n","import torchvision.transforms as transforms\n","from torch.autograd import Variable\n","from data import WIDERFace_ROOT , WIDERFace_CLASSES as labelmap\n","from PIL import Image\n","from data import WIDERFaceDetection, WIDERFaceAnnotationTransform, WIDERFace_CLASSES, WIDERFace_ROOT, BaseTransform , TestBaseTransform\n","from data import *\n","import torch.utils.data as data\n","from face_ssd import build_ssd\n","#from resnet50_ssd import build_sfd\n","import pdb\n","import numpy as np\n","import cv2\n","import math\n","import matplotlib.pyplot as plt\n","import time\n","plt.switch_backend('agg')\n","\n","\n","widerface_root=\"WIDERFace_ROOT\"\n","trained_model = \"/content/drive/My Drive/WIDERFace_DSFD_RES152.pth\"\n","save_folder = \"eval_tools/\"\n","visual_threshold = 0.1\n","cuda = True\n","img_root=\"./data/worlds-largest-selfie.jpg\"\n","\n","\n","\n","if cuda and torch.cuda.is_available():\n","    torch.set_default_tensor_type('torch.cuda.FloatTensor')\n","else:\n","    torch.set_default_tensor_type('torch.FloatTensor')\n","if not os.path.exists(save_folder):\n","    os.mkdir(save_folder)\n","\n","\n","def bbox_vote(det):\n","    order = det[:, 4].ravel().argsort()[::-1]\n","    det = det[order, :]\n","    while det.shape[0] > 0:\n","        # IOU\n","        area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)\n","        xx1 = np.maximum(det[0, 0], det[:, 0])\n","        yy1 = np.maximum(det[0, 1], det[:, 1])\n","        xx2 = np.minimum(det[0, 2], det[:, 2])\n","        yy2 = np.minimum(det[0, 3], det[:, 3])\n","        w = np.maximum(0.0, xx2 - xx1 + 1)\n","        h = np.maximum(0.0, yy2 - yy1 + 1)\n","        inter = w * h\n","        o = inter / (area[0] + area[:] - inter)\n","        # get needed merge det and delete these det\n","        merge_index = np.where(o >= 0.3)[0]\n","        det_accu = det[merge_index, :]\n","        det = np.delete(det, merge_index, 0)\n","        if merge_index.shape[0] <= 1:\n","            continue\n","        det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))\n","        max_score = np.max(det_accu[:, 4])\n","        det_accu_sum = np.zeros((1, 5))\n","        det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:])\n","        det_accu_sum[:, 4] = max_score\n","        try:\n","            dets = np.row_stack((dets, det_accu_sum))\n","        except:\n","            dets = det_accu_sum\n","    dets = dets[0:750, :]\n","    return dets\n","\n","def write_to_txt(f, det , event , im_name):\n","    f.write('{:s}\\n'.format(event + '/' + im_name))\n","    f.write('{:d}\\n'.format(det.shape[0]))\n","    for i in range(det.shape[0]):\n","        xmin = det[i][0]\n","        ymin = det[i][1]\n","        xmax = det[i][2]\n","        ymax = det[i][3]\n","        score = det[i][4] \n","        f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\\n'.\n","                format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score))\n","\n","def infer(net , img , transform , thresh , cuda , shrink):\n","    if shrink != 1:\n","        img = cv2.resize(img, None, None, fx=shrink, fy=shrink, interpolation=cv2.INTER_LINEAR)\n","    x = torch.from_numpy(transform(img)[0]).permute(2, 0, 1)\n","    x = Variable(x.unsqueeze(0) , volatile=True)\n","    if cuda:\n","        x = x.cuda()\n","    #print (shrink , x.shape)\n","    y = net(x)      # forward pass\n","    detections = y.data\n","    # scale each detection back up to the image\n","    scale = torch.Tensor([ img.shape[1]/shrink, img.shape[0]/shrink,\n","                         img.shape[1]/shrink, img.shape[0]/shrink] )\n","    det = []\n","    for i in range(detections.size(1)):\n","        j = 0\n","        while detections[0, i, j, 0] >= thresh:\n","            score = detections[0, i, j, 0]\n","            #label_name = labelmap[i-1]\n","            pt = (detections[0, i, j, 1:]*scale).cpu().numpy()\n","            coords = (pt[0], pt[1], pt[2], pt[3]) \n","            det.append([pt[0], pt[1], pt[2], pt[3], score])\n","            j += 1\n","    if (len(det)) == 0:\n","        det = [ [0.1,0.1,0.2,0.2,0.01] ]\n","    det = np.array(det)\n","\n","    keep_index = np.where(det[:, 4] >= 0)[0]\n","    det = det[keep_index, :]\n","    return det\n","\n","def infer_flip(net , img , transform , thresh , cuda , shrink):\n","    img = cv2.flip(img, 1)\n","    det = infer(net , img , transform , thresh , cuda , shrink)\n","    det_t = np.zeros(det.shape)\n","    det_t[:, 0] = img.shape[1] - det[:, 2]\n","    det_t[:, 1] = det[:, 1]\n","    det_t[:, 2] = img.shape[1] - det[:, 0]\n","    det_t[:, 3] = det[:, 3]\n","    det_t[:, 4] = det[:, 4]\n","    return det_t\n","\n","\n","def infer_multi_scale_sfd(net , img , transform , thresh , cuda ,  max_im_shrink):\n","    # shrink detecting and shrink only detect big face\n","    st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n","    det_s = infer(net , img , transform , thresh , cuda , st)\n","    index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n","    det_s = det_s[index, :]\n","    # enlarge one times\n","    bt = min(2, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n","    det_b = infer(net , img , transform , thresh , cuda , bt)\n","    # enlarge small iamge x times for small face\n","    if max_im_shrink > 2:\n","        bt *= 2\n","        while bt < max_im_shrink:\n","            det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n","            bt *= 2\n","        det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n","    # enlarge only detect small face\n","    if bt > 1:\n","        index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n","        det_b = det_b[index, :]\n","    else:\n","        index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n","        det_b = det_b[index, :]\n","    return det_s, det_b\n","\n","\n","def vis_detections(im,  dets, image_name , thresh=0.5):\n","    \"\"\"Draw detected bounding boxes.\"\"\"\n","    class_name = 'face'\n","    inds = np.where(dets[:, -1] >= thresh)[0]\n","    if len(inds) == 0:\n","        return\n","    print (len(inds))\n","    im = im[:, :, (2, 1, 0)]\n","    fig, ax = plt.subplots(figsize=(12, 12))\n","    ax.imshow(im, aspect='equal')\n","    for i in inds:\n","        bbox = dets[i, :4]\n","        score = dets[i, -1]\n","        ax.add_patch(\n","            plt.Rectangle((bbox[0], bbox[1]),\n","                          bbox[2] - bbox[0],\n","                          bbox[3] - bbox[1], fill=False,\n","                          edgecolor='red', linewidth=2.5)\n","            )\n","        '''\n","        ax.text(bbox[0], bbox[1] - 5,\n","                '{:s} {:.3f}'.format(class_name, score),\n","                bbox=dict(facecolor='blue', alpha=0.5),\n","                fontsize=10, color='white')\n","        '''\n","    ax.set_title(('{} detections with '\n","                  'p({} | box) >= {:.1f}').format(class_name, class_name,\n","                                                  thresh),\n","                  fontsize=10)\n","    plt.axis('off')\n","    plt.tight_layout()\n","    plt.savefig(save_folder+image_name, dpi=fig.dpi)\n","\n","def output(im,  dets, image_name , thresh=0.5):\n","    \"\"\"Draw detected bounding boxes.\"\"\"\n","    str_=\"\"\n","    class_name = 'face'\n","    inds = np.where(dets[:, -1] >= thresh)[0]\n","    if len(inds) == 0:\n","        str_+=\"empty\"\n","        return\n","    print (len(inds))\n","    im = im[:, :, (2, 1, 0)]\n","    fig, ax = plt.subplots(figsize=(12, 12))\n","    ax.imshow(im, aspect='equal')\n","    for i in inds:\n","        bbox = dets[i, :4]\n","        score = dets[i, -1]\n","        \n","        xmin=bbox[0]\n","        ymin=bbox[1]\n","        xmax=bbox[2]\n","        ymax=bbox[3]\n","        boxes=[xmin,ymin,xmax,ymax]\n","        str_+=\" \"+str(xmin)+\" \"+str(ymin)+\" \"+str(xmax)+\" \"+str(ymax)\n","        ax.add_patch(\n","            plt.Rectangle((bbox[0], bbox[1]),\n","                          bbox[2] - bbox[0],\n","                          bbox[3] - bbox[1], fill=False,\n","                          edgecolor='red', linewidth=2.5)\n","            )\n","        '''\n","        ax.text(bbox[0], bbox[1] - 5,\n","                '{:s} {:.3f}'.format(class_name, score),\n","                bbox=dict(facecolor='blue', alpha=0.5),\n","                fontsize=10, color='white')\n","        '''\n","    txt.write(str_)\n","    txt.write(\"\\n\")\n","    ax.set_title(('{} detections with '\n","                  'p({} | box) >= {:.1f}').format(class_name, class_name,\n","                                                  thresh),\n","                  fontsize=10)\n","    plt.axis('off')\n","    plt.tight_layout()\n","    plt.savefig(save_folder+image_name, dpi=fig.dpi)\n","\n","\n","\n","\n","\n","\n","def test_oneimage():\n","    # load net\n","    cfg = widerface_640\n","    num_classes = len(WIDERFace_CLASSES) + 1 # +1 background\n","    net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD\n","    net.load_state_dict(torch.load(trained_model))\n","    net.cuda()\n","    net.eval()\n","    print('Finished loading model!')\n","\n","    # evaluation\n","    cuda = cuda\n","    transform = TestBaseTransform((104, 117, 123))\n","    thresh=cfg['conf_thresh']\n","    #save_path = save_folder\n","    #num_images = len(testset)\n"," \n","    # load data\n","    path = img_root\n","    img_id = 'face'\n","    img = cv2.imread(path, cv2.IMREAD_COLOR)\n","\n","    max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5\n","    shrink = max_im_shrink if max_im_shrink < 1 else 1\n","\n","    det0 = infer(net , img , transform , thresh , cuda , shrink)\n","    det1 = infer_flip(net , img , transform , thresh , cuda , shrink)\n","    # shrink detecting and shrink only detect big face\n","    st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n","    det_s = infer(net , img , transform , thresh , cuda , st)\n","    index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n","    det_s = det_s[index, :]\n","    # enlarge one times\n","    factor = 2\n","    bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n","    det_b = infer(net , img , transform , thresh , cuda , bt)\n","    # enlarge small iamge x times for small face\n","    if max_im_shrink > factor:\n","        bt *= factor\n","        while bt < max_im_shrink:\n","            det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n","            bt *= factor\n","        det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n","    # enlarge only detect small face\n","    if bt > 1:\n","        index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n","        det_b = det_b[index, :]\n","    else:\n","        index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n","        det_b = det_b[index, :]\n","    det = np.row_stack((det0, det1, det_s, det_b))\n","    det = bbox_vote(det)\n","    vis_detections(img , det , img_id, visual_threshold)\n"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"Y9HYBbXgXyR_","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":428},"outputId":"35b5ee12-fb0d-4b2b-9dfb-5f0e357f7cf0","executionInfo":{"status":"ok","timestamp":1576069257013,"user_tz":-330,"elapsed":150415,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}}},"source":["cfg = widerface_640\n","num_classes = len(WIDERFace_CLASSES) + 1 # +1 background\n","net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD\n","net.load_state_dict(torch.load(trained_model))\n","net.cuda()\n","net.eval()    \n","cuda = cuda\n","transform = TestBaseTransform((104, 117, 123))\n","thresh=cfg['conf_thresh']\n","\n","\n","textfile = 'bbox_op.txt'\n","filesdir='/content/drive/My Drive/FolderSeconds/'\n","with open(textfile,'w') as txt:\n","    for so in (sorted(os.listdir(filesdir))):\n","        path = os.path.join(filesdir,so)\n","        img_id = so\n","        print(img_id)\n","        # load data\n","        img = cv2.imread(path, cv2.IMREAD_COLOR)\n","\n","        max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5\n","        shrink = max_im_shrink if max_im_shrink < 1 else 1\n","\n","        det0 = infer(net , img , transform , thresh , cuda , shrink)\n","        det1 = infer_flip(net , img , transform , thresh , cuda , shrink)\n","        # shrink detecting and shrink only detect big face\n","        st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n","        det_s = infer(net , img , transform , thresh , cuda , st)\n","        index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n","        det_s = det_s[index, :]\n","        # enlarge one times\n","        factor = 2\n","        bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n","        det_b = infer(net , img , transform , thresh , cuda , bt)\n","        # enlarge small iamge x times for small face\n","        if max_im_shrink > factor:\n","            bt *= factor\n","            while bt < max_im_shrink:\n","                det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n","                bt *= factor\n","            det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n","        # enlarge only detect small face\n","        if bt > 1:\n","            index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n","            det_b = det_b[index, :]\n","        else:\n","            index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n","            det_b = det_b[index, :]\n","        det = np.row_stack((det0, det1, det_s, det_b))\n","        det = bbox_vote(det)\n","        output(img , det , img_id, visual_threshold)\n","\n","\n","\n","\n","\n","\n"],"execution_count":41,"outputs":[{"output_type":"stream","text":["ERROR: You specified size [1440, 2560]. However, currently only SSD640 (size=640) is supported!\n","loading pretrained resnet model\n","frame1320.jpg\n","2\n","frame20460.jpg\n","1\n","frame2310.jpg\n","4\n","frame25080.jpg\n","5\n","frame25740.jpg\n","3\n","frame2640.jpg\n","1\n","frame330.jpg\n","1\n","frame660.jpg\n","4\n","frame6930.jpg\n","1\n","frame990.jpg\n"],"name":"stdout"},{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:199: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).\n"],"name":"stderr"},{"output_type":"stream","text":["2\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"WfufuuBwjLmh","colab_type":"code","colab":{}},"source":[""],"execution_count":0,"outputs":[]}]}
\ No newline at end of file

From 5244f5cb9a2ac53e6788b186831de3bfd53bf519 Mon Sep 17 00:00:00 2001
From: developerpawandeep <developer.pawandeep@gmail.com>
Date: Thu, 12 Dec 2019 16:48:08 +0530
Subject: [PATCH 2/2] minor changes

---
 colab_inference/infer2.ipynb              |   1 -
 colab_inference/inference_dual_shot.ipynb | 637 ++++++++++++++++++++++
 colab_inference/inference_dual_shot.py    | 365 +++++++++++++
 3 files changed, 1002 insertions(+), 1 deletion(-)
 delete mode 100644 colab_inference/infer2.ipynb
 create mode 100644 colab_inference/inference_dual_shot.ipynb
 create mode 100644 colab_inference/inference_dual_shot.py

diff --git a/colab_inference/infer2.ipynb b/colab_inference/infer2.ipynb
deleted file mode 100644
index a313d66..0000000
--- a/colab_inference/infer2.ipynb
+++ /dev/null
@@ -1 +0,0 @@
-{"nbformat":4,"nbformat_minor":0,"metadata":{"accelerator":"GPU","colab":{"name":"infer2.ipynb","provenance":[],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.4"}},"cells":[{"cell_type":"code","metadata":{"colab_type":"code","id":"oJ-O1t4bATth","outputId":"bd85fe80-4caf-4212-c8f9-88f329e71799","scrolled":true,"executionInfo":{"status":"ok","timestamp":1576064509480,"user_tz":-330,"elapsed":6062,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":68}},"source":["!pip install Torch==0.3.1\n"],"execution_count":4,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: Torch==0.3.1 in /usr/local/lib/python3.6/dist-packages (0.3.1)\n","Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from Torch==0.3.1) (3.13)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from Torch==0.3.1) (1.17.4)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"NTekWu-OAp2C","outputId":"f6b43008-016a-4410-d8af-b96567b9d2bc","executionInfo":{"status":"ok","timestamp":1576064513862,"user_tz":-330,"elapsed":10432,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":136}},"source":["!pip install Torchvision==0.2.1"],"execution_count":5,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: Torchvision==0.2.1 in /usr/local/lib/python3.6/dist-packages (0.2.1)\n","Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (1.12.0)\n","Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (4.3.0)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (1.17.4)\n","Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (0.3.1)\n","Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from pillow>=4.1.1->Torchvision==0.2.1) (0.46)\n","Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from torch->Torchvision==0.2.1) (3.13)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"PKyc13d4Ascv","outputId":"357a473f-eab7-44fe-cfc2-cca8704a3089","executionInfo":{"status":"ok","timestamp":1576064517783,"user_tz":-330,"elapsed":14343,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["!git clone https://github.com/TencentYoutuResearch/FaceDetection-DSFD.git"],"execution_count":6,"outputs":[{"output_type":"stream","text":["fatal: destination path 'FaceDetection-DSFD' already exists and is not an empty directory.\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"EQXrLdu5A3fG","outputId":"e849c788-4b01-4e16-d852-debc8ddb183d","executionInfo":{"status":"ok","timestamp":1576064517784,"user_tz":-330,"elapsed":14333,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":7,"outputs":[{"output_type":"stream","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"cm2Zo2f7A5E6","outputId":"d5c13c6c-9579-4847-c5a6-ffe1623d2728","executionInfo":{"status":"ok","timestamp":1576064517786,"user_tz":-330,"elapsed":14324,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["%cd /content/FaceDetection-DSFD"],"execution_count":8,"outputs":[{"output_type":"stream","text":["/content/FaceDetection-DSFD\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IjT9EwG-HKpM","colab_type":"code","colab":{}},"source":["%cp /content/drive/My\\ Drive/WIDERFace_DSFD_RES152.pth /content/FaceDetection-DSFD/weights"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"colab_type":"code","id":"PV5ozxH8C-Yo","outputId":"fe0855ac-cb42-46be-f3f1-527e1a1ea9ca","executionInfo":{"status":"ok","timestamp":1576064542478,"user_tz":-330,"elapsed":38997,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":68}},"source":["!python demo.py"],"execution_count":10,"outputs":[{"output_type":"stream","text":["loading pretrained resnet model\n","Finished loading model!\n","650\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"qGaZJNuaW80k","colab":{}},"source":["from __future__ import print_function \n","import sys\n","import os\n","import argparse\n","import torch\n","import torch.nn as nn\n","import torch.backends.cudnn as cudnn\n","import torchvision.transforms as transforms\n","from torch.autograd import Variable\n","from data import WIDERFace_ROOT , WIDERFace_CLASSES as labelmap\n","from PIL import Image\n","from data import WIDERFaceDetection, WIDERFaceAnnotationTransform, WIDERFace_CLASSES, WIDERFace_ROOT, BaseTransform , TestBaseTransform\n","from data import *\n","import torch.utils.data as data\n","from face_ssd import build_ssd\n","#from resnet50_ssd import build_sfd\n","import pdb\n","import numpy as np\n","import cv2\n","import math\n","import matplotlib.pyplot as plt\n","import time\n","plt.switch_backend('agg')\n","\n","\n","widerface_root=\"WIDERFace_ROOT\"\n","trained_model = \"/content/drive/My Drive/WIDERFace_DSFD_RES152.pth\"\n","save_folder = \"eval_tools/\"\n","visual_threshold = 0.1\n","cuda = True\n","img_root=\"./data/worlds-largest-selfie.jpg\"\n","\n","\n","\n","if cuda and torch.cuda.is_available():\n","    torch.set_default_tensor_type('torch.cuda.FloatTensor')\n","else:\n","    torch.set_default_tensor_type('torch.FloatTensor')\n","if not os.path.exists(save_folder):\n","    os.mkdir(save_folder)\n","\n","\n","def bbox_vote(det):\n","    order = det[:, 4].ravel().argsort()[::-1]\n","    det = det[order, :]\n","    while det.shape[0] > 0:\n","        # IOU\n","        area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)\n","        xx1 = np.maximum(det[0, 0], det[:, 0])\n","        yy1 = np.maximum(det[0, 1], det[:, 1])\n","        xx2 = np.minimum(det[0, 2], det[:, 2])\n","        yy2 = np.minimum(det[0, 3], det[:, 3])\n","        w = np.maximum(0.0, xx2 - xx1 + 1)\n","        h = np.maximum(0.0, yy2 - yy1 + 1)\n","        inter = w * h\n","        o = inter / (area[0] + area[:] - inter)\n","        # get needed merge det and delete these det\n","        merge_index = np.where(o >= 0.3)[0]\n","        det_accu = det[merge_index, :]\n","        det = np.delete(det, merge_index, 0)\n","        if merge_index.shape[0] <= 1:\n","            continue\n","        det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))\n","        max_score = np.max(det_accu[:, 4])\n","        det_accu_sum = np.zeros((1, 5))\n","        det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:])\n","        det_accu_sum[:, 4] = max_score\n","        try:\n","            dets = np.row_stack((dets, det_accu_sum))\n","        except:\n","            dets = det_accu_sum\n","    dets = dets[0:750, :]\n","    return dets\n","\n","def write_to_txt(f, det , event , im_name):\n","    f.write('{:s}\\n'.format(event + '/' + im_name))\n","    f.write('{:d}\\n'.format(det.shape[0]))\n","    for i in range(det.shape[0]):\n","        xmin = det[i][0]\n","        ymin = det[i][1]\n","        xmax = det[i][2]\n","        ymax = det[i][3]\n","        score = det[i][4] \n","        f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\\n'.\n","                format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score))\n","\n","def infer(net , img , transform , thresh , cuda , shrink):\n","    if shrink != 1:\n","        img = cv2.resize(img, None, None, fx=shrink, fy=shrink, interpolation=cv2.INTER_LINEAR)\n","    x = torch.from_numpy(transform(img)[0]).permute(2, 0, 1)\n","    x = Variable(x.unsqueeze(0) , volatile=True)\n","    if cuda:\n","        x = x.cuda()\n","    #print (shrink , x.shape)\n","    y = net(x)      # forward pass\n","    detections = y.data\n","    # scale each detection back up to the image\n","    scale = torch.Tensor([ img.shape[1]/shrink, img.shape[0]/shrink,\n","                         img.shape[1]/shrink, img.shape[0]/shrink] )\n","    det = []\n","    for i in range(detections.size(1)):\n","        j = 0\n","        while detections[0, i, j, 0] >= thresh:\n","            score = detections[0, i, j, 0]\n","            #label_name = labelmap[i-1]\n","            pt = (detections[0, i, j, 1:]*scale).cpu().numpy()\n","            coords = (pt[0], pt[1], pt[2], pt[3]) \n","            det.append([pt[0], pt[1], pt[2], pt[3], score])\n","            j += 1\n","    if (len(det)) == 0:\n","        det = [ [0.1,0.1,0.2,0.2,0.01] ]\n","    det = np.array(det)\n","\n","    keep_index = np.where(det[:, 4] >= 0)[0]\n","    det = det[keep_index, :]\n","    return det\n","\n","def infer_flip(net , img , transform , thresh , cuda , shrink):\n","    img = cv2.flip(img, 1)\n","    det = infer(net , img , transform , thresh , cuda , shrink)\n","    det_t = np.zeros(det.shape)\n","    det_t[:, 0] = img.shape[1] - det[:, 2]\n","    det_t[:, 1] = det[:, 1]\n","    det_t[:, 2] = img.shape[1] - det[:, 0]\n","    det_t[:, 3] = det[:, 3]\n","    det_t[:, 4] = det[:, 4]\n","    return det_t\n","\n","\n","def infer_multi_scale_sfd(net , img , transform , thresh , cuda ,  max_im_shrink):\n","    # shrink detecting and shrink only detect big face\n","    st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n","    det_s = infer(net , img , transform , thresh , cuda , st)\n","    index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n","    det_s = det_s[index, :]\n","    # enlarge one times\n","    bt = min(2, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n","    det_b = infer(net , img , transform , thresh , cuda , bt)\n","    # enlarge small iamge x times for small face\n","    if max_im_shrink > 2:\n","        bt *= 2\n","        while bt < max_im_shrink:\n","            det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n","            bt *= 2\n","        det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n","    # enlarge only detect small face\n","    if bt > 1:\n","        index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n","        det_b = det_b[index, :]\n","    else:\n","        index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n","        det_b = det_b[index, :]\n","    return det_s, det_b\n","\n","\n","def vis_detections(im,  dets, image_name , thresh=0.5):\n","    \"\"\"Draw detected bounding boxes.\"\"\"\n","    class_name = 'face'\n","    inds = np.where(dets[:, -1] >= thresh)[0]\n","    if len(inds) == 0:\n","        return\n","    print (len(inds))\n","    im = im[:, :, (2, 1, 0)]\n","    fig, ax = plt.subplots(figsize=(12, 12))\n","    ax.imshow(im, aspect='equal')\n","    for i in inds:\n","        bbox = dets[i, :4]\n","        score = dets[i, -1]\n","        ax.add_patch(\n","            plt.Rectangle((bbox[0], bbox[1]),\n","                          bbox[2] - bbox[0],\n","                          bbox[3] - bbox[1], fill=False,\n","                          edgecolor='red', linewidth=2.5)\n","            )\n","        '''\n","        ax.text(bbox[0], bbox[1] - 5,\n","                '{:s} {:.3f}'.format(class_name, score),\n","                bbox=dict(facecolor='blue', alpha=0.5),\n","                fontsize=10, color='white')\n","        '''\n","    ax.set_title(('{} detections with '\n","                  'p({} | box) >= {:.1f}').format(class_name, class_name,\n","                                                  thresh),\n","                  fontsize=10)\n","    plt.axis('off')\n","    plt.tight_layout()\n","    plt.savefig(save_folder+image_name, dpi=fig.dpi)\n","\n","def output(im,  dets, image_name , thresh=0.5):\n","    \"\"\"Draw detected bounding boxes.\"\"\"\n","    str_=\"\"\n","    class_name = 'face'\n","    inds = np.where(dets[:, -1] >= thresh)[0]\n","    if len(inds) == 0:\n","        str_+=\"empty\"\n","        return\n","    print (len(inds))\n","    im = im[:, :, (2, 1, 0)]\n","    fig, ax = plt.subplots(figsize=(12, 12))\n","    ax.imshow(im, aspect='equal')\n","    for i in inds:\n","        bbox = dets[i, :4]\n","        score = dets[i, -1]\n","        \n","        xmin=bbox[0]\n","        ymin=bbox[1]\n","        xmax=bbox[2]\n","        ymax=bbox[3]\n","        boxes=[xmin,ymin,xmax,ymax]\n","        str_+=\" \"+str(xmin)+\" \"+str(ymin)+\" \"+str(xmax)+\" \"+str(ymax)\n","        ax.add_patch(\n","            plt.Rectangle((bbox[0], bbox[1]),\n","                          bbox[2] - bbox[0],\n","                          bbox[3] - bbox[1], fill=False,\n","                          edgecolor='red', linewidth=2.5)\n","            )\n","        '''\n","        ax.text(bbox[0], bbox[1] - 5,\n","                '{:s} {:.3f}'.format(class_name, score),\n","                bbox=dict(facecolor='blue', alpha=0.5),\n","                fontsize=10, color='white')\n","        '''\n","    txt.write(str_)\n","    txt.write(\"\\n\")\n","    ax.set_title(('{} detections with '\n","                  'p({} | box) >= {:.1f}').format(class_name, class_name,\n","                                                  thresh),\n","                  fontsize=10)\n","    plt.axis('off')\n","    plt.tight_layout()\n","    plt.savefig(save_folder+image_name, dpi=fig.dpi)\n","\n","\n","\n","\n","\n","\n","def test_oneimage():\n","    # load net\n","    cfg = widerface_640\n","    num_classes = len(WIDERFace_CLASSES) + 1 # +1 background\n","    net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD\n","    net.load_state_dict(torch.load(trained_model))\n","    net.cuda()\n","    net.eval()\n","    print('Finished loading model!')\n","\n","    # evaluation\n","    cuda = cuda\n","    transform = TestBaseTransform((104, 117, 123))\n","    thresh=cfg['conf_thresh']\n","    #save_path = save_folder\n","    #num_images = len(testset)\n"," \n","    # load data\n","    path = img_root\n","    img_id = 'face'\n","    img = cv2.imread(path, cv2.IMREAD_COLOR)\n","\n","    max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5\n","    shrink = max_im_shrink if max_im_shrink < 1 else 1\n","\n","    det0 = infer(net , img , transform , thresh , cuda , shrink)\n","    det1 = infer_flip(net , img , transform , thresh , cuda , shrink)\n","    # shrink detecting and shrink only detect big face\n","    st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n","    det_s = infer(net , img , transform , thresh , cuda , st)\n","    index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n","    det_s = det_s[index, :]\n","    # enlarge one times\n","    factor = 2\n","    bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n","    det_b = infer(net , img , transform , thresh , cuda , bt)\n","    # enlarge small iamge x times for small face\n","    if max_im_shrink > factor:\n","        bt *= factor\n","        while bt < max_im_shrink:\n","            det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n","            bt *= factor\n","        det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n","    # enlarge only detect small face\n","    if bt > 1:\n","        index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n","        det_b = det_b[index, :]\n","    else:\n","        index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n","        det_b = det_b[index, :]\n","    det = np.row_stack((det0, det1, det_s, det_b))\n","    det = bbox_vote(det)\n","    vis_detections(img , det , img_id, visual_threshold)\n"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"Y9HYBbXgXyR_","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":428},"outputId":"35b5ee12-fb0d-4b2b-9dfb-5f0e357f7cf0","executionInfo":{"status":"ok","timestamp":1576069257013,"user_tz":-330,"elapsed":150415,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}}},"source":["cfg = widerface_640\n","num_classes = len(WIDERFace_CLASSES) + 1 # +1 background\n","net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD\n","net.load_state_dict(torch.load(trained_model))\n","net.cuda()\n","net.eval()    \n","cuda = cuda\n","transform = TestBaseTransform((104, 117, 123))\n","thresh=cfg['conf_thresh']\n","\n","\n","textfile = 'bbox_op.txt'\n","filesdir='/content/drive/My Drive/FolderSeconds/'\n","with open(textfile,'w') as txt:\n","    for so in (sorted(os.listdir(filesdir))):\n","        path = os.path.join(filesdir,so)\n","        img_id = so\n","        print(img_id)\n","        # load data\n","        img = cv2.imread(path, cv2.IMREAD_COLOR)\n","\n","        max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5\n","        shrink = max_im_shrink if max_im_shrink < 1 else 1\n","\n","        det0 = infer(net , img , transform , thresh , cuda , shrink)\n","        det1 = infer_flip(net , img , transform , thresh , cuda , shrink)\n","        # shrink detecting and shrink only detect big face\n","        st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n","        det_s = infer(net , img , transform , thresh , cuda , st)\n","        index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n","        det_s = det_s[index, :]\n","        # enlarge one times\n","        factor = 2\n","        bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n","        det_b = infer(net , img , transform , thresh , cuda , bt)\n","        # enlarge small iamge x times for small face\n","        if max_im_shrink > factor:\n","            bt *= factor\n","            while bt < max_im_shrink:\n","                det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n","                bt *= factor\n","            det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n","        # enlarge only detect small face\n","        if bt > 1:\n","            index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n","            det_b = det_b[index, :]\n","        else:\n","            index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n","            det_b = det_b[index, :]\n","        det = np.row_stack((det0, det1, det_s, det_b))\n","        det = bbox_vote(det)\n","        output(img , det , img_id, visual_threshold)\n","\n","\n","\n","\n","\n","\n"],"execution_count":41,"outputs":[{"output_type":"stream","text":["ERROR: You specified size [1440, 2560]. However, currently only SSD640 (size=640) is supported!\n","loading pretrained resnet model\n","frame1320.jpg\n","2\n","frame20460.jpg\n","1\n","frame2310.jpg\n","4\n","frame25080.jpg\n","5\n","frame25740.jpg\n","3\n","frame2640.jpg\n","1\n","frame330.jpg\n","1\n","frame660.jpg\n","4\n","frame6930.jpg\n","1\n","frame990.jpg\n"],"name":"stdout"},{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:199: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).\n"],"name":"stderr"},{"output_type":"stream","text":["2\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"WfufuuBwjLmh","colab_type":"code","colab":{}},"source":[""],"execution_count":0,"outputs":[]}]}
\ No newline at end of file
diff --git a/colab_inference/inference_dual_shot.ipynb b/colab_inference/inference_dual_shot.ipynb
new file mode 100644
index 0000000..2b7a73b
--- /dev/null
+++ b/colab_inference/inference_dual_shot.ipynb
@@ -0,0 +1,637 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "name": "inference_dual_shot.ipynb",
+      "provenance": [],
+      "collapsed_sections": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.4"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "oJ-O1t4bATth",
+        "outputId": "2cdd9c48-c62c-45b6-e992-1a678bcb1f89",
+        "scrolled": true,
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 241
+        }
+      },
+      "source": [
+        "!pip install Torch==0.3.1"
+      ],
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Collecting Torch==0.3.1\n",
+            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/5b/a5/e8b50b55b1abac9f1e3346c4242f1e42a82d368a8442cbd50c532922f6c4/torch-0.3.1-cp36-cp36m-manylinux1_x86_64.whl (496.4MB)\n",
+            "\u001b[K     |████████████████████████████████| 496.4MB 37kB/s \n",
+            "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from Torch==0.3.1) (1.17.4)\n",
+            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from Torch==0.3.1) (3.13)\n",
+            "\u001b[31mERROR: torchvision 0.4.2 has requirement torch==1.3.1, but you'll have torch 0.3.1 which is incompatible.\u001b[0m\n",
+            "\u001b[31mERROR: fastai 1.0.59 has requirement torch>=1.0.0, but you'll have torch 0.3.1 which is incompatible.\u001b[0m\n",
+            "Installing collected packages: Torch\n",
+            "  Found existing installation: torch 1.3.1\n",
+            "    Uninstalling torch-1.3.1:\n",
+            "      Successfully uninstalled torch-1.3.1\n",
+            "Successfully installed Torch-0.3.1\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "NTekWu-OAp2C",
+        "outputId": "e3551f83-5a90-48b7-b3f3-176aa314ec01",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 292
+        }
+      },
+      "source": [
+        "!pip install Torchvision==0.2.1"
+      ],
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Collecting Torchvision==0.2.1\n",
+            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/ca/0d/f00b2885711e08bd71242ebe7b96561e6f6d01fdb4b9dcf4d37e2e13c5e1/torchvision-0.2.1-py2.py3-none-any.whl (54kB)\n",
+            "\r\u001b[K     |██████                          | 10kB 27.8MB/s eta 0:00:01\r\u001b[K     |████████████                    | 20kB 34.6MB/s eta 0:00:01\r\u001b[K     |██████████████████              | 30kB 40.7MB/s eta 0:00:01\r\u001b[K     |████████████████████████        | 40kB 43.3MB/s eta 0:00:01\r\u001b[K     |██████████████████████████████  | 51kB 37.2MB/s eta 0:00:01\r\u001b[K     |████████████████████████████████| 61kB 8.9MB/s \n",
+            "\u001b[?25hRequirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (0.3.1)\n",
+            "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (1.12.0)\n",
+            "Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (4.3.0)\n",
+            "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (1.17.4)\n",
+            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from torch->Torchvision==0.2.1) (3.13)\n",
+            "Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from pillow>=4.1.1->Torchvision==0.2.1) (0.46)\n",
+            "\u001b[31mERROR: fastai 1.0.59 has requirement torch>=1.0.0, but you'll have torch 0.3.1 which is incompatible.\u001b[0m\n",
+            "Installing collected packages: Torchvision\n",
+            "  Found existing installation: torchvision 0.4.2\n",
+            "    Uninstalling torchvision-0.4.2:\n",
+            "      Successfully uninstalled torchvision-0.4.2\n",
+            "Successfully installed Torchvision-0.2.1\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "PKyc13d4Ascv",
+        "outputId": "7a65dbd6-e724-42ba-9a06-7657c8ddd6e0",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 136
+        }
+      },
+      "source": [
+        "!git clone https://github.com/TencentYoutuResearch/FaceDetection-DSFD.git"
+      ],
+      "execution_count": 3,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Cloning into 'FaceDetection-DSFD'...\n",
+            "remote: Enumerating objects: 3, done.\u001b[K\n",
+            "remote: Counting objects: 100% (3/3), done.\u001b[K\n",
+            "remote: Compressing objects: 100% (3/3), done.\u001b[K\n",
+            "remote: Total 244 (delta 0), reused 1 (delta 0), pack-reused 241\u001b[K\n",
+            "Receiving objects: 100% (244/244), 17.62 MiB | 4.61 MiB/s, done.\n",
+            "Resolving deltas: 100% (75/75), done.\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "EQXrLdu5A3fG",
+        "outputId": "0a231613-ee22-4e87-98f9-467279f235f7",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 122
+        }
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": 4,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n",
+            "\n",
+            "Enter your authorization code:\n",
+            "··········\n",
+            "Mounted at /content/drive\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "cm2Zo2f7A5E6",
+        "outputId": "898ef801-395b-4f91-feb6-26c0499e2c99",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        }
+      },
+      "source": [
+        "%cd /content/FaceDetection-DSFD"
+      ],
+      "execution_count": 5,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "/content/FaceDetection-DSFD\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab_type": "code",
+        "id": "qGaZJNuaW80k",
+        "colab": {}
+      },
+      "source": [
+        "from __future__ import print_function \n",
+        "import sys\n",
+        "import os\n",
+        "import argparse\n",
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "import torch.backends.cudnn as cudnn\n",
+        "import torchvision.transforms as transforms\n",
+        "from torch.autograd import Variable\n",
+        "from data import WIDERFace_ROOT , WIDERFace_CLASSES as labelmap\n",
+        "from PIL import Image\n",
+        "from data import WIDERFaceDetection, WIDERFaceAnnotationTransform, WIDERFace_CLASSES, WIDERFace_ROOT, BaseTransform , TestBaseTransform\n",
+        "from data import *\n",
+        "import torch.utils.data as data\n",
+        "from face_ssd import build_ssd\n",
+        "#from resnet50_ssd import build_sfd\n",
+        "import pdb\n",
+        "import numpy as np\n",
+        "import cv2\n",
+        "import math\n",
+        "import matplotlib.pyplot as plt\n",
+        "import time\n",
+        "plt.switch_backend('agg')\n",
+        "\n",
+        "\n",
+        "widerface_root=\"WIDERFace_ROOT\"\n",
+        "trained_model = \"/content/drive/My Drive/WIDERFace_DSFD_RES152.pth\"\n",
+        "save_folder = \"eval_tools/\"\n",
+        "visual_threshold = 0.1\n",
+        "cuda = True\n",
+        "img_root=\"./data/worlds-largest-selfie.jpg\"\n",
+        "\n",
+        "\n",
+        "\n",
+        "if cuda and torch.cuda.is_available():\n",
+        "    torch.set_default_tensor_type('torch.cuda.FloatTensor')\n",
+        "else:\n",
+        "    torch.set_default_tensor_type('torch.FloatTensor')\n",
+        "if not os.path.exists(save_folder):\n",
+        "    os.mkdir(save_folder)\n",
+        "\n",
+        "\n",
+        "def bbox_vote(det):\n",
+        "    order = det[:, 4].ravel().argsort()[::-1]\n",
+        "    det = det[order, :]\n",
+        "    while det.shape[0] > 0:\n",
+        "        # IOU\n",
+        "        area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)\n",
+        "        xx1 = np.maximum(det[0, 0], det[:, 0])\n",
+        "        yy1 = np.maximum(det[0, 1], det[:, 1])\n",
+        "        xx2 = np.minimum(det[0, 2], det[:, 2])\n",
+        "        yy2 = np.minimum(det[0, 3], det[:, 3])\n",
+        "        w = np.maximum(0.0, xx2 - xx1 + 1)\n",
+        "        h = np.maximum(0.0, yy2 - yy1 + 1)\n",
+        "        inter = w * h\n",
+        "        o = inter / (area[0] + area[:] - inter)\n",
+        "        # get needed merge det and delete these det\n",
+        "        merge_index = np.where(o >= 0.3)[0]\n",
+        "        det_accu = det[merge_index, :]\n",
+        "        det = np.delete(det, merge_index, 0)\n",
+        "        if merge_index.shape[0] <= 1:\n",
+        "            continue\n",
+        "        det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))\n",
+        "        max_score = np.max(det_accu[:, 4])\n",
+        "        det_accu_sum = np.zeros((1, 5))\n",
+        "        det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:])\n",
+        "        det_accu_sum[:, 4] = max_score\n",
+        "        try:\n",
+        "            dets = np.row_stack((dets, det_accu_sum))\n",
+        "        except:\n",
+        "            dets = det_accu_sum\n",
+        "    dets = dets[0:750, :]\n",
+        "    return dets\n",
+        "\n",
+        "def write_to_txt(f, det , event , im_name):\n",
+        "    f.write('{:s}\\n'.format(event + '/' + im_name))\n",
+        "    f.write('{:d}\\n'.format(det.shape[0]))\n",
+        "    for i in range(det.shape[0]):\n",
+        "        xmin = det[i][0]\n",
+        "        ymin = det[i][1]\n",
+        "        xmax = det[i][2]\n",
+        "        ymax = det[i][3]\n",
+        "        score = det[i][4] \n",
+        "        f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\\n'.\n",
+        "                format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score))\n",
+        "\n",
+        "def infer(net , img , transform , thresh , cuda , shrink):\n",
+        "    if shrink != 1:\n",
+        "        img = cv2.resize(img, None, None, fx=shrink, fy=shrink, interpolation=cv2.INTER_LINEAR)\n",
+        "    x = torch.from_numpy(transform(img)[0]).permute(2, 0, 1)\n",
+        "    x = Variable(x.unsqueeze(0) , volatile=True)\n",
+        "    if cuda:\n",
+        "        x = x.cuda()\n",
+        "    #print (shrink , x.shape)\n",
+        "    y = net(x)      # forward pass\n",
+        "    detections = y.data\n",
+        "    # scale each detection back up to the image\n",
+        "    scale = torch.Tensor([ img.shape[1]/shrink, img.shape[0]/shrink,\n",
+        "                         img.shape[1]/shrink, img.shape[0]/shrink] )\n",
+        "    det = []\n",
+        "    for i in range(detections.size(1)):\n",
+        "        j = 0\n",
+        "        while detections[0, i, j, 0] >= thresh:\n",
+        "            score = detections[0, i, j, 0]\n",
+        "            #label_name = labelmap[i-1]\n",
+        "            pt = (detections[0, i, j, 1:]*scale).cpu().numpy()\n",
+        "            coords = (pt[0], pt[1], pt[2], pt[3]) \n",
+        "            det.append([pt[0], pt[1], pt[2], pt[3], score])\n",
+        "            j += 1\n",
+        "    if (len(det)) == 0:\n",
+        "        det = [ [0.1,0.1,0.2,0.2,0.01] ]\n",
+        "    det = np.array(det)\n",
+        "\n",
+        "    keep_index = np.where(det[:, 4] >= 0)[0]\n",
+        "    det = det[keep_index, :]\n",
+        "    return det\n",
+        "\n",
+        "def infer_flip(net , img , transform , thresh , cuda , shrink):\n",
+        "    img = cv2.flip(img, 1)\n",
+        "    det = infer(net , img , transform , thresh , cuda , shrink)\n",
+        "    det_t = np.zeros(det.shape)\n",
+        "    det_t[:, 0] = img.shape[1] - det[:, 2]\n",
+        "    det_t[:, 1] = det[:, 1]\n",
+        "    det_t[:, 2] = img.shape[1] - det[:, 0]\n",
+        "    det_t[:, 3] = det[:, 3]\n",
+        "    det_t[:, 4] = det[:, 4]\n",
+        "    return det_t\n",
+        "\n",
+        "\n",
+        "def infer_multi_scale_sfd(net , img , transform , thresh , cuda ,  max_im_shrink):\n",
+        "    # shrink detecting and shrink only detect big face\n",
+        "    st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n",
+        "    det_s = infer(net , img , transform , thresh , cuda , st)\n",
+        "    index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n",
+        "    det_s = det_s[index, :]\n",
+        "    # enlarge one times\n",
+        "    bt = min(2, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n",
+        "    det_b = infer(net , img , transform , thresh , cuda , bt)\n",
+        "    # enlarge small iamge x times for small face\n",
+        "    if max_im_shrink > 2:\n",
+        "        bt *= 2\n",
+        "        while bt < max_im_shrink:\n",
+        "            det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n",
+        "            bt *= 2\n",
+        "        det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n",
+        "    # enlarge only detect small face\n",
+        "    if bt > 1:\n",
+        "        index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n",
+        "        det_b = det_b[index, :]\n",
+        "    else:\n",
+        "        index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n",
+        "        det_b = det_b[index, :]\n",
+        "    return det_s, det_b\n",
+        "\n",
+        "\n",
+        "def vis_detections(im,  dets, image_name , thresh=0.5):\n",
+        "    \"\"\"Draw detected bounding boxes.\"\"\"\n",
+        "    class_name = 'face'\n",
+        "    inds = np.where(dets[:, -1] >= thresh)[0]\n",
+        "    if len(inds) == 0:\n",
+        "        return\n",
+        "    print (len(inds))\n",
+        "    im = im[:, :, (2, 1, 0)]\n",
+        "    fig, ax = plt.subplots(figsize=(12, 12))\n",
+        "    ax.imshow(im, aspect='equal')\n",
+        "    for i in inds:\n",
+        "        bbox = dets[i, :4]\n",
+        "        score = dets[i, -1]\n",
+        "        ax.add_patch(\n",
+        "            plt.Rectangle((bbox[0], bbox[1]),\n",
+        "                          bbox[2] - bbox[0],\n",
+        "                          bbox[3] - bbox[1], fill=False,\n",
+        "                          edgecolor='red', linewidth=2.5)\n",
+        "            )\n",
+        "        '''\n",
+        "        ax.text(bbox[0], bbox[1] - 5,\n",
+        "                '{:s} {:.3f}'.format(class_name, score),\n",
+        "                bbox=dict(facecolor='blue', alpha=0.5),\n",
+        "                fontsize=10, color='white')\n",
+        "        '''\n",
+        "    ax.set_title(('{} detections with '\n",
+        "                  'p({} | box) >= {:.1f}').format(class_name, class_name,\n",
+        "                                                  thresh),\n",
+        "                  fontsize=10)\n",
+        "    plt.axis('off')\n",
+        "    plt.tight_layout()\n",
+        "    plt.savefig(save_folder+image_name, dpi=fig.dpi)\n",
+        "\n",
+        "def output(im,  dets, image_name , thresh=0.5):\n",
+        "    \"\"\"Draw detected bounding boxes.\"\"\"\n",
+        "    str_=\"\"\n",
+        "    class_name = 'face'\n",
+        "    inds = np.where(dets[:, -1] >= thresh)[0]\n",
+        "    if len(inds) == 0:\n",
+        "        str_+=\"empty\"\n",
+        "        return\n",
+        "    print (len(inds))\n",
+        "    im = im[:, :, (2, 1, 0)]\n",
+        "    fig, ax = plt.subplots(figsize=(12, 12))\n",
+        "    ax.imshow(im, aspect='equal')\n",
+        "    for i in inds:\n",
+        "        bbox = dets[i, :4]\n",
+        "        score = dets[i, -1]\n",
+        "        \n",
+        "        xmin=bbox[0]\n",
+        "        ymin=bbox[1]\n",
+        "        xmax=bbox[2]\n",
+        "        ymax=bbox[3]\n",
+        "        boxes=[xmin,ymin,xmax,ymax]\n",
+        "        str_+=\" \"+str(xmin)+\" \"+str(ymin)+\" \"+str(xmax)+\" \"+str(ymax)\n",
+        "        ax.add_patch(\n",
+        "            plt.Rectangle((bbox[0], bbox[1]),\n",
+        "                          bbox[2] - bbox[0],\n",
+        "                          bbox[3] - bbox[1], fill=False,\n",
+        "                          edgecolor='red', linewidth=2.5)\n",
+        "            )\n",
+        "        '''\n",
+        "        ax.text(bbox[0], bbox[1] - 5,\n",
+        "                '{:s} {:.3f}'.format(class_name, score),\n",
+        "                bbox=dict(facecolor='blue', alpha=0.5),\n",
+        "                fontsize=10, color='white')\n",
+        "        '''\n",
+        "    txt.write(str_)\n",
+        "    txt.write(\"\\n\")\n",
+        "    ax.set_title(('{} detections with '\n",
+        "                  'p({} | box) >= {:.1f}').format(class_name, class_name,\n",
+        "                                                  thresh),\n",
+        "                  fontsize=10)\n",
+        "    plt.axis('off')\n",
+        "    plt.tight_layout()\n",
+        "    plt.savefig(save_folder+image_name, dpi=fig.dpi)\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "def test_oneimage():\n",
+        "    # load net\n",
+        "    cfg = widerface_640\n",
+        "    num_classes = len(WIDERFace_CLASSES) + 1 # +1 background\n",
+        "    net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD\n",
+        "    net.load_state_dict(torch.load(trained_model))\n",
+        "    net.cuda()\n",
+        "    net.eval()\n",
+        "    print('Finished loading model!')\n",
+        "\n",
+        "    # evaluation\n",
+        "    cuda = cuda\n",
+        "    transform = TestBaseTransform((104, 117, 123))\n",
+        "    thresh=cfg['conf_thresh']\n",
+        "    #save_path = save_folder\n",
+        "    #num_images = len(testset)\n",
+        " \n",
+        "    # load data\n",
+        "    path = img_root\n",
+        "    img_id = 'face'\n",
+        "    img = cv2.imread(path, cv2.IMREAD_COLOR)\n",
+        "\n",
+        "    max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5\n",
+        "    shrink = max_im_shrink if max_im_shrink < 1 else 1\n",
+        "\n",
+        "    det0 = infer(net , img , transform , thresh , cuda , shrink)\n",
+        "    det1 = infer_flip(net , img , transform , thresh , cuda , shrink)\n",
+        "    # shrink detecting and shrink only detect big face\n",
+        "    st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n",
+        "    det_s = infer(net , img , transform , thresh , cuda , st)\n",
+        "    index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n",
+        "    det_s = det_s[index, :]\n",
+        "    # enlarge one times\n",
+        "    factor = 2\n",
+        "    bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n",
+        "    det_b = infer(net , img , transform , thresh , cuda , bt)\n",
+        "    # enlarge small iamge x times for small face\n",
+        "    if max_im_shrink > factor:\n",
+        "        bt *= factor\n",
+        "        while bt < max_im_shrink:\n",
+        "            det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n",
+        "            bt *= factor\n",
+        "        det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n",
+        "    # enlarge only detect small face\n",
+        "    if bt > 1:\n",
+        "        index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n",
+        "        det_b = det_b[index, :]\n",
+        "    else:\n",
+        "        index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n",
+        "        det_b = det_b[index, :]\n",
+        "    det = np.row_stack((det0, det1, det_s, det_b))\n",
+        "    det = bbox_vote(det)\n",
+        "    vis_detections(img , det , img_id, visual_threshold)\n"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Y9HYBbXgXyR_",
+        "colab_type": "code",
+        "outputId": "91046e68-1648-4d07-c546-8432732c782f",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 680
+        }
+      },
+      "source": [
+        "cfg = widerface_640\n",
+        "num_classes = len(WIDERFace_CLASSES) + 1 # +1 background\n",
+        "net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD\n",
+        "net.load_state_dict(torch.load(trained_model))\n",
+        "net.cuda()\n",
+        "net.eval()    \n",
+        "cuda = cuda\n",
+        "transform = TestBaseTransform((104, 117, 123))\n",
+        "thresh=cfg['conf_thresh']\n",
+        "\n",
+        "\n",
+        "textfile = 'bbox_op.txt'\n",
+        "filesdir='/content/drive/My Drive/FolderSeconds/'\n",
+        "with open(textfile,'w') as txt:\n",
+        "    for so in (sorted(os.listdir(filesdir))):\n",
+        "        path = os.path.join(filesdir,so)\n",
+        "        img_id = so\n",
+        "        print(img_id)\n",
+        "        # load data\n",
+        "        img = cv2.imread(path, cv2.IMREAD_COLOR)\n",
+        "\n",
+        "        max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5\n",
+        "        shrink = max_im_shrink if max_im_shrink < 1 else 1\n",
+        "\n",
+        "        det0 = infer(net , img , transform , thresh , cuda , shrink)\n",
+        "        det1 = infer_flip(net , img , transform , thresh , cuda , shrink)\n",
+        "        # shrink detecting and shrink only detect big face\n",
+        "        st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n",
+        "        det_s = infer(net , img , transform , thresh , cuda , st)\n",
+        "        index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n",
+        "        det_s = det_s[index, :]\n",
+        "        # enlarge one times\n",
+        "        factor = 2\n",
+        "        bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n",
+        "        det_b = infer(net , img , transform , thresh , cuda , bt)\n",
+        "        # enlarge small iamge x times for small face\n",
+        "        if max_im_shrink > factor:\n",
+        "            bt *= factor\n",
+        "            while bt < max_im_shrink:\n",
+        "                det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n",
+        "                bt *= factor\n",
+        "            det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n",
+        "        # enlarge only detect small face\n",
+        "        if bt > 1:\n",
+        "            index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n",
+        "            det_b = det_b[index, :]\n",
+        "        else:\n",
+        "            index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n",
+        "            det_b = det_b[index, :]\n",
+        "        det = np.row_stack((det0, det1, det_s, det_b))\n",
+        "        det = bbox_vote(det)\n",
+        "        output(img , det , img_id, visual_threshold)\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n"
+      ],
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "loading pretrained resnet model\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "Downloading: \"https://download.pytorch.org/models/resnet152-b121ed2d.pth\" to /root/.torch/models/resnet152-b121ed2d.pth\n",
+            "100%|██████████| 241530880/241530880 [00:02<00:00, 101989518.91it/s]\n"
+          ],
+          "name": "stderr"
+        },
+        {
+          "output_type": "stream",
+          "text": [
+            "frame1320.jpg\n",
+            "2\n",
+            "frame20460.jpg\n",
+            "1\n",
+            "frame2310.jpg\n",
+            "4\n",
+            "frame25080.jpg\n",
+            "5\n",
+            "frame25740.jpg\n",
+            "3\n",
+            "frame2640.jpg\n",
+            "1\n",
+            "frame330.jpg\n",
+            "1\n",
+            "frame660.jpg\n"
+          ],
+          "name": "stdout"
+        },
+        {
+          "output_type": "error",
+          "ename": "KeyboardInterrupt",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-7-ccd4a4caa245>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     25\u001b[0m         \u001b[0mdet0\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minfer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mimg\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mtransform\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mthresh\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mcuda\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mshrink\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m         \u001b[0mdet1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minfer_flip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mimg\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mtransform\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mthresh\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mcuda\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mshrink\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     27\u001b[0m         \u001b[0;31m# shrink detecting and shrink only detect big face\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     28\u001b[0m         \u001b[0mst\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0.5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmax_im_shrink\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;36m0.75\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;36m0.5\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mmax_im_shrink\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m<ipython-input-6-a7dd572f46cb>\u001b[0m in \u001b[0;36minfer_flip\u001b[0;34m(net, img, transform, thresh, cuda, shrink)\u001b[0m\n\u001b[1;32m    118\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minfer_flip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mimg\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mtransform\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mthresh\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mcuda\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mshrink\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    119\u001b[0m     \u001b[0mimg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcv2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 120\u001b[0;31m     \u001b[0mdet\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minfer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mimg\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mtransform\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mthresh\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mcuda\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mshrink\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    121\u001b[0m     \u001b[0mdet_t\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzeros\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    122\u001b[0m     \u001b[0mdet_t\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mimg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mdet\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m<ipython-input-6-a7dd572f46cb>\u001b[0m in \u001b[0;36minfer\u001b[0;34m(net, img, transform, thresh, cuda, shrink)\u001b[0m\n\u001b[1;32m     93\u001b[0m         \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcuda\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     94\u001b[0m     \u001b[0;31m#print (shrink , x.shape)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 95\u001b[0;31m     \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m      \u001b[0;31m# forward pass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     96\u001b[0m     \u001b[0mdetections\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     97\u001b[0m     \u001b[0;31m# scale each detection back up to the image\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    355\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    356\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 357\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    358\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    359\u001b[0m             \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/content/FaceDetection-DSFD/face_ssd.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m    329\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'feature_maps'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfeaturemap_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    330\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'min_dim'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mimage_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 331\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpriors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minit_priors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    332\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mrefine\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    333\u001b[0m                 output = self.detect(\n",
+            "\u001b[0;32m/content/FaceDetection-DSFD/face_ssd.py\u001b[0m in \u001b[0;36minit_priors\u001b[0;34m(self, cfg, min_size, max_size)\u001b[0m\n\u001b[1;32m    185\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0minit_priors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m \u001b[0;34m,\u001b[0m\u001b[0mcfg\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mmin_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'min_sizes'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'max_sizes'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    186\u001b[0m         \u001b[0mpriorbox\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mPriorBox\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mmin_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 187\u001b[0;31m         \u001b[0mprior\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mVariable\u001b[0m\u001b[0;34m(\u001b[0m \u001b[0mpriorbox\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mvolatile\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    188\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mprior\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    189\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/content/FaceDetection-DSFD/layers/functions/prior_box.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     69\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     70\u001b[0m         \u001b[0;31m# back to torch land\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 71\u001b[0;31m         \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     72\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclip\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     73\u001b[0m             \u001b[0moutput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclamp_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "WfufuuBwjLmh",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": 0,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/colab_inference/inference_dual_shot.py b/colab_inference/inference_dual_shot.py
new file mode 100644
index 0000000..cfa0851
--- /dev/null
+++ b/colab_inference/inference_dual_shot.py
@@ -0,0 +1,365 @@
+# -*- coding: utf-8 -*-
+"""inference_dual_shot.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/1V1tS5-T3WOybFZmo7Z8tOLaPxub5Waet
+"""
+
+!pip install Torch==0.3.1
+
+!pip install Torchvision==0.2.1
+
+!git clone https://github.com/TencentYoutuResearch/FaceDetection-DSFD.git
+
+from google.colab import drive
+drive.mount('/content/drive')
+
+# Commented out IPython magic to ensure Python compatibility.
+# %cd /content/FaceDetection-DSFD
+
+from __future__ import print_function 
+import sys
+import os
+import argparse
+import torch
+import torch.nn as nn
+import torch.backends.cudnn as cudnn
+import torchvision.transforms as transforms
+from torch.autograd import Variable
+from data import WIDERFace_ROOT , WIDERFace_CLASSES as labelmap
+from PIL import Image
+from data import WIDERFaceDetection, WIDERFaceAnnotationTransform, WIDERFace_CLASSES, WIDERFace_ROOT, BaseTransform , TestBaseTransform
+from data import *
+import torch.utils.data as data
+from face_ssd import build_ssd
+#from resnet50_ssd import build_sfd
+import pdb
+import numpy as np
+import cv2
+import math
+import matplotlib.pyplot as plt
+import time
+plt.switch_backend('agg')
+
+
+widerface_root="WIDERFace_ROOT"
+trained_model = "/content/drive/My Drive/WIDERFace_DSFD_RES152.pth"
+save_folder = "eval_tools/"
+visual_threshold = 0.1
+cuda = True
+img_root="./data/worlds-largest-selfie.jpg"
+
+
+
+if cuda and torch.cuda.is_available():
+    torch.set_default_tensor_type('torch.cuda.FloatTensor')
+else:
+    torch.set_default_tensor_type('torch.FloatTensor')
+if not os.path.exists(save_folder):
+    os.mkdir(save_folder)
+
+
+def bbox_vote(det):
+    order = det[:, 4].ravel().argsort()[::-1]
+    det = det[order, :]
+    while det.shape[0] > 0:
+        # IOU
+        area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
+        xx1 = np.maximum(det[0, 0], det[:, 0])
+        yy1 = np.maximum(det[0, 1], det[:, 1])
+        xx2 = np.minimum(det[0, 2], det[:, 2])
+        yy2 = np.minimum(det[0, 3], det[:, 3])
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        o = inter / (area[0] + area[:] - inter)
+        # get needed merge det and delete these det
+        merge_index = np.where(o >= 0.3)[0]
+        det_accu = det[merge_index, :]
+        det = np.delete(det, merge_index, 0)
+        if merge_index.shape[0] <= 1:
+            continue
+        det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
+        max_score = np.max(det_accu[:, 4])
+        det_accu_sum = np.zeros((1, 5))
+        det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:])
+        det_accu_sum[:, 4] = max_score
+        try:
+            dets = np.row_stack((dets, det_accu_sum))
+        except:
+            dets = det_accu_sum
+    dets = dets[0:750, :]
+    return dets
+
+def write_to_txt(f, det , event , im_name):
+    f.write('{:s}\n'.format(event + '/' + im_name))
+    f.write('{:d}\n'.format(det.shape[0]))
+    for i in range(det.shape[0]):
+        xmin = det[i][0]
+        ymin = det[i][1]
+        xmax = det[i][2]
+        ymax = det[i][3]
+        score = det[i][4] 
+        f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.
+                format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score))
+
+def infer(net , img , transform , thresh , cuda , shrink):
+    if shrink != 1:
+        img = cv2.resize(img, None, None, fx=shrink, fy=shrink, interpolation=cv2.INTER_LINEAR)
+    x = torch.from_numpy(transform(img)[0]).permute(2, 0, 1)
+    x = Variable(x.unsqueeze(0) , volatile=True)
+    if cuda:
+        x = x.cuda()
+    #print (shrink , x.shape)
+    y = net(x)      # forward pass
+    detections = y.data
+    # scale each detection back up to the image
+    scale = torch.Tensor([ img.shape[1]/shrink, img.shape[0]/shrink,
+                         img.shape[1]/shrink, img.shape[0]/shrink] )
+    det = []
+    for i in range(detections.size(1)):
+        j = 0
+        while detections[0, i, j, 0] >= thresh:
+            score = detections[0, i, j, 0]
+            #label_name = labelmap[i-1]
+            pt = (detections[0, i, j, 1:]*scale).cpu().numpy()
+            coords = (pt[0], pt[1], pt[2], pt[3]) 
+            det.append([pt[0], pt[1], pt[2], pt[3], score])
+            j += 1
+    if (len(det)) == 0:
+        det = [ [0.1,0.1,0.2,0.2,0.01] ]
+    det = np.array(det)
+
+    keep_index = np.where(det[:, 4] >= 0)[0]
+    det = det[keep_index, :]
+    return det
+
+def infer_flip(net , img , transform , thresh , cuda , shrink):
+    img = cv2.flip(img, 1)
+    det = infer(net , img , transform , thresh , cuda , shrink)
+    det_t = np.zeros(det.shape)
+    det_t[:, 0] = img.shape[1] - det[:, 2]
+    det_t[:, 1] = det[:, 1]
+    det_t[:, 2] = img.shape[1] - det[:, 0]
+    det_t[:, 3] = det[:, 3]
+    det_t[:, 4] = det[:, 4]
+    return det_t
+
+
+def infer_multi_scale_sfd(net , img , transform , thresh , cuda ,  max_im_shrink):
+    # shrink detecting and shrink only detect big face
+    st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink
+    det_s = infer(net , img , transform , thresh , cuda , st)
+    index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]
+    det_s = det_s[index, :]
+    # enlarge one times
+    bt = min(2, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2
+    det_b = infer(net , img , transform , thresh , cuda , bt)
+    # enlarge small iamge x times for small face
+    if max_im_shrink > 2:
+        bt *= 2
+        while bt < max_im_shrink:
+            det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))
+            bt *= 2
+        det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))
+    # enlarge only detect small face
+    if bt > 1:
+        index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]
+        det_b = det_b[index, :]
+    else:
+        index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
+        det_b = det_b[index, :]
+    return det_s, det_b
+
+
+def vis_detections(im,  dets, image_name , thresh=0.5):
+    """Draw detected bounding boxes."""
+    class_name = 'face'
+    inds = np.where(dets[:, -1] >= thresh)[0]
+    if len(inds) == 0:
+        return
+    print (len(inds))
+    im = im[:, :, (2, 1, 0)]
+    fig, ax = plt.subplots(figsize=(12, 12))
+    ax.imshow(im, aspect='equal')
+    for i in inds:
+        bbox = dets[i, :4]
+        score = dets[i, -1]
+        ax.add_patch(
+            plt.Rectangle((bbox[0], bbox[1]),
+                          bbox[2] - bbox[0],
+                          bbox[3] - bbox[1], fill=False,
+                          edgecolor='red', linewidth=2.5)
+            )
+        '''
+        ax.text(bbox[0], bbox[1] - 5,
+                '{:s} {:.3f}'.format(class_name, score),
+                bbox=dict(facecolor='blue', alpha=0.5),
+                fontsize=10, color='white')
+        '''
+    ax.set_title(('{} detections with '
+                  'p({} | box) >= {:.1f}').format(class_name, class_name,
+                                                  thresh),
+                  fontsize=10)
+    plt.axis('off')
+    plt.tight_layout()
+    plt.savefig(save_folder+image_name, dpi=fig.dpi)
+
+def output(im,  dets, image_name , thresh=0.5):
+    """Draw detected bounding boxes."""
+    str_=""
+    class_name = 'face'
+    inds = np.where(dets[:, -1] >= thresh)[0]
+    if len(inds) == 0:
+        str_+="empty"
+        return
+    print (len(inds))
+    im = im[:, :, (2, 1, 0)]
+    fig, ax = plt.subplots(figsize=(12, 12))
+    ax.imshow(im, aspect='equal')
+    for i in inds:
+        bbox = dets[i, :4]
+        score = dets[i, -1]
+        
+        xmin=bbox[0]
+        ymin=bbox[1]
+        xmax=bbox[2]
+        ymax=bbox[3]
+        boxes=[xmin,ymin,xmax,ymax]
+        str_+=" "+str(xmin)+" "+str(ymin)+" "+str(xmax)+" "+str(ymax)
+        ax.add_patch(
+            plt.Rectangle((bbox[0], bbox[1]),
+                          bbox[2] - bbox[0],
+                          bbox[3] - bbox[1], fill=False,
+                          edgecolor='red', linewidth=2.5)
+            )
+        '''
+        ax.text(bbox[0], bbox[1] - 5,
+                '{:s} {:.3f}'.format(class_name, score),
+                bbox=dict(facecolor='blue', alpha=0.5),
+                fontsize=10, color='white')
+        '''
+    txt.write(str_)
+    txt.write("\n")
+    ax.set_title(('{} detections with '
+                  'p({} | box) >= {:.1f}').format(class_name, class_name,
+                                                  thresh),
+                  fontsize=10)
+    plt.axis('off')
+    plt.tight_layout()
+    plt.savefig(save_folder+image_name, dpi=fig.dpi)
+
+
+
+
+
+
+def test_oneimage():
+    # load net
+    cfg = widerface_640
+    num_classes = len(WIDERFace_CLASSES) + 1 # +1 background
+    net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD
+    net.load_state_dict(torch.load(trained_model))
+    net.cuda()
+    net.eval()
+    print('Finished loading model!')
+
+    # evaluation
+    cuda = cuda
+    transform = TestBaseTransform((104, 117, 123))
+    thresh=cfg['conf_thresh']
+    #save_path = save_folder
+    #num_images = len(testset)
+ 
+    # load data
+    path = img_root
+    img_id = 'face'
+    img = cv2.imread(path, cv2.IMREAD_COLOR)
+
+    max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5
+    shrink = max_im_shrink if max_im_shrink < 1 else 1
+
+    det0 = infer(net , img , transform , thresh , cuda , shrink)
+    det1 = infer_flip(net , img , transform , thresh , cuda , shrink)
+    # shrink detecting and shrink only detect big face
+    st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink
+    det_s = infer(net , img , transform , thresh , cuda , st)
+    index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]
+    det_s = det_s[index, :]
+    # enlarge one times
+    factor = 2
+    bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2
+    det_b = infer(net , img , transform , thresh , cuda , bt)
+    # enlarge small iamge x times for small face
+    if max_im_shrink > factor:
+        bt *= factor
+        while bt < max_im_shrink:
+            det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))
+            bt *= factor
+        det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))
+    # enlarge only detect small face
+    if bt > 1:
+        index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]
+        det_b = det_b[index, :]
+    else:
+        index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
+        det_b = det_b[index, :]
+    det = np.row_stack((det0, det1, det_s, det_b))
+    det = bbox_vote(det)
+    vis_detections(img , det , img_id, visual_threshold)
+
+cfg = widerface_640
+num_classes = len(WIDERFace_CLASSES) + 1 # +1 background
+net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD
+net.load_state_dict(torch.load(trained_model))
+net.cuda()
+net.eval()    
+cuda = cuda
+transform = TestBaseTransform((104, 117, 123))
+thresh=cfg['conf_thresh']
+
+
+textfile = 'bbox_op.txt'
+filesdir='/content/drive/My Drive/FolderSeconds/'
+with open(textfile,'w') as txt:
+    for so in (sorted(os.listdir(filesdir))):
+        path = os.path.join(filesdir,so)
+        img_id = so
+        print(img_id)
+        # load data
+        img = cv2.imread(path, cv2.IMREAD_COLOR)
+
+        max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5
+        shrink = max_im_shrink if max_im_shrink < 1 else 1
+
+        det0 = infer(net , img , transform , thresh , cuda , shrink)
+        det1 = infer_flip(net , img , transform , thresh , cuda , shrink)
+        # shrink detecting and shrink only detect big face
+        st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink
+        det_s = infer(net , img , transform , thresh , cuda , st)
+        index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]
+        det_s = det_s[index, :]
+        # enlarge one times
+        factor = 2
+        bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2
+        det_b = infer(net , img , transform , thresh , cuda , bt)
+        # enlarge small iamge x times for small face
+        if max_im_shrink > factor:
+            bt *= factor
+            while bt < max_im_shrink:
+                det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))
+                bt *= factor
+            det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))
+        # enlarge only detect small face
+        if bt > 1:
+            index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]
+            det_b = det_b[index, :]
+        else:
+            index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
+            det_b = det_b[index, :]
+        det = np.row_stack((det0, det1, det_s, det_b))
+        det = bbox_vote(det)
+        output(img , det , img_id, visual_threshold)
+