From 92f05370af7e17f4383afa9a42865da3ae217330 Mon Sep 17 00:00:00 2001 From: developerpawandeep Date: Thu, 12 Dec 2019 16:26:27 +0530 Subject: [PATCH 1/2] inference --- colab_inference/howtorun.txt | 40 ++++++++++++++++++++++++++++++++++++ colab_inference/infer2.ipynb | 1 + 2 files changed, 41 insertions(+) create mode 100644 colab_inference/howtorun.txt create mode 100644 colab_inference/infer2.ipynb diff --git a/colab_inference/howtorun.txt b/colab_inference/howtorun.txt new file mode 100644 index 0000000..728dd0b --- /dev/null +++ b/colab_inference/howtorun.txt @@ -0,0 +1,40 @@ +# Already inside notebook just run code +################################################################################### +## Install these versions + +!pip install Torch==0.3.1 +!pip install Torchvision==0.2.1 + +## Clone repo + +!git clone https://github.com/TencentYoutuResearch/FaceDetection-DSFD.git + +## Mount drive + +change directory to: /content/FaceDetection-DSFD + +################################################################################### + +# Do these manually (path to weight file and path to images you want to test and save +## change paths + +change path of trained_model in code cell +change path of save folder,etc + +!it look like this + +widerface_root="WIDERFace_ROOT" +trained_model = "/content/drive/My Drive/WIDERFace_DSFD_RES152.pth" +save_folder = "eval_tools/" +visual_threshold = 0.1 +cuda = True +img_root="./data/worlds-largest-selfie.jpg" + + +change path to your images in next code + +!it look like this + +filesdir='/content/drive/My Drive/FolderSeconds/' + + diff --git a/colab_inference/infer2.ipynb b/colab_inference/infer2.ipynb new file mode 100644 index 0000000..a313d66 --- /dev/null +++ b/colab_inference/infer2.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"accelerator":"GPU","colab":{"name":"infer2.ipynb","provenance":[],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.4"}},"cells":[{"cell_type":"code","metadata":{"colab_type":"code","id":"oJ-O1t4bATth","outputId":"bd85fe80-4caf-4212-c8f9-88f329e71799","scrolled":true,"executionInfo":{"status":"ok","timestamp":1576064509480,"user_tz":-330,"elapsed":6062,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":68}},"source":["!pip install Torch==0.3.1\n"],"execution_count":4,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: Torch==0.3.1 in /usr/local/lib/python3.6/dist-packages (0.3.1)\n","Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from Torch==0.3.1) (3.13)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from Torch==0.3.1) (1.17.4)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"NTekWu-OAp2C","outputId":"f6b43008-016a-4410-d8af-b96567b9d2bc","executionInfo":{"status":"ok","timestamp":1576064513862,"user_tz":-330,"elapsed":10432,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":136}},"source":["!pip install Torchvision==0.2.1"],"execution_count":5,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: Torchvision==0.2.1 in /usr/local/lib/python3.6/dist-packages (0.2.1)\n","Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (1.12.0)\n","Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (4.3.0)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (1.17.4)\n","Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (0.3.1)\n","Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from pillow>=4.1.1->Torchvision==0.2.1) (0.46)\n","Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from torch->Torchvision==0.2.1) (3.13)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"PKyc13d4Ascv","outputId":"357a473f-eab7-44fe-cfc2-cca8704a3089","executionInfo":{"status":"ok","timestamp":1576064517783,"user_tz":-330,"elapsed":14343,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["!git clone https://github.com/TencentYoutuResearch/FaceDetection-DSFD.git"],"execution_count":6,"outputs":[{"output_type":"stream","text":["fatal: destination path 'FaceDetection-DSFD' already exists and is not an empty directory.\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"EQXrLdu5A3fG","outputId":"e849c788-4b01-4e16-d852-debc8ddb183d","executionInfo":{"status":"ok","timestamp":1576064517784,"user_tz":-330,"elapsed":14333,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":7,"outputs":[{"output_type":"stream","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"cm2Zo2f7A5E6","outputId":"d5c13c6c-9579-4847-c5a6-ffe1623d2728","executionInfo":{"status":"ok","timestamp":1576064517786,"user_tz":-330,"elapsed":14324,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["%cd /content/FaceDetection-DSFD"],"execution_count":8,"outputs":[{"output_type":"stream","text":["/content/FaceDetection-DSFD\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IjT9EwG-HKpM","colab_type":"code","colab":{}},"source":["%cp /content/drive/My\\ Drive/WIDERFace_DSFD_RES152.pth /content/FaceDetection-DSFD/weights"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"colab_type":"code","id":"PV5ozxH8C-Yo","outputId":"fe0855ac-cb42-46be-f3f1-527e1a1ea9ca","executionInfo":{"status":"ok","timestamp":1576064542478,"user_tz":-330,"elapsed":38997,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":68}},"source":["!python demo.py"],"execution_count":10,"outputs":[{"output_type":"stream","text":["loading pretrained resnet model\n","Finished loading model!\n","650\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"qGaZJNuaW80k","colab":{}},"source":["from __future__ import print_function \n","import sys\n","import os\n","import argparse\n","import torch\n","import torch.nn as nn\n","import torch.backends.cudnn as cudnn\n","import torchvision.transforms as transforms\n","from torch.autograd import Variable\n","from data import WIDERFace_ROOT , WIDERFace_CLASSES as labelmap\n","from PIL import Image\n","from data import WIDERFaceDetection, WIDERFaceAnnotationTransform, WIDERFace_CLASSES, WIDERFace_ROOT, BaseTransform , TestBaseTransform\n","from data import *\n","import torch.utils.data as data\n","from face_ssd import build_ssd\n","#from resnet50_ssd import build_sfd\n","import pdb\n","import numpy as np\n","import cv2\n","import math\n","import matplotlib.pyplot as plt\n","import time\n","plt.switch_backend('agg')\n","\n","\n","widerface_root=\"WIDERFace_ROOT\"\n","trained_model = \"/content/drive/My Drive/WIDERFace_DSFD_RES152.pth\"\n","save_folder = \"eval_tools/\"\n","visual_threshold = 0.1\n","cuda = True\n","img_root=\"./data/worlds-largest-selfie.jpg\"\n","\n","\n","\n","if cuda and torch.cuda.is_available():\n"," torch.set_default_tensor_type('torch.cuda.FloatTensor')\n","else:\n"," torch.set_default_tensor_type('torch.FloatTensor')\n","if not os.path.exists(save_folder):\n"," os.mkdir(save_folder)\n","\n","\n","def bbox_vote(det):\n"," order = det[:, 4].ravel().argsort()[::-1]\n"," det = det[order, :]\n"," while det.shape[0] > 0:\n"," # IOU\n"," area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)\n"," xx1 = np.maximum(det[0, 0], det[:, 0])\n"," yy1 = np.maximum(det[0, 1], det[:, 1])\n"," xx2 = np.minimum(det[0, 2], det[:, 2])\n"," yy2 = np.minimum(det[0, 3], det[:, 3])\n"," w = np.maximum(0.0, xx2 - xx1 + 1)\n"," h = np.maximum(0.0, yy2 - yy1 + 1)\n"," inter = w * h\n"," o = inter / (area[0] + area[:] - inter)\n"," # get needed merge det and delete these det\n"," merge_index = np.where(o >= 0.3)[0]\n"," det_accu = det[merge_index, :]\n"," det = np.delete(det, merge_index, 0)\n"," if merge_index.shape[0] <= 1:\n"," continue\n"," det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))\n"," max_score = np.max(det_accu[:, 4])\n"," det_accu_sum = np.zeros((1, 5))\n"," det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:])\n"," det_accu_sum[:, 4] = max_score\n"," try:\n"," dets = np.row_stack((dets, det_accu_sum))\n"," except:\n"," dets = det_accu_sum\n"," dets = dets[0:750, :]\n"," return dets\n","\n","def write_to_txt(f, det , event , im_name):\n"," f.write('{:s}\\n'.format(event + '/' + im_name))\n"," f.write('{:d}\\n'.format(det.shape[0]))\n"," for i in range(det.shape[0]):\n"," xmin = det[i][0]\n"," ymin = det[i][1]\n"," xmax = det[i][2]\n"," ymax = det[i][3]\n"," score = det[i][4] \n"," f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\\n'.\n"," format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score))\n","\n","def infer(net , img , transform , thresh , cuda , shrink):\n"," if shrink != 1:\n"," img = cv2.resize(img, None, None, fx=shrink, fy=shrink, interpolation=cv2.INTER_LINEAR)\n"," x = torch.from_numpy(transform(img)[0]).permute(2, 0, 1)\n"," x = Variable(x.unsqueeze(0) , volatile=True)\n"," if cuda:\n"," x = x.cuda()\n"," #print (shrink , x.shape)\n"," y = net(x) # forward pass\n"," detections = y.data\n"," # scale each detection back up to the image\n"," scale = torch.Tensor([ img.shape[1]/shrink, img.shape[0]/shrink,\n"," img.shape[1]/shrink, img.shape[0]/shrink] )\n"," det = []\n"," for i in range(detections.size(1)):\n"," j = 0\n"," while detections[0, i, j, 0] >= thresh:\n"," score = detections[0, i, j, 0]\n"," #label_name = labelmap[i-1]\n"," pt = (detections[0, i, j, 1:]*scale).cpu().numpy()\n"," coords = (pt[0], pt[1], pt[2], pt[3]) \n"," det.append([pt[0], pt[1], pt[2], pt[3], score])\n"," j += 1\n"," if (len(det)) == 0:\n"," det = [ [0.1,0.1,0.2,0.2,0.01] ]\n"," det = np.array(det)\n","\n"," keep_index = np.where(det[:, 4] >= 0)[0]\n"," det = det[keep_index, :]\n"," return det\n","\n","def infer_flip(net , img , transform , thresh , cuda , shrink):\n"," img = cv2.flip(img, 1)\n"," det = infer(net , img , transform , thresh , cuda , shrink)\n"," det_t = np.zeros(det.shape)\n"," det_t[:, 0] = img.shape[1] - det[:, 2]\n"," det_t[:, 1] = det[:, 1]\n"," det_t[:, 2] = img.shape[1] - det[:, 0]\n"," det_t[:, 3] = det[:, 3]\n"," det_t[:, 4] = det[:, 4]\n"," return det_t\n","\n","\n","def infer_multi_scale_sfd(net , img , transform , thresh , cuda , max_im_shrink):\n"," # shrink detecting and shrink only detect big face\n"," st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n"," det_s = infer(net , img , transform , thresh , cuda , st)\n"," index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n"," det_s = det_s[index, :]\n"," # enlarge one times\n"," bt = min(2, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n"," det_b = infer(net , img , transform , thresh , cuda , bt)\n"," # enlarge small iamge x times for small face\n"," if max_im_shrink > 2:\n"," bt *= 2\n"," while bt < max_im_shrink:\n"," det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n"," bt *= 2\n"," det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n"," # enlarge only detect small face\n"," if bt > 1:\n"," index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n"," det_b = det_b[index, :]\n"," else:\n"," index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n"," det_b = det_b[index, :]\n"," return det_s, det_b\n","\n","\n","def vis_detections(im, dets, image_name , thresh=0.5):\n"," \"\"\"Draw detected bounding boxes.\"\"\"\n"," class_name = 'face'\n"," inds = np.where(dets[:, -1] >= thresh)[0]\n"," if len(inds) == 0:\n"," return\n"," print (len(inds))\n"," im = im[:, :, (2, 1, 0)]\n"," fig, ax = plt.subplots(figsize=(12, 12))\n"," ax.imshow(im, aspect='equal')\n"," for i in inds:\n"," bbox = dets[i, :4]\n"," score = dets[i, -1]\n"," ax.add_patch(\n"," plt.Rectangle((bbox[0], bbox[1]),\n"," bbox[2] - bbox[0],\n"," bbox[3] - bbox[1], fill=False,\n"," edgecolor='red', linewidth=2.5)\n"," )\n"," '''\n"," ax.text(bbox[0], bbox[1] - 5,\n"," '{:s} {:.3f}'.format(class_name, score),\n"," bbox=dict(facecolor='blue', alpha=0.5),\n"," fontsize=10, color='white')\n"," '''\n"," ax.set_title(('{} detections with '\n"," 'p({} | box) >= {:.1f}').format(class_name, class_name,\n"," thresh),\n"," fontsize=10)\n"," plt.axis('off')\n"," plt.tight_layout()\n"," plt.savefig(save_folder+image_name, dpi=fig.dpi)\n","\n","def output(im, dets, image_name , thresh=0.5):\n"," \"\"\"Draw detected bounding boxes.\"\"\"\n"," str_=\"\"\n"," class_name = 'face'\n"," inds = np.where(dets[:, -1] >= thresh)[0]\n"," if len(inds) == 0:\n"," str_+=\"empty\"\n"," return\n"," print (len(inds))\n"," im = im[:, :, (2, 1, 0)]\n"," fig, ax = plt.subplots(figsize=(12, 12))\n"," ax.imshow(im, aspect='equal')\n"," for i in inds:\n"," bbox = dets[i, :4]\n"," score = dets[i, -1]\n"," \n"," xmin=bbox[0]\n"," ymin=bbox[1]\n"," xmax=bbox[2]\n"," ymax=bbox[3]\n"," boxes=[xmin,ymin,xmax,ymax]\n"," str_+=\" \"+str(xmin)+\" \"+str(ymin)+\" \"+str(xmax)+\" \"+str(ymax)\n"," ax.add_patch(\n"," plt.Rectangle((bbox[0], bbox[1]),\n"," bbox[2] - bbox[0],\n"," bbox[3] - bbox[1], fill=False,\n"," edgecolor='red', linewidth=2.5)\n"," )\n"," '''\n"," ax.text(bbox[0], bbox[1] - 5,\n"," '{:s} {:.3f}'.format(class_name, score),\n"," bbox=dict(facecolor='blue', alpha=0.5),\n"," fontsize=10, color='white')\n"," '''\n"," txt.write(str_)\n"," txt.write(\"\\n\")\n"," ax.set_title(('{} detections with '\n"," 'p({} | box) >= {:.1f}').format(class_name, class_name,\n"," thresh),\n"," fontsize=10)\n"," plt.axis('off')\n"," plt.tight_layout()\n"," plt.savefig(save_folder+image_name, dpi=fig.dpi)\n","\n","\n","\n","\n","\n","\n","def test_oneimage():\n"," # load net\n"," cfg = widerface_640\n"," num_classes = len(WIDERFace_CLASSES) + 1 # +1 background\n"," net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD\n"," net.load_state_dict(torch.load(trained_model))\n"," net.cuda()\n"," net.eval()\n"," print('Finished loading model!')\n","\n"," # evaluation\n"," cuda = cuda\n"," transform = TestBaseTransform((104, 117, 123))\n"," thresh=cfg['conf_thresh']\n"," #save_path = save_folder\n"," #num_images = len(testset)\n"," \n"," # load data\n"," path = img_root\n"," img_id = 'face'\n"," img = cv2.imread(path, cv2.IMREAD_COLOR)\n","\n"," max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5\n"," shrink = max_im_shrink if max_im_shrink < 1 else 1\n","\n"," det0 = infer(net , img , transform , thresh , cuda , shrink)\n"," det1 = infer_flip(net , img , transform , thresh , cuda , shrink)\n"," # shrink detecting and shrink only detect big face\n"," st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n"," det_s = infer(net , img , transform , thresh , cuda , st)\n"," index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n"," det_s = det_s[index, :]\n"," # enlarge one times\n"," factor = 2\n"," bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n"," det_b = infer(net , img , transform , thresh , cuda , bt)\n"," # enlarge small iamge x times for small face\n"," if max_im_shrink > factor:\n"," bt *= factor\n"," while bt < max_im_shrink:\n"," det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n"," bt *= factor\n"," det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n"," # enlarge only detect small face\n"," if bt > 1:\n"," index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n"," det_b = det_b[index, :]\n"," else:\n"," index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n"," det_b = det_b[index, :]\n"," det = np.row_stack((det0, det1, det_s, det_b))\n"," det = bbox_vote(det)\n"," vis_detections(img , det , img_id, visual_threshold)\n"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"Y9HYBbXgXyR_","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":428},"outputId":"35b5ee12-fb0d-4b2b-9dfb-5f0e357f7cf0","executionInfo":{"status":"ok","timestamp":1576069257013,"user_tz":-330,"elapsed":150415,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}}},"source":["cfg = widerface_640\n","num_classes = len(WIDERFace_CLASSES) + 1 # +1 background\n","net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD\n","net.load_state_dict(torch.load(trained_model))\n","net.cuda()\n","net.eval() \n","cuda = cuda\n","transform = TestBaseTransform((104, 117, 123))\n","thresh=cfg['conf_thresh']\n","\n","\n","textfile = 'bbox_op.txt'\n","filesdir='/content/drive/My Drive/FolderSeconds/'\n","with open(textfile,'w') as txt:\n"," for so in (sorted(os.listdir(filesdir))):\n"," path = os.path.join(filesdir,so)\n"," img_id = so\n"," print(img_id)\n"," # load data\n"," img = cv2.imread(path, cv2.IMREAD_COLOR)\n","\n"," max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5\n"," shrink = max_im_shrink if max_im_shrink < 1 else 1\n","\n"," det0 = infer(net , img , transform , thresh , cuda , shrink)\n"," det1 = infer_flip(net , img , transform , thresh , cuda , shrink)\n"," # shrink detecting and shrink only detect big face\n"," st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n"," det_s = infer(net , img , transform , thresh , cuda , st)\n"," index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n"," det_s = det_s[index, :]\n"," # enlarge one times\n"," factor = 2\n"," bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n"," det_b = infer(net , img , transform , thresh , cuda , bt)\n"," # enlarge small iamge x times for small face\n"," if max_im_shrink > factor:\n"," bt *= factor\n"," while bt < max_im_shrink:\n"," det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n"," bt *= factor\n"," det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n"," # enlarge only detect small face\n"," if bt > 1:\n"," index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n"," det_b = det_b[index, :]\n"," else:\n"," index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n"," det_b = det_b[index, :]\n"," det = np.row_stack((det0, det1, det_s, det_b))\n"," det = bbox_vote(det)\n"," output(img , det , img_id, visual_threshold)\n","\n","\n","\n","\n","\n","\n"],"execution_count":41,"outputs":[{"output_type":"stream","text":["ERROR: You specified size [1440, 2560]. However, currently only SSD640 (size=640) is supported!\n","loading pretrained resnet model\n","frame1320.jpg\n","2\n","frame20460.jpg\n","1\n","frame2310.jpg\n","4\n","frame25080.jpg\n","5\n","frame25740.jpg\n","3\n","frame2640.jpg\n","1\n","frame330.jpg\n","1\n","frame660.jpg\n","4\n","frame6930.jpg\n","1\n","frame990.jpg\n"],"name":"stdout"},{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:199: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).\n"],"name":"stderr"},{"output_type":"stream","text":["2\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"WfufuuBwjLmh","colab_type":"code","colab":{}},"source":[""],"execution_count":0,"outputs":[]}]} \ No newline at end of file From 5244f5cb9a2ac53e6788b186831de3bfd53bf519 Mon Sep 17 00:00:00 2001 From: developerpawandeep Date: Thu, 12 Dec 2019 16:48:08 +0530 Subject: [PATCH 2/2] minor changes --- colab_inference/infer2.ipynb | 1 - colab_inference/inference_dual_shot.ipynb | 637 ++++++++++++++++++++++ colab_inference/inference_dual_shot.py | 365 +++++++++++++ 3 files changed, 1002 insertions(+), 1 deletion(-) delete mode 100644 colab_inference/infer2.ipynb create mode 100644 colab_inference/inference_dual_shot.ipynb create mode 100644 colab_inference/inference_dual_shot.py diff --git a/colab_inference/infer2.ipynb b/colab_inference/infer2.ipynb deleted file mode 100644 index a313d66..0000000 --- a/colab_inference/infer2.ipynb +++ /dev/null @@ -1 +0,0 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"accelerator":"GPU","colab":{"name":"infer2.ipynb","provenance":[],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.4"}},"cells":[{"cell_type":"code","metadata":{"colab_type":"code","id":"oJ-O1t4bATth","outputId":"bd85fe80-4caf-4212-c8f9-88f329e71799","scrolled":true,"executionInfo":{"status":"ok","timestamp":1576064509480,"user_tz":-330,"elapsed":6062,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":68}},"source":["!pip install Torch==0.3.1\n"],"execution_count":4,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: Torch==0.3.1 in /usr/local/lib/python3.6/dist-packages (0.3.1)\n","Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from Torch==0.3.1) (3.13)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from Torch==0.3.1) (1.17.4)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"NTekWu-OAp2C","outputId":"f6b43008-016a-4410-d8af-b96567b9d2bc","executionInfo":{"status":"ok","timestamp":1576064513862,"user_tz":-330,"elapsed":10432,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":136}},"source":["!pip install Torchvision==0.2.1"],"execution_count":5,"outputs":[{"output_type":"stream","text":["Requirement already satisfied: Torchvision==0.2.1 in /usr/local/lib/python3.6/dist-packages (0.2.1)\n","Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (1.12.0)\n","Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (4.3.0)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (1.17.4)\n","Requirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (0.3.1)\n","Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from pillow>=4.1.1->Torchvision==0.2.1) (0.46)\n","Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from torch->Torchvision==0.2.1) (3.13)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"PKyc13d4Ascv","outputId":"357a473f-eab7-44fe-cfc2-cca8704a3089","executionInfo":{"status":"ok","timestamp":1576064517783,"user_tz":-330,"elapsed":14343,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["!git clone https://github.com/TencentYoutuResearch/FaceDetection-DSFD.git"],"execution_count":6,"outputs":[{"output_type":"stream","text":["fatal: destination path 'FaceDetection-DSFD' already exists and is not an empty directory.\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"EQXrLdu5A3fG","outputId":"e849c788-4b01-4e16-d852-debc8ddb183d","executionInfo":{"status":"ok","timestamp":1576064517784,"user_tz":-330,"elapsed":14333,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":7,"outputs":[{"output_type":"stream","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"cm2Zo2f7A5E6","outputId":"d5c13c6c-9579-4847-c5a6-ffe1623d2728","executionInfo":{"status":"ok","timestamp":1576064517786,"user_tz":-330,"elapsed":14324,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["%cd /content/FaceDetection-DSFD"],"execution_count":8,"outputs":[{"output_type":"stream","text":["/content/FaceDetection-DSFD\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"IjT9EwG-HKpM","colab_type":"code","colab":{}},"source":["%cp /content/drive/My\\ Drive/WIDERFace_DSFD_RES152.pth /content/FaceDetection-DSFD/weights"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"colab_type":"code","id":"PV5ozxH8C-Yo","outputId":"fe0855ac-cb42-46be-f3f1-527e1a1ea9ca","executionInfo":{"status":"ok","timestamp":1576064542478,"user_tz":-330,"elapsed":38997,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}},"colab":{"base_uri":"https://localhost:8080/","height":68}},"source":["!python demo.py"],"execution_count":10,"outputs":[{"output_type":"stream","text":["loading pretrained resnet model\n","Finished loading model!\n","650\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab_type":"code","id":"qGaZJNuaW80k","colab":{}},"source":["from __future__ import print_function \n","import sys\n","import os\n","import argparse\n","import torch\n","import torch.nn as nn\n","import torch.backends.cudnn as cudnn\n","import torchvision.transforms as transforms\n","from torch.autograd import Variable\n","from data import WIDERFace_ROOT , WIDERFace_CLASSES as labelmap\n","from PIL import Image\n","from data import WIDERFaceDetection, WIDERFaceAnnotationTransform, WIDERFace_CLASSES, WIDERFace_ROOT, BaseTransform , TestBaseTransform\n","from data import *\n","import torch.utils.data as data\n","from face_ssd import build_ssd\n","#from resnet50_ssd import build_sfd\n","import pdb\n","import numpy as np\n","import cv2\n","import math\n","import matplotlib.pyplot as plt\n","import time\n","plt.switch_backend('agg')\n","\n","\n","widerface_root=\"WIDERFace_ROOT\"\n","trained_model = \"/content/drive/My Drive/WIDERFace_DSFD_RES152.pth\"\n","save_folder = \"eval_tools/\"\n","visual_threshold = 0.1\n","cuda = True\n","img_root=\"./data/worlds-largest-selfie.jpg\"\n","\n","\n","\n","if cuda and torch.cuda.is_available():\n"," torch.set_default_tensor_type('torch.cuda.FloatTensor')\n","else:\n"," torch.set_default_tensor_type('torch.FloatTensor')\n","if not os.path.exists(save_folder):\n"," os.mkdir(save_folder)\n","\n","\n","def bbox_vote(det):\n"," order = det[:, 4].ravel().argsort()[::-1]\n"," det = det[order, :]\n"," while det.shape[0] > 0:\n"," # IOU\n"," area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)\n"," xx1 = np.maximum(det[0, 0], det[:, 0])\n"," yy1 = np.maximum(det[0, 1], det[:, 1])\n"," xx2 = np.minimum(det[0, 2], det[:, 2])\n"," yy2 = np.minimum(det[0, 3], det[:, 3])\n"," w = np.maximum(0.0, xx2 - xx1 + 1)\n"," h = np.maximum(0.0, yy2 - yy1 + 1)\n"," inter = w * h\n"," o = inter / (area[0] + area[:] - inter)\n"," # get needed merge det and delete these det\n"," merge_index = np.where(o >= 0.3)[0]\n"," det_accu = det[merge_index, :]\n"," det = np.delete(det, merge_index, 0)\n"," if merge_index.shape[0] <= 1:\n"," continue\n"," det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))\n"," max_score = np.max(det_accu[:, 4])\n"," det_accu_sum = np.zeros((1, 5))\n"," det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:])\n"," det_accu_sum[:, 4] = max_score\n"," try:\n"," dets = np.row_stack((dets, det_accu_sum))\n"," except:\n"," dets = det_accu_sum\n"," dets = dets[0:750, :]\n"," return dets\n","\n","def write_to_txt(f, det , event , im_name):\n"," f.write('{:s}\\n'.format(event + '/' + im_name))\n"," f.write('{:d}\\n'.format(det.shape[0]))\n"," for i in range(det.shape[0]):\n"," xmin = det[i][0]\n"," ymin = det[i][1]\n"," xmax = det[i][2]\n"," ymax = det[i][3]\n"," score = det[i][4] \n"," f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\\n'.\n"," format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score))\n","\n","def infer(net , img , transform , thresh , cuda , shrink):\n"," if shrink != 1:\n"," img = cv2.resize(img, None, None, fx=shrink, fy=shrink, interpolation=cv2.INTER_LINEAR)\n"," x = torch.from_numpy(transform(img)[0]).permute(2, 0, 1)\n"," x = Variable(x.unsqueeze(0) , volatile=True)\n"," if cuda:\n"," x = x.cuda()\n"," #print (shrink , x.shape)\n"," y = net(x) # forward pass\n"," detections = y.data\n"," # scale each detection back up to the image\n"," scale = torch.Tensor([ img.shape[1]/shrink, img.shape[0]/shrink,\n"," img.shape[1]/shrink, img.shape[0]/shrink] )\n"," det = []\n"," for i in range(detections.size(1)):\n"," j = 0\n"," while detections[0, i, j, 0] >= thresh:\n"," score = detections[0, i, j, 0]\n"," #label_name = labelmap[i-1]\n"," pt = (detections[0, i, j, 1:]*scale).cpu().numpy()\n"," coords = (pt[0], pt[1], pt[2], pt[3]) \n"," det.append([pt[0], pt[1], pt[2], pt[3], score])\n"," j += 1\n"," if (len(det)) == 0:\n"," det = [ [0.1,0.1,0.2,0.2,0.01] ]\n"," det = np.array(det)\n","\n"," keep_index = np.where(det[:, 4] >= 0)[0]\n"," det = det[keep_index, :]\n"," return det\n","\n","def infer_flip(net , img , transform , thresh , cuda , shrink):\n"," img = cv2.flip(img, 1)\n"," det = infer(net , img , transform , thresh , cuda , shrink)\n"," det_t = np.zeros(det.shape)\n"," det_t[:, 0] = img.shape[1] - det[:, 2]\n"," det_t[:, 1] = det[:, 1]\n"," det_t[:, 2] = img.shape[1] - det[:, 0]\n"," det_t[:, 3] = det[:, 3]\n"," det_t[:, 4] = det[:, 4]\n"," return det_t\n","\n","\n","def infer_multi_scale_sfd(net , img , transform , thresh , cuda , max_im_shrink):\n"," # shrink detecting and shrink only detect big face\n"," st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n"," det_s = infer(net , img , transform , thresh , cuda , st)\n"," index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n"," det_s = det_s[index, :]\n"," # enlarge one times\n"," bt = min(2, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n"," det_b = infer(net , img , transform , thresh , cuda , bt)\n"," # enlarge small iamge x times for small face\n"," if max_im_shrink > 2:\n"," bt *= 2\n"," while bt < max_im_shrink:\n"," det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n"," bt *= 2\n"," det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n"," # enlarge only detect small face\n"," if bt > 1:\n"," index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n"," det_b = det_b[index, :]\n"," else:\n"," index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n"," det_b = det_b[index, :]\n"," return det_s, det_b\n","\n","\n","def vis_detections(im, dets, image_name , thresh=0.5):\n"," \"\"\"Draw detected bounding boxes.\"\"\"\n"," class_name = 'face'\n"," inds = np.where(dets[:, -1] >= thresh)[0]\n"," if len(inds) == 0:\n"," return\n"," print (len(inds))\n"," im = im[:, :, (2, 1, 0)]\n"," fig, ax = plt.subplots(figsize=(12, 12))\n"," ax.imshow(im, aspect='equal')\n"," for i in inds:\n"," bbox = dets[i, :4]\n"," score = dets[i, -1]\n"," ax.add_patch(\n"," plt.Rectangle((bbox[0], bbox[1]),\n"," bbox[2] - bbox[0],\n"," bbox[3] - bbox[1], fill=False,\n"," edgecolor='red', linewidth=2.5)\n"," )\n"," '''\n"," ax.text(bbox[0], bbox[1] - 5,\n"," '{:s} {:.3f}'.format(class_name, score),\n"," bbox=dict(facecolor='blue', alpha=0.5),\n"," fontsize=10, color='white')\n"," '''\n"," ax.set_title(('{} detections with '\n"," 'p({} | box) >= {:.1f}').format(class_name, class_name,\n"," thresh),\n"," fontsize=10)\n"," plt.axis('off')\n"," plt.tight_layout()\n"," plt.savefig(save_folder+image_name, dpi=fig.dpi)\n","\n","def output(im, dets, image_name , thresh=0.5):\n"," \"\"\"Draw detected bounding boxes.\"\"\"\n"," str_=\"\"\n"," class_name = 'face'\n"," inds = np.where(dets[:, -1] >= thresh)[0]\n"," if len(inds) == 0:\n"," str_+=\"empty\"\n"," return\n"," print (len(inds))\n"," im = im[:, :, (2, 1, 0)]\n"," fig, ax = plt.subplots(figsize=(12, 12))\n"," ax.imshow(im, aspect='equal')\n"," for i in inds:\n"," bbox = dets[i, :4]\n"," score = dets[i, -1]\n"," \n"," xmin=bbox[0]\n"," ymin=bbox[1]\n"," xmax=bbox[2]\n"," ymax=bbox[3]\n"," boxes=[xmin,ymin,xmax,ymax]\n"," str_+=\" \"+str(xmin)+\" \"+str(ymin)+\" \"+str(xmax)+\" \"+str(ymax)\n"," ax.add_patch(\n"," plt.Rectangle((bbox[0], bbox[1]),\n"," bbox[2] - bbox[0],\n"," bbox[3] - bbox[1], fill=False,\n"," edgecolor='red', linewidth=2.5)\n"," )\n"," '''\n"," ax.text(bbox[0], bbox[1] - 5,\n"," '{:s} {:.3f}'.format(class_name, score),\n"," bbox=dict(facecolor='blue', alpha=0.5),\n"," fontsize=10, color='white')\n"," '''\n"," txt.write(str_)\n"," txt.write(\"\\n\")\n"," ax.set_title(('{} detections with '\n"," 'p({} | box) >= {:.1f}').format(class_name, class_name,\n"," thresh),\n"," fontsize=10)\n"," plt.axis('off')\n"," plt.tight_layout()\n"," plt.savefig(save_folder+image_name, dpi=fig.dpi)\n","\n","\n","\n","\n","\n","\n","def test_oneimage():\n"," # load net\n"," cfg = widerface_640\n"," num_classes = len(WIDERFace_CLASSES) + 1 # +1 background\n"," net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD\n"," net.load_state_dict(torch.load(trained_model))\n"," net.cuda()\n"," net.eval()\n"," print('Finished loading model!')\n","\n"," # evaluation\n"," cuda = cuda\n"," transform = TestBaseTransform((104, 117, 123))\n"," thresh=cfg['conf_thresh']\n"," #save_path = save_folder\n"," #num_images = len(testset)\n"," \n"," # load data\n"," path = img_root\n"," img_id = 'face'\n"," img = cv2.imread(path, cv2.IMREAD_COLOR)\n","\n"," max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5\n"," shrink = max_im_shrink if max_im_shrink < 1 else 1\n","\n"," det0 = infer(net , img , transform , thresh , cuda , shrink)\n"," det1 = infer_flip(net , img , transform , thresh , cuda , shrink)\n"," # shrink detecting and shrink only detect big face\n"," st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n"," det_s = infer(net , img , transform , thresh , cuda , st)\n"," index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n"," det_s = det_s[index, :]\n"," # enlarge one times\n"," factor = 2\n"," bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n"," det_b = infer(net , img , transform , thresh , cuda , bt)\n"," # enlarge small iamge x times for small face\n"," if max_im_shrink > factor:\n"," bt *= factor\n"," while bt < max_im_shrink:\n"," det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n"," bt *= factor\n"," det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n"," # enlarge only detect small face\n"," if bt > 1:\n"," index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n"," det_b = det_b[index, :]\n"," else:\n"," index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n"," det_b = det_b[index, :]\n"," det = np.row_stack((det0, det1, det_s, det_b))\n"," det = bbox_vote(det)\n"," vis_detections(img , det , img_id, visual_threshold)\n"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"Y9HYBbXgXyR_","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":428},"outputId":"35b5ee12-fb0d-4b2b-9dfb-5f0e357f7cf0","executionInfo":{"status":"ok","timestamp":1576069257013,"user_tz":-330,"elapsed":150415,"user":{"displayName":"Face Detection","photoUrl":"","userId":"01433683970229031124"}}},"source":["cfg = widerface_640\n","num_classes = len(WIDERFace_CLASSES) + 1 # +1 background\n","net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD\n","net.load_state_dict(torch.load(trained_model))\n","net.cuda()\n","net.eval() \n","cuda = cuda\n","transform = TestBaseTransform((104, 117, 123))\n","thresh=cfg['conf_thresh']\n","\n","\n","textfile = 'bbox_op.txt'\n","filesdir='/content/drive/My Drive/FolderSeconds/'\n","with open(textfile,'w') as txt:\n"," for so in (sorted(os.listdir(filesdir))):\n"," path = os.path.join(filesdir,so)\n"," img_id = so\n"," print(img_id)\n"," # load data\n"," img = cv2.imread(path, cv2.IMREAD_COLOR)\n","\n"," max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5\n"," shrink = max_im_shrink if max_im_shrink < 1 else 1\n","\n"," det0 = infer(net , img , transform , thresh , cuda , shrink)\n"," det1 = infer_flip(net , img , transform , thresh , cuda , shrink)\n"," # shrink detecting and shrink only detect big face\n"," st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n"," det_s = infer(net , img , transform , thresh , cuda , st)\n"," index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n"," det_s = det_s[index, :]\n"," # enlarge one times\n"," factor = 2\n"," bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n"," det_b = infer(net , img , transform , thresh , cuda , bt)\n"," # enlarge small iamge x times for small face\n"," if max_im_shrink > factor:\n"," bt *= factor\n"," while bt < max_im_shrink:\n"," det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n"," bt *= factor\n"," det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n"," # enlarge only detect small face\n"," if bt > 1:\n"," index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n"," det_b = det_b[index, :]\n"," else:\n"," index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n"," det_b = det_b[index, :]\n"," det = np.row_stack((det0, det1, det_s, det_b))\n"," det = bbox_vote(det)\n"," output(img , det , img_id, visual_threshold)\n","\n","\n","\n","\n","\n","\n"],"execution_count":41,"outputs":[{"output_type":"stream","text":["ERROR: You specified size [1440, 2560]. However, currently only SSD640 (size=640) is supported!\n","loading pretrained resnet model\n","frame1320.jpg\n","2\n","frame20460.jpg\n","1\n","frame2310.jpg\n","4\n","frame25080.jpg\n","5\n","frame25740.jpg\n","3\n","frame2640.jpg\n","1\n","frame330.jpg\n","1\n","frame660.jpg\n","4\n","frame6930.jpg\n","1\n","frame990.jpg\n"],"name":"stdout"},{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:199: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).\n"],"name":"stderr"},{"output_type":"stream","text":["2\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"WfufuuBwjLmh","colab_type":"code","colab":{}},"source":[""],"execution_count":0,"outputs":[]}]} \ No newline at end of file diff --git a/colab_inference/inference_dual_shot.ipynb b/colab_inference/inference_dual_shot.ipynb new file mode 100644 index 0000000..2b7a73b --- /dev/null +++ b/colab_inference/inference_dual_shot.ipynb @@ -0,0 +1,637 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "inference_dual_shot.ipynb", + "provenance": [], + "collapsed_sections": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "oJ-O1t4bATth", + "outputId": "2cdd9c48-c62c-45b6-e992-1a678bcb1f89", + "scrolled": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241 + } + }, + "source": [ + "!pip install Torch==0.3.1" + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Collecting Torch==0.3.1\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/5b/a5/e8b50b55b1abac9f1e3346c4242f1e42a82d368a8442cbd50c532922f6c4/torch-0.3.1-cp36-cp36m-manylinux1_x86_64.whl (496.4MB)\n", + "\u001b[K |████████████████████████████████| 496.4MB 37kB/s \n", + "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from Torch==0.3.1) (1.17.4)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from Torch==0.3.1) (3.13)\n", + "\u001b[31mERROR: torchvision 0.4.2 has requirement torch==1.3.1, but you'll have torch 0.3.1 which is incompatible.\u001b[0m\n", + "\u001b[31mERROR: fastai 1.0.59 has requirement torch>=1.0.0, but you'll have torch 0.3.1 which is incompatible.\u001b[0m\n", + "Installing collected packages: Torch\n", + " Found existing installation: torch 1.3.1\n", + " Uninstalling torch-1.3.1:\n", + " Successfully uninstalled torch-1.3.1\n", + "Successfully installed Torch-0.3.1\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "NTekWu-OAp2C", + "outputId": "e3551f83-5a90-48b7-b3f3-176aa314ec01", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 292 + } + }, + "source": [ + "!pip install Torchvision==0.2.1" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Collecting Torchvision==0.2.1\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/ca/0d/f00b2885711e08bd71242ebe7b96561e6f6d01fdb4b9dcf4d37e2e13c5e1/torchvision-0.2.1-py2.py3-none-any.whl (54kB)\n", + "\r\u001b[K |██████ | 10kB 27.8MB/s eta 0:00:01\r\u001b[K |████████████ | 20kB 34.6MB/s eta 0:00:01\r\u001b[K |██████████████████ | 30kB 40.7MB/s eta 0:00:01\r\u001b[K |████████████████████████ | 40kB 43.3MB/s eta 0:00:01\r\u001b[K |██████████████████████████████ | 51kB 37.2MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 61kB 8.9MB/s \n", + "\u001b[?25hRequirement already satisfied: torch in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (0.3.1)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (1.12.0)\n", + "Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (4.3.0)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from Torchvision==0.2.1) (1.17.4)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from torch->Torchvision==0.2.1) (3.13)\n", + "Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from pillow>=4.1.1->Torchvision==0.2.1) (0.46)\n", + "\u001b[31mERROR: fastai 1.0.59 has requirement torch>=1.0.0, but you'll have torch 0.3.1 which is incompatible.\u001b[0m\n", + "Installing collected packages: Torchvision\n", + " Found existing installation: torchvision 0.4.2\n", + " Uninstalling torchvision-0.4.2:\n", + " Successfully uninstalled torchvision-0.4.2\n", + "Successfully installed Torchvision-0.2.1\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "PKyc13d4Ascv", + "outputId": "7a65dbd6-e724-42ba-9a06-7657c8ddd6e0", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + } + }, + "source": [ + "!git clone https://github.com/TencentYoutuResearch/FaceDetection-DSFD.git" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Cloning into 'FaceDetection-DSFD'...\n", + "remote: Enumerating objects: 3, done.\u001b[K\n", + "remote: Counting objects: 100% (3/3), done.\u001b[K\n", + "remote: Compressing objects: 100% (3/3), done.\u001b[K\n", + "remote: Total 244 (delta 0), reused 1 (delta 0), pack-reused 241\u001b[K\n", + "Receiving objects: 100% (244/244), 17.62 MiB | 4.61 MiB/s, done.\n", + "Resolving deltas: 100% (75/75), done.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "EQXrLdu5A3fG", + "outputId": "0a231613-ee22-4e87-98f9-467279f235f7", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 122 + } + }, + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n", + "\n", + "Enter your authorization code:\n", + "··········\n", + "Mounted at /content/drive\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "cm2Zo2f7A5E6", + "outputId": "898ef801-395b-4f91-feb6-26c0499e2c99", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + } + }, + "source": [ + "%cd /content/FaceDetection-DSFD" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/content/FaceDetection-DSFD\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "qGaZJNuaW80k", + "colab": {} + }, + "source": [ + "from __future__ import print_function \n", + "import sys\n", + "import os\n", + "import argparse\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.backends.cudnn as cudnn\n", + "import torchvision.transforms as transforms\n", + "from torch.autograd import Variable\n", + "from data import WIDERFace_ROOT , WIDERFace_CLASSES as labelmap\n", + "from PIL import Image\n", + "from data import WIDERFaceDetection, WIDERFaceAnnotationTransform, WIDERFace_CLASSES, WIDERFace_ROOT, BaseTransform , TestBaseTransform\n", + "from data import *\n", + "import torch.utils.data as data\n", + "from face_ssd import build_ssd\n", + "#from resnet50_ssd import build_sfd\n", + "import pdb\n", + "import numpy as np\n", + "import cv2\n", + "import math\n", + "import matplotlib.pyplot as plt\n", + "import time\n", + "plt.switch_backend('agg')\n", + "\n", + "\n", + "widerface_root=\"WIDERFace_ROOT\"\n", + "trained_model = \"/content/drive/My Drive/WIDERFace_DSFD_RES152.pth\"\n", + "save_folder = \"eval_tools/\"\n", + "visual_threshold = 0.1\n", + "cuda = True\n", + "img_root=\"./data/worlds-largest-selfie.jpg\"\n", + "\n", + "\n", + "\n", + "if cuda and torch.cuda.is_available():\n", + " torch.set_default_tensor_type('torch.cuda.FloatTensor')\n", + "else:\n", + " torch.set_default_tensor_type('torch.FloatTensor')\n", + "if not os.path.exists(save_folder):\n", + " os.mkdir(save_folder)\n", + "\n", + "\n", + "def bbox_vote(det):\n", + " order = det[:, 4].ravel().argsort()[::-1]\n", + " det = det[order, :]\n", + " while det.shape[0] > 0:\n", + " # IOU\n", + " area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)\n", + " xx1 = np.maximum(det[0, 0], det[:, 0])\n", + " yy1 = np.maximum(det[0, 1], det[:, 1])\n", + " xx2 = np.minimum(det[0, 2], det[:, 2])\n", + " yy2 = np.minimum(det[0, 3], det[:, 3])\n", + " w = np.maximum(0.0, xx2 - xx1 + 1)\n", + " h = np.maximum(0.0, yy2 - yy1 + 1)\n", + " inter = w * h\n", + " o = inter / (area[0] + area[:] - inter)\n", + " # get needed merge det and delete these det\n", + " merge_index = np.where(o >= 0.3)[0]\n", + " det_accu = det[merge_index, :]\n", + " det = np.delete(det, merge_index, 0)\n", + " if merge_index.shape[0] <= 1:\n", + " continue\n", + " det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))\n", + " max_score = np.max(det_accu[:, 4])\n", + " det_accu_sum = np.zeros((1, 5))\n", + " det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:])\n", + " det_accu_sum[:, 4] = max_score\n", + " try:\n", + " dets = np.row_stack((dets, det_accu_sum))\n", + " except:\n", + " dets = det_accu_sum\n", + " dets = dets[0:750, :]\n", + " return dets\n", + "\n", + "def write_to_txt(f, det , event , im_name):\n", + " f.write('{:s}\\n'.format(event + '/' + im_name))\n", + " f.write('{:d}\\n'.format(det.shape[0]))\n", + " for i in range(det.shape[0]):\n", + " xmin = det[i][0]\n", + " ymin = det[i][1]\n", + " xmax = det[i][2]\n", + " ymax = det[i][3]\n", + " score = det[i][4] \n", + " f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\\n'.\n", + " format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score))\n", + "\n", + "def infer(net , img , transform , thresh , cuda , shrink):\n", + " if shrink != 1:\n", + " img = cv2.resize(img, None, None, fx=shrink, fy=shrink, interpolation=cv2.INTER_LINEAR)\n", + " x = torch.from_numpy(transform(img)[0]).permute(2, 0, 1)\n", + " x = Variable(x.unsqueeze(0) , volatile=True)\n", + " if cuda:\n", + " x = x.cuda()\n", + " #print (shrink , x.shape)\n", + " y = net(x) # forward pass\n", + " detections = y.data\n", + " # scale each detection back up to the image\n", + " scale = torch.Tensor([ img.shape[1]/shrink, img.shape[0]/shrink,\n", + " img.shape[1]/shrink, img.shape[0]/shrink] )\n", + " det = []\n", + " for i in range(detections.size(1)):\n", + " j = 0\n", + " while detections[0, i, j, 0] >= thresh:\n", + " score = detections[0, i, j, 0]\n", + " #label_name = labelmap[i-1]\n", + " pt = (detections[0, i, j, 1:]*scale).cpu().numpy()\n", + " coords = (pt[0], pt[1], pt[2], pt[3]) \n", + " det.append([pt[0], pt[1], pt[2], pt[3], score])\n", + " j += 1\n", + " if (len(det)) == 0:\n", + " det = [ [0.1,0.1,0.2,0.2,0.01] ]\n", + " det = np.array(det)\n", + "\n", + " keep_index = np.where(det[:, 4] >= 0)[0]\n", + " det = det[keep_index, :]\n", + " return det\n", + "\n", + "def infer_flip(net , img , transform , thresh , cuda , shrink):\n", + " img = cv2.flip(img, 1)\n", + " det = infer(net , img , transform , thresh , cuda , shrink)\n", + " det_t = np.zeros(det.shape)\n", + " det_t[:, 0] = img.shape[1] - det[:, 2]\n", + " det_t[:, 1] = det[:, 1]\n", + " det_t[:, 2] = img.shape[1] - det[:, 0]\n", + " det_t[:, 3] = det[:, 3]\n", + " det_t[:, 4] = det[:, 4]\n", + " return det_t\n", + "\n", + "\n", + "def infer_multi_scale_sfd(net , img , transform , thresh , cuda , max_im_shrink):\n", + " # shrink detecting and shrink only detect big face\n", + " st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n", + " det_s = infer(net , img , transform , thresh , cuda , st)\n", + " index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n", + " det_s = det_s[index, :]\n", + " # enlarge one times\n", + " bt = min(2, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n", + " det_b = infer(net , img , transform , thresh , cuda , bt)\n", + " # enlarge small iamge x times for small face\n", + " if max_im_shrink > 2:\n", + " bt *= 2\n", + " while bt < max_im_shrink:\n", + " det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n", + " bt *= 2\n", + " det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n", + " # enlarge only detect small face\n", + " if bt > 1:\n", + " index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n", + " det_b = det_b[index, :]\n", + " else:\n", + " index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n", + " det_b = det_b[index, :]\n", + " return det_s, det_b\n", + "\n", + "\n", + "def vis_detections(im, dets, image_name , thresh=0.5):\n", + " \"\"\"Draw detected bounding boxes.\"\"\"\n", + " class_name = 'face'\n", + " inds = np.where(dets[:, -1] >= thresh)[0]\n", + " if len(inds) == 0:\n", + " return\n", + " print (len(inds))\n", + " im = im[:, :, (2, 1, 0)]\n", + " fig, ax = plt.subplots(figsize=(12, 12))\n", + " ax.imshow(im, aspect='equal')\n", + " for i in inds:\n", + " bbox = dets[i, :4]\n", + " score = dets[i, -1]\n", + " ax.add_patch(\n", + " plt.Rectangle((bbox[0], bbox[1]),\n", + " bbox[2] - bbox[0],\n", + " bbox[3] - bbox[1], fill=False,\n", + " edgecolor='red', linewidth=2.5)\n", + " )\n", + " '''\n", + " ax.text(bbox[0], bbox[1] - 5,\n", + " '{:s} {:.3f}'.format(class_name, score),\n", + " bbox=dict(facecolor='blue', alpha=0.5),\n", + " fontsize=10, color='white')\n", + " '''\n", + " ax.set_title(('{} detections with '\n", + " 'p({} | box) >= {:.1f}').format(class_name, class_name,\n", + " thresh),\n", + " fontsize=10)\n", + " plt.axis('off')\n", + " plt.tight_layout()\n", + " plt.savefig(save_folder+image_name, dpi=fig.dpi)\n", + "\n", + "def output(im, dets, image_name , thresh=0.5):\n", + " \"\"\"Draw detected bounding boxes.\"\"\"\n", + " str_=\"\"\n", + " class_name = 'face'\n", + " inds = np.where(dets[:, -1] >= thresh)[0]\n", + " if len(inds) == 0:\n", + " str_+=\"empty\"\n", + " return\n", + " print (len(inds))\n", + " im = im[:, :, (2, 1, 0)]\n", + " fig, ax = plt.subplots(figsize=(12, 12))\n", + " ax.imshow(im, aspect='equal')\n", + " for i in inds:\n", + " bbox = dets[i, :4]\n", + " score = dets[i, -1]\n", + " \n", + " xmin=bbox[0]\n", + " ymin=bbox[1]\n", + " xmax=bbox[2]\n", + " ymax=bbox[3]\n", + " boxes=[xmin,ymin,xmax,ymax]\n", + " str_+=\" \"+str(xmin)+\" \"+str(ymin)+\" \"+str(xmax)+\" \"+str(ymax)\n", + " ax.add_patch(\n", + " plt.Rectangle((bbox[0], bbox[1]),\n", + " bbox[2] - bbox[0],\n", + " bbox[3] - bbox[1], fill=False,\n", + " edgecolor='red', linewidth=2.5)\n", + " )\n", + " '''\n", + " ax.text(bbox[0], bbox[1] - 5,\n", + " '{:s} {:.3f}'.format(class_name, score),\n", + " bbox=dict(facecolor='blue', alpha=0.5),\n", + " fontsize=10, color='white')\n", + " '''\n", + " txt.write(str_)\n", + " txt.write(\"\\n\")\n", + " ax.set_title(('{} detections with '\n", + " 'p({} | box) >= {:.1f}').format(class_name, class_name,\n", + " thresh),\n", + " fontsize=10)\n", + " plt.axis('off')\n", + " plt.tight_layout()\n", + " plt.savefig(save_folder+image_name, dpi=fig.dpi)\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "def test_oneimage():\n", + " # load net\n", + " cfg = widerface_640\n", + " num_classes = len(WIDERFace_CLASSES) + 1 # +1 background\n", + " net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD\n", + " net.load_state_dict(torch.load(trained_model))\n", + " net.cuda()\n", + " net.eval()\n", + " print('Finished loading model!')\n", + "\n", + " # evaluation\n", + " cuda = cuda\n", + " transform = TestBaseTransform((104, 117, 123))\n", + " thresh=cfg['conf_thresh']\n", + " #save_path = save_folder\n", + " #num_images = len(testset)\n", + " \n", + " # load data\n", + " path = img_root\n", + " img_id = 'face'\n", + " img = cv2.imread(path, cv2.IMREAD_COLOR)\n", + "\n", + " max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5\n", + " shrink = max_im_shrink if max_im_shrink < 1 else 1\n", + "\n", + " det0 = infer(net , img , transform , thresh , cuda , shrink)\n", + " det1 = infer_flip(net , img , transform , thresh , cuda , shrink)\n", + " # shrink detecting and shrink only detect big face\n", + " st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n", + " det_s = infer(net , img , transform , thresh , cuda , st)\n", + " index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n", + " det_s = det_s[index, :]\n", + " # enlarge one times\n", + " factor = 2\n", + " bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n", + " det_b = infer(net , img , transform , thresh , cuda , bt)\n", + " # enlarge small iamge x times for small face\n", + " if max_im_shrink > factor:\n", + " bt *= factor\n", + " while bt < max_im_shrink:\n", + " det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n", + " bt *= factor\n", + " det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n", + " # enlarge only detect small face\n", + " if bt > 1:\n", + " index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n", + " det_b = det_b[index, :]\n", + " else:\n", + " index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n", + " det_b = det_b[index, :]\n", + " det = np.row_stack((det0, det1, det_s, det_b))\n", + " det = bbox_vote(det)\n", + " vis_detections(img , det , img_id, visual_threshold)\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Y9HYBbXgXyR_", + "colab_type": "code", + "outputId": "91046e68-1648-4d07-c546-8432732c782f", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 680 + } + }, + "source": [ + "cfg = widerface_640\n", + "num_classes = len(WIDERFace_CLASSES) + 1 # +1 background\n", + "net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD\n", + "net.load_state_dict(torch.load(trained_model))\n", + "net.cuda()\n", + "net.eval() \n", + "cuda = cuda\n", + "transform = TestBaseTransform((104, 117, 123))\n", + "thresh=cfg['conf_thresh']\n", + "\n", + "\n", + "textfile = 'bbox_op.txt'\n", + "filesdir='/content/drive/My Drive/FolderSeconds/'\n", + "with open(textfile,'w') as txt:\n", + " for so in (sorted(os.listdir(filesdir))):\n", + " path = os.path.join(filesdir,so)\n", + " img_id = so\n", + " print(img_id)\n", + " # load data\n", + " img = cv2.imread(path, cv2.IMREAD_COLOR)\n", + "\n", + " max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5\n", + " shrink = max_im_shrink if max_im_shrink < 1 else 1\n", + "\n", + " det0 = infer(net , img , transform , thresh , cuda , shrink)\n", + " det1 = infer_flip(net , img , transform , thresh , cuda , shrink)\n", + " # shrink detecting and shrink only detect big face\n", + " st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink\n", + " det_s = infer(net , img , transform , thresh , cuda , st)\n", + " index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0]\n", + " det_s = det_s[index, :]\n", + " # enlarge one times\n", + " factor = 2\n", + " bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2\n", + " det_b = infer(net , img , transform , thresh , cuda , bt)\n", + " # enlarge small iamge x times for small face\n", + " if max_im_shrink > factor:\n", + " bt *= factor\n", + " while bt < max_im_shrink:\n", + " det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt)))\n", + " bt *= factor\n", + " det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) ))\n", + " # enlarge only detect small face\n", + " if bt > 1:\n", + " index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]\n", + " det_b = det_b[index, :]\n", + " else:\n", + " index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]\n", + " det_b = det_b[index, :]\n", + " det = np.row_stack((det0, det1, det_s, det_b))\n", + " det = bbox_vote(det)\n", + " output(img , det , img_id, visual_threshold)\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "text": [ + "loading pretrained resnet model\n" + ], + "name": "stdout" + }, + { + "output_type": "stream", + "text": [ + "Downloading: \"https://download.pytorch.org/models/resnet152-b121ed2d.pth\" to /root/.torch/models/resnet152-b121ed2d.pth\n", + "100%|██████████| 241530880/241530880 [00:02<00:00, 101989518.91it/s]\n" + ], + "name": "stderr" + }, + { + "output_type": "stream", + "text": [ + "frame1320.jpg\n", + "2\n", + "frame20460.jpg\n", + "1\n", + "frame2310.jpg\n", + "4\n", + "frame25080.jpg\n", + "5\n", + "frame25740.jpg\n", + "3\n", + "frame2640.jpg\n", + "1\n", + "frame330.jpg\n", + "1\n", + "frame660.jpg\n" + ], + "name": "stdout" + }, + { + "output_type": "error", + "ename": "KeyboardInterrupt", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mdet0\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minfer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mimg\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mtransform\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mthresh\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mcuda\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mshrink\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[0mdet1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minfer_flip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mimg\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mtransform\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mthresh\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mcuda\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mshrink\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 27\u001b[0m \u001b[0;31m# shrink detecting and shrink only detect big face\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0mst\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0.5\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmax_im_shrink\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;36m0.75\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;36m0.5\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mmax_im_shrink\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36minfer_flip\u001b[0;34m(net, img, transform, thresh, cuda, shrink)\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minfer_flip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mimg\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mtransform\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mthresh\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mcuda\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mshrink\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[0mimg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcv2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 120\u001b[0;31m \u001b[0mdet\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minfer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mimg\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mtransform\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mthresh\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mcuda\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mshrink\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 121\u001b[0m \u001b[0mdet_t\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzeros\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[0mdet_t\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mimg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mdet\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36minfer\u001b[0;34m(net, img, transform, thresh, cuda, shrink)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcuda\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[0;31m#print (shrink , x.shape)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 95\u001b[0;31m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnet\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# forward pass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 96\u001b[0m \u001b[0mdetections\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[0;31m# scale each detection back up to the image\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 355\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 356\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 357\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 358\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 359\u001b[0m \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/content/FaceDetection-DSFD/face_ssd.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 329\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'feature_maps'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfeaturemap_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 330\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'min_dim'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mimage_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 331\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpriors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minit_priors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 332\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrefine\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 333\u001b[0m output = self.detect(\n", + "\u001b[0;32m/content/FaceDetection-DSFD/face_ssd.py\u001b[0m in \u001b[0;36minit_priors\u001b[0;34m(self, cfg, min_size, max_size)\u001b[0m\n\u001b[1;32m 185\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minit_priors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m \u001b[0;34m,\u001b[0m\u001b[0mcfg\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mmin_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'min_sizes'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'max_sizes'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 186\u001b[0m \u001b[0mpriorbox\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mPriorBox\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mmin_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 187\u001b[0;31m \u001b[0mprior\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mVariable\u001b[0m\u001b[0;34m(\u001b[0m \u001b[0mpriorbox\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0mvolatile\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 188\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mprior\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/content/FaceDetection-DSFD/layers/functions/prior_box.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[0;31m# back to torch land\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 71\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 72\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclip\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclamp_\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "WfufuuBwjLmh", + "colab_type": "code", + "colab": {} + }, + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/colab_inference/inference_dual_shot.py b/colab_inference/inference_dual_shot.py new file mode 100644 index 0000000..cfa0851 --- /dev/null +++ b/colab_inference/inference_dual_shot.py @@ -0,0 +1,365 @@ +# -*- coding: utf-8 -*- +"""inference_dual_shot.ipynb + +Automatically generated by Colaboratory. + +Original file is located at + https://colab.research.google.com/drive/1V1tS5-T3WOybFZmo7Z8tOLaPxub5Waet +""" + +!pip install Torch==0.3.1 + +!pip install Torchvision==0.2.1 + +!git clone https://github.com/TencentYoutuResearch/FaceDetection-DSFD.git + +from google.colab import drive +drive.mount('/content/drive') + +# Commented out IPython magic to ensure Python compatibility. +# %cd /content/FaceDetection-DSFD + +from __future__ import print_function +import sys +import os +import argparse +import torch +import torch.nn as nn +import torch.backends.cudnn as cudnn +import torchvision.transforms as transforms +from torch.autograd import Variable +from data import WIDERFace_ROOT , WIDERFace_CLASSES as labelmap +from PIL import Image +from data import WIDERFaceDetection, WIDERFaceAnnotationTransform, WIDERFace_CLASSES, WIDERFace_ROOT, BaseTransform , TestBaseTransform +from data import * +import torch.utils.data as data +from face_ssd import build_ssd +#from resnet50_ssd import build_sfd +import pdb +import numpy as np +import cv2 +import math +import matplotlib.pyplot as plt +import time +plt.switch_backend('agg') + + +widerface_root="WIDERFace_ROOT" +trained_model = "/content/drive/My Drive/WIDERFace_DSFD_RES152.pth" +save_folder = "eval_tools/" +visual_threshold = 0.1 +cuda = True +img_root="./data/worlds-largest-selfie.jpg" + + + +if cuda and torch.cuda.is_available(): + torch.set_default_tensor_type('torch.cuda.FloatTensor') +else: + torch.set_default_tensor_type('torch.FloatTensor') +if not os.path.exists(save_folder): + os.mkdir(save_folder) + + +def bbox_vote(det): + order = det[:, 4].ravel().argsort()[::-1] + det = det[order, :] + while det.shape[0] > 0: + # IOU + area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) + xx1 = np.maximum(det[0, 0], det[:, 0]) + yy1 = np.maximum(det[0, 1], det[:, 1]) + xx2 = np.minimum(det[0, 2], det[:, 2]) + yy2 = np.minimum(det[0, 3], det[:, 3]) + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + o = inter / (area[0] + area[:] - inter) + # get needed merge det and delete these det + merge_index = np.where(o >= 0.3)[0] + det_accu = det[merge_index, :] + det = np.delete(det, merge_index, 0) + if merge_index.shape[0] <= 1: + continue + det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) + max_score = np.max(det_accu[:, 4]) + det_accu_sum = np.zeros((1, 5)) + det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) + det_accu_sum[:, 4] = max_score + try: + dets = np.row_stack((dets, det_accu_sum)) + except: + dets = det_accu_sum + dets = dets[0:750, :] + return dets + +def write_to_txt(f, det , event , im_name): + f.write('{:s}\n'.format(event + '/' + im_name)) + f.write('{:d}\n'.format(det.shape[0])) + for i in range(det.shape[0]): + xmin = det[i][0] + ymin = det[i][1] + xmax = det[i][2] + ymax = det[i][3] + score = det[i][4] + f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'. + format(xmin, ymin, (xmax - xmin + 1), (ymax - ymin + 1), score)) + +def infer(net , img , transform , thresh , cuda , shrink): + if shrink != 1: + img = cv2.resize(img, None, None, fx=shrink, fy=shrink, interpolation=cv2.INTER_LINEAR) + x = torch.from_numpy(transform(img)[0]).permute(2, 0, 1) + x = Variable(x.unsqueeze(0) , volatile=True) + if cuda: + x = x.cuda() + #print (shrink , x.shape) + y = net(x) # forward pass + detections = y.data + # scale each detection back up to the image + scale = torch.Tensor([ img.shape[1]/shrink, img.shape[0]/shrink, + img.shape[1]/shrink, img.shape[0]/shrink] ) + det = [] + for i in range(detections.size(1)): + j = 0 + while detections[0, i, j, 0] >= thresh: + score = detections[0, i, j, 0] + #label_name = labelmap[i-1] + pt = (detections[0, i, j, 1:]*scale).cpu().numpy() + coords = (pt[0], pt[1], pt[2], pt[3]) + det.append([pt[0], pt[1], pt[2], pt[3], score]) + j += 1 + if (len(det)) == 0: + det = [ [0.1,0.1,0.2,0.2,0.01] ] + det = np.array(det) + + keep_index = np.where(det[:, 4] >= 0)[0] + det = det[keep_index, :] + return det + +def infer_flip(net , img , transform , thresh , cuda , shrink): + img = cv2.flip(img, 1) + det = infer(net , img , transform , thresh , cuda , shrink) + det_t = np.zeros(det.shape) + det_t[:, 0] = img.shape[1] - det[:, 2] + det_t[:, 1] = det[:, 1] + det_t[:, 2] = img.shape[1] - det[:, 0] + det_t[:, 3] = det[:, 3] + det_t[:, 4] = det[:, 4] + return det_t + + +def infer_multi_scale_sfd(net , img , transform , thresh , cuda , max_im_shrink): + # shrink detecting and shrink only detect big face + st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink + det_s = infer(net , img , transform , thresh , cuda , st) + index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0] + det_s = det_s[index, :] + # enlarge one times + bt = min(2, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2 + det_b = infer(net , img , transform , thresh , cuda , bt) + # enlarge small iamge x times for small face + if max_im_shrink > 2: + bt *= 2 + while bt < max_im_shrink: + det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt))) + bt *= 2 + det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) )) + # enlarge only detect small face + if bt > 1: + index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0] + det_b = det_b[index, :] + else: + index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0] + det_b = det_b[index, :] + return det_s, det_b + + +def vis_detections(im, dets, image_name , thresh=0.5): + """Draw detected bounding boxes.""" + class_name = 'face' + inds = np.where(dets[:, -1] >= thresh)[0] + if len(inds) == 0: + return + print (len(inds)) + im = im[:, :, (2, 1, 0)] + fig, ax = plt.subplots(figsize=(12, 12)) + ax.imshow(im, aspect='equal') + for i in inds: + bbox = dets[i, :4] + score = dets[i, -1] + ax.add_patch( + plt.Rectangle((bbox[0], bbox[1]), + bbox[2] - bbox[0], + bbox[3] - bbox[1], fill=False, + edgecolor='red', linewidth=2.5) + ) + ''' + ax.text(bbox[0], bbox[1] - 5, + '{:s} {:.3f}'.format(class_name, score), + bbox=dict(facecolor='blue', alpha=0.5), + fontsize=10, color='white') + ''' + ax.set_title(('{} detections with ' + 'p({} | box) >= {:.1f}').format(class_name, class_name, + thresh), + fontsize=10) + plt.axis('off') + plt.tight_layout() + plt.savefig(save_folder+image_name, dpi=fig.dpi) + +def output(im, dets, image_name , thresh=0.5): + """Draw detected bounding boxes.""" + str_="" + class_name = 'face' + inds = np.where(dets[:, -1] >= thresh)[0] + if len(inds) == 0: + str_+="empty" + return + print (len(inds)) + im = im[:, :, (2, 1, 0)] + fig, ax = plt.subplots(figsize=(12, 12)) + ax.imshow(im, aspect='equal') + for i in inds: + bbox = dets[i, :4] + score = dets[i, -1] + + xmin=bbox[0] + ymin=bbox[1] + xmax=bbox[2] + ymax=bbox[3] + boxes=[xmin,ymin,xmax,ymax] + str_+=" "+str(xmin)+" "+str(ymin)+" "+str(xmax)+" "+str(ymax) + ax.add_patch( + plt.Rectangle((bbox[0], bbox[1]), + bbox[2] - bbox[0], + bbox[3] - bbox[1], fill=False, + edgecolor='red', linewidth=2.5) + ) + ''' + ax.text(bbox[0], bbox[1] - 5, + '{:s} {:.3f}'.format(class_name, score), + bbox=dict(facecolor='blue', alpha=0.5), + fontsize=10, color='white') + ''' + txt.write(str_) + txt.write("\n") + ax.set_title(('{} detections with ' + 'p({} | box) >= {:.1f}').format(class_name, class_name, + thresh), + fontsize=10) + plt.axis('off') + plt.tight_layout() + plt.savefig(save_folder+image_name, dpi=fig.dpi) + + + + + + +def test_oneimage(): + # load net + cfg = widerface_640 + num_classes = len(WIDERFace_CLASSES) + 1 # +1 background + net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD + net.load_state_dict(torch.load(trained_model)) + net.cuda() + net.eval() + print('Finished loading model!') + + # evaluation + cuda = cuda + transform = TestBaseTransform((104, 117, 123)) + thresh=cfg['conf_thresh'] + #save_path = save_folder + #num_images = len(testset) + + # load data + path = img_root + img_id = 'face' + img = cv2.imread(path, cv2.IMREAD_COLOR) + + max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5 + shrink = max_im_shrink if max_im_shrink < 1 else 1 + + det0 = infer(net , img , transform , thresh , cuda , shrink) + det1 = infer_flip(net , img , transform , thresh , cuda , shrink) + # shrink detecting and shrink only detect big face + st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink + det_s = infer(net , img , transform , thresh , cuda , st) + index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0] + det_s = det_s[index, :] + # enlarge one times + factor = 2 + bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2 + det_b = infer(net , img , transform , thresh , cuda , bt) + # enlarge small iamge x times for small face + if max_im_shrink > factor: + bt *= factor + while bt < max_im_shrink: + det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt))) + bt *= factor + det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) )) + # enlarge only detect small face + if bt > 1: + index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0] + det_b = det_b[index, :] + else: + index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0] + det_b = det_b[index, :] + det = np.row_stack((det0, det1, det_s, det_b)) + det = bbox_vote(det) + vis_detections(img , det , img_id, visual_threshold) + +cfg = widerface_640 +num_classes = len(WIDERFace_CLASSES) + 1 # +1 background +net = build_ssd('test', cfg['min_dim'], num_classes) # initialize SSD +net.load_state_dict(torch.load(trained_model)) +net.cuda() +net.eval() +cuda = cuda +transform = TestBaseTransform((104, 117, 123)) +thresh=cfg['conf_thresh'] + + +textfile = 'bbox_op.txt' +filesdir='/content/drive/My Drive/FolderSeconds/' +with open(textfile,'w') as txt: + for so in (sorted(os.listdir(filesdir))): + path = os.path.join(filesdir,so) + img_id = so + print(img_id) + # load data + img = cv2.imread(path, cv2.IMREAD_COLOR) + + max_im_shrink = ( (2000.0*2000.0) / (img.shape[0] * img.shape[1])) ** 0.5 + shrink = max_im_shrink if max_im_shrink < 1 else 1 + + det0 = infer(net , img , transform , thresh , cuda , shrink) + det1 = infer_flip(net , img , transform , thresh , cuda , shrink) + # shrink detecting and shrink only detect big face + st = 0.5 if max_im_shrink >= 0.75 else 0.5 * max_im_shrink + det_s = infer(net , img , transform , thresh , cuda , st) + index = np.where(np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1) > 30)[0] + det_s = det_s[index, :] + # enlarge one times + factor = 2 + bt = min(factor, max_im_shrink) if max_im_shrink > 1 else (st + max_im_shrink) / 2 + det_b = infer(net , img , transform , thresh , cuda , bt) + # enlarge small iamge x times for small face + if max_im_shrink > factor: + bt *= factor + while bt < max_im_shrink: + det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , bt))) + bt *= factor + det_b = np.row_stack((det_b, infer(net , img , transform , thresh , cuda , max_im_shrink) )) + # enlarge only detect small face + if bt > 1: + index = np.where(np.minimum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) < 100)[0] + det_b = det_b[index, :] + else: + index = np.where(np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1) > 30)[0] + det_b = det_b[index, :] + det = np.row_stack((det0, det1, det_s, det_b)) + det = bbox_vote(det) + output(img , det , img_id, visual_threshold) +