diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..6773dced4
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,15 @@
+*.pyc
+*__pycache__*
+*core.*
+_ext
+tmp
+*.o*
+*~
+*.idea
+*.mp4
+*.h5
+*.pth
+*.egg-info
+
+/build
+/dist
\ No newline at end of file
diff --git a/README.md b/README.md
index 1974b3640..86f32575b 100644
--- a/README.md
+++ b/README.md
@@ -20,14 +20,14 @@ We also provide a set of Face Detector for edge device in [here](https://github.
 | Pytorch (original image scale) | 90.70% | 88.16% | 73.82% |
 | Mxnet | 88.72% | 86.97% | 79.19% |
 | Mxnet(original image scale) | 89.58% | 87.11% | 69.12% |
-<p align="center"><img src="curve/Widerface.jpg" width="640"\></p>
+<p align="center"><img src="retinaface/curve/Widerface.jpg" width="640"\></p>
 
 ## FDDB Performance.
 | FDDB(pytorch) | performance |
 |:-|:-:|
 | Mobilenet0.25 | 98.64% |
 | Resnet50 | 99.22% |
-<p align="center"><img src="curve/FDDB.png" width="640"\></p>
+<p align="center"><img src="retinaface/curve/FDDB.png" width="640"\></p>
 
 ### Contents
 - [Installation](#installation)
@@ -112,7 +112,7 @@ python test_fddb.py --trained_model weight_file --network mobile0.25 or resnet50
 
 3. Download [eval_tool](https://bitbucket.org/marcopede/face-eval) to evaluate the performance.
 
-<p align="center"><img src="curve/1.jpg" width="640"\></p>
+<p align="center"><img src="retinaface/curve/1.jpg" width="640"\></p>
 
 ## TensorRT
 -[TensorRT](https://github.com/wang-xinyu/tensorrtx/tree/master/retinaface)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000..2a84a692b
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,7 @@
+[build-system]
+requires = [
+    "setuptools>=64.0.0",
+    "wheel",
+]
+build-backend = "setuptools.build_meta"
+
diff --git a/retinaface/__init__.py b/retinaface/__init__.py
new file mode 100644
index 000000000..162f22da6
--- /dev/null
+++ b/retinaface/__init__.py
@@ -0,0 +1 @@
+from .inference_framework import RetinaFaceDetector
diff --git a/convert_to_onnx.py b/retinaface/convert_to_onnx.py
similarity index 72%
rename from convert_to_onnx.py
rename to retinaface/convert_to_onnx.py
index e7f32799a..c7f9121a7 100755
--- a/convert_to_onnx.py
+++ b/retinaface/convert_to_onnx.py
@@ -1,16 +1,10 @@
 from __future__ import print_function
-import os
+
 import argparse
 import torch
-import torch.backends.cudnn as cudnn
-import numpy as np
+
 from data import cfg_mnet, cfg_re50
-from layers.functions.prior_box import PriorBox
-from utils.nms.py_cpu_nms import py_cpu_nms
-import cv2
 from models.retinaface import RetinaFace
-from utils.box_utils import decode, decode_landm
-from utils.timer import Timer
 
 
 parser = argparse.ArgumentParser(description='Test')
@@ -18,7 +12,7 @@
                     type=str, help='Trained state_dict file path to open')
 parser.add_argument('--network', default='mobile0.25', help='Backbone network mobile0.25 or resnet50')
 parser.add_argument('--long_side', default=640, help='when origin_size is false, long_side is scaled size(320 or 640 for long side)')
-parser.add_argument('--cpu', action="store_true", default=True, help='Use cpu inference')
+parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference')
 
 args = parser.parse_args()
 
@@ -43,13 +37,17 @@ def remove_prefix(state_dict, prefix):
     return {f(key): value for key, value in state_dict.items()}
 
 
-def load_model(model, pretrained_path, load_to_cpu):
+def load_model(model, pretrained_path, device):
     print('Loading pretrained model from {}'.format(pretrained_path))
-    if load_to_cpu:
-        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
-    else:
+    if 'cuda' in device or device=='gpu':
         device = torch.cuda.current_device()
         pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
+    elif device=='mps':
+        device = torch.device('mps')
+        pretrained_dict = torch.load(pretrained_path, map_location=device)
+    else:
+        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
+    
     if "state_dict" in pretrained_dict.keys():
         pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
     else:
@@ -66,13 +64,31 @@ def load_model(model, pretrained_path, load_to_cpu):
         cfg = cfg_mnet
     elif args.network == "resnet50":
         cfg = cfg_re50
+
+    if args.cpu:
+        print('--> load model and config files to CPU')
+        device = "cpu"
+    elif torch.cuda.is_available():
+        print('--> load model and config files to GPU')
+        device = "cuda"
+    elif torch.mps.is_available():
+        print('--> load model and config files to MPS')
+        device = "mps"
+    else:
+        raise RuntimeError('No GPU or MPS found. Please use "--cpu"')
+
     # net and model
     net = RetinaFace(cfg=cfg, phase = 'test')
-    net = load_model(net, args.trained_model, args.cpu)
+    net = load_model(net, args.trained_model, device=device)
     net.eval()
-    print('Finished loading model!')
-    print(net)
-    device = torch.device("cpu" if args.cpu else "cuda")
+    print('--> Finished loading model!')
+    # print(net)
+
+    if device == "cuda" and torch.cuda.is_available:
+        cudnn.benchmark = True
+
+    # device = torch.device("cpu" if args.cpu else "cuda")
+    device = torch.device(device)
     net = net.to(device)
 
     # ------------------------ export -----------------------------
@@ -82,7 +98,7 @@ def load_model(model, pretrained_path, load_to_cpu):
     output_names = ["output0"]
     inputs = torch.randn(1, 3, args.long_side, args.long_side).to(device)
 
-    torch_out = torch.onnx._export(net, inputs, output_onnx, export_params=True, verbose=False,
+    torch_out = torch.onnx.export(net, inputs, output_onnx, export_params=True, verbose=False,
                                    input_names=input_names, output_names=output_names)
 
 
diff --git a/curve/1.jpg b/retinaface/curve/1.jpg
similarity index 100%
rename from curve/1.jpg
rename to retinaface/curve/1.jpg
diff --git a/curve/FDDB.png b/retinaface/curve/FDDB.png
similarity index 100%
rename from curve/FDDB.png
rename to retinaface/curve/FDDB.png
diff --git a/curve/Widerface.jpg b/retinaface/curve/Widerface.jpg
similarity index 100%
rename from curve/Widerface.jpg
rename to retinaface/curve/Widerface.jpg
diff --git a/curve/test.jpg b/retinaface/curve/test.jpg
similarity index 100%
rename from curve/test.jpg
rename to retinaface/curve/test.jpg
diff --git a/data/FDDB/img_list.txt b/retinaface/data/FDDB/img_list.txt
similarity index 100%
rename from data/FDDB/img_list.txt
rename to retinaface/data/FDDB/img_list.txt
diff --git a/data/__init__.py b/retinaface/data/__init__.py
similarity index 100%
rename from data/__init__.py
rename to retinaface/data/__init__.py
diff --git a/data/config.py b/retinaface/data/config.py
similarity index 100%
rename from data/config.py
rename to retinaface/data/config.py
diff --git a/data/data_augment.py b/retinaface/data/data_augment.py
similarity index 99%
rename from data/data_augment.py
rename to retinaface/data/data_augment.py
index c1b52ae19..8e0622150 100644
--- a/data/data_augment.py
+++ b/retinaface/data/data_augment.py
@@ -1,7 +1,7 @@
 import cv2
 import numpy as np
 import random
-from utils.box_utils import matrix_iof
+from retinaface.utils.box_utils import matrix_iof
 
 
 def _crop(image, boxes, labels, landm, img_dim):
diff --git a/data/wider_face.py b/retinaface/data/wider_face.py
similarity index 98%
rename from data/wider_face.py
rename to retinaface/data/wider_face.py
index 22f56efdc..73ccfe8a5 100644
--- a/data/wider_face.py
+++ b/retinaface/data/wider_face.py
@@ -1,6 +1,3 @@
-import os
-import os.path
-import sys
 import torch
 import torch.utils.data as data
 import cv2
diff --git a/detect.py b/retinaface/detect.py
similarity index 54%
rename from detect.py
rename to retinaface/detect.py
index 2e822400e..a5238129d 100755
--- a/detect.py
+++ b/retinaface/detect.py
@@ -1,30 +1,18 @@
 from __future__ import print_function
-import os
+
 import argparse
 import torch
 import torch.backends.cudnn as cudnn
 import numpy as np
-from data import cfg_mnet, cfg_re50
-from layers.functions.prior_box import PriorBox
-from utils.nms.py_cpu_nms import py_cpu_nms
-import cv2
-from models.retinaface import RetinaFace
-from utils.box_utils import decode, decode_landm
 import time
+import cv2
 
-parser = argparse.ArgumentParser(description='Retinaface')
+from retinaface.data import cfg_mnet, cfg_re50
+from retinaface.layers.functions.prior_box import PriorBox
+from retinaface.utils.nms.py_cpu_nms import py_cpu_nms
+from retinaface.models.retinaface import RetinaFace
+from retinaface.utils.box_utils import decode, decode_landm
 
-parser.add_argument('-m', '--trained_model', default='./weights/Resnet50_Final.pth',
-                    type=str, help='Trained state_dict file path to open')
-parser.add_argument('--network', default='resnet50', help='Backbone network mobile0.25 or resnet50')
-parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference')
-parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold')
-parser.add_argument('--top_k', default=5000, type=int, help='top_k')
-parser.add_argument('--nms_threshold', default=0.4, type=float, help='nms_threshold')
-parser.add_argument('--keep_top_k', default=750, type=int, help='keep_top_k')
-parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results')
-parser.add_argument('--vis_thres', default=0.6, type=float, help='visualization_threshold')
-args = parser.parse_args()
 
 
 def check_keys(model, pretrained_state_dict):
@@ -33,58 +21,114 @@ def check_keys(model, pretrained_state_dict):
     used_pretrained_keys = model_keys & ckpt_keys
     unused_pretrained_keys = ckpt_keys - model_keys
     missing_keys = model_keys - ckpt_keys
-    print('Missing keys:{}'.format(len(missing_keys)))
-    print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
-    print('Used keys:{}'.format(len(used_pretrained_keys)))
     assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
     return True
 
 
 def remove_prefix(state_dict, prefix):
     ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
-    print('remove prefix \'{}\''.format(prefix))
     f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
     return {f(key): value for key, value in state_dict.items()}
 
-
-def load_model(model, pretrained_path, load_to_cpu):
+def load_model(model, pretrained_path, device, url_file_name=None):
     print('Loading pretrained model from {}'.format(pretrained_path))
-    if load_to_cpu:
-        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
-    else:
+
+    url_flag = False
+    if pretrained_path[:8] == 'https://':
+        url_flag = True
+
+    if 'cuda' in device or device=='gpu':
         device = torch.cuda.current_device()
-        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
+        if url_flag:
+            pretrained_dict = torch.hub.load_state_dict_from_url(pretrained_path,
+                                                                map_location=lambda storage, loc: storage.cuda(device),
+                                                                file_name=url_file_name)
+        else:
+            pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
+    elif device=='mps':
+        device = torch.device('mps')
+        if url_flag:
+            pretrained_dict = torch.hub.load_state_dict_from_url(pretrained_path,
+                                                                map_location=device,
+                                                                file_name=url_file_name)
+        else:
+            pretrained_dict = torch.load(pretrained_path, map_location=device)
+    else:
+        if url_flag:
+            pretrained_dict = torch.hub.load_state_dict_from_url(pretrained_path,
+                                                                map_location=lambda storage, loc: storage,
+                                                                file_name=url_file_name)
+        else:
+            pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
+    
     if "state_dict" in pretrained_dict.keys():
         pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
     else:
         pretrained_dict = remove_prefix(pretrained_dict, 'module.')
+
     check_keys(model, pretrained_dict)
     model.load_state_dict(pretrained_dict, strict=False)
+    
     return model
 
 
 if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser(description='Retinaface')
+
+    parser.add_argument('-m', '--trained_model', default='./weights/Resnet50_Final.pth',
+                        type=str, help='Trained state_dict file path to open')
+    parser.add_argument('--network', default='resnet50', help='Backbone network mobile0.25 or resnet50')
+    parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference')
+    parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold')
+    parser.add_argument('--top_k', default=5000, type=int, help='top_k')
+    parser.add_argument('--nms_threshold', default=0.4, type=float, help='nms_threshold')
+    parser.add_argument('--keep_top_k', default=750, type=int, help='keep_top_k')
+    parser.add_argument('-s', '--save_image', action="store_true", default=False, help='show detection results')
+    parser.add_argument('--vis_thres', default=0.6, type=float, help='visualization_threshold')
+    args = parser.parse_args()
+
     torch.set_grad_enabled(False)
     cfg = None
     if args.network == "mobile0.25":
         cfg = cfg_mnet
     elif args.network == "resnet50":
         cfg = cfg_re50
+
+    if args.cpu:
+        print('--> load model and config files to CPU')
+        device = "cpu"
+    elif torch.cuda.is_available():
+        print('--> load model and config files to GPU')
+        device = "cuda"
+    elif torch.mps.is_available():
+        print('--> load model and config files to MPS')
+        device = "mps"
+    else:
+        raise RuntimeError('No GPU or MPS found. Please use "--cpu"')
+
     # net and model
     net = RetinaFace(cfg=cfg, phase = 'test')
-    net = load_model(net, args.trained_model, args.cpu)
+    net = load_model(net, args.trained_model, device=device)
     net.eval()
-    print('Finished loading model!')
-    print(net)
-    cudnn.benchmark = True
-    device = torch.device("cpu" if args.cpu else "cuda")
+    print('--> Finished loading model!')
+    # print(net)
+
+    if device == "cuda" and torch.cuda.is_available:
+        cudnn.benchmark = True
+
+    # device = torch.device("cpu" if args.cpu else "cuda")
+    device = torch.device(device)
     net = net.to(device)
 
     resize = 1
 
+    total_time = 0
+    n_loops = 100
+
     # testing begin
-    for i in range(100):
-        image_path = "./curve/test.jpg"
+    for i in range(n_loops):
+        image_path = "curve/test.jpg"
         img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
 
         img = np.float32(img_raw)
@@ -99,7 +143,11 @@ def load_model(model, pretrained_path, load_to_cpu):
 
         tic = time.time()
         loc, conf, landms = net(img)  # forward pass
-        print('net forward time: {:.4f}'.format(time.time() - tic))
+
+        time_det = time.time() - tic
+        # print('net forward time: {:.4f}'.format(time_det))
+
+        total_time += time_det
 
         priorbox = PriorBox(cfg, image_size=(im_height, im_width))
         priors = priorbox.forward()
@@ -166,3 +214,5 @@ def load_model(model, pretrained_path, load_to_cpu):
             name = "test.jpg"
             cv2.imwrite(name, img_raw)
 
+    avg_time = total_time / n_loops
+    print(f'--> Average time: {avg_time:.4f} for {n_loops} loops')
\ No newline at end of file
diff --git a/retinaface/inference_framework.py b/retinaface/inference_framework.py
new file mode 100644
index 000000000..54806dba7
--- /dev/null
+++ b/retinaface/inference_framework.py
@@ -0,0 +1,140 @@
+import torch
+import numpy as np
+
+torch.set_grad_enabled(False)
+
+# My libs
+import retinaface.models.retinaface as rf_model
+import retinaface.detect as rf_detect
+import retinaface.data.config as rf_config
+import retinaface.layers.functions.prior_box as rf_priors
+import retinaface.utils.box_utils as rf_ubox
+import retinaface.utils.nms.py_cpu_nms as rf_nms
+
+
+# Default configs
+cfg_postreat_dft = {'resize': 1.,
+                    'score_thr': 0.75,
+                    'top_k': 5000,
+                    'nms_thr': 0.4,
+                    'keep_top_k': 50}
+
+
+class RetinaFaceDetector:
+
+    def __init__(self,
+                 model='mobile0.25',
+                 device='cuda',
+                 extra_features=['landmarks'],
+                 cfg_postreat=cfg_postreat_dft):
+
+        # Set model configuration
+        cfg = None
+        trained_model = None
+        if model == "mobile0.25":
+            cfg = rf_config.cfg_mnet
+            trained_model = "https://drive.google.com/uc?export=download&confirm=yes&id=1nxhtpdVLbmheUTwyIb733MrL53X4SQgQ"
+            url_model_name = "retinaface_mobile025.pth"
+        elif model == "resnet50":
+            cfg = rf_config.cfg_re50
+            trained_model = "https://drive.google.com/uc?export=download&confirm=yes&id=1a9SqFRkeTuJUwqerElCWJFrotZuDGVtT"
+            url_model_name = "retinaface_resnet50.pth"
+        else:
+            raise ValueError('Model configuration not found')
+
+        # Load net and model
+        # cpu_flag = 'cpu' in device
+        net = rf_model.RetinaFace(cfg=cfg, phase='test')
+        net = rf_detect.load_model(net, trained_model, device=device, url_file_name=url_model_name)
+        net.eval()
+        print('RetinaFace loaded!')
+
+        # Define detector variables
+        self.device = torch.device(device)
+        self.net = net.to(self.device)
+        self.cfg = cfg
+        self.features = ['bbox'] + extra_features
+        self.scale = {}
+        self.prior_data = None
+
+        # Postreatment configuration
+        self.cfg['postreat'] = cfg_postreat
+
+    def set_input_shape(self, im_height, im_width):
+
+        # Scales
+        scale_bbox = torch.Tensor([im_width, im_height, im_width, im_height])
+        self.scale['bbox'] = scale_bbox.to(self.device)
+
+        if 'landmarks' in self.features:
+            scale_lnd = torch.Tensor([im_width, im_height, im_width, im_height,
+                                      im_width, im_height, im_width, im_height,
+                                      im_width, im_height])
+            self.scale['landmarks'] = scale_lnd.to(self.device)
+
+        # Load priors
+        priorbox = rf_priors.PriorBox(self.cfg, image_size=(im_height, im_width))
+        priors = priorbox.forward()
+        priors = priors.to(self.device)
+        self.prior_data = priors.data
+
+    def inference(self, image):
+        img = self._pretreatment(image)
+        loc, conf, lnd = self._net_forward(img)
+        features = self._postreatment(loc, conf, lnd)
+        return features
+
+    def _pretreatment(self, img_raw):
+        img = np.float32(img_raw)
+        img -= (104, 117, 123)
+        img = img.transpose(2, 0, 1)
+        img = torch.from_numpy(img).unsqueeze(0)
+        img = img.to(self.device)
+        return img
+
+    def _net_forward(self, img):
+        loc, conf, landms = self.net(img)
+        return loc, conf, landms
+
+    def _postreatment(self, loc, conf, landms):
+
+        cfg_post = self.cfg['postreat']
+        boxes = rf_ubox.decode(loc.data.squeeze(0), self.prior_data, self.cfg['variance'])
+        boxes = boxes * self.scale['bbox'] / cfg_post['resize']
+        boxes = boxes.cpu().numpy()
+        scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
+
+        landms = rf_ubox.decode_landm(landms.data.squeeze(0), self.prior_data, self.cfg['variance'])
+        landms = landms * self.scale['landmarks'] / cfg_post['resize']
+        landms = landms.cpu().numpy()
+
+        # Ignore low scores
+        inds = np.where(scores > cfg_post['score_thr'])[0]
+        boxes = boxes[inds]
+        scores = scores[inds]
+
+        # Keep top-K before NMS
+        order = scores.argsort()[::-1][:cfg_post['top_k']]
+        boxes = boxes[order]
+        scores = scores[order]
+
+        # NMS
+        dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
+        keep = rf_nms.py_cpu_nms(dets, cfg_post['nms_thr'])
+        dets = dets[keep, :]
+
+        # keep top-K faster NMS
+        dets = dets[:cfg_post['keep_top_k'], :]
+
+        features = {'bbox': dets}
+        if 'landmarks' in self.features:
+            landms = landms[inds]
+            landms = landms[order]
+            landms = landms[keep]
+            landms = landms[:cfg_post['keep_top_k'], :]
+            landms = np.array(landms)
+            landms = np.expand_dims(landms, axis=-1)
+            landms = landms.reshape((-1, 5, 2))
+            features['landmarks'] = landms
+
+        return features
diff --git a/layers/__init__.py b/retinaface/layers/__init__.py
similarity index 100%
rename from layers/__init__.py
rename to retinaface/layers/__init__.py
diff --git a/models/__init__.py b/retinaface/layers/functions/__init__.py
similarity index 100%
rename from models/__init__.py
rename to retinaface/layers/functions/__init__.py
diff --git a/layers/functions/prior_box.py b/retinaface/layers/functions/prior_box.py
similarity index 98%
rename from layers/functions/prior_box.py
rename to retinaface/layers/functions/prior_box.py
index 80c7f8583..a3a9723ab 100644
--- a/layers/functions/prior_box.py
+++ b/retinaface/layers/functions/prior_box.py
@@ -1,6 +1,5 @@
 import torch
 from itertools import product as product
-import numpy as np
 from math import ceil
 
 
diff --git a/layers/modules/__init__.py b/retinaface/layers/modules/__init__.py
similarity index 100%
rename from layers/modules/__init__.py
rename to retinaface/layers/modules/__init__.py
diff --git a/layers/modules/multibox_loss.py b/retinaface/layers/modules/multibox_loss.py
similarity index 97%
rename from layers/modules/multibox_loss.py
rename to retinaface/layers/modules/multibox_loss.py
index 096620480..73158af70 100644
--- a/layers/modules/multibox_loss.py
+++ b/retinaface/layers/modules/multibox_loss.py
@@ -1,9 +1,8 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from torch.autograd import Variable
-from utils.box_utils import match, log_sum_exp
-from data import cfg_mnet
+from retinaface.utils.box_utils import match, log_sum_exp
+from retinaface.data import cfg_mnet
 GPU = cfg_mnet['gpu_train']
 
 class MultiBoxLoss(nn.Module):
diff --git a/utils/__init__.py b/retinaface/models/__init__.py
similarity index 100%
rename from utils/__init__.py
rename to retinaface/models/__init__.py
diff --git a/models/net.py b/retinaface/models/net.py
similarity index 97%
rename from models/net.py
rename to retinaface/models/net.py
index beb6040b2..e7a13d85d 100644
--- a/models/net.py
+++ b/retinaface/models/net.py
@@ -1,10 +1,7 @@
-import time
 import torch
 import torch.nn as nn
-import torchvision.models._utils as _utils
-import torchvision.models as models
 import torch.nn.functional as F
-from torch.autograd import Variable
+
 
 def conv_bn(inp, oup, stride = 1, leaky = 0):
     return nn.Sequential(
diff --git a/models/retinaface.py b/retinaface/models/retinaface.py
similarity index 85%
rename from models/retinaface.py
rename to retinaface/models/retinaface.py
index d530bd839..eb7544240 100644
--- a/models/retinaface.py
+++ b/retinaface/models/retinaface.py
@@ -1,13 +1,11 @@
 import torch
 import torch.nn as nn
-import torchvision.models.detection.backbone_utils as backbone_utils
 import torchvision.models._utils as _utils
 import torch.nn.functional as F
-from collections import OrderedDict
 
-from models.net import MobileNetV1 as MobileNetV1
-from models.net import FPN as FPN
-from models.net import SSH as SSH
+from retinaface.models.net import MobileNetV1 as MobileNetV1
+from retinaface.models.net import FPN as FPN
+from retinaface.models.net import SSH as SSH
 
 
 
@@ -56,15 +54,6 @@ def __init__(self, cfg = None, phase = 'train'):
         backbone = None
         if cfg['name'] == 'mobilenet0.25':
             backbone = MobileNetV1()
-            if cfg['pretrain']:
-                checkpoint = torch.load("./weights/mobilenetV1X0.25_pretrain.tar", map_location=torch.device('cpu'))
-                from collections import OrderedDict
-                new_state_dict = OrderedDict()
-                for k, v in checkpoint['state_dict'].items():
-                    name = k[7:]  # remove module.
-                    new_state_dict[name] = v
-                # load params
-                backbone.load_state_dict(new_state_dict)
         elif cfg['name'] == 'Resnet50':
             import torchvision.models as models
             backbone = models.resnet50(pretrained=cfg['pretrain'])
diff --git a/test_fddb.py b/retinaface/test_fddb.py
similarity index 89%
rename from test_fddb.py
rename to retinaface/test_fddb.py
index 98fef43b8..d466ea097 100755
--- a/test_fddb.py
+++ b/retinaface/test_fddb.py
@@ -49,13 +49,17 @@ def remove_prefix(state_dict, prefix):
     return {f(key): value for key, value in state_dict.items()}
 
 
-def load_model(model, pretrained_path, load_to_cpu):
+def load_model(model, pretrained_path, device):
     print('Loading pretrained model from {}'.format(pretrained_path))
-    if load_to_cpu:
-        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
-    else:
+    if 'cuda' in device or device=='gpu':
         device = torch.cuda.current_device()
         pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
+    elif device=='mps':
+        device = torch.device('mps')
+        pretrained_dict = torch.load(pretrained_path, map_location=device)
+    else:
+        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
+
     if "state_dict" in pretrained_dict.keys():
         pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
     else:
@@ -72,16 +76,32 @@ def load_model(model, pretrained_path, load_to_cpu):
         cfg = cfg_mnet
     elif args.network == "resnet50":
         cfg = cfg_re50
+    
+    if args.cpu:
+        print('--> load model and config files to CPU')
+        device = "cpu"
+    elif torch.cuda.is_available():
+        print('--> load model and config files to GPU')
+        device = "cuda"
+    elif torch.mps.is_available():
+        print('--> load model and config files to MPS')
+        device = "mps"
+    else:
+        raise RuntimeError('No GPU or MPS found. Please use "--cpu"')
+
     # net and model
     net = RetinaFace(cfg=cfg, phase = 'test')
-    net = load_model(net, args.trained_model, args.cpu)
+    net = load_model(net, args.trained_model, device=device)
     net.eval()
-    print('Finished loading model!')
-    print(net)
-    cudnn.benchmark = True
-    device = torch.device("cpu" if args.cpu else "cuda")
-    net = net.to(device)
+    print('--> Finished loading model!')
+    # print(net)
 
+    if device == "cuda" and torch.cuda.is_available:
+        cudnn.benchmark = True
+
+    # device = torch.device("cpu" if args.cpu else "cuda")
+    device = torch.device(device)
+    net = net.to(device)
 
     # save file
     if not os.path.exists(args.save_folder):
diff --git a/test_widerface.py b/retinaface/test_widerface.py
similarity index 90%
rename from test_widerface.py
rename to retinaface/test_widerface.py
index baf7c42cd..df7323d39 100755
--- a/test_widerface.py
+++ b/retinaface/test_widerface.py
@@ -50,13 +50,17 @@ def remove_prefix(state_dict, prefix):
     return {f(key): value for key, value in state_dict.items()}
 
 
-def load_model(model, pretrained_path, load_to_cpu):
+def load_model(model, pretrained_path, device):
     print('Loading pretrained model from {}'.format(pretrained_path))
-    if load_to_cpu:
-        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
-    else:
+    if 'cuda' in device or device=='gpu':
         device = torch.cuda.current_device()
         pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
+    elif device=='mps':
+        device = torch.device('mps')
+        pretrained_dict = torch.load(pretrained_path, map_location=device)
+    else:
+        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
+
     if "state_dict" in pretrained_dict.keys():
         pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
     else:
@@ -74,14 +78,31 @@ def load_model(model, pretrained_path, load_to_cpu):
         cfg = cfg_mnet
     elif args.network == "resnet50":
         cfg = cfg_re50
+
+    if args.cpu:
+        print('--> load model and config files to CPU')
+        device = "cpu"
+    elif torch.cuda.is_available():
+        print('--> load model and config files to GPU')
+        device = "cuda"
+    elif torch.mps.is_available():
+        print('--> load model and config files to MPS')
+        device = "mps"
+    else:
+        raise RuntimeError('No GPU or MPS found. Please use "--cpu"')
+
     # net and model
     net = RetinaFace(cfg=cfg, phase = 'test')
-    net = load_model(net, args.trained_model, args.cpu)
+    net = load_model(net, args.trained_model, device=device)
     net.eval()
-    print('Finished loading model!')
-    print(net)
-    cudnn.benchmark = True
-    device = torch.device("cpu" if args.cpu else "cuda")
+    print('--> Finished loading model!')
+    # print(net)
+
+    if device == "cuda" and torch.cuda.is_available:
+        cudnn.benchmark = True
+
+    # device = torch.device("cpu" if args.cpu else "cuda")
+    device = torch.device(device)
     net = net.to(device)
 
     # testing dataset
diff --git a/train.py b/retinaface/train.py
similarity index 100%
rename from train.py
rename to retinaface/train.py
diff --git a/utils/nms/__init__.py b/retinaface/utils/__init__.py
similarity index 100%
rename from utils/nms/__init__.py
rename to retinaface/utils/__init__.py
diff --git a/utils/box_utils.py b/retinaface/utils/box_utils.py
similarity index 100%
rename from utils/box_utils.py
rename to retinaface/utils/box_utils.py
diff --git a/retinaface/utils/nms/__init__.py b/retinaface/utils/nms/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/utils/nms/py_cpu_nms.py b/retinaface/utils/nms/py_cpu_nms.py
similarity index 100%
rename from utils/nms/py_cpu_nms.py
rename to retinaface/utils/nms/py_cpu_nms.py
diff --git a/utils/timer.py b/retinaface/utils/timer.py
similarity index 100%
rename from utils/timer.py
rename to retinaface/utils/timer.py
diff --git a/widerface_evaluate/README.md b/retinaface/widerface_evaluate/README.md
similarity index 100%
rename from widerface_evaluate/README.md
rename to retinaface/widerface_evaluate/README.md
diff --git a/retinaface/widerface_evaluate/__init__.py b/retinaface/widerface_evaluate/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/widerface_evaluate/box_overlaps.pyx b/retinaface/widerface_evaluate/box_overlaps.pyx
similarity index 100%
rename from widerface_evaluate/box_overlaps.pyx
rename to retinaface/widerface_evaluate/box_overlaps.pyx
diff --git a/widerface_evaluate/evaluation.py b/retinaface/widerface_evaluate/evaluation.py
similarity index 99%
rename from widerface_evaluate/evaluation.py
rename to retinaface/widerface_evaluate/evaluation.py
index a9b17448a..95f8a9855 100644
--- a/widerface_evaluate/evaluation.py
+++ b/retinaface/widerface_evaluate/evaluation.py
@@ -12,7 +12,6 @@
 import numpy as np
 from scipy.io import loadmat
 from bbox import bbox_overlaps
-from IPython import embed
 
 
 def get_gt_boxes(gt_dir):
diff --git a/widerface_evaluate/ground_truth/wider_easy_val.mat b/retinaface/widerface_evaluate/ground_truth/wider_easy_val.mat
similarity index 100%
rename from widerface_evaluate/ground_truth/wider_easy_val.mat
rename to retinaface/widerface_evaluate/ground_truth/wider_easy_val.mat
diff --git a/widerface_evaluate/ground_truth/wider_face_val.mat b/retinaface/widerface_evaluate/ground_truth/wider_face_val.mat
similarity index 100%
rename from widerface_evaluate/ground_truth/wider_face_val.mat
rename to retinaface/widerface_evaluate/ground_truth/wider_face_val.mat
diff --git a/widerface_evaluate/ground_truth/wider_hard_val.mat b/retinaface/widerface_evaluate/ground_truth/wider_hard_val.mat
similarity index 100%
rename from widerface_evaluate/ground_truth/wider_hard_val.mat
rename to retinaface/widerface_evaluate/ground_truth/wider_hard_val.mat
diff --git a/widerface_evaluate/ground_truth/wider_medium_val.mat b/retinaface/widerface_evaluate/ground_truth/wider_medium_val.mat
similarity index 100%
rename from widerface_evaluate/ground_truth/wider_medium_val.mat
rename to retinaface/widerface_evaluate/ground_truth/wider_medium_val.mat
diff --git a/widerface_evaluate/setup.py b/retinaface/widerface_evaluate/setup.py
similarity index 100%
rename from widerface_evaluate/setup.py
rename to retinaface/widerface_evaluate/setup.py
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 000000000..f257c1996
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,39 @@
+# Configuration of the Python project
+
+# Configure setup.py
+[metadata]
+name = retinaface-py
+version = 0.0.1
+author = Jiankang Deng
+maintainer = Andres Prados Torreblanca
+maintainer_email = andresprator@gmail.com
+description = RetinaFace: Single-stage Dense Face Localisation in the Wild
+long_description = file: README.md
+long_description_content_type= text/markdown
+license = MIT License
+url = https://github.com/andresprados/Pytorch_Retinaface
+classifiers =
+        License :: OSI Approved :: MIT License
+        Intended Audience :: Developers
+        Intended Audience :: Science/Research
+        Operating System :: OS Independent
+        Programming Language :: Python
+        Programming Language :: Python :: 3
+        Topic :: Software Development :: Libraries
+        Topic :: Software Development :: Libraries :: Python Modules
+
+[options]
+packages = find:
+include_package_data = True
+python_requires = >= 3.6
+install_requires =
+    numpy
+    torch>=1.1.0
+    torchvision>=0.3.0
+    opencv-python
+
+[options.packages.find]
+exclude =
+    curve*
+    widerface_evaluate*
+