nwesem
diff --git a/‎.vscode/settings.json
+6 b/‎.vscode/settings.json
+6
diff --git a/‎CMakeLists.txt
+11-27 b/‎CMakeLists.txt
+11-27
diff --git a/‎README.md
+26-51 b/‎README.md
+26-51
diff --git a/‎src/baseEngine.cpp
+4-3 b/‎src/baseEngine.cpp
+4-3
diff --git a/‎src/common.h
+1-1 b/‎src/common.h
+1-1
diff --git a/‎src/faceNet.cpp
+26-9 b/‎src/faceNet.cpp
+26-9
diff --git a/‎src/faceNet.h
-1 b/‎src/faceNet.h
-1
diff --git a/‎src/main.cpp
+8-2 b/‎src/main.cpp
+8-2
@@ -0,0 +1,6 @@
+{
+    "files.associations": {
+        "cmath": "cpp",
+        "chrono": "cpp"
+    }
+}
@@ -1,34 +1,23 @@
 cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
-set(PROJECT_NAME mtcnn_facenet_cpp_tensorRT)
+set(PROJECT_NAME face_recogition_tensorRT)
 project(${PROJECT_NAME})# LANGUAGES CXX CUDA)
 
 set (CMAKE_CXX_STANDARD 11)
 
 # OpenCV
 find_package(OpenCV REQUIRED)
 
+# setup CUDA
 find_package(CUDA)
 message("-- CUDA version: ${CUDA_VERSION}")
 
 set(
 	CUDA_NVCC_FLAGS
 	${CUDA_NVCC_FLAGS}; 
     -O3 
-	-gencode arch=compute_53,code=sm_53
-	-gencode arch=compute_62,code=sm_62
+  -gencode arch=compute_87,code=sm_87
 )
 
-if(CUDA_VERSION_MAJOR GREATER 9)
-	message("-- CUDA ${CUDA_VERSION_MAJOR} detected, enabling SM_72")
-
-	set(
-		CUDA_NVCC_FLAGS
-		${CUDA_NVCC_FLAGS}; 
-		-gencode arch=compute_72,code=sm_72
-	)
-
-endif()
-
 # tensorRT
 message("CUDA_TOOLKIT_ROOT_DIR = ${CUDA_TOOLKIT_ROOT_DIR}")
 
@@ -38,14 +27,20 @@ find_path(TENSORRT_INCLUDE_DIR NvInfer.h
 find_path(TENSORRT_INCLUDE_DIR NvInferPlugin.h
   HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR}
   PATH_SUFFIXES include)
+find_path(TENSORRT_INCLUDE_DIR NvCaffeParser.h
+  HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR}
+  PATH_SUFFIXES include)
 MESSAGE(STATUS "Found TensorRT headers at ${TENSORRT_INCLUDE_DIR}")
 find_library(TENSORRT_LIBRARY_INFER nvinfer
   HINTS ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
   PATH_SUFFIXES lib lib64 lib/x64 lib/aarch64-linux-gnu)
 find_library(TENSORRT_LIBRARY_INFER_PLUGIN nvinfer_plugin
   HINTS  ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
   PATH_SUFFIXES lib lib64 lib/x64 lib/aarch64-linux-gnu)
-  find_library(TENSORRT_LIBRARY_PARSER nvparsers
+find_library(TENSORRT_LIBRARY_CAFFE_PARSER nvcaffe_parser
+  HINTS  ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
+  PATH_SUFFIXES lib lib64 lib/x64 lib/aarch64-linux-gnu)
+find_library(TENSORRT_LIBRARY_PARSER nvparsers
   HINTS  ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR}
   PATH_SUFFIXES lib lib64 lib/x64 lib/aarch64-linux-gnu)
 set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_INFER_PLUGIN} ${TENSORRT_LIBRARY_PARSER})
@@ -58,22 +53,11 @@ if(NOT TENSORRT_FOUND)
     "Cannot find TensorRT library.")
 endif()
 
-# l2norm_helper plugin
-add_subdirectory(trt_l2norm_helper)
-include_directories(
-  trt_l2norm_helper
-  ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
-  ${TENSORRT_INCLUDE_DIR}
-)
-
 message("TENSORRT_LIBRARY = ${TENSORRT_LIBRARY}")
 
 AUX_SOURCE_DIRECTORY(./src DIR_SRCS)
 message("DIR_SRCS = ${DIR_SRCS}")
 cuda_add_executable(${PROJECT_NAME} ${DIR_SRCS})
 
-target_link_libraries(${PROJECT_NAME}
-  trt_l2norm_helper
-  ${TENSORRT_LIBRARY}
-)
+target_link_libraries(${PROJECT_NAME} ${TENSORRT_LIBRARY})
 target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
@@ -1,41 +1,29 @@
-# Face Recognition for NVIDIA Jetson (Nano) using TensorRT
-Face recognition with [Google FaceNet](https://arxiv.org/abs/1503.03832)
-architecture and retrained model by David Sandberg
-([github.com/davidsandberg/facenet](https://github.com/davidsandberg/facenet))
-using TensorRT and OpenCV. <br> This project is based on the
-implementation of l2norm helper functions which are needed in the output
-layer of the FaceNet model. Link to the repo:
-[github.com/r7vme/tensorrt_l2norm_helper](https://github.com/r7vme/tensorrt_l2norm_helper). <br>
-Moreover, this project uses an adapted version of [PKUZHOU's implementation](https://github.com/PKUZHOU/MTCNN_FaceDetection_TensorRT)
+# Face Recognition for NVIDIA Jetson AGX Orin using TensorRT
+- This project is based on the implementation of this repo:
+[Face Recognition for NVIDIA Jetson (Nano) using TensorRT](https://github.com/nwesem/mtcnn_facenet_cpp_tensorRT). Since the original author is no longer updating his content, and many of the original content cannot be applied to the new Jetpack version and the new Jetson device. Therefore, I have modified the original author's content slightly to make it work for face recognition on the Jetson AGX Orin.
+- Face recognition with [Google FaceNet](https://arxiv.org/abs/1503.03832) architecture and retrained model by David Sandberg ([github.com/davidsandberg/facenet](https://github.com/davidsandberg/facenet)) using TensorRT and OpenCV.
+- Moreover, this project uses an adapted version of [PKUZHOU's implementation](https://github.com/PKUZHOU/MTCNN_FaceDetection_TensorRT)
 of the mtCNN for face detection. More info below.
 
 ## Hardware
-* NVIDIA Jetson Nano
-* Raspberry Pi v2 camera 
+- Nvidia Jetson AGX Orin DVK
+- Logitech C922 Pro HD Stream Webcam
 
-If you want to use a USB camera instead of Raspi Camera set the boolean _isCSICam_ to false in [main.cpp](./src/main.cpp).
+If you want to use a CSI camera instead of USB Camera, set the boolean _isCSICam_ to true in [main.cpp](./src/main.cpp).
 
 
 ## Dependencies
-cuda 10.2 + cudnn 8.0 <br> TensorRT 7.x <br> OpenCV 4.1.1 <br>
-TensorFlow r1.14 (for Python to convert model from .pb to .uff)
+- JetPack 5.1
+- CUDA 11.4.19 + cuDNN 8.6.0
+- TensorRT 8.5.2
+- OpenCV 4.5.4
+- Tensorflow 2.11
 
-## Update
-This master branch now uses Jetpack 4.4, so dependencies have slightly changed and tensorflow is not preinstalled anymore. So there is an extra step that takes a few minutes more than before. <br>
-In case you would like to use older versions of Jetpack there is a tag jp4.2.2, that can links to the older implementation.
 
 ## Installation
-#### 1. Install Cuda, CudNN, TensorRT, and TensorFlow for Python 
-You can check [NVIDIA website](https://developer.nvidia.com/) for help.
-Installation procedures are very well documented.<br><br>**If you are
-using NVIDIA Jetson (Nano, TX1/2, Xavier) with Jetpack 4.4**, most needed packages
-should be installed if the Jetson was correctly flashed using SDK
-Manager or the SD card image, you will only need to install cmake, openblas and tensorflow:
-```bash
-sudo apt install cmake libopenblas-dev
-```
-#### 2. Install Tensorflow
-The following shows the steps to install Tensorflow for Jetpack 4.4. This was copied from the official [NVIDIA documentation](https://docs.nvidia.com/deeplearning/frameworks/install-tf-jetson-platform/index.html). I'm assuming you don't need to install it in a virtual environment. If yes, please refer to the documentation linked above. If you are not installing this on a jetson, please refer to the official tensorflow documentation.
+
+#### 1. Install Tensorflow
+The following shows the steps to install Tensorflow for Jetpack 5.1. This was copied from the official [NVIDIA documentation](https://docs.nvidia.com/deeplearning/frameworks/install-tf-jetson-platform/index.html). I'm assuming you don't need to install it in a virtual environment. If yes, please refer to the documentation linked above. If you are not installing this on a jetson, please refer to the official tensorflow documentation.
 
 ```bash
 # Install system packages required by TensorFlow:
@@ -44,13 +32,14 @@ sudo apt install libhdf5-serial-dev hdf5-tools libhdf5-dev zlib1g-dev zip libjpe
 
 # Install and upgrade pip3
 sudo apt install python3-pip
-sudo pip3 install -U pip testresources setuptools
+sudo python3 -m pip install --upgrade pip
+sudo pip3 install -U testresources setuptools==65.5.0
 
 # Install the Python package dependencies
-sudo pip3 install -U numpy==1.16.1 future==0.18.2 mock==3.0.5 h5py==2.10.0 keras_preprocessing==1.1.1 keras_applications==1.0.8 gast==0.2.2 futures protobuf pybind11
+sudo pip3 install -U numpy==1.22 future==0.18.2 mock==3.0.5 keras_preprocessing==1.1.2 keras_applications==1.0.8 gast==0.4.0 protobuf pybind11 cython pkgconfig packaging h5py==3.6.0
 
-# Install TensorFlow using the pip3 command. This command will install the latest version of TensorFlow compatible with JetPack 4.4.
-sudo pip3 install --pre --extra-index-url https://developer.download.nvidia.com/compute/redist/jp/v44 'tensorflow<2'
+# Install TensorFlow using the pip3 command. This command will install the latest version of TensorFlow compatible with JetPack 5.1.
+sudo pip3 install --extra-index-url https://developer.download.nvidia.com/compute/redist/jp/v51 tensorflow==2.11.0+nv23.01
 ```
 
 
@@ -127,7 +116,7 @@ Put images of people in the imgs folder. Please only use images that contain one
 the OpenCV GUI, press "**N**" on your keyboard to add a new face. The camera input will stop until
 you have opened your terminal and put in the name of the person you want to add.
 ```bash
-./mtcnn_facenet_cpp_tensorRT
+./face_recogition_tensorRT
 ```
 Press "**Q**" to quit and to show the stats (fps).
 
@@ -136,26 +125,12 @@ now parses and serializes the model from .uff to a runtime engine
 (.engine file). 
 
 ## Performance
-Performance on **NVIDIA Jetson Nano**
-* ~60ms +/- 20ms for face detection using mtCNN
-* ~22ms +/- 2ms per face for facenet inference
-* **Total:** ~15fps
-
-Performance on **NVIDIA Jetson AGX Xavier**:
-* ~40ms +/- 20ms for mtCNN 
-* ~9ms +/- 1ms per face for inference of facenet
-* **Total:** ~22fps
+Performance on **NVIDIA Jetson AGX Orin**
+* ~24ms for face detection using mtCNN
+* ~4ms per face for facenet inference
+* **Total:** ~30fps
 
 ## License
 Please respect all licenses of OpenCV and the data the machine learning models (mtCNN and Google FaceNet)
 were trained on.
 
-## FAQ
-Sometimes the camera driver doesn't close properly that means you will have to restart the __nvargus-daemon__:
-```bash
-sudo systemctl restart nvargus-daemon
-``` 
-
-## Info
-Niclas Wesemann <br>
-[[email protected]](mailto:[email protected]) <br>
@@ -66,9 +66,10 @@ void baseEngine::caffeToGIEModel(const std::string &deployFile,                /
     else {
         // create the builder
         IBuilder *builder = createInferBuilder(gLogger);
+        IBuilderConfig* config = builder->createBuilderConfig();
 
         // parse the caffe model to populate the network, then set the outputs
-        INetworkDefinition *network = builder->createNetwork();
+        INetworkDefinition *network = builder->createNetworkV2(0U);
         ICaffeParser *parser = createCaffeParser();
 
         const IBlobNameToTensor *blobNameToTensor = parser->parse(deployFile.c_str(),
@@ -81,8 +82,8 @@ void baseEngine::caffeToGIEModel(const std::string &deployFile,                /
 
         // Build the engine
         builder->setMaxBatchSize(maxBatchSize);
-        builder->setMaxWorkspaceSize(1 << 25);
-        ICudaEngine *engine = builder->buildCudaEngine(*network);
+        config->setMaxWorkspaceSize(1 << 25);
+        ICudaEngine *engine = builder->buildEngineWithConfig(*network, *config);
         assert(engine);
 
         context = engine->createExecutionContext();
 
@@ -31,7 +31,7 @@
 class Logger : public nvinfer1::ILogger
 {
 public:
-    void log(nvinfer1::ILogger::Severity severity, const char* msg) override
+    void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override
     {
         // suppress info-level messages
         //if (severity == Severity::kINFO) return;
 
@@ -1,4 +1,6 @@
 #include "faceNet.h"
+#include <vector>
+#include <cmath>
 
 int FaceNetClassifier::m_classCount = 0;
 
@@ -51,10 +53,11 @@ void FaceNetClassifier::createOrLoadEngine() {
     }
     else {
         IBuilder *builder = createInferBuilder(m_gLogger);
-        INetworkDefinition *network = builder->createNetwork();
+        IBuilderConfig* config = builder->createBuilderConfig();
+        INetworkDefinition *network = builder->createNetworkV2(0U);
         IUffParser *parser = createUffParser();
-        parser->registerInput("input", DimsCHW(160, 160, 3), UffInputOrder::kNHWC);
-        parser->registerOutput("embeddings");
+        parser->registerInput("input", Dims3(160, 160, 3), UffInputOrder::kNHWC);
+        parser->registerOutput("Bottleneck/BatchNorm/batchnorm/add_1");
 
         if (!parser->parse(m_uffFile.c_str(), *network, m_dtype))
         {
@@ -68,23 +71,23 @@ void FaceNetClassifier::createOrLoadEngine() {
         /* build engine */
         if (m_dtype == DataType::kHALF)
         {
-            builder->setFp16Mode(true);
+            config->setFlag(BuilderFlag::kFP16);
         }
         else if (m_dtype == DataType::kINT8) {
-            builder->setInt8Mode(true);
+            config->setFlag(BuilderFlag::kINT8);
             // ToDo
             //builder->setInt8Calibrator()
         }
         builder->setMaxBatchSize(m_batchSize);
-        builder->setMaxWorkspaceSize(1<<30);
+        config->setMaxWorkspaceSize(1<<30);
         // strict will force selected datatype, even when another was faster
         //builder->setStrictTypeConstraints(true);
         // Disable DLA, because many layers are still not supported
         // and this causes additional latency.
         //builder->allowGPUFallback(true);
         //builder->setDefaultDeviceType(DeviceType::kDLA);
         //builder->setDLACore(1);
-        m_engine = builder->buildCudaEngine(*network);
+        m_engine = builder->buildEngineWithConfig(*network, *config);
 
         /* serialize engine and write to file */
         if(m_serializeEngine) {
@@ -155,7 +158,7 @@ void FaceNetClassifier::doInference(float* inputData, float* output) {
     int size_of_single_input = 3 * 160 * 160 * sizeof(float);
     int size_of_single_output = 128 * sizeof(float);
     int inputIndex = m_engine->getBindingIndex("input");
-    int outputIndex = m_engine->getBindingIndex("embeddings");
+    int outputIndex = m_engine->getBindingIndex("Bottleneck/BatchNorm/batchnorm/add_1");
 
     void* buffers[2];
 
@@ -262,10 +265,24 @@ FaceNetClassifier::~FaceNetClassifier() {
     // std::cout << "FaceNet was destructed" << std::endl;
 }
 
+std::vector<float> l2Normalize(const std::vector<float>& vec) {
+    float norm = 0.0;
+    for (const auto& element : vec) {
+        norm += element * element;
+    }
+    norm = std::sqrt(norm);
+    std::vector<float> normalizedVec(vec.size());
+    for (std::size_t i = 0; i < vec.size(); ++i) {
+        normalizedVec[i] = vec[i] / norm;
+    }
+    return normalizedVec;
+}
 
 // HELPER FUNCTIONS
 // Computes the distance between two std::vectors
-float vectors_distance(const std::vector<float>& a, const std::vector<float>& b) {
+float vectors_distance(const std::vector<float>& aa, const std::vector<float>& bb) {
+    std::vector<float>	a = l2Normalize(aa);
+    std::vector<float>	b = l2Normalize(bb);
     std::vector<double>	auxiliary;
     std::transform (a.begin(), a.end(), b.begin(), std::back_inserter(auxiliary),//
                     [](float element1, float element2) {return pow((element1-element2),2);});
 
@@ -15,7 +15,6 @@
 #include <NvInfer.h>
 #include <NvUffParser.h>
 #include <NvInferPlugin.h>
-#include <l2norm_helper.h>
 #include "common.h"
 #include "pBox.h"
 
 
@@ -3,7 +3,6 @@
 #include <chrono>
 #include <NvInfer.h>
 #include <NvInferPlugin.h>
-#include <l2norm_helper.h>
 #include <opencv2/highgui.hpp>
 #include "faceNet.h"
 #include "videoStreamer.h"
@@ -35,7 +34,7 @@ int main()
     int videoFrameHeight = 480;
     int maxFacesPerScene = 5;
     float knownPersonThreshold = 1.;
-    bool isCSICam = true;
+    bool isCSICam = false;
 
     // init facenet
     FaceNetClassifier faceNet = FaceNetClassifier(gLogger, dtype, uffFile, engineFile, batchSize, serializeEngine,
@@ -69,6 +68,7 @@ int main()
     // loop over frames with inference
     auto globalTimeStart = chrono::steady_clock::now();
     while (true) {
+        auto fps_start = chrono::steady_clock::now();
         videoStreamer.getFrame(frame);
         if (frame.empty()) {
             std::cout << "Empty frame! Exiting...\n Try restarting nvargus-daemon by "
@@ -86,6 +86,12 @@ int main()
         auto endFeatM = chrono::steady_clock::now();
         faceNet.resetVariables();
 
+        auto fps_end = chrono::steady_clock::now();
+        auto milliseconds = chrono::duration_cast<chrono::milliseconds>(fps_end-fps_start).count();
+        float fps = (1000/milliseconds);
+        std::string label = cv::format("FPS: %.2f ", fps);
+        cv::putText(frame, label, cv::Point(15, 30), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 0), 2);
+        
         cv::imshow("VideoSource", frame);
         nbFrames++;
         outputBbox.clear();
Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,7 @@`
`31`	`31`	`class Logger : public nvinfer1::ILogger`
`32`	`32`	`{`
`33`	`33`	`public:`
`34`		`- void log(nvinfer1::ILogger::Severity severity, const char* msg) override`
	`34`	`+ void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override`
`35`	`35`	`{`
`36`	`36`	`// suppress info-level messages`
`37`	`37`	`//if (severity == Severity::kINFO) return;`