initial commit

53e339ae · Sikhin VC · bc7ca466 · 53e339ae · 53e339ae · 53e339ae
Commit 53e339ae authored Aug 07, 2023 by Sikhin VC
244 changed files
--- a/.flake8
+++ b/.flake8
+# This is an example .flake8 config, used when developing *Black* itself.
+# Keep in sync with setup.cfg which is used for source packages.
+[flake8]
+ignore = W503, E203, E221, C901, C408, E741, C407, E741, B006, B007, B017, B950, C416, E203
+max-line-length = 88
+max-complexity = 18
+select = B,C,E,F,W,T4,B9
+exclude = build
+per-file-ignores =
+  **/__init__.py:F401,F403,E402
--- a/.gitattributes
+++ b/.gitattributes
+# Auto detect text files and perform LF normalization
+* text=auto
--- a/.github/workflows/workflow.yml
+++ b/.github/workflows/workflow.yml
+name: CI
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+jobs:
+  linter:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install flake8==5.0.4 isort==5.10.1
+          python -m pip install black==22.6.0
+          flake8 --version
+      - name: Lint
+        run: |
+          echo "Running isort"
+          isort --profile black .
+          echo "Running black"
+          black --check .
+          echo "Running flake8"
+          flake8 .
+  test_cpu:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        torch: [1.10.1, 1.11.0, 1.12.1, 1.13.1]
+        include:
+          - torch: 1.10.1
+            torchvision: 0.11.2
+          - torch: 1.11.0
+            torchvision: 0.12.0
+          - torch: 1.12.1
+            torchvision: 0.13.1
+          - torch: 1.13.1
+            torchvision: 0.14.1
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+      - name: Install dependencies
+        run: |
+          python -m pip install -U pip
+          python -m pip install ninja opencv-python-headless onnx pytest-xdist codecov
+          python -m pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
+          python -m pip install Cython termcolor numpy tensorboard pycocotools matplotlib pyaml opencv-python tqdm pytorch-lightning torchmetrics codecov flake8 pytest timm
+          python -m pip install -r requirements.txt
+      - name: Setup
+        run: rm -rf .eggs && python setup.py develop
+      - name: Run unittests and generate coverage report
+        run: |
+          coverage run --branch --source nanodet -m pytest tests/
+          coverage xml
+          coverage report -m
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v2
+        if: matrix.torch == '1.12.1'
+        with:
+          file: ./coverage.xml
+          flags: unittests
+          env_vars: OS,PYTHON
+          name: codecov-umbrella
+          fail_ci_if_error: false
+#  test_cuda:
+#    runs-on: ubuntu-latest
+#    env:
+#      CUDA: 10.2.89-1
+#      CUDA_SHORT: 10.2
+#      UBUNTU_VERSION: ubuntu1804
+#    strategy:
+#      matrix:
+#        torch: [1.7.0, 1.8.0, 1.9.0]
+#        include:
+#          - torch: 1.7.0
+#            torchvision: 0.8.1
+#          - torch: 1.8.0
+#            torchvision: 0.9.0
+#          - torch: 1.9.0
+#            torchvision: 0.10.0
+#    steps:
+#      - name: Checkout
+#        uses: actions/checkout@v2
+#      - name: Set up Python 3.6
+#        uses: actions/setup-python@v2
+#        with:
+#          python-version: 3.6
+#      - name: Install CUDA
+#        run: |
+#          export INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb
+#          wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER}
+#          sudo dpkg -i ${INSTALLER}
+#          wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub
+#          sudo apt-key add 7fa2af80.pub
+#          sudo apt update -qq
+#          sudo apt install -y cuda-${CUDA_SHORT/./-} cuda-cufft-dev-${CUDA_SHORT/./-}
+#          sudo apt clean
+#          export CUDA_HOME=/usr/local/cuda-${CUDA_SHORT}
+#          export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${CUDA_HOME}/include:${LD_LIBRARY_PATH}
+#          export PATH=${CUDA_HOME}/bin:${PATH}
+#      - name: Install dependencies
+#        run: |
+#          python -m pip install -U pip
+#          python -m pip install ninja opencv-python-headless onnx pytest-xdist codecov
+#          python -m pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html
+#          python -m pip install Cython termcolor numpy tensorboard pycocotools matplotlib pyaml opencv-python tqdm pytorch-lightning torchmetrics codecov flake8 pytest
+#      - name: Setup
+#        run: |
+#          rm -rf .eggs
+#          python setup.py check -m -s
+#          TORCH_CUDA_ARCH_LIST=7.0 pip install .
+#      - name: Run unittests and generate coverage report
+#        run: |
+#          coverage run --branch --source nanodet -m pytest tests/
+#          coverage xml
+#          coverage report -m
+#      - name: Upload coverage to Codecov
+#        uses: codecov/codecov-action@v1.0.10
+#        if: matrix.torch == '1.9.0'
+#        with:
+#          file: ./coverage.xml
+#          flags: unittests
+#          env_vars: OS,PYTHON
+#          name: codecov-umbrella
+#          fail_ci_if_error: false
--- a/.gitignore
+++ b/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.vscode
+.idea
+.DS_Store
+# custom
+*.pkl
+*.pkl.json
+*.log.json
+work_dirs/
+# Pytorch
+*.pth
+*.py~
+*.sh~
--- a/.isort.cfg
+++ b/.isort.cfg
+[tool.isort]
+profile = "black"
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v2.5.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-docstring-first
+      - id: check-yaml
+      - id: debug-statements
+      - id: requirements-txt-fixer
+  - repo: https://github.com/pycqa/isort
+    rev: 5.10.1
+    hooks:
+      - id: isort
+        args: ["--profile", "black"]
+  - repo: https://github.com/psf/black
+    rev: 22.6.0
+    hooks:
+      - id: black
+  - repo: https://github.com/pycqa/flake8
+    rev: 5.0.4
+    hooks:
+      - id: flake8
--- a/LICENSE
+++ b/LICENSE
--- a/README.md
+++ b/README.md
-# nanodet_lightweight_object_detection
+# NanoDet
+- NanoDet is a FCOS-style one-stage anchor-free object detection model which using Generalised Focal Loss as classification and regression loss. 
+- FCOS (Fully Convolutional One-Stage): FCOS is an object detection framework that performs object detection in a single stage using fully convolutional networks. Unlike two-stage detectors like Faster R-CNN, which rely on a separate region proposal network (RPN) to generate anchor boxes, FCOS eliminates the need for anchors and performs object detection directly on the feature map.
+- Anchor-Free Object Detection: In traditional object detection models, anchor boxes are predefined boxes of various sizes and aspect ratios that are placed on the image. The model then predicts whether each anchor box contains an object and adjusts its position and size. Anchor-free object detection models, like FCOS, do not use predefined anchor boxes. Instead, they directly predict the bounding boxes and class probabilities for objects in the image.
+- Generalised Focal Loss (GFL): The Focal Loss, introduced in the RetinaNet object detection model, is designed to address the class imbalance problem in object detection datasets. It assigns higher weights to hard examples (misclassified or difficult-to-classify examples) and lower weights to easy examples. Generalised Focal Loss (GFL) extends this concept by considering additional factors such as class distribution and overlapping instances, making it a more versatile loss function for object detection tasks.
+- Classification Loss: In object detection, the classification loss is a term that measures the discrepancy between the predicted class probabilities and the ground truth labels for each object. It helps the model learn to accurately classify objects into different classes or categories.
+- Regression Loss: The regression loss measures the difference between the predicted bounding box coordinates (e.g., coordinates for the top-left and bottom-right corners) and the ground truth bounding box coordinates for each object. It allows the model to learn to accurately localise and predict the object's position and size.
+- By combining the FCOS architecture with the Generalised Focal Loss (GFL), NanoDet achieves efficient and accurate object detection, making it suitable for scenarios with limited computational resources or real-time applications.
+- Use this github link to refer vastly [NanoDet](https://github.com/RangiLyu/nanodet.git)
+****
+## Installation 
+### Requirements
+* Linux or MacOS
+* CUDA >= 10.2
+* Python >= 3.7
+* Pytorch >= 1.10.0, <2.0.0
+### Step
+1. Create a conda virtual environment and then activate it.
+```shell script
+ conda create -n nanodet python=3.8 -y
+ conda activate nanodet
+```
+2. Install pytorch
+```shell script
+conda install pytorch torchvision cudatoolkit=11.1 -c pytorch -c conda-forge
+```
+3. Clone this repository
+```shell script
+git clone https://github.com/RangiLyu/nanodet.git
+cd nanodet
+```
+4. Install requirements
+```shell script
+pip install -r requirements.txt
+```
+- Before installing 
+Goto requirements.txt → Change pycocotools to pycocotools-windows
+5. Setup NanoDet
+```shell script
+python setup.py develop
+```
+****
+# Inferencing
+* Inference images
+```bash
+python demo/demo.py image --config CONFIG_PATH --model MODEL_PATH --path IMAGE_PATH
+```
+* Inference video
+```bash
+python demo/demo.py video --config CONFIG_PATH --model MODEL_PATH --path VIDEO_PATH
+```
+* Inference webcam
+```bash
+python demo/demo.py webcam --config CONFIG_PATH --model MODEL_PATH --camid YOUR_CAMERA_ID
+```
+- When inferencing this an error would occur:
+- OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized.
+- To solve this paste the following in demo.py:
+				os.environ['KMP_DUPLICATE_LIB_OK']='True'
+- Also Cuda won’t support so change 'cuda' to 'cpu' in code where cuda occurs
+****
+# Steps for Colab
+1. Change runtime to gpu
+2. Mount drive
+```shell script
+from google.colab import drive
+drive.mount('/content/drive')
+```
+3. Clone Repository
+```shell script
+!git clone https://github.com/SerinSV/Nanodet.git
+```
+4. Change the directory to the uploaded repository
+```shell script
+!pip install -r requirements.txt
+```
+```shell script
+!pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchtext==0.14.1 torchaudio==0.13.1 torchdata==0.5.1 --extra-index-url https://download.pytorch.org/whl/cu117
+```
+```shell script
+!python setup.py develop
+```
+* Inference images
+```bash
+python demo/demo.py image --config CONFIG_PATH --model MODEL_PATH --path IMAGE_PATH
+```
+* Inference video
+```bash
+python demo/demo.py video --config CONFIG_PATH --model MODEL_PATH --path VIDEO_PATH
+```
+* Inference webcam
+```bash
+python demo/demo.py webcam --config CONFIG_PATH --model MODEL_PATH --camid YOUR_CAMERA_ID
+```
+Besides, We provide a notebook [here](https://github.com/SerinSV/Nanodet/blob/main/demo/Nanodet_colab.ipynb) to demonstrate how to make it work in colab.
+****
+## Model Zoo
+NanoDet supports variety of backbones. Go to the [***config*** folder](config/) to see the sample training config files.
+Model                 | Backbone           |Resolution|COCO mAP| FLOPS |Params | Pre-train weight |
+:--------------------:|:------------------:|:--------:|:------:|:-----:|:-----:|:-----:|
+NanoDet-m             | ShuffleNetV2 1.0x  | 320*320  |  20.6  | 0.72G | 0.95M | [Download](https://drive.google.com/file/d/1ZkYucuLusJrCb_i63Lid0kYyyLvEiGN3/view?usp=sharing) |
+NanoDet-Plus-m-320 (***NEW***)     | ShuffleNetV2 1.0x | 320*320  |  27.0  | 0.9G  | 1.17M | [Weight](https://drive.google.com/file/d/1Dq0cTIdJDUhQxJe45z6rWncbZmOyh1Tv/view?usp=sharing) &#124; [Checkpoint](https://drive.google.com/file/d/1YvuEhahlgqxIhJu7bsL-fhaqubKcCWQc/view?usp=sharing)
+NanoDet-Plus-m-416 (***NEW***)     | ShuffleNetV2 1.0x | 416*416  |  30.4  | 1.52G | 1.17M | [Weight](https://drive.google.com/file/d/1FN3WK3FLjBm7oCqiwUcD3m3MjfqxuzXe/view?usp=sharing) &#124; [Checkpoint](https://drive.google.com/file/d/1gFjyrl7O8p5APr1ZOtWEm3tQNN35zi_W/view?usp=sharing)
+NanoDet-Plus-m-1.5x-320 (***NEW***)| ShuffleNetV2 1.5x | 320*320  |  29.9  | 1.75G | 2.44M | [Weight](https://drive.google.com/file/d/1Xdlgu5lxiS3w6ER7GE1mZpY663wmpcyY/view?usp=sharing) &#124; [Checkpoint](https://drive.google.com/file/d/1qXR6t3TBMXlz6GlTU3fxiLA-eueYoGrW/view?usp=sharing)
+NanoDet-Plus-m-1.5x-416 (***NEW***)| ShuffleNetV2 1.5x | 416*416  |  34.1  | 2.97G | 2.44M | [Weight](https://drive.google.com/file/d/16FJJJgUt5VrSKG7RM_ImdKKzhJ-Mu45I/view?usp=sharing) &#124; [Checkpoint](https://drive.google.com/file/d/17sdAUydlEXCrHMsxlDPLj5cGb-8-mmY6/view?usp=sharing)
+*Notice*: The difference between `Weight` and `Checkpoint` is the weight only provide params in inference time, but the checkpoint contains training time params.
+****
+## How to Train
+1. **Prepare dataset**
+    If your dataset annotations are pascal voc xml format, refer to [config/nanodet_custom_xml_dataset.yml](config/nanodet_custom_xml_dataset.yml)
+    Otherwise, if your dataset annotations are YOLO format ([Darknet TXT](https://github.com/AlexeyAB/Yolo_mark/issues/60#issuecomment-401854885)), refer to [config/nanodet-plus-m_416-yolo.yml](config/nanodet-plus-m_416-yolo.yml)
+    Or convert your dataset annotations to MS COCO format[(COCO annotation format details)](https://cocodataset.org/#format-data).
+2. **Prepare config file**
+    Copy and modify an example yml config file in config/ folder.
+    Change ***save_dir*** to where you want to save model.
+    Change ***num_classes*** in ***model->arch->head***.
+    Change image path and annotation path in both ***data->train*** and ***data->val***.
+    Set gpu ids, num workers and batch size in ***device*** to fit your device.
+    Set ***total_epochs***, ***lr*** and ***lr_schedule*** according to your dataset and batchsize.
+    If you want to modify network, data augmentation or other things, please refer to [Config File Detail](docs/config_file_detail.md)
+3. **Start training**
+   NanoDet is now using [pytorch lightning](https://github.com/PyTorchLightning/pytorch-lightning) for training.
+   For both single-GPU or multiple-GPUs, run:
+   ```shell script
+   python tools/train.py CONFIG_FILE_PATH
+   ```
+4. **Visualize Logs**
+    TensorBoard logs are saved in `save_dir` which you set in config file.
+    To visualize tensorboard logs, run:
+    ```shell script
+    cd <YOUR_SAVE_DIR>
+    tensorboard --logdir ./
+    ```
+****
--- a/config/convnext/nanodet-plus_convnext-nano_640.yml
+++ b/config/convnext/nanodet-plus_convnext-nano_640.yml
+save_dir: workspace/convnext/nanodet-plus_convnext-nano_640
+model:
+  weight_averager:
+    name: ExpMovingAverager
+    decay: 0.9998
+  arch:
+    name: NanoDetPlus
+    detach_epoch: 10
+    backbone:
+      name: TIMMWrapper
+      model_name: convnext_nano
+      features_only: True
+      pretrained: True
+      # output_stride: 32
+      out_indices: [1, 2, 3]
+    fpn:
+      name: GhostPAN
+      in_channels: [160, 320, 640]
+      out_channels: 128
+      kernel_size: 5
+      num_extra_level: 1
+      use_depthwise: True
+      activation: SiLU
+    head:
+      name: NanoDetPlusHead
+      num_classes: 80
+      input_channel: 128
+      feat_channels: 128
+      stacked_convs: 2
+      kernel_size: 5
+      strides: [8, 16, 32, 64]
+      activation: SiLU
+      reg_max: 7
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+    # Auxiliary head, only use in training time.
+    aux_head:
+      name: SimpleConvHead
+      num_classes: 80
+      input_channel: 256
+      feat_channels: 256
+      stacked_convs: 4
+      strides: [8, 16, 32, 64]
+      activation: SiLU
+      reg_max: 7
+data:
+  train:
+    name: CocoDataset
+    img_path: coco/train2017
+    ann_path: coco/annotations/instances_train2017.json
+    input_size: [640,640] #[w,h]
+    keep_ratio: False
+    pipeline:
+      perspective: 0.0
+      scale: [0.1, 2.0]
+      stretch: [[0.8, 1.2], [0.8, 1.2]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: CocoDataset
+    img_path: coco/val2017
+    ann_path: coco/annotations/instances_val2017.json
+    input_size: [640,640] #[w,h]
+    keep_ratio: False
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0, 1, 2, 3]
+  workers_per_gpu: 8
+  batchsize_per_gpu: 24
+schedule:
+#  resume:
+#  load_model:
+  optimizer:
+    name: AdamW
+    lr: 0.001
+    weight_decay: 0.05
+    no_norm_decay: True
+    param_level_cfg:
+      backbone:
+        lr_mult: 0.1
+  warmup:
+    name: linear
+    steps: 500
+    ratio: 0.0001
+  total_epochs: 50
+  lr_schedule:
+    name: CosineAnnealingLR
+    T_max: 50
+    eta_min: 0.0005
+  val_intervals: 5
+grad_clip: 35
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 50
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/legacy_v0.x_configs/EfficientNet-Lite/nanodet-EfficientNet-Lite0_320.yml
+++ b/config/legacy_v0.x_configs/EfficientNet-Lite/nanodet-EfficientNet-Lite0_320.yml
+# nanodet-EfficientNet-Lite0_320
+# COCO mAP(0.5:0.95) = 0.247
+#             AP_50  = 0.404
+#             AP_75  = 0.250
+#           AP_small = 0.079
+#               AP_m = 0.243
+#               AP_l = 0.406
+save_dir: workspace/efficient0_320
+model:
+  arch:
+    name: OneStageDetector
+    backbone:
+      name: EfficientNetLite
+      model_name: efficientnet_lite0
+      out_stages: [2,4,6]
+      activation: ReLU6
+    fpn:
+      name: PAN
+      in_channels: [40, 112, 320]
+      out_channels: 96
+      start_level: 0
+      num_outs: 3
+    head:
+      name: NanoDetHead
+      num_classes: 80
+      input_channel: 96
+      feat_channels: 96
+      activation: ReLU6
+      stacked_convs: 2
+      share_cls_reg: True
+      octave_base_scale: 5
+      scales_per_octave: 1
+      strides: [8, 16, 32]
+      reg_max: 7
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+data:
+  train:
+    name: CocoDataset
+    img_path: /coco/train2017
+    ann_path: /coco/annotations/instances_train2017.json
+    input_size: [320,320] #[w,h]
+    keep_ratio: True
+    pipeline:
+      perspective: 0.0
+      scale: [0.6, 1.4]
+      stretch: [[1, 1], [1, 1]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]]
+  val:
+    name: CocoDataset
+    img_path: /coco/val2017
+    ann_path: /coco/annotations/instances_val2017.json
+    input_size: [320,320] #[w,h]
+    keep_ratio: True
+    pipeline:
+      normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 12
+  batchsize_per_gpu: 150
+schedule:
+#  resume:
+#  load_model: YOUR_MODEL_PATH
+  optimizer:
+    name: SGD
+    lr: 0.15
+    momentum: 0.9
+    weight_decay: 0.0001
+  warmup:
+    name: linear
+    steps: 500
+    ratio: 0.01
+  total_epochs: 190
+  lr_schedule:
+    name: MultiStepLR
+    milestones: [140,170,180,185]
+    gamma: 0.1
+  val_intervals: 1
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 10
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/legacy_v0.x_configs/EfficientNet-Lite/nanodet-EfficientNet-Lite1_416.yml
+++ b/config/legacy_v0.x_configs/EfficientNet-Lite/nanodet-EfficientNet-Lite1_416.yml
+# nanodet-EfficientNet-Lite1_416
+# COCO mAP(0.5:0.95) = 0.303
+#             AP_50  = 0.471
+#             AP_75  = 0.313
+#           AP_small = 0.122
+#               AP_m = 0.321
+#               AP_l = 0.432
+save_dir: workspace/efficient1_416_SGD
+model:
+  arch:
+    name: OneStageDetector
+    backbone:
+      name: EfficientNetLite
+      model_name: efficientnet_lite1
+      out_stages: [2,4,6]
+      activation: ReLU6
+      pretrain: True
+    fpn:
+      name: PAN
+      in_channels: [40, 112, 320]
+      out_channels: 128
+      start_level: 0
+      num_outs: 3
+    head:
+      name: NanoDetHead
+      num_classes: 80
+      input_channel: 128
+      feat_channels: 128
+      stacked_convs: 3
+      activation: ReLU6
+      share_cls_reg: True
+      octave_base_scale: 8
+      scales_per_octave: 1
+      strides: [8, 16, 32]
+      reg_max: 10
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+data:
+  train:
+    name: CocoDataset
+    img_path: /coco/train2017
+    ann_path: /coco/annotations/instances_train2017.json
+    input_size: [416,416] #[w,h]
+    keep_ratio: True
+    pipeline:
+      perspective: 0.0
+      scale: [0.5, 1.5]
+      stretch: [[1, 1], [1, 1]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]]
+  val:
+    name: CocoDataset
+    img_path: /coco/val2017
+    ann_path: /coco/annotations/instances_val2017.json
+    input_size: [416,416] #[w,h]
+    keep_ratio: True
+    pipeline:
+      normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 12
+  batchsize_per_gpu: 100
+schedule:
+#  resume:
+#  load_model: YOUR_MODEL_PATH
+  optimizer:
+    name: SGD
+    lr: 0.07
+    momentum: 0.9
+    weight_decay: 0.0001
+  warmup:
+    name: linear
+    steps: 500
+    ratio: 0.01
+  total_epochs: 170
+  lr_schedule:
+    name: MultiStepLR
+    milestones: [130,150,160,165]
+    gamma: 0.1
+  val_intervals: 5
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 10
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/legacy_v0.x_configs/EfficientNet-Lite/nanodet-EfficientNet-Lite2_512.yml
+++ b/config/legacy_v0.x_configs/EfficientNet-Lite/nanodet-EfficientNet-Lite2_512.yml
+# nanodet-EfficientNet-Lite2_512
+# COCO mAP(0.5:0.95) = 0.326
+#             AP_50  = 0.501
+#             AP_75  = 0.344
+#           AP_small = 0.152
+#               AP_m = 0.342
+#               AP_l = 0.481
+save_dir: workspace/efficientlite2_512
+model:
+  arch:
+    name: OneStageDetector
+    backbone:
+      name: EfficientNetLite
+      model_name: efficientnet_lite2
+      out_stages: [2,4,6]
+      activation: ReLU6
+      pretrain: True
+    fpn:
+      name: PAN
+      in_channels: [48, 120, 352]
+      out_channels: 128
+      start_level: 0
+      num_outs: 3
+    head:
+      name: NanoDetHead
+      num_classes: 80
+      input_channel: 128
+      feat_channels: 128
+      stacked_convs: 4
+      activation: ReLU6
+      share_cls_reg: True
+      octave_base_scale: 5
+      scales_per_octave: 1
+      strides: [8, 16, 32]
+      reg_max: 10
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+data:
+  train:
+    name: CocoDataset
+    img_path: /coco/train2017
+    ann_path: /coco/annotations/instances_train2017.json
+    input_size: [512,512] #[w,h]
+    keep_ratio: True
+    pipeline:
+      perspective: 0.0
+      scale: [0.5, 1.5]
+      stretch: [[1, 1], [1, 1]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]]
+  val:
+    name: CocoDataset
+    img_path: /coco/val2017
+    ann_path: /coco/annotations/instances_val2017.json
+    input_size: [512,512] #[w,h]
+    keep_ratio: True
+    pipeline:
+      normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 12
+  batchsize_per_gpu: 60
+schedule:
+#  resume:
+#  load_model: YOUR_MODEL_PATH
+  optimizer:
+    name: SGD
+    lr: 0.06
+    momentum: 0.9
+    weight_decay: 0.0001
+  warmup:
+    name: linear
+    steps: 300
+    ratio: 0.1
+  total_epochs: 135
+  lr_schedule:
+    name: MultiStepLR
+    milestones: [90,110,120,130]
+    gamma: 0.1
+  val_intervals: 5
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 10
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/legacy_v0.x_configs/RepVGG/nanodet-RepVGG-A0_416.yml
+++ b/config/legacy_v0.x_configs/RepVGG/nanodet-RepVGG-A0_416.yml
+# nanodet-EfficientNet-Lite1_416
+save_dir: workspace/RepVGG-A0-416
+model:
+  arch:
+    name: OneStageDetector
+    backbone:
+      name: RepVGG
+      arch: A0
+      out_stages: [2,3,4]
+      activation: ReLU
+      last_channel: 512
+      deploy: False
+    fpn:
+      name: PAN
+      in_channels: [96, 192, 512]
+      out_channels: 128
+      start_level: 0
+      num_outs: 3
+    head:
+      name: NanoDetHead
+      num_classes: 80
+      conv_type: Conv
+      input_channel: 128
+      feat_channels: 128
+      stacked_convs: 2
+      activation: ReLU
+      share_cls_reg: True
+      octave_base_scale: 8
+      scales_per_octave: 1
+      strides: [8, 16, 32]
+      reg_max: 10
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+data:
+  train:
+    name: CocoDataset
+    img_path: /coco/train2017
+    ann_path: /coco/annotations/instances_train2017.json
+    input_size: [416,416] #[w,h]
+    keep_ratio: True
+    pipeline:
+      perspective: 0.0
+      scale: [0.5, 1.5]
+      stretch: [[1, 1], [1, 1]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: CocoDataset
+    img_path: /coco/val2017
+    ann_path: /coco/annotations/instances_val2017.json
+    input_size: [416,416] #[w,h]
+    keep_ratio: True
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 1
+  batchsize_per_gpu: 100
+schedule:
+#  resume:
+#  load_model: YOUR_MODEL_PATH
+  optimizer:
+    name: SGD
+    lr: 0.07
+    momentum: 0.9
+    weight_decay: 0.0001
+  warmup:
+    name: linear
+    steps: 500
+    ratio: 0.01
+  total_epochs: 170
+  lr_schedule:
+    name: MultiStepLR
+    milestones: [130,150,160,165]
+    gamma: 0.1
+  val_intervals: 5
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 10
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/legacy_v0.x_configs/Transformer/nanodet-t.yml
+++ b/config/legacy_v0.x_configs/Transformer/nanodet-t.yml
+# NanoDet-m with transformer attention
+# COCO mAP(0.5:0.95) = 0.217
+#             AP_50  = 0.363
+#             AP_75  = 0.218
+#           AP_small = 0.069
+#               AP_m = 0.214
+#               AP_l = 0.364
+save_dir: workspace/nanodet_t
+model:
+  arch:
+    name: OneStageDetector
+    backbone:
+      name: ShuffleNetV2
+      model_size: 1.0x
+      out_stages: [2,3,4]
+      activation: LeakyReLU
+    fpn:
+      name: TAN # transformer attention network
+      in_channels: [116, 232, 464]
+      out_channels: 128
+      feature_hw: [20,20] # size for position embedding
+      num_heads: 8
+      num_encoders: 1
+      mlp_ratio: 4
+      dropout_ratio: 0.1
+      activation: LeakyReLU
+    head:
+      name: NanoDetHead
+      num_classes: 80
+      input_channel: 128
+      feat_channels: 128
+      stacked_convs: 2
+      share_cls_reg: True
+      octave_base_scale: 5
+      scales_per_octave: 1
+      strides: [8, 16, 32]
+      reg_max: 7
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+data:
+  train:
+    name: CocoDataset
+    img_path: coco/train2017
+    ann_path: coco/annotations/instances_train2017.json
+    input_size: [320,320] #[w,h]
+    keep_ratio: True
+    pipeline:
+      perspective: 0.0
+      scale: [0.6, 1.4]
+      stretch: [[1, 1], [1, 1]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.8, 1.2]
+      saturation: [0.8, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: CocoDataset
+    img_path: coco/val2017
+    ann_path: coco/annotations/instances_val2017.json
+    input_size: [320,320] #[w,h]
+    keep_ratio: True
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 8
+  batchsize_per_gpu: 160
+schedule:
+  resume:
+#  load_model: YOUR_MODEL_PATH
+  optimizer:
+    name: SGD
+    lr: 0.14
+    momentum: 0.9
+    weight_decay: 0.0001
+  warmup:
+    name: linear
+    steps: 500
+    ratio: 0.01
+  total_epochs: 190
+  lr_schedule:
+    name: MultiStepLR
+    milestones: [140,170,180,185]
+    gamma: 0.1
+  val_intervals: 10
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 10
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/legacy_v0.x_configs/nanodet-g.yml
+++ b/config/legacy_v0.x_configs/nanodet-g.yml
+# NanoDet-g-416 is designed for edge NPU, GPU or TPU with high parallel computing power but low memory bandwidth
+# COCO mAP(0.5:0.95) = 22.9
+# Flops = 4.2B
+# Params = 3.8M
+# COCO pre-trained weight link: https://drive.google.com/file/d/10uW7oqZKw231l_tr4C1bJWkbCXgBf7av/view?usp=sharing
+save_dir: workspace/nanodet_g
+model:
+  arch:
+    name: OneStageDetector
+    backbone:
+      name: CustomCspNet
+      net_cfg: [[ 'Conv', 3, 32, 3, 2],  # 1/2
+                [ 'MaxPool', 3, 2 ],  # 1/4
+                [ 'CspBlock', 32, 1, 3, 1 ],  # 1/4
+                [ 'CspBlock', 64, 2, 3, 2 ],  # 1/8
+                [ 'CspBlock', 128, 2, 3, 2 ],  # 1/16
+                [ 'CspBlock', 256, 3, 3, 2 ]]  # 1/32
+      out_stages: [3,4,5]
+      activation: LeakyReLU
+    fpn:
+      name: PAN
+      in_channels: [128, 256, 512]
+      out_channels: 128
+      start_level: 0
+      num_outs: 3
+    head:
+      name: NanoDetHead
+      num_classes: 80
+      conv_type: Conv
+      activation: LeakyReLU
+      input_channel: 128
+      feat_channels: 128
+      stacked_convs: 1
+      share_cls_reg: True
+      octave_base_scale: 8
+      scales_per_octave: 1
+      strides: [8, 16, 32]
+      reg_max: 10
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+data:
+  train:
+    name: CocoDataset
+    img_path: coco/train2017
+    ann_path: coco/annotations/instances_train2017.json
+    input_size: [416,416] #[w,h]
+    keep_ratio: True
+    pipeline:
+      perspective: 0.0
+      scale: [0.6, 1.4]
+      stretch: [[1, 1], [1, 1]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: CocoDataset
+    img_path: coco/val2017
+    ann_path: coco/annotations/instances_val2017.json
+    input_size: [416,416] #[w,h]
+    keep_ratio: True
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 10
+  batchsize_per_gpu: 128
+schedule:
+#  resume:
+#  load_model: YOUR_MODEL_PATH
+  optimizer:
+    name: SGD
+    lr: 0.1
+    momentum: 0.9
+    weight_decay: 0.0001
+  warmup:
+    name: linear
+    steps: 500
+    ratio: 0.01
+  total_epochs: 190
+  lr_schedule:
+    name: MultiStepLR
+    milestones: [130,160,175,185]
+    gamma: 0.1
+  val_intervals: 5
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 10
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/legacy_v0.x_configs/nanodet-m-0.5x.yml
+++ b/config/legacy_v0.x_configs/nanodet-m-0.5x.yml
+# nanodet-m-0.5x
+# COCO mAP(0.5:0.95) = 0.135
+#             AP_50  = 0.245
+#             AP_75  = 0.129
+#           AP_small = 0.036
+#               AP_m = 0.119
+#               AP_l = 0.232
+save_dir: workspace/nanodet_m_0.5x
+model:
+  arch:
+    name: OneStageDetector
+    backbone:
+      name: ShuffleNetV2
+      model_size: 0.5x
+      out_stages: [2,3,4]
+      activation: LeakyReLU
+    fpn:
+      name: PAN
+      in_channels: [48, 96, 192]
+      out_channels: 96
+      start_level: 0
+      num_outs: 3
+    head:
+      name: NanoDetHead
+      num_classes: 80
+      input_channel: 96
+      feat_channels: 96
+      stacked_convs: 2
+      share_cls_reg: True
+      octave_base_scale: 5
+      scales_per_octave: 1
+      strides: [8, 16, 32]
+      reg_max: 7
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+data:
+  train:
+    name: CocoDataset
+    img_path: coco/train2017
+    ann_path: coco/annotations/instances_train2017.json
+    input_size: [320,320] #[w,h]
+    keep_ratio: True
+    pipeline:
+      perspective: 0.0
+      scale: [0.5, 1.5]
+      stretch: [[1, 1], [1, 1]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: CocoDataset
+    img_path: coco/val2017
+    ann_path: coco/annotations/instances_val2017.json
+    input_size: [320,320] #[w,h]
+    keep_ratio: True
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 8
+  batchsize_per_gpu: 96
+schedule:
+#  resume:
+#  load_model: YOUR_MODEL_PATH
+  optimizer:
+    name: SGD
+    lr: 0.07
+    momentum: 0.9
+    weight_decay: 0.0001
+  warmup:
+    name: linear
+    steps: 1000
+    ratio: 0.00001
+  total_epochs: 180
+  lr_schedule:
+    name: MultiStepLR
+    milestones: [130,160,175]
+    gamma: 0.1
+  val_intervals: 10
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 50
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/legacy_v0.x_configs/nanodet-m-1.5x-416.yml
+++ b/config/legacy_v0.x_configs/nanodet-m-1.5x-416.yml
+#nanodet-m-1.5x-416
+# COCO mAP(0.5:0.95) = 0.268
+#             AP_50  = 0.424
+#             AP_75  = 0.276
+#           AP_small = 0.098
+#               AP_m = 0.277
+#               AP_l = 0.420
+save_dir: workspace/nanodet_m_1.5x_416
+model:
+  arch:
+    name: OneStageDetector
+    backbone:
+      name: ShuffleNetV2
+      model_size: 1.5x
+      out_stages: [2,3,4]
+      activation: LeakyReLU
+    fpn:
+      name: PAN
+      in_channels: [176, 352, 704]
+      out_channels: 128
+      start_level: 0
+      num_outs: 3
+    head:
+      name: NanoDetHead
+      num_classes: 80
+      input_channel: 128
+      feat_channels: 128
+      stacked_convs: 2
+      share_cls_reg: True
+      octave_base_scale: 5
+      scales_per_octave: 1
+      strides: [8, 16, 32]
+      reg_max: 7
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+data:
+  train:
+    name: CocoDataset
+    img_path: coco/train2017
+    ann_path: coco/annotations/instances_train2017.json
+    input_size: [416,416] #[w,h]
+    keep_ratio: True
+    pipeline:
+      perspective: 0.0
+      scale: [0.5, 1.4]
+      stretch: [[1, 1], [1, 1]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: CocoDataset
+    img_path: coco/val2017
+    ann_path: coco/annotations/instances_val2017.json
+    input_size: [416,416] #[w,h]
+    keep_ratio: True
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 8
+  batchsize_per_gpu: 176
+schedule:
+#  resume:
+#  load_model: YOUR_MODEL_PATH
+  optimizer:
+    name: SGD
+    lr: 0.14
+    momentum: 0.9
+    weight_decay: 0.0001
+  warmup:
+    name: linear
+    steps: 300
+    ratio: 0.1
+  total_epochs: 280
+  lr_schedule:
+    name: MultiStepLR
+    milestones: [240,260,275]
+    gamma: 0.1
+  val_intervals: 10
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 10
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/legacy_v0.x_configs/nanodet-m-1.5x.yml
+++ b/config/legacy_v0.x_configs/nanodet-m-1.5x.yml
+#nanodet-m-1.5x
+# COCO mAP(0.5:0.95) = 0.235
+#             AP_50  = 0.384
+#             AP_75  = 0.239
+#           AP_small = 0.069
+#               AP_m = 0.235
+#               AP_l = 0.389
+save_dir: workspace/nanodet_m_1.5x
+model:
+  arch:
+    name: OneStageDetector
+    backbone:
+      name: ShuffleNetV2
+      model_size: 1.5x
+      out_stages: [2,3,4]
+      activation: LeakyReLU
+    fpn:
+      name: PAN
+      in_channels: [176, 352, 704]
+      out_channels: 128
+      start_level: 0
+      num_outs: 3
+    head:
+      name: NanoDetHead
+      num_classes: 80
+      input_channel: 128
+      feat_channels: 128
+      stacked_convs: 2
+      share_cls_reg: True
+      octave_base_scale: 5
+      scales_per_octave: 1
+      strides: [8, 16, 32]
+      reg_max: 7
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+data:
+  train:
+    name: CocoDataset
+    img_path: coco/train2017
+    ann_path: coco/annotations/instances_train2017.json
+    input_size: [320,320] #[w,h]
+    keep_ratio: True
+    pipeline:
+      perspective: 0.0
+      scale: [0.6, 1.4]
+      stretch: [[1, 1], [1, 1]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: CocoDataset
+    img_path: coco/val2017
+    ann_path: coco/annotations/instances_val2017.json
+    input_size: [320,320] #[w,h]
+    keep_ratio: True
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 8
+  batchsize_per_gpu: 192
+schedule:
+#  resume:
+#  load_model: YOUR_MODEL_PATH
+  optimizer:
+    name: SGD
+    lr: 0.14
+    momentum: 0.9
+    weight_decay: 0.0001
+  warmup:
+    name: linear
+    steps: 300
+    ratio: 0.1
+  total_epochs: 280
+  lr_schedule:
+    name: MultiStepLR
+    milestones: [240,260,275]
+    gamma: 0.1
+  val_intervals: 10
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 10
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/legacy_v0.x_configs/nanodet-m-416.yml
+++ b/config/legacy_v0.x_configs/nanodet-m-416.yml
+#nanodet-m-416
+# COCO mAP(0.5:0.95) = 0.235
+#             AP_50  = 0.384
+#             AP_75  = 0.242
+#           AP_small = 0.082
+#               AP_m = 0.240
+#               AP_l = 0.375
+save_dir: workspace/nanodet_m_416
+model:
+  arch:
+    name: OneStageDetector
+    backbone:
+      name: ShuffleNetV2
+      model_size: 1.0x
+      out_stages: [2,3,4]
+      activation: LeakyReLU
+    fpn:
+      name: PAN
+      in_channels: [116, 232, 464]
+      out_channels: 96
+      start_level: 0
+      num_outs: 3
+    head:
+      name: NanoDetHead
+      num_classes: 80
+      input_channel: 96
+      feat_channels: 96
+      stacked_convs: 2
+      share_cls_reg: True
+      octave_base_scale: 5
+      scales_per_octave: 1
+      strides: [8, 16, 32]
+      reg_max: 7
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+data:
+  train:
+    name: CocoDataset
+    img_path: coco/train2017
+    ann_path: coco/annotations/instances_train2017.json
+    input_size: [416,416] #[w,h]
+    keep_ratio: True
+    pipeline:
+      perspective: 0.0
+      scale: [0.5, 1.4]
+      stretch: [[1, 1], [1, 1]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: CocoDataset
+    img_path: coco/val2017
+    ann_path: coco/annotations/instances_val2017.json
+    input_size: [416,416] #[w,h]
+    keep_ratio: True
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 8
+  batchsize_per_gpu: 192
+schedule:
+#  resume:
+#  load_model: YOUR_MODEL_PATH
+  optimizer:
+    name: SGD
+    lr: 0.14
+    momentum: 0.9
+    weight_decay: 0.0001
+  warmup:
+    name: linear
+    steps: 300
+    ratio: 0.1
+  total_epochs: 280
+  lr_schedule:
+    name: MultiStepLR
+    milestones: [240,260,275]
+    gamma: 0.1
+  val_intervals: 10
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 10
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/legacy_v0.x_configs/nanodet-m.yml
+++ b/config/legacy_v0.x_configs/nanodet-m.yml
+#Config File example
+save_dir: workspace/nanodet_m
+model:
+  arch:
+    name: OneStageDetector
+    backbone:
+      name: ShuffleNetV2
+      model_size: 1.0x
+      out_stages: [2,3,4]
+      activation: LeakyReLU
+    fpn:
+      name: PAN
+      in_channels: [116, 232, 464]
+      out_channels: 96
+      start_level: 0
+      num_outs: 3
+    head:
+      name: NanoDetHead
+      num_classes: 80
+      input_channel: 96
+      feat_channels: 96
+      stacked_convs: 2
+      share_cls_reg: True
+      octave_base_scale: 5
+      scales_per_octave: 1
+      strides: [8, 16, 32]
+      reg_max: 7
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+data:
+  train:
+    name: CocoDataset
+    img_path: coco/train2017
+    ann_path: coco/annotations/instances_train2017.json
+    input_size: [320,320] #[w,h]
+    keep_ratio: True
+    pipeline:
+      perspective: 0.0
+      scale: [0.6, 1.4]
+      stretch: [[1, 1], [1, 1]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: CocoDataset
+    img_path: coco/val2017
+    ann_path: coco/annotations/instances_val2017.json
+    input_size: [320,320] #[w,h]
+    keep_ratio: True
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 8
+  batchsize_per_gpu: 192
+schedule:
+#  resume:
+#  load_model: YOUR_MODEL_PATH
+  optimizer:
+    name: SGD
+    lr: 0.14
+    momentum: 0.9
+    weight_decay: 0.0001
+  warmup:
+    name: linear
+    steps: 300
+    ratio: 0.1
+  total_epochs: 280
+  lr_schedule:
+    name: MultiStepLR
+    milestones: [240,260,275]
+    gamma: 0.1
+  val_intervals: 10
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 10
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/nanodet-plus-m-1.5x_320.yml
+++ b/config/nanodet-plus-m-1.5x_320.yml
+# nanodet-plus-m-1.5x_320
+# COCO mAP(0.5:0.95) = 0.299
+#             AP_50  = 0.454
+#             AP_75  = 0.312
+#           AP_small = 0.102
+#               AP_m = 0.309
+#               AP_l = 0.493
+save_dir: workspace/nanodet-plus-m-1.5x_320
+model:
+  weight_averager:
+    name: ExpMovingAverager
+    decay: 0.9998
+  arch:
+    name: NanoDetPlus
+    detach_epoch: 10
+    backbone:
+      name: ShuffleNetV2
+      model_size: 1.5x
+      out_stages: [2,3,4]
+      activation: LeakyReLU
+    fpn:
+      name: GhostPAN
+      in_channels: [176, 352, 704]
+      out_channels: 128
+      kernel_size: 5
+      num_extra_level: 1
+      use_depthwise: True
+      activation: LeakyReLU
+    head:
+      name: NanoDetPlusHead
+      num_classes: 80
+      input_channel: 128
+      feat_channels: 128
+      stacked_convs: 2
+      kernel_size: 5
+      strides: [8, 16, 32, 64]
+      activation: LeakyReLU
+      reg_max: 7
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+    # Auxiliary head, only use in training time.
+    aux_head:
+      name: SimpleConvHead
+      num_classes: 80
+      input_channel: 256
+      feat_channels: 256
+      stacked_convs: 4
+      strides: [8, 16, 32, 64]
+      activation: LeakyReLU
+      reg_max: 7
+data:
+  train:
+    name: CocoDataset
+    img_path: coco/train2017
+    ann_path: coco/annotations/instances_train2017.json
+    input_size: [320,320] #[w,h]
+    keep_ratio: False
+    pipeline:
+      perspective: 0.0
+      scale: [0.6, 1.4]
+      stretch: [[0.8, 1.2], [0.8, 1.2]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: CocoDataset
+    img_path: coco/val2017
+    ann_path: coco/annotations/instances_val2017.json
+    input_size: [320,320] #[w,h]
+    keep_ratio: False
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 10
+  batchsize_per_gpu: 96
+  precision: 32 # set to 16 to use AMP training
+schedule:
+#  resume:
+#  load_model:
+  optimizer:
+    name: AdamW
+    lr: 0.001
+    weight_decay: 0.05
+  warmup:
+    name: linear
+    steps: 500
+    ratio: 0.0001
+  total_epochs: 300
+  lr_schedule:
+    name: CosineAnnealingLR
+    T_max: 300
+    eta_min: 0.00005
+  val_intervals: 10
+grad_clip: 35
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 50
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/nanodet-plus-m-1.5x_416.yml
+++ b/config/nanodet-plus-m-1.5x_416.yml
+# nanodet-plus-m-1.5x_416
+# COCO mAP(0.5:0.95) = 0.341
+#             AP_50  = 0.506
+#             AP_75  = 0.357
+#           AP_small = 0.143
+#               AP_m = 0.363
+#               AP_l = 0.539
+save_dir: workspace/nanodet-plus-m-1.5x_416
+model:
+  weight_averager:
+    name: ExpMovingAverager
+    decay: 0.9998
+  arch:
+    name: NanoDetPlus
+    detach_epoch: 10
+    backbone:
+      name: ShuffleNetV2
+      model_size: 1.5x
+      out_stages: [2,3,4]
+      activation: LeakyReLU
+    fpn:
+      name: GhostPAN
+      in_channels: [176, 352, 704]
+      out_channels: 128
+      kernel_size: 5
+      num_extra_level: 1
+      use_depthwise: True
+      activation: LeakyReLU
+    head:
+      name: NanoDetPlusHead
+      num_classes: 80
+      input_channel: 128
+      feat_channels: 128
+      stacked_convs: 2
+      kernel_size: 5
+      strides: [8, 16, 32, 64]
+      activation: LeakyReLU
+      reg_max: 7
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+    # Auxiliary head, only use in training time.
+    aux_head:
+      name: SimpleConvHead
+      num_classes: 80
+      input_channel: 256
+      feat_channels: 256
+      stacked_convs: 4
+      strides: [8, 16, 32, 64]
+      activation: LeakyReLU
+      reg_max: 7
+data:
+  train:
+    name: CocoDataset
+    img_path: coco/train2017
+    ann_path: coco/annotations/instances_train2017.json
+    input_size: [416,416] #[w,h]
+    keep_ratio: False
+    pipeline:
+      perspective: 0.0
+      scale: [0.6, 1.4]
+      stretch: [[0.8, 1.2], [0.8, 1.2]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: CocoDataset
+    img_path: coco/val2017
+    ann_path: coco/annotations/instances_val2017.json
+    input_size: [416,416] #[w,h]
+    keep_ratio: False
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 10
+  batchsize_per_gpu: 96
+  precision: 32 # set to 16 to use AMP training
+schedule:
+#  resume:
+#  load_model:
+  optimizer:
+    name: AdamW
+    lr: 0.001
+    weight_decay: 0.05
+  warmup:
+    name: linear
+    steps: 500
+    ratio: 0.0001
+  total_epochs: 300
+  lr_schedule:
+    name: CosineAnnealingLR
+    T_max: 300
+    eta_min: 0.00005
+  val_intervals: 10
+grad_clip: 35
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 50
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/nanodet-plus-m_320.yml
+++ b/config/nanodet-plus-m_320.yml
+# nanodet-plus-m_320
+# COCO mAP(0.5:0.95) = 0.270
+#             AP_50  = 0.418
+#             AP_75  = 0.281
+#           AP_small = 0.083
+#               AP_m = 0.278
+#               AP_l = 0.451
+save_dir: workspace/nanodet-plus-m_320
+model:
+  weight_averager:
+    name: ExpMovingAverager
+    decay: 0.9998
+  arch:
+    name: NanoDetPlus
+    detach_epoch: 10
+    backbone:
+      name: ShuffleNetV2
+      model_size: 1.0x
+      out_stages: [2,3,4]
+      activation: LeakyReLU
+    fpn:
+      name: GhostPAN
+      in_channels: [116, 232, 464]
+      out_channels: 96
+      kernel_size: 5
+      num_extra_level: 1
+      use_depthwise: True
+      activation: LeakyReLU
+    head:
+      name: NanoDetPlusHead
+      num_classes: 80
+      input_channel: 96
+      feat_channels: 96
+      stacked_convs: 2
+      kernel_size: 5
+      strides: [8, 16, 32, 64]
+      activation: LeakyReLU
+      reg_max: 7
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+    # Auxiliary head, only use in training time.
+    aux_head:
+      name: SimpleConvHead
+      num_classes: 80
+      input_channel: 192
+      feat_channels: 192
+      stacked_convs: 4
+      strides: [8, 16, 32, 64]
+      activation: LeakyReLU
+      reg_max: 7
+data:
+  train:
+    name: CocoDataset
+    img_path: coco/train2017
+    ann_path: coco/annotations/instances_train2017.json
+    input_size: [320,320] #[w,h]
+    keep_ratio: False
+    pipeline:
+      perspective: 0.0
+      scale: [0.6, 1.4]
+      stretch: [[0.8, 1.2], [0.8, 1.2]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: CocoDataset
+    img_path: coco/val2017
+    ann_path: coco/annotations/instances_val2017.json
+    input_size: [320,320] #[w,h]
+    keep_ratio: False
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0] # Set like [0, 1, 2, 3] if you have multi-GPUs
+  workers_per_gpu: 10
+  batchsize_per_gpu: 96
+  precision: 32 # set to 16 to use AMP training
+schedule:
+#  resume:
+#  load_model:
+  optimizer:
+    name: AdamW
+    lr: 0.001
+    weight_decay: 0.05
+  warmup:
+    name: linear
+    steps: 500
+    ratio: 0.0001
+  total_epochs: 300
+  lr_schedule:
+    name: CosineAnnealingLR
+    T_max: 300
+    eta_min: 0.00005
+  val_intervals: 10
+grad_clip: 35
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 50
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/nanodet-plus-m_416-yolo.yml
+++ b/config/nanodet-plus-m_416-yolo.yml
+# nanodet-plus-m_416
+# COCO mAP(0.5:0.95) = 0.304
+#             AP_50  = 0.459
+#             AP_75  = 0.317
+#           AP_small = 0.106
+#               AP_m = 0.322
+#               AP_l = 0.477
+save_dir: /content/drive/MyDrive/Nanodet_again/nanodet2/workspace/nanodet-custom
+model:
+  weight_averager:
+    name: ExpMovingAverager
+    decay: 0.9998
+  arch:
+    name: NanoDetPlus
+    detach_epoch: 10
+    backbone:
+      name: ShuffleNetV2
+      model_size: 1.0x
+      out_stages: [2,3,4]
+      activation: LeakyReLU
+    fpn:
+      name: GhostPAN
+      in_channels: [116, 232, 464]
+      out_channels: 96
+      kernel_size: 5
+      num_extra_level: 1
+      use_depthwise: True
+      activation: LeakyReLU
+    head:
+      name: NanoDetPlusHead
+      num_classes: 80
+      input_channel: 96
+      feat_channels: 96
+      stacked_convs: 2
+      kernel_size: 5
+      strides: [8, 16, 32, 64]
+      activation: LeakyReLU
+      reg_max: 7
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+    # Auxiliary head, only use in training time.
+    aux_head:
+      name: SimpleConvHead
+      num_classes: 80
+      input_channel: 192
+      feat_channels: 192
+      stacked_convs: 4
+      strides: [8, 16, 32, 64]
+      activation: LeakyReLU
+      reg_max: 7
+class_names:  &class_names ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
+data:
+  train:
+    name: YoloDataset
+    img_path: coco/train2017
+    ann_path: coco/train2017
+    class_names: *class_names
+    input_size: [416,416] #[w,h]
+    keep_ratio: False
+    pipeline:
+      perspective: 0.0
+      scale: [0.6, 1.4]
+      stretch: [[0.8, 1.2], [0.8, 1.2]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: YoloDataset
+    img_path: coco/val2017
+    ann_path: coco/val2017
+    class_names: *class_names
+    input_size: [416,416] #[w,h]
+    keep_ratio: False
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 10
+  batchsize_per_gpu: 96
+schedule:
+#  resume:
+#  load_model:
+  optimizer:
+    name: AdamW
+    lr: 0.001
+    weight_decay: 0.05
+  warmup:
+    name: linear
+    steps: 500
+    ratio: 0.0001
+  total_epochs: 300
+  lr_schedule:
+    name: CosineAnnealingLR
+    T_max: 300
+    eta_min: 0.00005
+  val_intervals: 10
+grad_clip: 35
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 50
--- a/config/nanodet-plus-m_416.yml
+++ b/config/nanodet-plus-m_416.yml
+# nanodet-plus-m_416
+# COCO mAP(0.5:0.95) = 0.304
+#             AP_50  = 0.459
+#             AP_75  = 0.317
+#           AP_small = 0.106
+#               AP_m = 0.322
+#               AP_l = 0.477
+save_dir: /content/drive/MyDrive/Nanodet_again/nanodet2/workspace/nanodet-custom
+model:
+  weight_averager:
+    name: ExpMovingAverager
+    decay: 0.9998
+  arch:
+    name: NanoDetPlus
+    detach_epoch: 10
+    backbone:
+      name: ShuffleNetV2
+      model_size: 1.0x
+      out_stages: [2,3,4]
+      activation: LeakyReLU
+    fpn:
+      name: GhostPAN
+      in_channels: [116, 232, 464]
+      out_channels: 96
+      kernel_size: 5
+      num_extra_level: 1
+      use_depthwise: True
+      activation: LeakyReLU
+    head:
+      name: NanoDetPlusHead
+      num_classes: 80
+      input_channel: 96
+      feat_channels: 96
+      stacked_convs: 2
+      kernel_size: 5
+      strides: [8, 16, 32, 64]
+      activation: LeakyReLU
+      reg_max: 7
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+    # Auxiliary head, only use in training time.
+    aux_head:
+      name: SimpleConvHead
+      num_classes: 80
+      input_channel: 192
+      feat_channels: 192
+      stacked_convs: 4
+      strides: [8, 16, 32, 64]
+      activation: LeakyReLU
+      reg_max: 7
+data:
+  train:
+    name: CocoDataset
+    img_path: coco/train2017
+    ann_path: coco/annotations/instances_train2017.json
+    input_size: [416,416] #[w,h]
+    keep_ratio: False
+    pipeline:
+      perspective: 0.0
+      scale: [0.6, 1.4]
+      stretch: [[0.8, 1.2], [0.8, 1.2]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: CocoDataset
+    img_path: coco/val2017
+    ann_path: coco/annotations/instances_val2017.json
+    input_size: [416,416] #[w,h]
+    keep_ratio: False
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 10
+  batchsize_per_gpu: 96
+  precision: 32 # set to 16 to use AMP training
+schedule:
+#  resume:
+#  load_model:
+  optimizer:
+    name: AdamW
+    lr: 0.001
+    weight_decay: 0.05
+  warmup:
+    name: linear
+    steps: 500
+    ratio: 0.0001
+  total_epochs: 300
+  lr_schedule:
+    name: CosineAnnealingLR
+    T_max: 300
+    eta_min: 0.00005
+  val_intervals: 10
+grad_clip: 35
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 50
+class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+              'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+              'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+              'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+              'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+              'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+              'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+              'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+              'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+              'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+              'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+              'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+              'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+              'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
--- a/config/nanodet_custom_xml_dataset.yml
+++ b/config/nanodet_custom_xml_dataset.yml
+# nanodet-plus-m_416
+# COCO mAP(0.5:0.95) = 0.304
+#             AP_50  = 0.459
+#             AP_75  = 0.317
+#           AP_small = 0.106
+#               AP_m = 0.322
+#               AP_l = 0.477
+save_dir: /content/drive/MyDrive/Nanodet_again/nanodet2/workspace/nanodet-custom
+model:
+  weight_averager:
+    name: ExpMovingAverager
+    decay: 0.9998
+  arch:
+    name: NanoDetPlus
+    detach_epoch: 10
+    backbone:
+      name: ShuffleNetV2
+      model_size: 1.0x
+      out_stages: [2,3,4]
+      activation: LeakyReLU
+    fpn:
+      name: GhostPAN
+      in_channels: [116, 232, 464]
+      out_channels: 96
+      kernel_size: 5
+      num_extra_level: 1
+      use_depthwise: True
+      activation: LeakyReLU
+    head:
+      name: NanoDetPlusHead
+      num_classes: 10
+      input_channel: 96
+      feat_channels: 96
+      stacked_convs: 2
+      kernel_size: 5
+      strides: [8, 16, 32, 64]
+      activation: LeakyReLU
+      reg_max: 7
+      norm_cfg:
+        type: BN
+      loss:
+        loss_qfl:
+          name: QualityFocalLoss
+          use_sigmoid: True
+          beta: 2.0
+          loss_weight: 1.0
+        loss_dfl:
+          name: DistributionFocalLoss
+          loss_weight: 0.25
+        loss_bbox:
+          name: GIoULoss
+          loss_weight: 2.0
+    # Auxiliary head, only use in training time.
+    aux_head:
+      name: SimpleConvHead
+      num_classes: 10
+      input_channel: 192
+      feat_channels: 192
+      stacked_convs: 4
+      strides: [8, 16, 32, 64]
+      activation: LeakyReLU
+      reg_max: 7
+class_names:  &class_names ['cat','chicken','cow','dog','fox','goat','horse','person','racoon','skunk']
+data:
+  train:
+    name: YoloDataset
+    img_path: /content/drive/MyDrive/Nanodet_again/yolo_dataset/train/images
+    ann_path: /content/drive/MyDrive/Nanodet_again/yolo_dataset/train/labels
+    class_names: *class_names
+    input_size: [416,416] #[w,h]
+    keep_ratio: False
+    pipeline:
+      perspective: 0.0
+      scale: [0.6, 1.4]
+      stretch: [[0.8, 1.2], [0.8, 1.2]]
+      rotation: 0
+      shear: 0
+      translate: 0.2
+      flip: 0.5
+      brightness: 0.2
+      contrast: [0.6, 1.4]
+      saturation: [0.5, 1.2]
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+  val:
+    name: YoloDataset
+    img_path: /content/drive/MyDrive/Nanodet_again/yolo_dataset/valid/images
+    ann_path: /content/drive/MyDrive/Nanodet_again/yolo_dataset/valid/labels
+    class_names: *class_names
+    input_size: [416,416] #[w,h]
+    keep_ratio: False
+    pipeline:
+      normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+  gpu_ids: [0]
+  workers_per_gpu: 10
+  batchsize_per_gpu: 32
+schedule:
+#  resume:
+#  load_model:
+  optimizer:
+    name: AdamW
+    lr: 0.001
+    weight_decay: 0.05
+  warmup:
+    name: linear
+    steps: 500
+    ratio: 0.0001
+  total_epochs: 300
+  lr_schedule:
+    name: CosineAnnealingLR
+    T_max: 300
+    eta_min: 0.00005
+  val_intervals: 10
+grad_clip: 35
+evaluator:
+  name: CocoDetectionEvaluator
+  save_key: mAP
+log:
+  interval: 2
--- a/demo/Nanodet_colab.ipynb
+++ b/demo/Nanodet_colab.ipynb
--- a/demo/demo-inference-with-pytorch.ipynb
+++ b/demo/demo-inference-with-pytorch.ipynb
--- a/demo/demo.py
+++ b/demo/demo.py
+import argparse
+import os
+import time
+import cv2
+import torch
+from nanodet.data.batch_process import stack_batch_img
+from nanodet.data.collate import naive_collate
+from nanodet.data.transform import Pipeline
+from nanodet.model.arch import build_model
+from nanodet.util import Logger, cfg, load_config, load_model_weight
+from nanodet.util.path import mkdir
+image_ext = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]
+video_ext = ["mp4", "mov", "avi", "mkv"]
+os.environ['KMP_DUPLICATE_LIB_OK']='True'
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "demo", default="image", help="demo type, eg. image, video and webcam"
+    )
+    parser.add_argument("--config", help="model config file path")
+    parser.add_argument("--model", help="model file path")
+    parser.add_argument("--path", default="./demo", help="path to images or video")
+    parser.add_argument("--camid", type=int, default=0, help="webcam demo camera id")
+    parser.add_argument(
+        "--save_result",
+        action="store_true",
+        help="whether to save the inference result of image/video",
+    )
+    args = parser.parse_args()
+    return args
+class Predictor(object):
+    def __init__(self, cfg, model_path, logger, device="cpu"):
+        self.cfg = cfg
+        self.device = device
+        model = build_model(cfg.model)
+        ckpt = torch.load(model_path, map_location=lambda storage, loc: storage)
+        load_model_weight(model, ckpt, logger)
+        if cfg.model.arch.backbone.name == "RepVGG":
+            deploy_config = cfg.model
+            deploy_config.arch.backbone.update({"deploy": True})
+            deploy_model = build_model(deploy_config)
+            from nanodet.model.backbone.repvgg import repvgg_det_model_convert
+            model = repvgg_det_model_convert(model, deploy_model)
+        self.model = model.to(device).eval()
+        self.pipeline = Pipeline(cfg.data.val.pipeline, cfg.data.val.keep_ratio)
+    def inference(self, img):
+        img_info = {"id": 0}
+        if isinstance(img, str):
+            img_info["file_name"] = os.path.basename(img)
+            img = cv2.imread(img)
+        else:
+            img_info["file_name"] = None
+        height, width = img.shape[:2]
+        img_info["height"] = height
+        img_info["width"] = width
+        meta = dict(img_info=img_info, raw_img=img, img=img)
+        meta = self.pipeline(None, meta, self.cfg.data.val.input_size)
+        meta["img"] = torch.from_numpy(meta["img"].transpose(2, 0, 1)).to(self.device)
+        meta = naive_collate([meta])
+        meta["img"] = stack_batch_img(meta["img"], divisible=32)
+        with torch.no_grad():
+            t0 = time.time()
+            results = self.model.inference(meta)
+            t1 = time.time()
+            diff = t1 - t0
+            fps = 1/ (diff)
+            print("fps:  ", str(fps))
+        return meta, results
+    def visualize(self, dets, meta, class_names, score_thres, wait=0):
+        time1 = time.time()
+        result_img = self.model.head.show_result(
+            meta["raw_img"][0], dets, class_names, score_thres=score_thres, show=True
+        )
+        print("viz time: {:.3f}s".format(time.time() - time1))
+        # diff = time.time() - time1
+        # fps = 1000/ (diff)
+        # print("fps:  ", str(fps))
+        return result_img
+def get_image_list(path):
+    image_names = []
+    for maindir, subdir, file_name_list in os.walk(path):
+        for filename in file_name_list:
+            apath = os.path.join(maindir, filename)
+            ext = os.path.splitext(apath)[1]
+            if ext in image_ext:
+                image_names.append(apath)
+    return image_names
+def main():
+    args = parse_args()
+    local_rank = 0
+    torch.backends.cudnn.enabled = True
+    torch.backends.cudnn.benchmark = True
+    load_config(cfg, args.config)
+    logger = Logger(local_rank, use_tensorboard=False)
+    predictor = Predictor(cfg, args.model, logger, device="cpu")
+    logger.log('Press "Esc", "q" or "Q" to exit.')
+    current_time = time.localtime()
+    if args.demo == "image":
+        if os.path.isdir(args.path):
+            files = get_image_list(args.path)
+        else:
+            files = [args.path]
+        files.sort()
+        for image_name in files:
+            meta, res = predictor.inference(image_name)
+            result_image = predictor.visualize(res[0], meta, cfg.class_names, 0.35)
+            if args.save_result:
+                save_folder = os.path.join(
+                    cfg.save_dir, time.strftime("%Y_%m_%d_%H_%M_%S", current_time)
+                )
+                mkdir(local_rank, save_folder)
+                save_file_name = os.path.join(save_folder, os.path.basename(image_name))
+                cv2.imwrite(save_file_name, result_image)
+            ch = cv2.waitKey(1)
+            if ch == 27 or ch == ord("q") or ch == ord("Q"):
+                break
+    elif args.demo == "video" or args.demo == "webcam":
+        cap = cv2.VideoCapture(args.path if args.demo == "video" else args.camid)
+        width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float
+        height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        save_folder = os.path.join(
+            cfg.save_dir, time.strftime("%Y_%m_%d_%H_%M_%S", current_time)
+        )
+        mkdir(local_rank, save_folder)
+        save_path = (
+            os.path.join(save_folder, args.path.replace("\\", "/").split("/")[-1])
+            if args.demo == "video"
+            else os.path.join(save_folder, "camera.mp4")
+        )
+        print(f"save_path is {save_path}")
+        vid_writer = cv2.VideoWriter(
+            save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))
+        )
+        while True:
+            ret_val, frame = cap.read()
+            if ret_val:
+                meta, res = predictor.inference(frame)
+                result_frame = predictor.visualize(res[0], meta, cfg.class_names, 0.35)
+                if args.save_result:
+                    vid_writer.write(result_frame)
+                ch = cv2.waitKey(1)
+                if ch == 27 or ch == ord("q") or ch == ord("Q"):
+                    break
+            else:
+                break
+if __name__ == "__main__":
+    main()
--- a/demo/demo_multi_backend_python.py
+++ b/demo/demo_multi_backend_python.py
+# Working in progress
--- a/demo_android_ncnn/.gitignore
+++ b/demo_android_ncnn/.gitignore
+*.iml
+.gradle
+/local.properties
+/.idea
+/.idea/caches
+/.idea/libraries
+/.idea/modules.xml
+/.idea/workspace.xml
+/.idea/navEditor.xml
+/.idea/assetWizardSettings.xml
+.DS_Store
+/build
+/captures
+.externalNativeBuild
+.cxx
--- a/demo_android_ncnn/Android_demo.jpg
+++ b/demo_android_ncnn/Android_demo.jpg
--- a/demo_android_ncnn/LICENSE
+++ b/demo_android_ncnn/LICENSE
--- a/demo_android_ncnn/README.md
+++ b/demo_android_ncnn/README.md
+# NanoDet NCNN Android Demo
+This repo is an Android object detection demo of NanoDet using
+[Tencent's NCNN framework](https://github.com/Tencent/ncnn).
+# Tutorial
+## Step1.
+Download ncnn-android-vulkan.zip from ncnn repo or build ncnn-android from source.
+- [ncnn-android-vulkan.zip download link](https://github.com/Tencent/ncnn/releases)
+## Step2.
+Unzip ncnn-android-vulkan.zip into demo_android_ncnn/app/src/main/cpp or change the ncnn_DIR path to yours in demo_android_ncnn/app/src/main/cpp/CMakeLists.txt
+```bash
+# e.g. change to `ncnn-20211208-android-vulkan` if download version 200211208
+set(ncnn_DIR ${CMAKE_SOURCE_DIR}/ncnn-20211208-android-vulkan/${ANDROID_ABI}/lib/cmake/ncnn)
+```
+## Step3.
+Copy the NanoDet ncnn model file and rename to nanodet.param and nanodet.bin from models folder into demo_android_ncnn/app/src/main/assets
+* [NanoDet ncnn model download link](https://drive.google.com/file/d/1cuVBJiFKwyq1-l3AwHoP2boTesUQP-6K/view?usp=sharing)
+If you want to run yolov4-tiny and yolov5s, download them and also put in demo_android_ncnn/app/src/main/assets.
+* [Yolov4 and v5 ncnn model download link](https://drive.google.com/file/d/1Qk_1fDvOcFmNppDnaMFW-xFpMgLDyeAs/view?usp=sharing)
+## Step4.
+Open demo_android_ncnn folder with Android Studio and then build it.
+# Screenshot
+![](Android_demo.jpg)
+# Notice
+* The FPS in the app includes pre-process, post-process and visualization, not equal to the model inference time.
+* If meet error like `No version of NDK matched the requested version`, set `android { ndkVersion` to your ndk version.
+* If you want to use custom model, remember to change the hyperparams in `demo_android_ncnn/app/src/main/cpp/NanoDet.h` the same with your training config.
+# Reference
+* [ncnn](https://github.com/tencent/ncnn)
+* [YOLOv5_NCNN](https://github.com/WZTENG/YOLOv5_NCNN)
--- a/demo_android_ncnn/app/.gitignore
+++ b/demo_android_ncnn/app/.gitignore
+/build
--- a/demo_android_ncnn/app/build.gradle
+++ b/demo_android_ncnn/app/build.gradle
+apply plugin: 'com.android.application'
+android {
+    compileSdkVersion 29
+    buildToolsVersion "29.0.3"
+    defaultConfig {
+        applicationId "com.rangi.nanodet"
+        minSdkVersion 26
+        targetSdkVersion 29
+        versionCode 1
+        versionName "1.0"
+        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
+        externalNativeBuild {
+            cmake {
+                cppFlags ""
+                arguments '-DANDROID_PLATFORM=android-24', '-DANDROID_STL=c++_static', '-DANDROID_STL=c++_shared'
+            }
+        }
+        ndk {
+            moduleName "NcnnJniLog"
+            ldLibs "log", "z", "m"
+            abiFilters "armeabi-v7a", "arm64-v8a"
+        }
+        multiDexEnabled true
+    }
+    buildTypes {
+        release {
+            minifyEnabled false
+            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
+        }
+    }
+    externalNativeBuild {
+        cmake {
+            path "src/main/cpp/CMakeLists.txt"
+            version "3.10.2"
+        }
+    }
+    sourceSets {
+        main {
+            jniLibs.srcDirs = ['libs']
+        }
+    }
+    repositories {
+        flatDir {
+            dirs 'libs'
+        }
+    }
+}
+dependencies {
+    implementation fileTree(dir: 'libs', include: ['*.jar'])
+    implementation 'androidx.appcompat:appcompat:1.1.0'
+    implementation 'androidx.constraintlayout:constraintlayout:1.1.3'
+    testImplementation 'junit:junit:4.12'
+    androidTestImplementation 'androidx.test.ext:junit:1.1.1'
+    androidTestImplementation 'androidx.test.espresso:espresso-core:3.2.0'
+    // Use the most recent version of CameraX, currently that is alpha04
+    def camerax_version = "1.0.0-alpha05"
+    //noinspection GradleDependency
+    implementation "androidx.camera:camera-core:${camerax_version}"
+    //noinspection GradleDependency
+    implementation "androidx.camera:camera-camera2:${camerax_version}"
+    implementation 'com.android.support:multidex:1.0.3'
+    // crash
+    implementation 'com.zxy.android:recovery:1.0.0'
+    // photoview
+    implementation 'com.github.chrisbanes:PhotoView:2.3.0'
+//    implementation 'com.bm.photoview:library:1.4.1'
+}
--- a/demo_android_ncnn/app/proguard-rules.pro
+++ b/demo_android_ncnn/app/proguard-rules.pro
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+#   http://developer.android.com/guide/developing/tools/proguard.html
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+#   public *;
+#}
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
--- a/demo_android_ncnn/app/src/androidTest/java/com/rangi/nanodet/ExampleInstrumentedTest.java
+++ b/demo_android_ncnn/app/src/androidTest/java/com/rangi/nanodet/ExampleInstrumentedTest.java
+package com.rangi.nanodet;
+import android.content.Context;
+import androidx.test.platform.app.InstrumentationRegistry;
+import androidx.test.ext.junit.runners.AndroidJUnit4;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import static org.junit.Assert.*;
+/**
+ * Instrumented test, which will execute on an Android device.
+ *
+ * @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
+ */
+@RunWith(AndroidJUnit4.class)
+public class ExampleInstrumentedTest {
+    @Test
+    public void useAppContext() {
+        // Context of the app under test.
+        Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext();
+        assertEquals("gd.hq.yolov5", appContext.getPackageName());
+    }
+}
--- a/demo_android_ncnn/app/src/main/AndroidManifest.xml
+++ b/demo_android_ncnn/app/src/main/AndroidManifest.xml
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    package="com.rangi.nanodet">
+    <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
+    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
+    <uses-permission android:name="android.permission.CAMERA" />
+    <application
+        android:name="com.rangi.nanodet.NcnnApp"
+        android:allowBackup="true"
+        android:icon="@drawable/ncnn_icon"
+        android:label="@string/app_name"
+        android:roundIcon="@mipmap/ic_launcher_round"
+        android:supportsRtl="true"
+        android:requestLegacyExternalStorage="true"
+        android:theme="@style/AppTheme">
+        <activity android:name="com.rangi.nanodet.WelcomeActivity">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity>
+        <activity
+            android:name="com.rangi.nanodet.MainActivity"
+            android:theme="@style/AppNoTitleTheme" />
+    </application>
+</manifest>
--- a/demo_android_ncnn/app/src/main/cpp/CMakeLists.txt
+++ b/demo_android_ncnn/app/src/main/cpp/CMakeLists.txt
+cmake_minimum_required(VERSION 3.10)
+set(ncnn_DIR ${CMAKE_SOURCE_DIR}/ncnn-20211208-android-vulkan/${ANDROID_ABI}/lib/cmake/ncnn)
+find_package(ncnn REQUIRED)
+add_library(yolov5 SHARED
+    jni_interface.cpp
+    YoloV5.cpp
+    YoloV4.cpp
+    NanoDet.cpp
+)
+target_link_libraries(yolov5 ncnn jnigraphics)
--- a/demo_android_ncnn/app/src/main/cpp/NanoDet.cpp
+++ b/demo_android_ncnn/app/src/main/cpp/NanoDet.cpp
+//
+// Create by RangiLyu
+// 2020 / 10 / 2
+//
+#include "NanoDet.h"
+bool NanoDet::hasGPU = true;
+NanoDet* NanoDet::detector = nullptr;
+inline float fast_exp(float x)
+{
+    union {
+        uint32_t i;
+        float f;
+    } v{};
+    v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
+    return v.f;
+}
+inline float sigmoid(float x)
+{
+    return 1.0f / (1.0f + fast_exp(-x));
+}
+template<typename _Tp>
+int activation_function_softmax(const _Tp* src, _Tp* dst, int length)
+{
+    const _Tp alpha = *std::max_element(src, src + length);
+    _Tp denominator{ 0 };
+    for (int i = 0; i < length; ++i) {
+        dst[i] = fast_exp(src[i] - alpha);
+        denominator += dst[i];
+    }
+    for (int i = 0; i < length; ++i) {
+        dst[i] /= denominator;
+    }
+    return 0;
+}
+static void generate_grid_center_priors(const int input_height, const int input_width, std::vector<int>& strides, std::vector<CenterPrior>& center_priors)
+{
+    for (int i = 0; i < (int)strides.size(); i++)
+    {
+        int stride = strides[i];
+        int feat_w = ceil((float)input_width / stride);
+        int feat_h = ceil((float)input_height / stride);
+        for (int y = 0; y < feat_h; y++)
+        {
+            for (int x = 0; x < feat_w; x++)
+            {
+                CenterPrior ct;
+                ct.x = x;
+                ct.y = y;
+                ct.stride = stride;
+                center_priors.push_back(ct);
+            }
+        }
+    }
+}
+NanoDet::NanoDet(AAssetManager *mgr, const char *param, const char *bin, bool useGPU) {
+    this->Net = new ncnn::Net();
+    hasGPU = ncnn::get_gpu_count() > 0;
+    this->Net->opt.use_vulkan_compute = false; //hasGPU && useGPU;  // gpu
+    this->Net->opt.use_fp16_arithmetic = true;
+    this->Net->opt.use_fp16_packed = true;
+    this->Net->opt.use_fp16_storage = true;
+    this->Net->load_param(mgr, param);
+    this->Net->load_model(mgr, bin);
+}
+NanoDet::~NanoDet()
+{
+    delete this->Net;
+}
+void NanoDet::preprocess(JNIEnv *env, jobject image, ncnn::Mat& in)
+{
+    in = ncnn::Mat::from_android_bitmap_resize(env, image, ncnn::Mat::PIXEL_RGBA2BGR, input_size[1], input_size[0]);
+//    in = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR, img_w, img_h);
+    //in = ncnn::Mat::from_pixels_resize(image.data, ncnn::Mat::PIXEL_BGR, img_w, img_h, this->input_width, this->input_height);
+    const float mean_vals[3] = { 103.53f, 116.28f, 123.675f };
+    const float norm_vals[3] = { 0.017429f, 0.017507f, 0.017125f };
+    in.substract_mean_normalize(mean_vals, norm_vals);
+}
+std::vector<BoxInfo> NanoDet::detect(JNIEnv *env, jobject image, float score_threshold, float nms_threshold) {
+    AndroidBitmapInfo img_size;
+    AndroidBitmap_getInfo(env, image, &img_size);
+    float width_ratio = (float) img_size.width / (float) this->input_size[1];
+    float height_ratio = (float) img_size.height / (float) this->input_size[0];
+    ncnn::Mat input;
+    this->preprocess(env, image, input);
+    auto ex = this->Net->create_extractor();
+    ex.set_light_mode(true);
+    ex.set_num_threads(4);
+    hasGPU = ncnn::get_gpu_count() > 0;
+    //ex.set_vulkan_compute(hasGPU);
+    ex.input("data", input);
+    std::vector<std::vector<BoxInfo>> results;
+    results.resize(this->num_class);
+    ncnn::Mat out;
+    ex.extract("output", out);
+    // printf("%d %d %d \n", out.w, out.h, out.c);
+    // generate center priors in format of (x, y, stride)
+    std::vector<CenterPrior> center_priors;
+    generate_grid_center_priors(this->input_size[0], this->input_size[1], this->strides, center_priors);
+    this->decode_infer(out, center_priors, score_threshold, results, width_ratio, height_ratio);
+    std::vector<BoxInfo> dets;
+    for (int i = 0; i < (int)results.size(); i++)
+    {
+        this->nms(results[i], nms_threshold);
+        for (auto box : results[i])
+        {
+            dets.push_back(box);
+        }
+    }
+    return dets;
+}
+void NanoDet::decode_infer(ncnn::Mat& feats, std::vector<CenterPrior>& center_priors, float threshold, std::vector<std::vector<BoxInfo>>& results, float width_ratio, float height_ratio)
+{
+    const int num_points = center_priors.size();
+    //printf("num_points:%d\n", num_points);
+    //cv::Mat debug_heatmap = cv::Mat(feature_h, feature_w, CV_8UC3);
+    for (int idx = 0; idx < num_points; idx++)
+    {
+        const int ct_x = center_priors[idx].x;
+        const int ct_y = center_priors[idx].y;
+        const int stride = center_priors[idx].stride;
+        const float* scores = feats.row(idx);
+        float score = 0;
+        int cur_label = 0;
+        for (int label = 0; label < this->num_class; label++)
+        {
+            if (scores[label] > score)
+            {
+                score = scores[label];
+                cur_label = label;
+            }
+        }
+        if (score > threshold)
+        {
+            //std::cout << "label:" << cur_label << " score:" << score << std::endl;
+            const float* bbox_pred = feats.row(idx) + this->num_class;
+            results[cur_label].push_back(this->disPred2Bbox(bbox_pred, cur_label, score, ct_x, ct_y, stride, width_ratio, height_ratio));
+            //debug_heatmap.at<cv::Vec3b>(row, col)[0] = 255;
+            //cv::imshow("debug", debug_heatmap);
+        }
+    }
+}
+BoxInfo NanoDet::disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride, float width_ratio, float height_ratio)
+{
+    float ct_x = x * stride;
+    float ct_y = y * stride;
+    std::vector<float> dis_pred;
+    dis_pred.resize(4);
+    for (int i = 0; i < 4; i++)
+    {
+        float dis = 0;
+        float* dis_after_sm = new float[this->reg_max + 1];
+        activation_function_softmax(dfl_det + i * (this->reg_max + 1), dis_after_sm, this->reg_max + 1);
+        for (int j = 0; j < this->reg_max + 1; j++)
+        {
+            dis += j * dis_after_sm[j];
+        }
+        dis *= stride;
+        //std::cout << "dis:" << dis << std::endl;
+        dis_pred[i] = dis;
+        delete[] dis_after_sm;
+    }
+    float xmin = (std::max)(ct_x - dis_pred[0], .0f) * width_ratio;
+    float ymin = (std::max)(ct_y - dis_pred[1], .0f) * height_ratio;
+    float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size[1]) * width_ratio;
+    float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size[0]) * height_ratio;
+    //std::cout << xmin << "," << ymin << "," << xmax << "," << xmax << "," << std::endl;
+    return BoxInfo { xmin, ymin, xmax, ymax, score, label };
+}
+void NanoDet::nms(std::vector<BoxInfo>& input_boxes, float NMS_THRESH)
+{
+    std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
+    std::vector<float> vArea(input_boxes.size());
+    for (int i = 0; i < int(input_boxes.size()); ++i) {
+        vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
+                   * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
+    }
+    for (int i = 0; i < int(input_boxes.size()); ++i) {
+        for (int j = i + 1; j < int(input_boxes.size());) {
+            float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
+            float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
+            float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
+            float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
+            float w = (std::max)(float(0), xx2 - xx1 + 1);
+            float h = (std::max)(float(0), yy2 - yy1 + 1);
+            float inter = w * h;
+            float ovr = inter / (vArea[i] + vArea[j] - inter);
+            if (ovr >= NMS_THRESH) {
+                input_boxes.erase(input_boxes.begin() + j);
+                vArea.erase(vArea.begin() + j);
+            }
+            else {
+                j++;
+            }
+        }
+    }
+}
--- a/demo_android_ncnn/app/src/main/cpp/NanoDet.h
+++ b/demo_android_ncnn/app/src/main/cpp/NanoDet.h
+//
+// Create by RangiLyu
+// 2020 / 10 / 2
+//
+#ifndef NANODET_H
+#define NANODET_H
+#include "net.h"
+#include "YoloV5.h"
+typedef struct HeadInfo_
+{
+    std::string cls_layer;
+    std::string dis_layer;
+    int stride;
+} HeadInfo;
+typedef struct CenterPrior_
+{
+    int x;
+    int y;
+    int stride;
+} CenterPrior;
+class NanoDet{
+public:
+    NanoDet(AAssetManager *mgr, const char *param, const char *bin, bool useGPU);
+    ~NanoDet();
+    std::vector<BoxInfo> detect(JNIEnv *env, jobject image, float score_threshold, float nms_threshold);
+    std::vector<std::string> labels{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
+                                    "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
+                                    "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+                                    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
+                                    "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+                                    "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+                                    "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
+                                    "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+                                    "hair drier", "toothbrush"};
+private:
+    void preprocess(JNIEnv *env, jobject image, ncnn::Mat& in);
+    void decode_infer(ncnn::Mat& feats, std::vector<CenterPrior>& center_priors, float threshold, std::vector<std::vector<BoxInfo>>& results, float width_ratio, float height_ratio);
+    BoxInfo disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride, float width_ratio, float height_ratio);
+    static void nms(std::vector<BoxInfo>& result, float nms_threshold);
+    ncnn::Net *Net;
+    // modify these parameters to the same with your config if you want to use your own model
+    int input_size[2] = {320, 320}; // input height and width
+    int num_class = 80; // number of classes. 80 for COCO
+    int reg_max = 7; // `reg_max` set in the training config. Default: 7.
+    std::vector<int> strides = { 8, 16, 32, 64 }; // strides of the multi-level feature.
+public:
+    static NanoDet *detector;
+    static bool hasGPU;
+};
+#endif //NANODET_H
--- a/demo_android_ncnn/app/src/main/cpp/YoloV4.cpp
+++ b/demo_android_ncnn/app/src/main/cpp/YoloV4.cpp
+#include "YoloV4.h"
+bool YoloV4::hasGPU = true;
+YoloV4 *YoloV4::detector = nullptr;
+YoloV4::YoloV4(AAssetManager *mgr, const char *param, const char *bin, bool useGPU) {
+    Net = new ncnn::Net();
+    // opt 需要在加载前设置
+    hasGPU = ncnn::get_gpu_count() > 0;
+    Net->opt.use_vulkan_compute = hasGPU && useGPU;  // gpu
+    Net->opt.use_fp16_arithmetic = true;  // fp16运算加速
+    Net->load_param(mgr, param);
+    Net->load_model(mgr, bin);
+}
+YoloV4::~YoloV4() {
+    delete Net;
+}
+std::vector<BoxInfo> YoloV4::detect(JNIEnv *env, jobject image, float threshold, float nms_threshold) {
+    AndroidBitmapInfo img_size;
+    AndroidBitmap_getInfo(env, image, &img_size);
+    ncnn::Mat in_net = ncnn::Mat::from_android_bitmap_resize(env, image, ncnn::Mat::PIXEL_RGBA2RGB, input_size,
+                                                             input_size);
+    float norm[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
+    float mean[3] = {0, 0, 0};
+    in_net.substract_mean_normalize(mean, norm);
+    auto ex = Net->create_extractor();
+    ex.set_light_mode(true);
+    ex.set_num_threads(4);
+    hasGPU = ncnn::get_gpu_count() > 0;
+    ex.set_vulkan_compute(hasGPU);
+    ex.input(0, in_net);
+    std::vector<BoxInfo> result;
+    ncnn::Mat blob;
+    ex.extract("output", blob);
+    auto boxes = decode_infer(blob, {(int) img_size.width, (int) img_size.height}, input_size, num_class, threshold);
+    result.insert(result.begin(), boxes.begin(), boxes.end());
+//    nms(result,nms_threshold);
+    return result;
+}
+inline float fast_exp(float x) {
+    union {
+        uint32_t i;
+        float f;
+    } v{};
+    v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
+    return v.f;
+}
+inline float sigmoid(float x) {
+    return 1.0f / (1.0f + fast_exp(-x));
+}
+std::vector<BoxInfo>
+YoloV4::decode_infer(ncnn::Mat &data, const yolocv::YoloSize &frame_size, int net_size, int num_classes, float threshold) {
+    std::vector<BoxInfo> result;
+    for (int i = 0; i < data.h; i++) {
+        BoxInfo box;
+        const float *values = data.row(i);
+        box.label = values[0] - 1;
+        box.score = values[1];
+        box.x1 = values[2] * (float) frame_size.width;
+        box.y1 = values[3] * (float) frame_size.height;
+        box.x2 = values[4] * (float) frame_size.width;
+        box.y2 = values[5] * (float) frame_size.height;
+        result.push_back(box);
+    }
+    return result;
+}
--- a/demo_android_ncnn/app/src/main/cpp/YoloV4.h
+++ b/demo_android_ncnn/app/src/main/cpp/YoloV4.h
+#ifndef YOLOV4_H
+#define YOLOV4_H
+#include "net.h"
+#include "YoloV5.h"
+class YoloV4 {
+public:
+    YoloV4(AAssetManager *mgr, const char *param, const char *bin, bool useGPU);
+    ~YoloV4();
+    std::vector<BoxInfo> detect(JNIEnv *env, jobject image, float threshold, float nms_threshold);
+    std::vector<std::string> labels{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
+                                    "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
+                                    "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+                                    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
+                                    "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+                                    "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+                                    "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
+                                    "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+                                    "hair drier", "toothbrush"};
+private:
+    static std::vector<BoxInfo>
+    decode_infer(ncnn::Mat &data, const yolocv::YoloSize &frame_size, int net_size, int num_classes, float threshold);
+//    static void nms(std::vector<BoxInfo>& result,float nms_threshold);
+    ncnn::Net *Net;
+    int input_size = 640 / 2;
+    int num_class = 80;
+public:
+    static YoloV4 *detector;
+    static bool hasGPU;
+};
+#endif //YOLOV4_H
--- a/demo_android_ncnn/app/src/main/cpp/YoloV5.cpp
+++ b/demo_android_ncnn/app/src/main/cpp/YoloV5.cpp
+//
+// Created by 邓昊晴 on 14/6/2020.
+//
+#include "YoloV5.h"
+bool YoloV5::hasGPU = true;
+YoloV5 *YoloV5::detector = nullptr;
+YoloV5::YoloV5(AAssetManager *mgr, const char *param, const char *bin, bool useGPU) {
+    Net = new ncnn::Net();
+    // opt 需要在加载前设置
+    hasGPU = ncnn::get_gpu_count() > 0;
+    Net->opt.use_vulkan_compute = hasGPU && useGPU;  // gpu
+    Net->opt.use_fp16_arithmetic = true;  // fp16运算加速
+    Net->load_param(mgr, param);
+    Net->load_model(mgr, bin);
+}
+YoloV5::~YoloV5() {
+    delete Net;
+}
+std::vector<BoxInfo> YoloV5::detect(JNIEnv *env, jobject image, float threshold, float nms_threshold) {
+    AndroidBitmapInfo img_size;
+    AndroidBitmap_getInfo(env, image, &img_size);
+//    ncnn::Mat in_net = ncnn::Mat::from_android_bitmap_resize(env,image,ncnn::Mat::PIXEL_BGR2RGB,input_size/2,input_size/2);
+    ncnn::Mat in_net = ncnn::Mat::from_android_bitmap_resize(env, image, ncnn::Mat::PIXEL_RGBA2RGB, input_size / 2,
+                                                             input_size / 2);
+    float norm[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
+    float mean[3] = {0, 0, 0};
+    in_net.substract_mean_normalize(mean, norm);
+    auto ex = Net->create_extractor();
+    ex.set_light_mode(true);
+    ex.set_num_threads(4);
+    hasGPU = ncnn::get_gpu_count() > 0;
+    ex.set_vulkan_compute(hasGPU);
+    ex.input(0, in_net);
+    std::vector<BoxInfo> result;
+    for (const auto &layer: layers) {
+        ncnn::Mat blob;
+        ex.extract(layer.name.c_str(), blob);
+        auto boxes = decode_infer(blob, layer.stride, {(int) img_size.width, (int) img_size.height}, input_size,
+                                  num_class, layer.anchors, threshold);
+        result.insert(result.begin(), boxes.begin(), boxes.end());
+    }
+    nms(result, nms_threshold);
+    return result;
+}
+inline float fast_exp(float x) {
+    union {
+        uint32_t i;
+        float f;
+    } v{};
+    v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
+    return v.f;
+}
+inline float sigmoid(float x) {
+    return 1.0f / (1.0f + fast_exp(-x));
+}
+std::vector<BoxInfo>
+YoloV5::decode_infer(ncnn::Mat &data, int stride, const yolocv::YoloSize &frame_size, int net_size, int num_classes,
+                     const std::vector<yolocv::YoloSize> &anchors, float threshold) {
+    std::vector<BoxInfo> result;
+    int grid_size = int(sqrt(data.h));
+    float *mat_data[data.c];
+    for (int i = 0; i < data.c; i++) {
+        mat_data[i] = data.channel(i);
+    }
+    float cx, cy, w, h;
+    for (int shift_y = 0; shift_y < grid_size; shift_y++) {
+        for (int shift_x = 0; shift_x < grid_size; shift_x++) {
+            int loc = shift_x + shift_y * grid_size;
+            for (int i = 0; i < 3; i++) {
+                float *record = mat_data[i];
+                float *cls_ptr = record + 5;
+                for (int cls = 0; cls < num_classes; cls++) {
+                    float score = sigmoid(cls_ptr[cls]) * sigmoid(record[4]);
+                    if (score > threshold) {
+                        cx = (sigmoid(record[0]) * 2.f - 0.5f + (float) shift_x) * (float) stride;
+                        cy = (sigmoid(record[1]) * 2.f - 0.5f + (float) shift_y) * (float) stride;
+                        w = pow(sigmoid(record[2]) * 2.f, 2) * anchors[i].width;
+                        h = pow(sigmoid(record[3]) * 2.f, 2) * anchors[i].height;
+                        //printf("[grid size=%d, stride = %d]x y w h %f %f %f %f\n",grid_size,stride,record[0],record[1],record[2],record[3]);
+                        BoxInfo box;
+                        box.x1 = std::max(0, std::min(frame_size.width, int((cx - w / 2.f) * (float) frame_size.width / (float) net_size)));
+                        box.y1 = std::max(0, std::min(frame_size.height, int((cy - h / 2.f) * (float) frame_size.height / (float) net_size)));
+                        box.x2 = std::max(0, std::min(frame_size.width, int((cx + w / 2.f) * (float) frame_size.width / (float) net_size)));
+                        box.y2 = std::max(0, std::min(frame_size.height, int((cy + h / 2.f) * (float) frame_size.height / (float) net_size)));
+                        box.score = score;
+                        box.label = cls;
+                        result.push_back(box);
+                    }
+                }
+            }
+            for (auto &ptr:mat_data) {
+                ptr += (num_classes + 5);
+            }
+        }
+    }
+    return result;
+}
+void YoloV5::nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {
+    std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
+    std::vector<float> vArea(input_boxes.size());
+    for (int i = 0; i < int(input_boxes.size()); ++i) {
+        vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
+                   * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
+    }
+    for (int i = 0; i < int(input_boxes.size()); ++i) {
+        for (int j = i + 1; j < int(input_boxes.size());) {
+            float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);
+            float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);
+            float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);
+            float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);
+            float w = std::max(float(0), xx2 - xx1 + 1);
+            float h = std::max(float(0), yy2 - yy1 + 1);
+            float inter = w * h;
+            float ovr = inter / (vArea[i] + vArea[j] - inter);
+            if (ovr >= NMS_THRESH) {
+                input_boxes.erase(input_boxes.begin() + j);
+                vArea.erase(vArea.begin() + j);
+            } else {
+                j++;
+            }
+        }
+    }
+}
--- a/demo_android_ncnn/app/src/main/cpp/YoloV5.h
+++ b/demo_android_ncnn/app/src/main/cpp/YoloV5.h
+//
+// Created by 邓昊晴 on 14/6/2020.
+//
+#ifndef YOLOV5_H
+#define YOLOV5_H
+#include "net.h"
+namespace yolocv {
+    typedef struct {
+        int width;
+        int height;
+    } YoloSize;
+}
+typedef struct {
+    std::string name;
+    int stride;
+    std::vector<yolocv::YoloSize> anchors;
+} YoloLayerData;
+typedef struct BoxInfo {
+    float x1;
+    float y1;
+    float x2;
+    float y2;
+    float score;
+    int label;
+} BoxInfo;
+class YoloV5 {
+public:
+    YoloV5(AAssetManager *mgr, const char *param, const char *bin, bool useGPU);
+    ~YoloV5();
+    std::vector<BoxInfo> detect(JNIEnv *env, jobject image, float threshold, float nms_threshold);
+    std::vector<std::string> labels{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
+                                    "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
+                                    "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+                                    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
+                                    "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+                                    "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+                                    "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
+                                    "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+                                    "hair drier", "toothbrush"};
+private:
+    static std::vector<BoxInfo>
+    decode_infer(ncnn::Mat &data, int stride, const yolocv::YoloSize &frame_size, int net_size, int num_classes,
+                 const std::vector<yolocv::YoloSize> &anchors, float threshold);
+    static void nms(std::vector<BoxInfo> &result, float nms_threshold);
+    ncnn::Net *Net;
+    int input_size = 640;
+    int num_class = 80;
+    std::vector<YoloLayerData> layers{
+            {"394",    32, {{116, 90}, {156, 198}, {373, 326}}},
+            {"375",    16, {{30,  61}, {62,  45},  {59,  119}}},
+            {"output", 8,  {{10,  13}, {16,  30},  {33,  23}}},
+    };
+public:
+    static YoloV5 *detector;
+    static bool hasGPU;
+};
+#endif //YOLOV5_H
--- a/demo_android_ncnn/app/src/main/cpp/jni_interface.cpp
+++ b/demo_android_ncnn/app/src/main/cpp/jni_interface.cpp
+#include <jni.h>
+#include <string>
+#include <gpu.h>
+#include <android/asset_manager_jni.h>
+#include <android/log.h>
+#include "YoloV5.h"
+#include "YoloV4.h"
+#include "NanoDet.h"
+JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void *reserved) {
+    ncnn::create_gpu_instance();
+    if (ncnn::get_gpu_count() > 0) {
+        YoloV5::hasGPU = true;
+        YoloV4::hasGPU = true;
+        NanoDet::hasGPU = true;
+    }
+    return JNI_VERSION_1_6;
+}
+JNIEXPORT void JNI_OnUnload(JavaVM *vm, void *reserved) {
+    ncnn::destroy_gpu_instance();
+}
+/*********************************************************************************************
+                                         NanoDet
+ ********************************************************************************************/
+extern "C" JNIEXPORT void JNICALL
+Java_com_rangi_nanodet_NanoDet_init(JNIEnv *env, jclass, jobject assetManager, jboolean useGPU) {
+    if (NanoDet::detector == nullptr) {
+        AAssetManager *mgr = AAssetManager_fromJava(env, assetManager);
+        NanoDet::detector = new NanoDet(mgr, "nanodet.param", "nanodet.bin", useGPU);
+    }
+}
+extern "C" JNIEXPORT jobjectArray JNICALL
+Java_com_rangi_nanodet_NanoDet_detect(JNIEnv *env, jclass, jobject image, jdouble threshold, jdouble nms_threshold) {
+    auto result = NanoDet::detector->detect(env, image, threshold, nms_threshold);
+    auto box_cls = env->FindClass("com/rangi/nanodet/Box");
+    auto cid = env->GetMethodID(box_cls, "<init>", "(FFFFIF)V");
+    jobjectArray ret = env->NewObjectArray(result.size(), box_cls, nullptr);
+    int i = 0;
+    for (auto &box:result) {
+        env->PushLocalFrame(1);
+        jobject obj = env->NewObject(box_cls, cid, box.x1, box.y1, box.x2, box.y2, box.label, box.score);
+        obj = env->PopLocalFrame(obj);
+        env->SetObjectArrayElement(ret, i++, obj);
+    }
+    return ret;
+}
+/*********************************************************************************************
+                                         Yolov5
+ ********************************************************************************************/
+extern "C" JNIEXPORT void JNICALL
+Java_com_rangi_nanodet_YOLOv5_init(JNIEnv *env, jclass, jobject assetManager, jboolean useGPU) {
+    if (YoloV5::detector == nullptr) {
+        AAssetManager *mgr = AAssetManager_fromJava(env, assetManager);
+        YoloV5::detector = new YoloV5(mgr, "yolov5.param", "yolov5.bin", useGPU);
+    }
+}
+extern "C" JNIEXPORT jobjectArray JNICALL
+Java_com_rangi_nanodet_YOLOv5_detect(JNIEnv *env, jclass, jobject image, jdouble threshold, jdouble nms_threshold) {
+    auto result = YoloV5::detector->detect(env, image, threshold, nms_threshold);
+    auto box_cls = env->FindClass("com/rangi/nanodet/Box");
+    auto cid = env->GetMethodID(box_cls, "<init>", "(FFFFIF)V");
+    jobjectArray ret = env->NewObjectArray(result.size(), box_cls, nullptr);
+    int i = 0;
+    for (auto &box:result) {
+        env->PushLocalFrame(1);
+        jobject obj = env->NewObject(box_cls, cid, box.x1, box.y1, box.x2, box.y2, box.label, box.score);
+        obj = env->PopLocalFrame(obj);
+        env->SetObjectArrayElement(ret, i++, obj);
+    }
+    return ret;
+}
+/*********************************************************************************************
+                                         YOLOv4-tiny
+ ********************************************************************************************/
+extern "C" JNIEXPORT void JNICALL
+Java_com_rangi_nanodet_YOLOv4_init(JNIEnv *env, jclass, jobject assetManager, jboolean useGPU) {
+    if (YoloV4::detector == nullptr) {
+        AAssetManager *mgr = AAssetManager_fromJava(env, assetManager);
+        YoloV4::detector = new YoloV4(mgr, "yolov4-tiny-opt.param", "yolov4-tiny-opt.bin", useGPU);
+    }
+}
+extern "C" JNIEXPORT jobjectArray JNICALL
+Java_com_rangi_nanodet_YOLOv4_detect(JNIEnv *env, jclass, jobject image, jdouble threshold, jdouble nms_threshold) {
+    auto result = YoloV4::detector->detect(env, image, threshold, nms_threshold);
+    auto box_cls = env->FindClass("com/rangi/nanodet/Box");
+    auto cid = env->GetMethodID(box_cls, "<init>", "(FFFFIF)V");
+    jobjectArray ret = env->NewObjectArray(result.size(), box_cls, nullptr);
+    int i = 0;
+    for (auto &box:result) {
+        env->PushLocalFrame(1);
+        jobject obj = env->NewObject(box_cls, cid, box.x1, box.y1, box.x2, box.y2, box.label, box.score);
+        obj = env->PopLocalFrame(obj);
+        env->SetObjectArrayElement(ret, i++, obj);
+    }
+    return ret;
+}
--- a/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/AppCrashHandler.java
+++ b/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/AppCrashHandler.java
+package com.rangi.nanodet;
+import androidx.annotation.NonNull;
+class AppCrashHandler implements Thread.UncaughtExceptionHandler {
+    private Thread.UncaughtExceptionHandler uncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler();
+    @Override
+    public void uncaughtException(@NonNull Thread t, @NonNull Throwable e) {
+        uncaughtExceptionHandler.uncaughtException(t, e);
+    }
+    public static void register() {
+        Thread.setDefaultUncaughtExceptionHandler(new AppCrashHandler());
+    }
+}
--- a/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/Box.java
+++ b/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/Box.java
+package com.rangi.nanodet;
+import android.graphics.Color;
+import android.graphics.RectF;
+import java.util.Random;
+public class Box {
+    public float x0,y0,x1,y1;
+    private int label;
+    private float score;
+    private static String[] labels={"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
+            "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
+            "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+            "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
+            "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+            "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+            "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
+            "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+            "hair drier", "toothbrush"};
+    public Box(float x0,float y0, float x1, float y1, int label, float score){
+        this.x0 = x0;
+        this.y0 = y0;
+        this.x1 = x1;
+        this.y1 = y1;
+        this.label = label;
+        this.score = score;
+    }
+    public RectF getRect(){
+        return new RectF(x0,y0,x1,y1);
+    }
+    public String getLabel(){
+        return labels[label];
+    }
+    public float getScore(){
+        return score;
+    }
+    public int getColor(){
+        Random random = new Random(label);
+        return Color.argb(255,random.nextInt(256),random.nextInt(256),random.nextInt(256));
+    }
+}
--- a/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/MainActivity.java
+++ b/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/MainActivity.java
--- a/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/NanoDet.java
+++ b/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/NanoDet.java
+package com.rangi.nanodet;
+import android.content.res.AssetManager;
+import android.graphics.Bitmap;
+public class NanoDet {
+    static {
+        System.loadLibrary("yolov5");
+    }
+    public static native void init(AssetManager manager, boolean useGPU);
+    public static native Box[] detect(Bitmap bitmap, double threshold, double nms_threshold);
+}
--- a/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/NcnnApp.java
+++ b/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/NcnnApp.java
+package com.rangi.nanodet;
+import android.app.Application;
+import android.content.Context;
+import android.util.Log;
+import androidx.multidex.MultiDex;
+import com.zxy.recovery.callback.RecoveryCallback;
+import com.zxy.recovery.core.Recovery;
+public class NcnnApp extends Application {
+    @Override
+    public void onCreate() {
+        super.onCreate();
+        //崩溃界面
+        initRecovery();
+    }
+    @Override
+    protected void attachBaseContext(Context base) {
+        super.attachBaseContext(base);
+        MultiDex.install(base);
+    }
+    private void initRecovery() {
+        Recovery.getInstance()
+                .debug(BuildConfig.DEBUG)
+                .recoverInBackground(true)
+                .recoverStack(true)
+                .mainPage(MainActivity.class)
+                .recoverEnabled(true)
+                .callback(new MyCrashCallback())
+                .silent(false, Recovery.SilentMode.RECOVER_ACTIVITY_STACK)
+//                .skip(TestActivity.class)
+                .init(this);
+        AppCrashHandler.register();
+    }
+    static final class MyCrashCallback implements RecoveryCallback {
+        @Override
+        public void stackTrace(String exceptionMessage) {
+            Log.e("wzt", "exceptionMessage:" + exceptionMessage);
+        }
+        @Override
+        public void cause(String cause) {
+            Log.e("wzt", "cause:" + cause);
+        }
+        @Override
+        public void exception(String exceptionType, String throwClassName, String throwMethodName, int throwLineNumber) {
+            Log.e("wzt", "exceptionClassName:" + exceptionType);
+            Log.e("wzt", "throwClassName:" + throwClassName);
+            Log.e("wzt", "throwMethodName:" + throwMethodName);
+            Log.e("wzt", "throwLineNumber:" + throwLineNumber);
+        }
+        @Override
+        public void throwable(Throwable throwable) {
+        }
+    }
+}
--- a/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/WelcomeActivity.java
+++ b/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/WelcomeActivity.java
+package com.rangi.nanodet;
+import androidx.appcompat.app.AlertDialog;
+import androidx.appcompat.app.AppCompatActivity;
+import android.content.Intent;
+import android.os.Bundle;
+import android.view.View;
+import android.widget.Button;
+import android.widget.CompoundButton;
+import android.widget.ToggleButton;
+public class WelcomeActivity extends AppCompatActivity {
+    private ToggleButton tbUseGpu;
+    private Button nanodet;
+    private Button yolov5s;
+    private Button yolov4tiny;
+    private boolean useGPU = false;
+    @Override
+    protected void onCreate(Bundle savedInstanceState) {
+        super.onCreate(savedInstanceState);
+        setContentView(R.layout.activity_welcome);
+        tbUseGpu = findViewById(R.id.tb_use_gpu);
+        tbUseGpu.setOnCheckedChangeListener(new CompoundButton.OnCheckedChangeListener() {
+            @Override
+            public void onCheckedChanged(CompoundButton buttonView, boolean isChecked) {
+                useGPU = isChecked;
+                MainActivity.USE_GPU = useGPU;
+                if (useGPU) {
+                    AlertDialog.Builder builder = new AlertDialog.Builder(WelcomeActivity.this);
+                    builder.setTitle("Warning");
+                    builder.setMessage("It may not work well in GPU mode, or errors may occur.");
+                    builder.setCancelable(true);
+                    builder.setPositiveButton("OK", null);
+                    AlertDialog dialog = builder.create();
+                    dialog.show();
+                }
+            }
+        });
+        nanodet = findViewById(R.id.btn_start_detect0);
+        nanodet.setOnClickListener(new View.OnClickListener() {
+            @Override
+            public void onClick(View v) {
+                MainActivity.USE_MODEL = MainActivity.NANODET;
+                Intent intent = new Intent(WelcomeActivity.this, MainActivity.class);
+                WelcomeActivity.this.startActivity(intent);
+            }
+        });
+        yolov5s = findViewById(R.id.btn_start_detect1);
+        yolov5s.setOnClickListener(new View.OnClickListener() {
+            @Override
+            public void onClick(View v) {
+                MainActivity.USE_MODEL = MainActivity.YOLOV5S;
+                Intent intent = new Intent(WelcomeActivity.this, MainActivity.class);
+                WelcomeActivity.this.startActivity(intent);
+            }
+        });
+        yolov4tiny = findViewById(R.id.btn_start_detect2);
+        yolov4tiny.setOnClickListener(new View.OnClickListener() {
+            @Override
+            public void onClick(View v) {
+                MainActivity.USE_MODEL = MainActivity.YOLOV4_TINY;
+                Intent intent = new Intent(WelcomeActivity.this, MainActivity.class);
+                WelcomeActivity.this.startActivity(intent);
+            }
+        });
+    }
+}
--- a/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/YOLOv4.java
+++ b/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/YOLOv4.java
+package com.rangi.nanodet;
+import android.content.res.AssetManager;
+import android.graphics.Bitmap;
+public class YOLOv4 {
+    static {
+        System.loadLibrary("yolov5");  // 存放在yolov5.so中
+    }
+    public static native void init(AssetManager manager, boolean useGPU);
+    public static native Box[] detect(Bitmap bitmap, double threshold, double nms_threshold);
+}
--- a/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/YOLOv5.java
+++ b/demo_android_ncnn/app/src/main/java/com/rangi/nanodet/YOLOv5.java
+package com.rangi.nanodet;
+import android.content.res.AssetManager;
+import android.graphics.Bitmap;
+public class YOLOv5 {
+    static {
+        System.loadLibrary("yolov5");
+    }
+    public static native void init(AssetManager manager, boolean useGPU);
+    public static native Box[] detect(Bitmap bitmap, double threshold, double nms_threshold);
+}
--- a/demo_android_ncnn/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
+++ b/demo_android_ncnn/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
+<vector xmlns:android="http://schemas.android.com/apk/res/android"
+    xmlns:aapt="http://schemas.android.com/aapt"
+    android:width="108dp"
+    android:height="108dp"
+    android:viewportWidth="108"
+    android:viewportHeight="108">
+    <path
+        android:fillType="evenOdd"
+        android:pathData="M32,64C32,64 38.39,52.99 44.13,50.95C51.37,48.37 70.14,49.57 70.14,49.57L108.26,87.69L108,109.01L75.97,107.97L32,64Z"
+        android:strokeWidth="1"
+        android:strokeColor="#00000000">
+        <aapt:attr name="android:fillColor">
+            <gradient
+                android:endX="78.5885"
+                android:endY="90.9159"
+                android:startX="48.7653"
+                android:startY="61.0927"
+                android:type="linear">
+                <item
+                    android:color="#44000000"
+                    android:offset="0.0" />
+                <item
+                    android:color="#00000000"
+                    android:offset="1.0" />
+            </gradient>
+        </aapt:attr>
+    </path>
+    <path
+        android:fillColor="#FFFFFF"
+        android:fillType="nonZero"
+        android:pathData="M66.94,46.02L66.94,46.02C72.44,50.07 76,56.61 76,64L32,64C32,56.61 35.56,50.11 40.98,46.06L36.18,41.19C35.45,40.45 35.45,39.3 36.18,38.56C36.91,37.81 38.05,37.81 38.78,38.56L44.25,44.05C47.18,42.57 50.48,41.71 54,41.71C57.48,41.71 60.78,42.57 63.68,44.05L69.11,38.56C69.84,37.81 70.98,37.81 71.71,38.56C72.44,39.3 72.44,40.45 71.71,41.19L66.94,46.02ZM62.94,56.92C64.08,56.92 65,56.01 65,54.88C65,53.76 64.08,52.85 62.94,52.85C61.8,52.85 60.88,53.76 60.88,54.88C60.88,56.01 61.8,56.92 62.94,56.92ZM45.06,56.92C46.2,56.92 47.13,56.01 47.13,54.88C47.13,53.76 46.2,52.85 45.06,52.85C43.92,52.85 43,53.76 43,54.88C43,56.01 43.92,56.92 45.06,56.92Z"
+        android:strokeWidth="1"
+        android:strokeColor="#00000000" />
+</vector>
--- a/demo_android_ncnn/app/src/main/res/drawable-xxhdpi/cpu.png
+++ b/demo_android_ncnn/app/src/main/res/drawable-xxhdpi/cpu.png
--- a/demo_android_ncnn/app/src/main/res/drawable-xxhdpi/gpu.png
+++ b/demo_android_ncnn/app/src/main/res/drawable-xxhdpi/gpu.png
--- a/demo_android_ncnn/app/src/main/res/drawable-xxhdpi/ncnn_icon.png
+++ b/demo_android_ncnn/app/src/main/res/drawable-xxhdpi/ncnn_icon.png
--- a/demo_android_ncnn/app/src/main/res/drawable/cpu_gpu_bg.xml
+++ b/demo_android_ncnn/app/src/main/res/drawable/cpu_gpu_bg.xml
+<?xml version="1.0" encoding="utf-8"?>
+<selector xmlns:android="http://schemas.android.com/apk/res/android">
+    <item android:drawable="@drawable/gpu" android:state_checked="true" />
+    <item android:drawable="@drawable/cpu" android:state_checked="false" />
+</selector>
--- a/demo_android_ncnn/app/src/main/res/drawable/ic_launcher_background.xml
+++ b/demo_android_ncnn/app/src/main/res/drawable/ic_launcher_background.xml
--- a/demo_android_ncnn/app/src/main/res/layout/activity_main.xml
+++ b/demo_android_ncnn/app/src/main/res/layout/activity_main.xml
--- a/demo_android_ncnn/app/src/main/res/layout/activity_welcome.xml
+++ b/demo_android_ncnn/app/src/main/res/layout/activity_welcome.xml
--- a/demo_android_ncnn/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
+++ b/demo_android_ncnn/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
+<?xml version="1.0" encoding="utf-8"?>
+<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
+    <background android:drawable="@drawable/ic_launcher_background" />
+    <foreground android:drawable="@drawable/ic_launcher_foreground" />
+</adaptive-icon>
--- a/demo_android_ncnn/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
+++ b/demo_android_ncnn/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
+<?xml version="1.0" encoding="utf-8"?>
+<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
+    <background android:drawable="@drawable/ic_launcher_background" />
+    <foreground android:drawable="@drawable/ic_launcher_foreground" />
+</adaptive-icon>
--- a/demo_android_ncnn/app/src/main/res/mipmap-hdpi/ic_launcher.png
+++ b/demo_android_ncnn/app/src/main/res/mipmap-hdpi/ic_launcher.png
--- a/demo_android_ncnn/app/src/main/res/mipmap-hdpi/ic_launcher_round.png
+++ b/demo_android_ncnn/app/src/main/res/mipmap-hdpi/ic_launcher_round.png
--- a/demo_android_ncnn/app/src/main/res/mipmap-mdpi/ic_launcher.png
+++ b/demo_android_ncnn/app/src/main/res/mipmap-mdpi/ic_launcher.png
--- a/demo_android_ncnn/app/src/main/res/mipmap-mdpi/ic_launcher_round.png
+++ b/demo_android_ncnn/app/src/main/res/mipmap-mdpi/ic_launcher_round.png
--- a/demo_android_ncnn/app/src/main/res/mipmap-xhdpi/ic_launcher.png
+++ b/demo_android_ncnn/app/src/main/res/mipmap-xhdpi/ic_launcher.png
--- a/demo_android_ncnn/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png
+++ b/demo_android_ncnn/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png
--- a/demo_android_ncnn/app/src/main/res/mipmap-xxhdpi/ic_launcher.png
+++ b/demo_android_ncnn/app/src/main/res/mipmap-xxhdpi/ic_launcher.png
--- a/demo_android_ncnn/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png
+++ b/demo_android_ncnn/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png
--- a/demo_android_ncnn/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png
+++ b/demo_android_ncnn/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png
--- a/demo_android_ncnn/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png
+++ b/demo_android_ncnn/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png
--- a/demo_android_ncnn/app/src/main/res/values/colors.xml
+++ b/demo_android_ncnn/app/src/main/res/values/colors.xml
--- a/demo_android_ncnn/app/src/main/res/values/strings.xml
+++ b/demo_android_ncnn/app/src/main/res/values/strings.xml
--- a/demo_android_ncnn/app/src/main/res/values/styles.xml
+++ b/demo_android_ncnn/app/src/main/res/values/styles.xml
--- a/demo_android_ncnn/app/src/test/java/com/rangi/nanodet/ExampleUnitTest.java
+++ b/demo_android_ncnn/app/src/test/java/com/rangi/nanodet/ExampleUnitTest.java
--- a/demo_android_ncnn/build.gradle
+++ b/demo_android_ncnn/build.gradle
--- a/demo_android_ncnn/gradle.properties
+++ b/demo_android_ncnn/gradle.properties
--- a/demo_android_ncnn/gradle/wrapper/gradle-wrapper.jar
+++ b/demo_android_ncnn/gradle/wrapper/gradle-wrapper.jar
--- a/demo_android_ncnn/gradle/wrapper/gradle-wrapper.properties
+++ b/demo_android_ncnn/gradle/wrapper/gradle-wrapper.properties
--- a/demo_android_ncnn/gradlew
+++ b/demo_android_ncnn/gradlew
--- a/demo_android_ncnn/gradlew.bat
+++ b/demo_android_ncnn/gradlew.bat
--- a/demo_android_ncnn/settings.gradle
+++ b/demo_android_ncnn/settings.gradle
--- a/demo_libtorch/CMakeLists.txt
+++ b/demo_libtorch/CMakeLists.txt
--- a/demo_libtorch/README.md
+++ b/demo_libtorch/README.md
--- a/demo_libtorch/main.cpp
+++ b/demo_libtorch/main.cpp
--- a/demo_libtorch/nanodet_libtorch.cpp
+++ b/demo_libtorch/nanodet_libtorch.cpp
--- a/demo_libtorch/nanodet_libtorch.h
+++ b/demo_libtorch/nanodet_libtorch.h
--- a/demo_mnn/CMakeLists.txt
+++ b/demo_mnn/CMakeLists.txt
--- a/demo_mnn/README.md
+++ b/demo_mnn/README.md
--- a/demo_mnn/imgs/000252.jpg
+++ b/demo_mnn/imgs/000252.jpg
--- a/demo_mnn/imgs/000258.jpg
+++ b/demo_mnn/imgs/000258.jpg
--- a/demo_mnn/main.cpp
+++ b/demo_mnn/main.cpp
--- a/demo_mnn/nanodet_mnn.cpp
+++ b/demo_mnn/nanodet_mnn.cpp
--- a/demo_mnn/nanodet_mnn.hpp
+++ b/demo_mnn/nanodet_mnn.hpp
--- a/demo_mnn/results/000252.jpg
+++ b/demo_mnn/results/000252.jpg
--- a/demo_mnn/results/000258.jpg
+++ b/demo_mnn/results/000258.jpg
--- a/demo_ncnn/CMakeLists.txt
+++ b/demo_ncnn/CMakeLists.txt
--- a/demo_ncnn/README.md
+++ b/demo_ncnn/README.md
--- a/demo_ncnn/benchmark.jpg
+++ b/demo_ncnn/benchmark.jpg
--- a/demo_ncnn/main.cpp
+++ b/demo_ncnn/main.cpp
--- a/demo_ncnn/nanodet.cpp
+++ b/demo_ncnn/nanodet.cpp
--- a/demo_ncnn/nanodet.h
+++ b/demo_ncnn/nanodet.h
--- a/demo_openvino/CMakeLists.txt
+++ b/demo_openvino/CMakeLists.txt
--- a/demo_openvino/README.md
+++ b/demo_openvino/README.md
--- a/demo_openvino/main.cpp
+++ b/demo_openvino/main.cpp
--- a/demo_openvino/nanodet_openvino.cpp
+++ b/demo_openvino/nanodet_openvino.cpp
--- a/demo_openvino/nanodet_openvino.h
+++ b/demo_openvino/nanodet_openvino.h
--- a/docs/config_file_detail.md
+++ b/docs/config_file_detail.md
--- a/docs/imgs/Android_demo.jpg
+++ b/docs/imgs/Android_demo.jpg
--- a/docs/imgs/Model_arch.png
+++ b/docs/imgs/Model_arch.png
--- a/docs/imgs/Title.jpg
+++ b/docs/imgs/Title.jpg
--- a/docs/imgs/nanodet-plus-arch.png
+++ b/docs/imgs/nanodet-plus-arch.png
--- a/docs/update.md
+++ b/docs/update.md
--- a/logs.txt
+++ b/logs.txt
--- a/nanodet/__about__.py
+++ b/nanodet/__about__.py
--- a/nanodet/__init__.py
+++ b/nanodet/__init__.py
--- a/nanodet/data/batch_process.py
+++ b/nanodet/data/batch_process.py
--- a/nanodet/data/collate.py
+++ b/nanodet/data/collate.py
--- a/nanodet/data/dataset/__init__.py
+++ b/nanodet/data/dataset/__init__.py
--- a/nanodet/data/dataset/base.py
+++ b/nanodet/data/dataset/base.py
--- a/nanodet/data/dataset/coco.py
+++ b/nanodet/data/dataset/coco.py
--- a/nanodet/data/dataset/xml_dataset.py
+++ b/nanodet/data/dataset/xml_dataset.py
--- a/nanodet/data/dataset/yolo.py
+++ b/nanodet/data/dataset/yolo.py
--- a/nanodet/data/transform/__init__.py
+++ b/nanodet/data/transform/__init__.py
--- a/nanodet/data/transform/color.py
+++ b/nanodet/data/transform/color.py
--- a/nanodet/data/transform/mosaic.py
+++ b/nanodet/data/transform/mosaic.py
--- a/nanodet/data/transform/pipeline.py
+++ b/nanodet/data/transform/pipeline.py
--- a/nanodet/data/transform/warp.py
+++ b/nanodet/data/transform/warp.py
--- a/nanodet/evaluator/__init__.py
+++ b/nanodet/evaluator/__init__.py
--- a/nanodet/evaluator/coco_detection.py
+++ b/nanodet/evaluator/coco_detection.py
--- a/nanodet/model/arch/__init__.py
+++ b/nanodet/model/arch/__init__.py
--- a/nanodet/model/arch/nanodet_plus.py
+++ b/nanodet/model/arch/nanodet_plus.py
--- a/nanodet/model/arch/one_stage_detector.py
+++ b/nanodet/model/arch/one_stage_detector.py
--- a/nanodet/model/backbone/__init__.py
+++ b/nanodet/model/backbone/__init__.py
--- a/nanodet/model/backbone/custom_csp.py
+++ b/nanodet/model/backbone/custom_csp.py
--- a/nanodet/model/backbone/efficientnet_lite.py
+++ b/nanodet/model/backbone/efficientnet_lite.py
--- a/nanodet/model/backbone/ghostnet.py
+++ b/nanodet/model/backbone/ghostnet.py
--- a/nanodet/model/backbone/mobilenetv2.py
+++ b/nanodet/model/backbone/mobilenetv2.py
--- a/nanodet/model/backbone/repvgg.py
+++ b/nanodet/model/backbone/repvgg.py
--- a/nanodet/model/backbone/resnet.py
+++ b/nanodet/model/backbone/resnet.py
--- a/nanodet/model/backbone/shufflenetv2.py
+++ b/nanodet/model/backbone/shufflenetv2.py
--- a/nanodet/model/backbone/timm_wrapper.py
+++ b/nanodet/model/backbone/timm_wrapper.py
--- a/nanodet/model/fpn/__init__.py
+++ b/nanodet/model/fpn/__init__.py
--- a/nanodet/model/fpn/fpn.py
+++ b/nanodet/model/fpn/fpn.py
--- a/nanodet/model/fpn/ghost_pan.py
+++ b/nanodet/model/fpn/ghost_pan.py
--- a/nanodet/model/fpn/pan.py
+++ b/nanodet/model/fpn/pan.py
--- a/nanodet/model/fpn/tan.py
+++ b/nanodet/model/fpn/tan.py
--- a/nanodet/model/head/__init__.py
+++ b/nanodet/model/head/__init__.py
--- a/nanodet/model/head/assigner/assign_result.py
+++ b/nanodet/model/head/assigner/assign_result.py
--- a/nanodet/model/head/assigner/atss_assigner.py
+++ b/nanodet/model/head/assigner/atss_assigner.py
--- a/nanodet/model/head/assigner/base_assigner.py
+++ b/nanodet/model/head/assigner/base_assigner.py
--- a/nanodet/model/head/assigner/dsl_assigner.py
+++ b/nanodet/model/head/assigner/dsl_assigner.py
--- a/nanodet/model/head/gfl_head.py
+++ b/nanodet/model/head/gfl_head.py
--- a/nanodet/model/head/nanodet_head.py
+++ b/nanodet/model/head/nanodet_head.py
--- a/nanodet/model/head/nanodet_plus_head.py
+++ b/nanodet/model/head/nanodet_plus_head.py
--- a/nanodet/model/head/simple_conv_head.py
+++ b/nanodet/model/head/simple_conv_head.py
--- a/nanodet/model/loss/gfocal_loss.py
+++ b/nanodet/model/loss/gfocal_loss.py
--- a/nanodet/model/loss/iou_loss.py
+++ b/nanodet/model/loss/iou_loss.py
--- a/nanodet/model/loss/utils.py
+++ b/nanodet/model/loss/utils.py
--- a/nanodet/model/module/activation.py
+++ b/nanodet/model/module/activation.py
--- a/nanodet/model/module/conv.py
+++ b/nanodet/model/module/conv.py
--- a/nanodet/model/module/init_weights.py
+++ b/nanodet/model/module/init_weights.py
--- a/nanodet/model/module/nms.py
+++ b/nanodet/model/module/nms.py
--- a/nanodet/model/module/norm.py
+++ b/nanodet/model/module/norm.py
--- a/nanodet/model/module/scale.py
+++ b/nanodet/model/module/scale.py
--- a/nanodet/model/module/transformer.py
+++ b/nanodet/model/module/transformer.py
--- a/nanodet/model/weight_averager/__init__.py
+++ b/nanodet/model/weight_averager/__init__.py
--- a/nanodet/model/weight_averager/ema.py
+++ b/nanodet/model/weight_averager/ema.py
--- a/nanodet/optim/__init__.py
+++ b/nanodet/optim/__init__.py
--- a/nanodet/optim/builder.py
+++ b/nanodet/optim/builder.py
--- a/nanodet/trainer/__init__.py
+++ b/nanodet/trainer/__init__.py
--- a/nanodet/trainer/task.py
+++ b/nanodet/trainer/task.py
--- a/nanodet/util/__init__.py
+++ b/nanodet/util/__init__.py
--- a/nanodet/util/box_transform.py
+++ b/nanodet/util/box_transform.py
--- a/nanodet/util/check_point.py
+++ b/nanodet/util/check_point.py
--- a/nanodet/util/config.py
+++ b/nanodet/util/config.py
--- a/nanodet/util/env_utils.py
+++ b/nanodet/util/env_utils.py
--- a/nanodet/util/flops_counter.py
+++ b/nanodet/util/flops_counter.py
--- a/nanodet/util/logger.py
+++ b/nanodet/util/logger.py
--- a/nanodet/util/misc.py
+++ b/nanodet/util/misc.py
--- a/nanodet/util/path.py
+++ b/nanodet/util/path.py
--- a/nanodet/util/rank_filter.py
+++ b/nanodet/util/rank_filter.py
--- a/nanodet/util/scatter_gather.py
+++ b/nanodet/util/scatter_gather.py
--- a/nanodet/util/util_mixins.py
+++ b/nanodet/util/util_mixins.py
--- a/nanodet/util/visualization.py
+++ b/nanodet/util/visualization.py
--- a/nanodet/util/yacs.py
+++ b/nanodet/util/yacs.py
--- a/requirements.txt
+++ b/requirements.txt
--- a/setup.py
+++ b/setup.py
--- a/tests/data/dummy_coco.json
+++ b/tests/data/dummy_coco.json
--- a/tests/data/test_img.jpg
+++ b/tests/data/test_img.jpg
--- a/tests/data/test_img.txt
+++ b/tests/data/test_img.txt
--- a/tests/data/test_img.xml
+++ b/tests/data/test_img.xml
--- a/tests/test_configs/test_config.py
+++ b/tests/test_configs/test_config.py
--- a/tests/test_data/test_batch_process.py
+++ b/tests/test_data/test_batch_process.py
--- a/tests/test_data/test_collate.py
+++ b/tests/test_data/test_collate.py
--- a/tests/test_data/test_dataset/test_cocodataset.py
+++ b/tests/test_data/test_dataset/test_cocodataset.py
--- a/tests/test_data/test_dataset/test_xmldataset.py
+++ b/tests/test_data/test_dataset/test_xmldataset.py
--- a/tests/test_data/test_dataset/test_yolodataset.py
+++ b/tests/test_data/test_dataset/test_yolodataset.py
--- a/tests/test_data/test_transform/test_color.py
+++ b/tests/test_data/test_transform/test_color.py
--- a/tests/test_data/test_transform/test_warp.py
+++ b/tests/test_data/test_transform/test_warp.py
--- a/tests/test_evaluator/test_coco_detection.py
+++ b/tests/test_evaluator/test_coco_detection.py
--- a/tests/test_models/test_backbone/test_custom_csp.py
+++ b/tests/test_models/test_backbone/test_custom_csp.py
--- a/tests/test_models/test_backbone/test_efficient_lite.py
+++ b/tests/test_models/test_backbone/test_efficient_lite.py
--- a/tests/test_models/test_backbone/test_ghostnet.py
+++ b/tests/test_models/test_backbone/test_ghostnet.py
--- a/tests/test_models/test_backbone/test_mobilenetv2.py
+++ b/tests/test_models/test_backbone/test_mobilenetv2.py
--- a/tests/test_models/test_backbone/test_repvgg.py
+++ b/tests/test_models/test_backbone/test_repvgg.py
--- a/tests/test_models/test_backbone/test_resnet.py
+++ b/tests/test_models/test_backbone/test_resnet.py
--- a/tests/test_models/test_backbone/test_shufflenetv2.py
+++ b/tests/test_models/test_backbone/test_shufflenetv2.py
--- a/tests/test_models/test_backbone/test_timm_wrapper.py
+++ b/tests/test_models/test_backbone/test_timm_wrapper.py
--- a/tests/test_models/test_fpn/test_fpn.py
+++ b/tests/test_models/test_fpn/test_fpn.py
--- a/tests/test_models/test_fpn/test_ghost_pan.py
+++ b/tests/test_models/test_fpn/test_ghost_pan.py
--- a/tests/test_models/test_fpn/test_pan.py
+++ b/tests/test_models/test_fpn/test_pan.py
--- a/tests/test_models/test_fpn/test_tan.py
+++ b/tests/test_models/test_fpn/test_tan.py
--- a/tests/test_models/test_head/test_gfl_head.py
+++ b/tests/test_models/test_head/test_gfl_head.py
--- a/tests/test_models/test_head/test_nanodet_head.py
+++ b/tests/test_models/test_head/test_nanodet_head.py
--- a/tests/test_models/test_head/test_nanodet_plus_head.py
+++ b/tests/test_models/test_head/test_nanodet_plus_head.py
--- a/tests/test_models/test_head/test_simple_conv_head.py
+++ b/tests/test_models/test_head/test_simple_conv_head.py
--- a/tests/test_models/test_loss/test_gfocal_loss.py
+++ b/tests/test_models/test_loss/test_gfocal_loss.py
--- a/tests/test_models/test_loss/test_iou_loss.py
+++ b/tests/test_models/test_loss/test_iou_loss.py
--- a/tests/test_models/test_modules/test_conv.py
+++ b/tests/test_models/test_modules/test_conv.py
--- a/tests/test_models/test_modules/test_dwconv.py
+++ b/tests/test_models/test_modules/test_dwconv.py
--- a/tests/test_models/test_modules/test_init_weights.py
+++ b/tests/test_models/test_modules/test_init_weights.py
--- a/tests/test_models/test_modules/test_nms.py
+++ b/tests/test_models/test_modules/test_nms.py
--- a/tests/test_models/test_modules/test_norm.py
+++ b/tests/test_models/test_modules/test_norm.py
--- a/tests/test_models/test_modules/test_repvgg_conv.py
+++ b/tests/test_models/test_modules/test_repvgg_conv.py
--- a/tests/test_models/test_modules/test_scale.py
+++ b/tests/test_models/test_modules/test_scale.py
--- a/tests/test_models/test_modules/test_transformer.py
+++ b/tests/test_models/test_modules/test_transformer.py
--- a/tests/test_models/test_weight_averager/test_ema.py
+++ b/tests/test_models/test_weight_averager/test_ema.py
--- a/tests/test_optim/test_builder.py
+++ b/tests/test_optim/test_builder.py
--- a/tests/test_trainer/test_lightning_task.py
+++ b/tests/test_trainer/test_lightning_task.py
--- a/tests/test_utils/test_env_utils.py
+++ b/tests/test_utils/test_env_utils.py
--- a/tests/test_utils/test_flops.py
+++ b/tests/test_utils/test_flops.py
--- a/tests/test_utils/test_logger.py
+++ b/tests/test_utils/test_logger.py
--- a/tools/convert_old_checkpoint.py
+++ b/tools/convert_old_checkpoint.py
--- a/tools/export_onnx.py
+++ b/tools/export_onnx.py
--- a/tools/export_torchscript.py
+++ b/tools/export_torchscript.py
--- a/tools/flops.py
+++ b/tools/flops.py
--- a/tools/inference.py
+++ b/tools/inference.py
--- a/tools/test.py
+++ b/tools/test.py
--- a/tools/train.py
+++ b/tools/train.py