Commit 53e339ae authored by Sikhin VC's avatar Sikhin VC

initial commit

parent bc7ca466
# This is an example .flake8 config, used when developing *Black* itself.
# Keep in sync with setup.cfg which is used for source packages.
[flake8]
ignore = W503, E203, E221, C901, C408, E741, C407, E741, B006, B007, B017, B950, C416, E203
max-line-length = 88
max-complexity = 18
select = B,C,E,F,W,T4,B9
exclude = build
per-file-ignores =
**/__init__.py:F401,F403,E402
# Auto detect text files and perform LF normalization
* text=auto
name: CI
on:
push:
branches:
- main
pull_request:
jobs:
linter:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install flake8==5.0.4 isort==5.10.1
python -m pip install black==22.6.0
flake8 --version
- name: Lint
run: |
echo "Running isort"
isort --profile black .
echo "Running black"
black --check .
echo "Running flake8"
flake8 .
test_cpu:
runs-on: ubuntu-latest
strategy:
matrix:
torch: [1.10.1, 1.11.0, 1.12.1, 1.13.1]
include:
- torch: 1.10.1
torchvision: 0.11.2
- torch: 1.11.0
torchvision: 0.12.0
- torch: 1.12.1
torchvision: 0.13.1
- torch: 1.13.1
torchvision: 0.14.1
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install dependencies
run: |
python -m pip install -U pip
python -m pip install ninja opencv-python-headless onnx pytest-xdist codecov
python -m pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
python -m pip install Cython termcolor numpy tensorboard pycocotools matplotlib pyaml opencv-python tqdm pytorch-lightning torchmetrics codecov flake8 pytest timm
python -m pip install -r requirements.txt
- name: Setup
run: rm -rf .eggs && python setup.py develop
- name: Run unittests and generate coverage report
run: |
coverage run --branch --source nanodet -m pytest tests/
coverage xml
coverage report -m
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v2
if: matrix.torch == '1.12.1'
with:
file: ./coverage.xml
flags: unittests
env_vars: OS,PYTHON
name: codecov-umbrella
fail_ci_if_error: false
# test_cuda:
# runs-on: ubuntu-latest
# env:
# CUDA: 10.2.89-1
# CUDA_SHORT: 10.2
# UBUNTU_VERSION: ubuntu1804
# strategy:
# matrix:
# torch: [1.7.0, 1.8.0, 1.9.0]
# include:
# - torch: 1.7.0
# torchvision: 0.8.1
# - torch: 1.8.0
# torchvision: 0.9.0
# - torch: 1.9.0
# torchvision: 0.10.0
# steps:
# - name: Checkout
# uses: actions/checkout@v2
# - name: Set up Python 3.6
# uses: actions/setup-python@v2
# with:
# python-version: 3.6
# - name: Install CUDA
# run: |
# export INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb
# wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER}
# sudo dpkg -i ${INSTALLER}
# wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub
# sudo apt-key add 7fa2af80.pub
# sudo apt update -qq
# sudo apt install -y cuda-${CUDA_SHORT/./-} cuda-cufft-dev-${CUDA_SHORT/./-}
# sudo apt clean
# export CUDA_HOME=/usr/local/cuda-${CUDA_SHORT}
# export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${CUDA_HOME}/include:${LD_LIBRARY_PATH}
# export PATH=${CUDA_HOME}/bin:${PATH}
# - name: Install dependencies
# run: |
# python -m pip install -U pip
# python -m pip install ninja opencv-python-headless onnx pytest-xdist codecov
# python -m pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html
# python -m pip install Cython termcolor numpy tensorboard pycocotools matplotlib pyaml opencv-python tqdm pytorch-lightning torchmetrics codecov flake8 pytest
# - name: Setup
# run: |
# rm -rf .eggs
# python setup.py check -m -s
# TORCH_CUDA_ARCH_LIST=7.0 pip install .
# - name: Run unittests and generate coverage report
# run: |
# coverage run --branch --source nanodet -m pytest tests/
# coverage xml
# coverage report -m
# - name: Upload coverage to Codecov
# uses: codecov/codecov-action@v1.0.10
# if: matrix.torch == '1.9.0'
# with:
# file: ./coverage.xml
# flags: unittests
# env_vars: OS,PYTHON
# name: codecov-umbrella
# fail_ci_if_error: false
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.vscode
.idea
.DS_Store
# custom
*.pkl
*.pkl.json
*.log.json
work_dirs/
# Pytorch
*.pth
*.py~
*.sh~
[tool.isort]
profile = "black"
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.5.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-docstring-first
- id: check-yaml
- id: debug-statements
- id: requirements-txt-fixer
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
args: ["--profile", "black"]
- repo: https://github.com/psf/black
rev: 22.6.0
hooks:
- id: black
- repo: https://github.com/pycqa/flake8
rev: 5.0.4
hooks:
- id: flake8
This diff is collapsed.
# nanodet_lightweight_object_detection # NanoDet
- NanoDet is a FCOS-style one-stage anchor-free object detection model which using Generalised Focal Loss as classification and regression loss.
- FCOS (Fully Convolutional One-Stage): FCOS is an object detection framework that performs object detection in a single stage using fully convolutional networks. Unlike two-stage detectors like Faster R-CNN, which rely on a separate region proposal network (RPN) to generate anchor boxes, FCOS eliminates the need for anchors and performs object detection directly on the feature map.
- Anchor-Free Object Detection: In traditional object detection models, anchor boxes are predefined boxes of various sizes and aspect ratios that are placed on the image. The model then predicts whether each anchor box contains an object and adjusts its position and size. Anchor-free object detection models, like FCOS, do not use predefined anchor boxes. Instead, they directly predict the bounding boxes and class probabilities for objects in the image.
- Generalised Focal Loss (GFL): The Focal Loss, introduced in the RetinaNet object detection model, is designed to address the class imbalance problem in object detection datasets. It assigns higher weights to hard examples (misclassified or difficult-to-classify examples) and lower weights to easy examples. Generalised Focal Loss (GFL) extends this concept by considering additional factors such as class distribution and overlapping instances, making it a more versatile loss function for object detection tasks.
- Classification Loss: In object detection, the classification loss is a term that measures the discrepancy between the predicted class probabilities and the ground truth labels for each object. It helps the model learn to accurately classify objects into different classes or categories.
- Regression Loss: The regression loss measures the difference between the predicted bounding box coordinates (e.g., coordinates for the top-left and bottom-right corners) and the ground truth bounding box coordinates for each object. It allows the model to learn to accurately localise and predict the object's position and size.
- By combining the FCOS architecture with the Generalised Focal Loss (GFL), NanoDet achieves efficient and accurate object detection, making it suitable for scenarios with limited computational resources or real-time applications.
- Use this github link to refer vastly [NanoDet](https://github.com/RangiLyu/nanodet.git)
****
## Installation
### Requirements
* Linux or MacOS
* CUDA >= 10.2
* Python >= 3.7
* Pytorch >= 1.10.0, <2.0.0
### Step
1. Create a conda virtual environment and then activate it.
```shell script
conda create -n nanodet python=3.8 -y
conda activate nanodet
```
2. Install pytorch
```shell script
conda install pytorch torchvision cudatoolkit=11.1 -c pytorch -c conda-forge
```
3. Clone this repository
```shell script
git clone https://github.com/RangiLyu/nanodet.git
cd nanodet
```
4. Install requirements
```shell script
pip install -r requirements.txt
```
- Before installing
Goto requirements.txt → Change pycocotools to pycocotools-windows
5. Setup NanoDet
```shell script
python setup.py develop
```
****
# Inferencing
* Inference images
```bash
python demo/demo.py image --config CONFIG_PATH --model MODEL_PATH --path IMAGE_PATH
```
* Inference video
```bash
python demo/demo.py video --config CONFIG_PATH --model MODEL_PATH --path VIDEO_PATH
```
* Inference webcam
```bash
python demo/demo.py webcam --config CONFIG_PATH --model MODEL_PATH --camid YOUR_CAMERA_ID
```
- When inferencing this an error would occur:
- OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized.
- To solve this paste the following in demo.py:
os.environ['KMP_DUPLICATE_LIB_OK']='True'
- Also Cuda won’t support so change 'cuda' to 'cpu' in code where cuda occurs
****
# Steps for Colab
1. Change runtime to gpu
2. Mount drive
```shell script
from google.colab import drive
drive.mount('/content/drive')
```
3. Clone Repository
```shell script
!git clone https://github.com/SerinSV/Nanodet.git
```
4. Change the directory to the uploaded repository
```shell script
!pip install -r requirements.txt
```
```shell script
!pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchtext==0.14.1 torchaudio==0.13.1 torchdata==0.5.1 --extra-index-url https://download.pytorch.org/whl/cu117
```
```shell script
!python setup.py develop
```
* Inference images
```bash
python demo/demo.py image --config CONFIG_PATH --model MODEL_PATH --path IMAGE_PATH
```
* Inference video
```bash
python demo/demo.py video --config CONFIG_PATH --model MODEL_PATH --path VIDEO_PATH
```
* Inference webcam
```bash
python demo/demo.py webcam --config CONFIG_PATH --model MODEL_PATH --camid YOUR_CAMERA_ID
```
Besides, We provide a notebook [here](https://github.com/SerinSV/Nanodet/blob/main/demo/Nanodet_colab.ipynb) to demonstrate how to make it work in colab.
****
## Model Zoo
NanoDet supports variety of backbones. Go to the [***config*** folder](config/) to see the sample training config files.
Model | Backbone |Resolution|COCO mAP| FLOPS |Params | Pre-train weight |
:--------------------:|:------------------:|:--------:|:------:|:-----:|:-----:|:-----:|
NanoDet-m | ShuffleNetV2 1.0x | 320*320 | 20.6 | 0.72G | 0.95M | [Download](https://drive.google.com/file/d/1ZkYucuLusJrCb_i63Lid0kYyyLvEiGN3/view?usp=sharing) |
NanoDet-Plus-m-320 (***NEW***) | ShuffleNetV2 1.0x | 320*320 | 27.0 | 0.9G | 1.17M | [Weight](https://drive.google.com/file/d/1Dq0cTIdJDUhQxJe45z6rWncbZmOyh1Tv/view?usp=sharing) &#124; [Checkpoint](https://drive.google.com/file/d/1YvuEhahlgqxIhJu7bsL-fhaqubKcCWQc/view?usp=sharing)
NanoDet-Plus-m-416 (***NEW***) | ShuffleNetV2 1.0x | 416*416 | 30.4 | 1.52G | 1.17M | [Weight](https://drive.google.com/file/d/1FN3WK3FLjBm7oCqiwUcD3m3MjfqxuzXe/view?usp=sharing) &#124; [Checkpoint](https://drive.google.com/file/d/1gFjyrl7O8p5APr1ZOtWEm3tQNN35zi_W/view?usp=sharing)
NanoDet-Plus-m-1.5x-320 (***NEW***)| ShuffleNetV2 1.5x | 320*320 | 29.9 | 1.75G | 2.44M | [Weight](https://drive.google.com/file/d/1Xdlgu5lxiS3w6ER7GE1mZpY663wmpcyY/view?usp=sharing) &#124; [Checkpoint](https://drive.google.com/file/d/1qXR6t3TBMXlz6GlTU3fxiLA-eueYoGrW/view?usp=sharing)
NanoDet-Plus-m-1.5x-416 (***NEW***)| ShuffleNetV2 1.5x | 416*416 | 34.1 | 2.97G | 2.44M | [Weight](https://drive.google.com/file/d/16FJJJgUt5VrSKG7RM_ImdKKzhJ-Mu45I/view?usp=sharing) &#124; [Checkpoint](https://drive.google.com/file/d/17sdAUydlEXCrHMsxlDPLj5cGb-8-mmY6/view?usp=sharing)
*Notice*: The difference between `Weight` and `Checkpoint` is the weight only provide params in inference time, but the checkpoint contains training time params.
****
## How to Train
1. **Prepare dataset**
If your dataset annotations are pascal voc xml format, refer to [config/nanodet_custom_xml_dataset.yml](config/nanodet_custom_xml_dataset.yml)
Otherwise, if your dataset annotations are YOLO format ([Darknet TXT](https://github.com/AlexeyAB/Yolo_mark/issues/60#issuecomment-401854885)), refer to [config/nanodet-plus-m_416-yolo.yml](config/nanodet-plus-m_416-yolo.yml)
Or convert your dataset annotations to MS COCO format[(COCO annotation format details)](https://cocodataset.org/#format-data).
2. **Prepare config file**
Copy and modify an example yml config file in config/ folder.
Change ***save_dir*** to where you want to save model.
Change ***num_classes*** in ***model->arch->head***.
Change image path and annotation path in both ***data->train*** and ***data->val***.
Set gpu ids, num workers and batch size in ***device*** to fit your device.
Set ***total_epochs***, ***lr*** and ***lr_schedule*** according to your dataset and batchsize.
If you want to modify network, data augmentation or other things, please refer to [Config File Detail](docs/config_file_detail.md)
3. **Start training**
NanoDet is now using [pytorch lightning](https://github.com/PyTorchLightning/pytorch-lightning) for training.
For both single-GPU or multiple-GPUs, run:
```shell script
python tools/train.py CONFIG_FILE_PATH
```
4. **Visualize Logs**
TensorBoard logs are saved in `save_dir` which you set in config file.
To visualize tensorboard logs, run:
```shell script
cd <YOUR_SAVE_DIR>
tensorboard --logdir ./
```
****
save_dir: workspace/convnext/nanodet-plus_convnext-nano_640
model:
weight_averager:
name: ExpMovingAverager
decay: 0.9998
arch:
name: NanoDetPlus
detach_epoch: 10
backbone:
name: TIMMWrapper
model_name: convnext_nano
features_only: True
pretrained: True
# output_stride: 32
out_indices: [1, 2, 3]
fpn:
name: GhostPAN
in_channels: [160, 320, 640]
out_channels: 128
kernel_size: 5
num_extra_level: 1
use_depthwise: True
activation: SiLU
head:
name: NanoDetPlusHead
num_classes: 80
input_channel: 128
feat_channels: 128
stacked_convs: 2
kernel_size: 5
strides: [8, 16, 32, 64]
activation: SiLU
reg_max: 7
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
# Auxiliary head, only use in training time.
aux_head:
name: SimpleConvHead
num_classes: 80
input_channel: 256
feat_channels: 256
stacked_convs: 4
strides: [8, 16, 32, 64]
activation: SiLU
reg_max: 7
data:
train:
name: CocoDataset
img_path: coco/train2017
ann_path: coco/annotations/instances_train2017.json
input_size: [640,640] #[w,h]
keep_ratio: False
pipeline:
perspective: 0.0
scale: [0.1, 2.0]
stretch: [[0.8, 1.2], [0.8, 1.2]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: CocoDataset
img_path: coco/val2017
ann_path: coco/annotations/instances_val2017.json
input_size: [640,640] #[w,h]
keep_ratio: False
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0, 1, 2, 3]
workers_per_gpu: 8
batchsize_per_gpu: 24
schedule:
# resume:
# load_model:
optimizer:
name: AdamW
lr: 0.001
weight_decay: 0.05
no_norm_decay: True
param_level_cfg:
backbone:
lr_mult: 0.1
warmup:
name: linear
steps: 500
ratio: 0.0001
total_epochs: 50
lr_schedule:
name: CosineAnnealingLR
T_max: 50
eta_min: 0.0005
val_intervals: 5
grad_clip: 35
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 50
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
# nanodet-EfficientNet-Lite0_320
# COCO mAP(0.5:0.95) = 0.247
# AP_50 = 0.404
# AP_75 = 0.250
# AP_small = 0.079
# AP_m = 0.243
# AP_l = 0.406
save_dir: workspace/efficient0_320
model:
arch:
name: OneStageDetector
backbone:
name: EfficientNetLite
model_name: efficientnet_lite0
out_stages: [2,4,6]
activation: ReLU6
fpn:
name: PAN
in_channels: [40, 112, 320]
out_channels: 96
start_level: 0
num_outs: 3
head:
name: NanoDetHead
num_classes: 80
input_channel: 96
feat_channels: 96
activation: ReLU6
stacked_convs: 2
share_cls_reg: True
octave_base_scale: 5
scales_per_octave: 1
strides: [8, 16, 32]
reg_max: 7
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
data:
train:
name: CocoDataset
img_path: /coco/train2017
ann_path: /coco/annotations/instances_train2017.json
input_size: [320,320] #[w,h]
keep_ratio: True
pipeline:
perspective: 0.0
scale: [0.6, 1.4]
stretch: [[1, 1], [1, 1]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]]
val:
name: CocoDataset
img_path: /coco/val2017
ann_path: /coco/annotations/instances_val2017.json
input_size: [320,320] #[w,h]
keep_ratio: True
pipeline:
normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]]
device:
gpu_ids: [0]
workers_per_gpu: 12
batchsize_per_gpu: 150
schedule:
# resume:
# load_model: YOUR_MODEL_PATH
optimizer:
name: SGD
lr: 0.15
momentum: 0.9
weight_decay: 0.0001
warmup:
name: linear
steps: 500
ratio: 0.01
total_epochs: 190
lr_schedule:
name: MultiStepLR
milestones: [140,170,180,185]
gamma: 0.1
val_intervals: 1
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 10
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
# nanodet-EfficientNet-Lite1_416
# COCO mAP(0.5:0.95) = 0.303
# AP_50 = 0.471
# AP_75 = 0.313
# AP_small = 0.122
# AP_m = 0.321
# AP_l = 0.432
save_dir: workspace/efficient1_416_SGD
model:
arch:
name: OneStageDetector
backbone:
name: EfficientNetLite
model_name: efficientnet_lite1
out_stages: [2,4,6]
activation: ReLU6
pretrain: True
fpn:
name: PAN
in_channels: [40, 112, 320]
out_channels: 128
start_level: 0
num_outs: 3
head:
name: NanoDetHead
num_classes: 80
input_channel: 128
feat_channels: 128
stacked_convs: 3
activation: ReLU6
share_cls_reg: True
octave_base_scale: 8
scales_per_octave: 1
strides: [8, 16, 32]
reg_max: 10
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
data:
train:
name: CocoDataset
img_path: /coco/train2017
ann_path: /coco/annotations/instances_train2017.json
input_size: [416,416] #[w,h]
keep_ratio: True
pipeline:
perspective: 0.0
scale: [0.5, 1.5]
stretch: [[1, 1], [1, 1]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]]
val:
name: CocoDataset
img_path: /coco/val2017
ann_path: /coco/annotations/instances_val2017.json
input_size: [416,416] #[w,h]
keep_ratio: True
pipeline:
normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]]
device:
gpu_ids: [0]
workers_per_gpu: 12
batchsize_per_gpu: 100
schedule:
# resume:
# load_model: YOUR_MODEL_PATH
optimizer:
name: SGD
lr: 0.07
momentum: 0.9
weight_decay: 0.0001
warmup:
name: linear
steps: 500
ratio: 0.01
total_epochs: 170
lr_schedule:
name: MultiStepLR
milestones: [130,150,160,165]
gamma: 0.1
val_intervals: 5
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 10
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
# nanodet-EfficientNet-Lite2_512
# COCO mAP(0.5:0.95) = 0.326
# AP_50 = 0.501
# AP_75 = 0.344
# AP_small = 0.152
# AP_m = 0.342
# AP_l = 0.481
save_dir: workspace/efficientlite2_512
model:
arch:
name: OneStageDetector
backbone:
name: EfficientNetLite
model_name: efficientnet_lite2
out_stages: [2,4,6]
activation: ReLU6
pretrain: True
fpn:
name: PAN
in_channels: [48, 120, 352]
out_channels: 128
start_level: 0
num_outs: 3
head:
name: NanoDetHead
num_classes: 80
input_channel: 128
feat_channels: 128
stacked_convs: 4
activation: ReLU6
share_cls_reg: True
octave_base_scale: 5
scales_per_octave: 1
strides: [8, 16, 32]
reg_max: 10
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
data:
train:
name: CocoDataset
img_path: /coco/train2017
ann_path: /coco/annotations/instances_train2017.json
input_size: [512,512] #[w,h]
keep_ratio: True
pipeline:
perspective: 0.0
scale: [0.5, 1.5]
stretch: [[1, 1], [1, 1]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]]
val:
name: CocoDataset
img_path: /coco/val2017
ann_path: /coco/annotations/instances_val2017.json
input_size: [512,512] #[w,h]
keep_ratio: True
pipeline:
normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]]
device:
gpu_ids: [0]
workers_per_gpu: 12
batchsize_per_gpu: 60
schedule:
# resume:
# load_model: YOUR_MODEL_PATH
optimizer:
name: SGD
lr: 0.06
momentum: 0.9
weight_decay: 0.0001
warmup:
name: linear
steps: 300
ratio: 0.1
total_epochs: 135
lr_schedule:
name: MultiStepLR
milestones: [90,110,120,130]
gamma: 0.1
val_intervals: 5
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 10
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
# nanodet-EfficientNet-Lite1_416
save_dir: workspace/RepVGG-A0-416
model:
arch:
name: OneStageDetector
backbone:
name: RepVGG
arch: A0
out_stages: [2,3,4]
activation: ReLU
last_channel: 512
deploy: False
fpn:
name: PAN
in_channels: [96, 192, 512]
out_channels: 128
start_level: 0
num_outs: 3
head:
name: NanoDetHead
num_classes: 80
conv_type: Conv
input_channel: 128
feat_channels: 128
stacked_convs: 2
activation: ReLU
share_cls_reg: True
octave_base_scale: 8
scales_per_octave: 1
strides: [8, 16, 32]
reg_max: 10
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
data:
train:
name: CocoDataset
img_path: /coco/train2017
ann_path: /coco/annotations/instances_train2017.json
input_size: [416,416] #[w,h]
keep_ratio: True
pipeline:
perspective: 0.0
scale: [0.5, 1.5]
stretch: [[1, 1], [1, 1]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: CocoDataset
img_path: /coco/val2017
ann_path: /coco/annotations/instances_val2017.json
input_size: [416,416] #[w,h]
keep_ratio: True
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0]
workers_per_gpu: 1
batchsize_per_gpu: 100
schedule:
# resume:
# load_model: YOUR_MODEL_PATH
optimizer:
name: SGD
lr: 0.07
momentum: 0.9
weight_decay: 0.0001
warmup:
name: linear
steps: 500
ratio: 0.01
total_epochs: 170
lr_schedule:
name: MultiStepLR
milestones: [130,150,160,165]
gamma: 0.1
val_intervals: 5
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 10
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
# NanoDet-m with transformer attention
# COCO mAP(0.5:0.95) = 0.217
# AP_50 = 0.363
# AP_75 = 0.218
# AP_small = 0.069
# AP_m = 0.214
# AP_l = 0.364
save_dir: workspace/nanodet_t
model:
arch:
name: OneStageDetector
backbone:
name: ShuffleNetV2
model_size: 1.0x
out_stages: [2,3,4]
activation: LeakyReLU
fpn:
name: TAN # transformer attention network
in_channels: [116, 232, 464]
out_channels: 128
feature_hw: [20,20] # size for position embedding
num_heads: 8
num_encoders: 1
mlp_ratio: 4
dropout_ratio: 0.1
activation: LeakyReLU
head:
name: NanoDetHead
num_classes: 80
input_channel: 128
feat_channels: 128
stacked_convs: 2
share_cls_reg: True
octave_base_scale: 5
scales_per_octave: 1
strides: [8, 16, 32]
reg_max: 7
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
data:
train:
name: CocoDataset
img_path: coco/train2017
ann_path: coco/annotations/instances_train2017.json
input_size: [320,320] #[w,h]
keep_ratio: True
pipeline:
perspective: 0.0
scale: [0.6, 1.4]
stretch: [[1, 1], [1, 1]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.8, 1.2]
saturation: [0.8, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: CocoDataset
img_path: coco/val2017
ann_path: coco/annotations/instances_val2017.json
input_size: [320,320] #[w,h]
keep_ratio: True
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0]
workers_per_gpu: 8
batchsize_per_gpu: 160
schedule:
resume:
# load_model: YOUR_MODEL_PATH
optimizer:
name: SGD
lr: 0.14
momentum: 0.9
weight_decay: 0.0001
warmup:
name: linear
steps: 500
ratio: 0.01
total_epochs: 190
lr_schedule:
name: MultiStepLR
milestones: [140,170,180,185]
gamma: 0.1
val_intervals: 10
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 10
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
# NanoDet-g-416 is designed for edge NPU, GPU or TPU with high parallel computing power but low memory bandwidth
# COCO mAP(0.5:0.95) = 22.9
# Flops = 4.2B
# Params = 3.8M
# COCO pre-trained weight link: https://drive.google.com/file/d/10uW7oqZKw231l_tr4C1bJWkbCXgBf7av/view?usp=sharing
save_dir: workspace/nanodet_g
model:
arch:
name: OneStageDetector
backbone:
name: CustomCspNet
net_cfg: [[ 'Conv', 3, 32, 3, 2], # 1/2
[ 'MaxPool', 3, 2 ], # 1/4
[ 'CspBlock', 32, 1, 3, 1 ], # 1/4
[ 'CspBlock', 64, 2, 3, 2 ], # 1/8
[ 'CspBlock', 128, 2, 3, 2 ], # 1/16
[ 'CspBlock', 256, 3, 3, 2 ]] # 1/32
out_stages: [3,4,5]
activation: LeakyReLU
fpn:
name: PAN
in_channels: [128, 256, 512]
out_channels: 128
start_level: 0
num_outs: 3
head:
name: NanoDetHead
num_classes: 80
conv_type: Conv
activation: LeakyReLU
input_channel: 128
feat_channels: 128
stacked_convs: 1
share_cls_reg: True
octave_base_scale: 8
scales_per_octave: 1
strides: [8, 16, 32]
reg_max: 10
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
data:
train:
name: CocoDataset
img_path: coco/train2017
ann_path: coco/annotations/instances_train2017.json
input_size: [416,416] #[w,h]
keep_ratio: True
pipeline:
perspective: 0.0
scale: [0.6, 1.4]
stretch: [[1, 1], [1, 1]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: CocoDataset
img_path: coco/val2017
ann_path: coco/annotations/instances_val2017.json
input_size: [416,416] #[w,h]
keep_ratio: True
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0]
workers_per_gpu: 10
batchsize_per_gpu: 128
schedule:
# resume:
# load_model: YOUR_MODEL_PATH
optimizer:
name: SGD
lr: 0.1
momentum: 0.9
weight_decay: 0.0001
warmup:
name: linear
steps: 500
ratio: 0.01
total_epochs: 190
lr_schedule:
name: MultiStepLR
milestones: [130,160,175,185]
gamma: 0.1
val_intervals: 5
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 10
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
# nanodet-m-0.5x
# COCO mAP(0.5:0.95) = 0.135
# AP_50 = 0.245
# AP_75 = 0.129
# AP_small = 0.036
# AP_m = 0.119
# AP_l = 0.232
save_dir: workspace/nanodet_m_0.5x
model:
arch:
name: OneStageDetector
backbone:
name: ShuffleNetV2
model_size: 0.5x
out_stages: [2,3,4]
activation: LeakyReLU
fpn:
name: PAN
in_channels: [48, 96, 192]
out_channels: 96
start_level: 0
num_outs: 3
head:
name: NanoDetHead
num_classes: 80
input_channel: 96
feat_channels: 96
stacked_convs: 2
share_cls_reg: True
octave_base_scale: 5
scales_per_octave: 1
strides: [8, 16, 32]
reg_max: 7
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
data:
train:
name: CocoDataset
img_path: coco/train2017
ann_path: coco/annotations/instances_train2017.json
input_size: [320,320] #[w,h]
keep_ratio: True
pipeline:
perspective: 0.0
scale: [0.5, 1.5]
stretch: [[1, 1], [1, 1]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: CocoDataset
img_path: coco/val2017
ann_path: coco/annotations/instances_val2017.json
input_size: [320,320] #[w,h]
keep_ratio: True
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0]
workers_per_gpu: 8
batchsize_per_gpu: 96
schedule:
# resume:
# load_model: YOUR_MODEL_PATH
optimizer:
name: SGD
lr: 0.07
momentum: 0.9
weight_decay: 0.0001
warmup:
name: linear
steps: 1000
ratio: 0.00001
total_epochs: 180
lr_schedule:
name: MultiStepLR
milestones: [130,160,175]
gamma: 0.1
val_intervals: 10
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 50
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
#nanodet-m-1.5x-416
# COCO mAP(0.5:0.95) = 0.268
# AP_50 = 0.424
# AP_75 = 0.276
# AP_small = 0.098
# AP_m = 0.277
# AP_l = 0.420
save_dir: workspace/nanodet_m_1.5x_416
model:
arch:
name: OneStageDetector
backbone:
name: ShuffleNetV2
model_size: 1.5x
out_stages: [2,3,4]
activation: LeakyReLU
fpn:
name: PAN
in_channels: [176, 352, 704]
out_channels: 128
start_level: 0
num_outs: 3
head:
name: NanoDetHead
num_classes: 80
input_channel: 128
feat_channels: 128
stacked_convs: 2
share_cls_reg: True
octave_base_scale: 5
scales_per_octave: 1
strides: [8, 16, 32]
reg_max: 7
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
data:
train:
name: CocoDataset
img_path: coco/train2017
ann_path: coco/annotations/instances_train2017.json
input_size: [416,416] #[w,h]
keep_ratio: True
pipeline:
perspective: 0.0
scale: [0.5, 1.4]
stretch: [[1, 1], [1, 1]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: CocoDataset
img_path: coco/val2017
ann_path: coco/annotations/instances_val2017.json
input_size: [416,416] #[w,h]
keep_ratio: True
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0]
workers_per_gpu: 8
batchsize_per_gpu: 176
schedule:
# resume:
# load_model: YOUR_MODEL_PATH
optimizer:
name: SGD
lr: 0.14
momentum: 0.9
weight_decay: 0.0001
warmup:
name: linear
steps: 300
ratio: 0.1
total_epochs: 280
lr_schedule:
name: MultiStepLR
milestones: [240,260,275]
gamma: 0.1
val_intervals: 10
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 10
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
#nanodet-m-1.5x
# COCO mAP(0.5:0.95) = 0.235
# AP_50 = 0.384
# AP_75 = 0.239
# AP_small = 0.069
# AP_m = 0.235
# AP_l = 0.389
save_dir: workspace/nanodet_m_1.5x
model:
arch:
name: OneStageDetector
backbone:
name: ShuffleNetV2
model_size: 1.5x
out_stages: [2,3,4]
activation: LeakyReLU
fpn:
name: PAN
in_channels: [176, 352, 704]
out_channels: 128
start_level: 0
num_outs: 3
head:
name: NanoDetHead
num_classes: 80
input_channel: 128
feat_channels: 128
stacked_convs: 2
share_cls_reg: True
octave_base_scale: 5
scales_per_octave: 1
strides: [8, 16, 32]
reg_max: 7
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
data:
train:
name: CocoDataset
img_path: coco/train2017
ann_path: coco/annotations/instances_train2017.json
input_size: [320,320] #[w,h]
keep_ratio: True
pipeline:
perspective: 0.0
scale: [0.6, 1.4]
stretch: [[1, 1], [1, 1]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: CocoDataset
img_path: coco/val2017
ann_path: coco/annotations/instances_val2017.json
input_size: [320,320] #[w,h]
keep_ratio: True
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0]
workers_per_gpu: 8
batchsize_per_gpu: 192
schedule:
# resume:
# load_model: YOUR_MODEL_PATH
optimizer:
name: SGD
lr: 0.14
momentum: 0.9
weight_decay: 0.0001
warmup:
name: linear
steps: 300
ratio: 0.1
total_epochs: 280
lr_schedule:
name: MultiStepLR
milestones: [240,260,275]
gamma: 0.1
val_intervals: 10
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 10
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
#nanodet-m-416
# COCO mAP(0.5:0.95) = 0.235
# AP_50 = 0.384
# AP_75 = 0.242
# AP_small = 0.082
# AP_m = 0.240
# AP_l = 0.375
save_dir: workspace/nanodet_m_416
model:
arch:
name: OneStageDetector
backbone:
name: ShuffleNetV2
model_size: 1.0x
out_stages: [2,3,4]
activation: LeakyReLU
fpn:
name: PAN
in_channels: [116, 232, 464]
out_channels: 96
start_level: 0
num_outs: 3
head:
name: NanoDetHead
num_classes: 80
input_channel: 96
feat_channels: 96
stacked_convs: 2
share_cls_reg: True
octave_base_scale: 5
scales_per_octave: 1
strides: [8, 16, 32]
reg_max: 7
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
data:
train:
name: CocoDataset
img_path: coco/train2017
ann_path: coco/annotations/instances_train2017.json
input_size: [416,416] #[w,h]
keep_ratio: True
pipeline:
perspective: 0.0
scale: [0.5, 1.4]
stretch: [[1, 1], [1, 1]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: CocoDataset
img_path: coco/val2017
ann_path: coco/annotations/instances_val2017.json
input_size: [416,416] #[w,h]
keep_ratio: True
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0]
workers_per_gpu: 8
batchsize_per_gpu: 192
schedule:
# resume:
# load_model: YOUR_MODEL_PATH
optimizer:
name: SGD
lr: 0.14
momentum: 0.9
weight_decay: 0.0001
warmup:
name: linear
steps: 300
ratio: 0.1
total_epochs: 280
lr_schedule:
name: MultiStepLR
milestones: [240,260,275]
gamma: 0.1
val_intervals: 10
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 10
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
#Config File example
save_dir: workspace/nanodet_m
model:
arch:
name: OneStageDetector
backbone:
name: ShuffleNetV2
model_size: 1.0x
out_stages: [2,3,4]
activation: LeakyReLU
fpn:
name: PAN
in_channels: [116, 232, 464]
out_channels: 96
start_level: 0
num_outs: 3
head:
name: NanoDetHead
num_classes: 80
input_channel: 96
feat_channels: 96
stacked_convs: 2
share_cls_reg: True
octave_base_scale: 5
scales_per_octave: 1
strides: [8, 16, 32]
reg_max: 7
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
data:
train:
name: CocoDataset
img_path: coco/train2017
ann_path: coco/annotations/instances_train2017.json
input_size: [320,320] #[w,h]
keep_ratio: True
pipeline:
perspective: 0.0
scale: [0.6, 1.4]
stretch: [[1, 1], [1, 1]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: CocoDataset
img_path: coco/val2017
ann_path: coco/annotations/instances_val2017.json
input_size: [320,320] #[w,h]
keep_ratio: True
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0]
workers_per_gpu: 8
batchsize_per_gpu: 192
schedule:
# resume:
# load_model: YOUR_MODEL_PATH
optimizer:
name: SGD
lr: 0.14
momentum: 0.9
weight_decay: 0.0001
warmup:
name: linear
steps: 300
ratio: 0.1
total_epochs: 280
lr_schedule:
name: MultiStepLR
milestones: [240,260,275]
gamma: 0.1
val_intervals: 10
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 10
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
# nanodet-plus-m-1.5x_320
# COCO mAP(0.5:0.95) = 0.299
# AP_50 = 0.454
# AP_75 = 0.312
# AP_small = 0.102
# AP_m = 0.309
# AP_l = 0.493
save_dir: workspace/nanodet-plus-m-1.5x_320
model:
weight_averager:
name: ExpMovingAverager
decay: 0.9998
arch:
name: NanoDetPlus
detach_epoch: 10
backbone:
name: ShuffleNetV2
model_size: 1.5x
out_stages: [2,3,4]
activation: LeakyReLU
fpn:
name: GhostPAN
in_channels: [176, 352, 704]
out_channels: 128
kernel_size: 5
num_extra_level: 1
use_depthwise: True
activation: LeakyReLU
head:
name: NanoDetPlusHead
num_classes: 80
input_channel: 128
feat_channels: 128
stacked_convs: 2
kernel_size: 5
strides: [8, 16, 32, 64]
activation: LeakyReLU
reg_max: 7
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
# Auxiliary head, only use in training time.
aux_head:
name: SimpleConvHead
num_classes: 80
input_channel: 256
feat_channels: 256
stacked_convs: 4
strides: [8, 16, 32, 64]
activation: LeakyReLU
reg_max: 7
data:
train:
name: CocoDataset
img_path: coco/train2017
ann_path: coco/annotations/instances_train2017.json
input_size: [320,320] #[w,h]
keep_ratio: False
pipeline:
perspective: 0.0
scale: [0.6, 1.4]
stretch: [[0.8, 1.2], [0.8, 1.2]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: CocoDataset
img_path: coco/val2017
ann_path: coco/annotations/instances_val2017.json
input_size: [320,320] #[w,h]
keep_ratio: False
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0]
workers_per_gpu: 10
batchsize_per_gpu: 96
precision: 32 # set to 16 to use AMP training
schedule:
# resume:
# load_model:
optimizer:
name: AdamW
lr: 0.001
weight_decay: 0.05
warmup:
name: linear
steps: 500
ratio: 0.0001
total_epochs: 300
lr_schedule:
name: CosineAnnealingLR
T_max: 300
eta_min: 0.00005
val_intervals: 10
grad_clip: 35
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 50
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
# nanodet-plus-m-1.5x_416
# COCO mAP(0.5:0.95) = 0.341
# AP_50 = 0.506
# AP_75 = 0.357
# AP_small = 0.143
# AP_m = 0.363
# AP_l = 0.539
save_dir: workspace/nanodet-plus-m-1.5x_416
model:
weight_averager:
name: ExpMovingAverager
decay: 0.9998
arch:
name: NanoDetPlus
detach_epoch: 10
backbone:
name: ShuffleNetV2
model_size: 1.5x
out_stages: [2,3,4]
activation: LeakyReLU
fpn:
name: GhostPAN
in_channels: [176, 352, 704]
out_channels: 128
kernel_size: 5
num_extra_level: 1
use_depthwise: True
activation: LeakyReLU
head:
name: NanoDetPlusHead
num_classes: 80
input_channel: 128
feat_channels: 128
stacked_convs: 2
kernel_size: 5
strides: [8, 16, 32, 64]
activation: LeakyReLU
reg_max: 7
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
# Auxiliary head, only use in training time.
aux_head:
name: SimpleConvHead
num_classes: 80
input_channel: 256
feat_channels: 256
stacked_convs: 4
strides: [8, 16, 32, 64]
activation: LeakyReLU
reg_max: 7
data:
train:
name: CocoDataset
img_path: coco/train2017
ann_path: coco/annotations/instances_train2017.json
input_size: [416,416] #[w,h]
keep_ratio: False
pipeline:
perspective: 0.0
scale: [0.6, 1.4]
stretch: [[0.8, 1.2], [0.8, 1.2]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: CocoDataset
img_path: coco/val2017
ann_path: coco/annotations/instances_val2017.json
input_size: [416,416] #[w,h]
keep_ratio: False
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0]
workers_per_gpu: 10
batchsize_per_gpu: 96
precision: 32 # set to 16 to use AMP training
schedule:
# resume:
# load_model:
optimizer:
name: AdamW
lr: 0.001
weight_decay: 0.05
warmup:
name: linear
steps: 500
ratio: 0.0001
total_epochs: 300
lr_schedule:
name: CosineAnnealingLR
T_max: 300
eta_min: 0.00005
val_intervals: 10
grad_clip: 35
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 50
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
# nanodet-plus-m_320
# COCO mAP(0.5:0.95) = 0.270
# AP_50 = 0.418
# AP_75 = 0.281
# AP_small = 0.083
# AP_m = 0.278
# AP_l = 0.451
save_dir: workspace/nanodet-plus-m_320
model:
weight_averager:
name: ExpMovingAverager
decay: 0.9998
arch:
name: NanoDetPlus
detach_epoch: 10
backbone:
name: ShuffleNetV2
model_size: 1.0x
out_stages: [2,3,4]
activation: LeakyReLU
fpn:
name: GhostPAN
in_channels: [116, 232, 464]
out_channels: 96
kernel_size: 5
num_extra_level: 1
use_depthwise: True
activation: LeakyReLU
head:
name: NanoDetPlusHead
num_classes: 80
input_channel: 96
feat_channels: 96
stacked_convs: 2
kernel_size: 5
strides: [8, 16, 32, 64]
activation: LeakyReLU
reg_max: 7
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
# Auxiliary head, only use in training time.
aux_head:
name: SimpleConvHead
num_classes: 80
input_channel: 192
feat_channels: 192
stacked_convs: 4
strides: [8, 16, 32, 64]
activation: LeakyReLU
reg_max: 7
data:
train:
name: CocoDataset
img_path: coco/train2017
ann_path: coco/annotations/instances_train2017.json
input_size: [320,320] #[w,h]
keep_ratio: False
pipeline:
perspective: 0.0
scale: [0.6, 1.4]
stretch: [[0.8, 1.2], [0.8, 1.2]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: CocoDataset
img_path: coco/val2017
ann_path: coco/annotations/instances_val2017.json
input_size: [320,320] #[w,h]
keep_ratio: False
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0] # Set like [0, 1, 2, 3] if you have multi-GPUs
workers_per_gpu: 10
batchsize_per_gpu: 96
precision: 32 # set to 16 to use AMP training
schedule:
# resume:
# load_model:
optimizer:
name: AdamW
lr: 0.001
weight_decay: 0.05
warmup:
name: linear
steps: 500
ratio: 0.0001
total_epochs: 300
lr_schedule:
name: CosineAnnealingLR
T_max: 300
eta_min: 0.00005
val_intervals: 10
grad_clip: 35
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 50
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
# nanodet-plus-m_416
# COCO mAP(0.5:0.95) = 0.304
# AP_50 = 0.459
# AP_75 = 0.317
# AP_small = 0.106
# AP_m = 0.322
# AP_l = 0.477
save_dir: /content/drive/MyDrive/Nanodet_again/nanodet2/workspace/nanodet-custom
model:
weight_averager:
name: ExpMovingAverager
decay: 0.9998
arch:
name: NanoDetPlus
detach_epoch: 10
backbone:
name: ShuffleNetV2
model_size: 1.0x
out_stages: [2,3,4]
activation: LeakyReLU
fpn:
name: GhostPAN
in_channels: [116, 232, 464]
out_channels: 96
kernel_size: 5
num_extra_level: 1
use_depthwise: True
activation: LeakyReLU
head:
name: NanoDetPlusHead
num_classes: 80
input_channel: 96
feat_channels: 96
stacked_convs: 2
kernel_size: 5
strides: [8, 16, 32, 64]
activation: LeakyReLU
reg_max: 7
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
# Auxiliary head, only use in training time.
aux_head:
name: SimpleConvHead
num_classes: 80
input_channel: 192
feat_channels: 192
stacked_convs: 4
strides: [8, 16, 32, 64]
activation: LeakyReLU
reg_max: 7
class_names: &class_names ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
data:
train:
name: YoloDataset
img_path: coco/train2017
ann_path: coco/train2017
class_names: *class_names
input_size: [416,416] #[w,h]
keep_ratio: False
pipeline:
perspective: 0.0
scale: [0.6, 1.4]
stretch: [[0.8, 1.2], [0.8, 1.2]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: YoloDataset
img_path: coco/val2017
ann_path: coco/val2017
class_names: *class_names
input_size: [416,416] #[w,h]
keep_ratio: False
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0]
workers_per_gpu: 10
batchsize_per_gpu: 96
schedule:
# resume:
# load_model:
optimizer:
name: AdamW
lr: 0.001
weight_decay: 0.05
warmup:
name: linear
steps: 500
ratio: 0.0001
total_epochs: 300
lr_schedule:
name: CosineAnnealingLR
T_max: 300
eta_min: 0.00005
val_intervals: 10
grad_clip: 35
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 50
# nanodet-plus-m_416
# COCO mAP(0.5:0.95) = 0.304
# AP_50 = 0.459
# AP_75 = 0.317
# AP_small = 0.106
# AP_m = 0.322
# AP_l = 0.477
save_dir: /content/drive/MyDrive/Nanodet_again/nanodet2/workspace/nanodet-custom
model:
weight_averager:
name: ExpMovingAverager
decay: 0.9998
arch:
name: NanoDetPlus
detach_epoch: 10
backbone:
name: ShuffleNetV2
model_size: 1.0x
out_stages: [2,3,4]
activation: LeakyReLU
fpn:
name: GhostPAN
in_channels: [116, 232, 464]
out_channels: 96
kernel_size: 5
num_extra_level: 1
use_depthwise: True
activation: LeakyReLU
head:
name: NanoDetPlusHead
num_classes: 80
input_channel: 96
feat_channels: 96
stacked_convs: 2
kernel_size: 5
strides: [8, 16, 32, 64]
activation: LeakyReLU
reg_max: 7
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
# Auxiliary head, only use in training time.
aux_head:
name: SimpleConvHead
num_classes: 80
input_channel: 192
feat_channels: 192
stacked_convs: 4
strides: [8, 16, 32, 64]
activation: LeakyReLU
reg_max: 7
data:
train:
name: CocoDataset
img_path: coco/train2017
ann_path: coco/annotations/instances_train2017.json
input_size: [416,416] #[w,h]
keep_ratio: False
pipeline:
perspective: 0.0
scale: [0.6, 1.4]
stretch: [[0.8, 1.2], [0.8, 1.2]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: CocoDataset
img_path: coco/val2017
ann_path: coco/annotations/instances_val2017.json
input_size: [416,416] #[w,h]
keep_ratio: False
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0]
workers_per_gpu: 10
batchsize_per_gpu: 96
precision: 32 # set to 16 to use AMP training
schedule:
# resume:
# load_model:
optimizer:
name: AdamW
lr: 0.001
weight_decay: 0.05
warmup:
name: linear
steps: 500
ratio: 0.0001
total_epochs: 300
lr_schedule:
name: CosineAnnealingLR
T_max: 300
eta_min: 0.00005
val_intervals: 10
grad_clip: 35
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 50
class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
# nanodet-plus-m_416
# COCO mAP(0.5:0.95) = 0.304
# AP_50 = 0.459
# AP_75 = 0.317
# AP_small = 0.106
# AP_m = 0.322
# AP_l = 0.477
save_dir: /content/drive/MyDrive/Nanodet_again/nanodet2/workspace/nanodet-custom
model:
weight_averager:
name: ExpMovingAverager
decay: 0.9998
arch:
name: NanoDetPlus
detach_epoch: 10
backbone:
name: ShuffleNetV2
model_size: 1.0x
out_stages: [2,3,4]
activation: LeakyReLU
fpn:
name: GhostPAN
in_channels: [116, 232, 464]
out_channels: 96
kernel_size: 5
num_extra_level: 1
use_depthwise: True
activation: LeakyReLU
head:
name: NanoDetPlusHead
num_classes: 10
input_channel: 96
feat_channels: 96
stacked_convs: 2
kernel_size: 5
strides: [8, 16, 32, 64]
activation: LeakyReLU
reg_max: 7
norm_cfg:
type: BN
loss:
loss_qfl:
name: QualityFocalLoss
use_sigmoid: True
beta: 2.0
loss_weight: 1.0
loss_dfl:
name: DistributionFocalLoss
loss_weight: 0.25
loss_bbox:
name: GIoULoss
loss_weight: 2.0
# Auxiliary head, only use in training time.
aux_head:
name: SimpleConvHead
num_classes: 10
input_channel: 192
feat_channels: 192
stacked_convs: 4
strides: [8, 16, 32, 64]
activation: LeakyReLU
reg_max: 7
class_names: &class_names ['cat','chicken','cow','dog','fox','goat','horse','person','racoon','skunk']
data:
train:
name: YoloDataset
img_path: /content/drive/MyDrive/Nanodet_again/yolo_dataset/train/images
ann_path: /content/drive/MyDrive/Nanodet_again/yolo_dataset/train/labels
class_names: *class_names
input_size: [416,416] #[w,h]
keep_ratio: False
pipeline:
perspective: 0.0
scale: [0.6, 1.4]
stretch: [[0.8, 1.2], [0.8, 1.2]]
rotation: 0
shear: 0
translate: 0.2
flip: 0.5
brightness: 0.2
contrast: [0.6, 1.4]
saturation: [0.5, 1.2]
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
val:
name: YoloDataset
img_path: /content/drive/MyDrive/Nanodet_again/yolo_dataset/valid/images
ann_path: /content/drive/MyDrive/Nanodet_again/yolo_dataset/valid/labels
class_names: *class_names
input_size: [416,416] #[w,h]
keep_ratio: False
pipeline:
normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
device:
gpu_ids: [0]
workers_per_gpu: 10
batchsize_per_gpu: 32
schedule:
# resume:
# load_model:
optimizer:
name: AdamW
lr: 0.001
weight_decay: 0.05
warmup:
name: linear
steps: 500
ratio: 0.0001
total_epochs: 300
lr_schedule:
name: CosineAnnealingLR
T_max: 300
eta_min: 0.00005
val_intervals: 10
grad_clip: 35
evaluator:
name: CocoDetectionEvaluator
save_key: mAP
log:
interval: 2
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
import argparse
import os
import time
import cv2
import torch
from nanodet.data.batch_process import stack_batch_img
from nanodet.data.collate import naive_collate
from nanodet.data.transform import Pipeline
from nanodet.model.arch import build_model
from nanodet.util import Logger, cfg, load_config, load_model_weight
from nanodet.util.path import mkdir
image_ext = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]
video_ext = ["mp4", "mov", "avi", "mkv"]
os.environ['KMP_DUPLICATE_LIB_OK']='True'
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"demo", default="image", help="demo type, eg. image, video and webcam"
)
parser.add_argument("--config", help="model config file path")
parser.add_argument("--model", help="model file path")
parser.add_argument("--path", default="./demo", help="path to images or video")
parser.add_argument("--camid", type=int, default=0, help="webcam demo camera id")
parser.add_argument(
"--save_result",
action="store_true",
help="whether to save the inference result of image/video",
)
args = parser.parse_args()
return args
class Predictor(object):
def __init__(self, cfg, model_path, logger, device="cpu"):
self.cfg = cfg
self.device = device
model = build_model(cfg.model)
ckpt = torch.load(model_path, map_location=lambda storage, loc: storage)
load_model_weight(model, ckpt, logger)
if cfg.model.arch.backbone.name == "RepVGG":
deploy_config = cfg.model
deploy_config.arch.backbone.update({"deploy": True})
deploy_model = build_model(deploy_config)
from nanodet.model.backbone.repvgg import repvgg_det_model_convert
model = repvgg_det_model_convert(model, deploy_model)
self.model = model.to(device).eval()
self.pipeline = Pipeline(cfg.data.val.pipeline, cfg.data.val.keep_ratio)
def inference(self, img):
img_info = {"id": 0}
if isinstance(img, str):
img_info["file_name"] = os.path.basename(img)
img = cv2.imread(img)
else:
img_info["file_name"] = None
height, width = img.shape[:2]
img_info["height"] = height
img_info["width"] = width
meta = dict(img_info=img_info, raw_img=img, img=img)
meta = self.pipeline(None, meta, self.cfg.data.val.input_size)
meta["img"] = torch.from_numpy(meta["img"].transpose(2, 0, 1)).to(self.device)
meta = naive_collate([meta])
meta["img"] = stack_batch_img(meta["img"], divisible=32)
with torch.no_grad():
t0 = time.time()
results = self.model.inference(meta)
t1 = time.time()
diff = t1 - t0
fps = 1/ (diff)
print("fps: ", str(fps))
return meta, results
def visualize(self, dets, meta, class_names, score_thres, wait=0):
time1 = time.time()
result_img = self.model.head.show_result(
meta["raw_img"][0], dets, class_names, score_thres=score_thres, show=True
)
print("viz time: {:.3f}s".format(time.time() - time1))
# diff = time.time() - time1
# fps = 1000/ (diff)
# print("fps: ", str(fps))
return result_img
def get_image_list(path):
image_names = []
for maindir, subdir, file_name_list in os.walk(path):
for filename in file_name_list:
apath = os.path.join(maindir, filename)
ext = os.path.splitext(apath)[1]
if ext in image_ext:
image_names.append(apath)
return image_names
def main():
args = parse_args()
local_rank = 0
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True
load_config(cfg, args.config)
logger = Logger(local_rank, use_tensorboard=False)
predictor = Predictor(cfg, args.model, logger, device="cpu")
logger.log('Press "Esc", "q" or "Q" to exit.')
current_time = time.localtime()
if args.demo == "image":
if os.path.isdir(args.path):
files = get_image_list(args.path)
else:
files = [args.path]
files.sort()
for image_name in files:
meta, res = predictor.inference(image_name)
result_image = predictor.visualize(res[0], meta, cfg.class_names, 0.35)
if args.save_result:
save_folder = os.path.join(
cfg.save_dir, time.strftime("%Y_%m_%d_%H_%M_%S", current_time)
)
mkdir(local_rank, save_folder)
save_file_name = os.path.join(save_folder, os.path.basename(image_name))
cv2.imwrite(save_file_name, result_image)
ch = cv2.waitKey(1)
if ch == 27 or ch == ord("q") or ch == ord("Q"):
break
elif args.demo == "video" or args.demo == "webcam":
cap = cv2.VideoCapture(args.path if args.demo == "video" else args.camid)
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float
fps = cap.get(cv2.CAP_PROP_FPS)
save_folder = os.path.join(
cfg.save_dir, time.strftime("%Y_%m_%d_%H_%M_%S", current_time)
)
mkdir(local_rank, save_folder)
save_path = (
os.path.join(save_folder, args.path.replace("\\", "/").split("/")[-1])
if args.demo == "video"
else os.path.join(save_folder, "camera.mp4")
)
print(f"save_path is {save_path}")
vid_writer = cv2.VideoWriter(
save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))
)
while True:
ret_val, frame = cap.read()
if ret_val:
meta, res = predictor.inference(frame)
result_frame = predictor.visualize(res[0], meta, cfg.class_names, 0.35)
if args.save_result:
vid_writer.write(result_frame)
ch = cv2.waitKey(1)
if ch == 27 or ch == ord("q") or ch == ord("Q"):
break
else:
break
if __name__ == "__main__":
main()
*.iml
.gradle
/local.properties
/.idea
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
This diff is collapsed.
# NanoDet NCNN Android Demo
This repo is an Android object detection demo of NanoDet using
[Tencent's NCNN framework](https://github.com/Tencent/ncnn).
# Tutorial
## Step1.
Download ncnn-android-vulkan.zip from ncnn repo or build ncnn-android from source.
- [ncnn-android-vulkan.zip download link](https://github.com/Tencent/ncnn/releases)
## Step2.
Unzip ncnn-android-vulkan.zip into demo_android_ncnn/app/src/main/cpp or change the ncnn_DIR path to yours in demo_android_ncnn/app/src/main/cpp/CMakeLists.txt
```bash
# e.g. change to `ncnn-20211208-android-vulkan` if download version 200211208
set(ncnn_DIR ${CMAKE_SOURCE_DIR}/ncnn-20211208-android-vulkan/${ANDROID_ABI}/lib/cmake/ncnn)
```
## Step3.
Copy the NanoDet ncnn model file and rename to nanodet.param and nanodet.bin from models folder into demo_android_ncnn/app/src/main/assets
* [NanoDet ncnn model download link](https://drive.google.com/file/d/1cuVBJiFKwyq1-l3AwHoP2boTesUQP-6K/view?usp=sharing)
If you want to run yolov4-tiny and yolov5s, download them and also put in demo_android_ncnn/app/src/main/assets.
* [Yolov4 and v5 ncnn model download link](https://drive.google.com/file/d/1Qk_1fDvOcFmNppDnaMFW-xFpMgLDyeAs/view?usp=sharing)
## Step4.
Open demo_android_ncnn folder with Android Studio and then build it.
# Screenshot
![](Android_demo.jpg)
# Notice
* The FPS in the app includes pre-process, post-process and visualization, not equal to the model inference time.
* If meet error like `No version of NDK matched the requested version`, set `android { ndkVersion` to your ndk version.
* If you want to use custom model, remember to change the hyperparams in `demo_android_ncnn/app/src/main/cpp/NanoDet.h` the same with your training config.
# Reference
* [ncnn](https://github.com/tencent/ncnn)
* [YOLOv5_NCNN](https://github.com/WZTENG/YOLOv5_NCNN)
apply plugin: 'com.android.application'
android {
compileSdkVersion 29
buildToolsVersion "29.0.3"
defaultConfig {
applicationId "com.rangi.nanodet"
minSdkVersion 26
targetSdkVersion 29
versionCode 1
versionName "1.0"
testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
externalNativeBuild {
cmake {
cppFlags ""
arguments '-DANDROID_PLATFORM=android-24', '-DANDROID_STL=c++_static', '-DANDROID_STL=c++_shared'
}
}
ndk {
moduleName "NcnnJniLog"
ldLibs "log", "z", "m"
abiFilters "armeabi-v7a", "arm64-v8a"
}
multiDexEnabled true
}
buildTypes {
release {
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
}
}
externalNativeBuild {
cmake {
path "src/main/cpp/CMakeLists.txt"
version "3.10.2"
}
}
sourceSets {
main {
jniLibs.srcDirs = ['libs']
}
}
repositories {
flatDir {
dirs 'libs'
}
}
}
dependencies {
implementation fileTree(dir: 'libs', include: ['*.jar'])
implementation 'androidx.appcompat:appcompat:1.1.0'
implementation 'androidx.constraintlayout:constraintlayout:1.1.3'
testImplementation 'junit:junit:4.12'
androidTestImplementation 'androidx.test.ext:junit:1.1.1'
androidTestImplementation 'androidx.test.espresso:espresso-core:3.2.0'
// Use the most recent version of CameraX, currently that is alpha04
def camerax_version = "1.0.0-alpha05"
//noinspection GradleDependency
implementation "androidx.camera:camera-core:${camerax_version}"
//noinspection GradleDependency
implementation "androidx.camera:camera-camera2:${camerax_version}"
implementation 'com.android.support:multidex:1.0.3'
// crash
implementation 'com.zxy.android:recovery:1.0.0'
// photoview
implementation 'com.github.chrisbanes:PhotoView:2.3.0'
// implementation 'com.bm.photoview:library:1.4.1'
}
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
# http://developer.android.com/guide/developing/tools/proguard.html
# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
# public *;
#}
# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable
# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile
package com.rangi.nanodet;
import android.content.Context;
import androidx.test.platform.app.InstrumentationRegistry;
import androidx.test.ext.junit.runners.AndroidJUnit4;
import org.junit.Test;
import org.junit.runner.RunWith;
import static org.junit.Assert.*;
/**
* Instrumented test, which will execute on an Android device.
*
* @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
*/
@RunWith(AndroidJUnit4.class)
public class ExampleInstrumentedTest {
@Test
public void useAppContext() {
// Context of the app under test.
Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext();
assertEquals("gd.hq.yolov5", appContext.getPackageName());
}
}
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.rangi.nanodet">
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
<uses-permission android:name="android.permission.CAMERA" />
<application
android:name="com.rangi.nanodet.NcnnApp"
android:allowBackup="true"
android:icon="@drawable/ncnn_icon"
android:label="@string/app_name"
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:requestLegacyExternalStorage="true"
android:theme="@style/AppTheme">
<activity android:name="com.rangi.nanodet.WelcomeActivity">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
<activity
android:name="com.rangi.nanodet.MainActivity"
android:theme="@style/AppNoTitleTheme" />
</application>
</manifest>
cmake_minimum_required(VERSION 3.10)
set(ncnn_DIR ${CMAKE_SOURCE_DIR}/ncnn-20211208-android-vulkan/${ANDROID_ABI}/lib/cmake/ncnn)
find_package(ncnn REQUIRED)
add_library(yolov5 SHARED
jni_interface.cpp
YoloV5.cpp
YoloV4.cpp
NanoDet.cpp
)
target_link_libraries(yolov5 ncnn jnigraphics)
//
// Create by RangiLyu
// 2020 / 10 / 2
//
#include "NanoDet.h"
bool NanoDet::hasGPU = true;
NanoDet* NanoDet::detector = nullptr;
inline float fast_exp(float x)
{
union {
uint32_t i;
float f;
} v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
}
inline float sigmoid(float x)
{
return 1.0f / (1.0f + fast_exp(-x));
}
template<typename _Tp>
int activation_function_softmax(const _Tp* src, _Tp* dst, int length)
{
const _Tp alpha = *std::max_element(src, src + length);
_Tp denominator{ 0 };
for (int i = 0; i < length; ++i) {
dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i];
}
for (int i = 0; i < length; ++i) {
dst[i] /= denominator;
}
return 0;
}
static void generate_grid_center_priors(const int input_height, const int input_width, std::vector<int>& strides, std::vector<CenterPrior>& center_priors)
{
for (int i = 0; i < (int)strides.size(); i++)
{
int stride = strides[i];
int feat_w = ceil((float)input_width / stride);
int feat_h = ceil((float)input_height / stride);
for (int y = 0; y < feat_h; y++)
{
for (int x = 0; x < feat_w; x++)
{
CenterPrior ct;
ct.x = x;
ct.y = y;
ct.stride = stride;
center_priors.push_back(ct);
}
}
}
}
NanoDet::NanoDet(AAssetManager *mgr, const char *param, const char *bin, bool useGPU) {
this->Net = new ncnn::Net();
hasGPU = ncnn::get_gpu_count() > 0;
this->Net->opt.use_vulkan_compute = false; //hasGPU && useGPU; // gpu
this->Net->opt.use_fp16_arithmetic = true;
this->Net->opt.use_fp16_packed = true;
this->Net->opt.use_fp16_storage = true;
this->Net->load_param(mgr, param);
this->Net->load_model(mgr, bin);
}
NanoDet::~NanoDet()
{
delete this->Net;
}
void NanoDet::preprocess(JNIEnv *env, jobject image, ncnn::Mat& in)
{
in = ncnn::Mat::from_android_bitmap_resize(env, image, ncnn::Mat::PIXEL_RGBA2BGR, input_size[1], input_size[0]);
// in = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR, img_w, img_h);
//in = ncnn::Mat::from_pixels_resize(image.data, ncnn::Mat::PIXEL_BGR, img_w, img_h, this->input_width, this->input_height);
const float mean_vals[3] = { 103.53f, 116.28f, 123.675f };
const float norm_vals[3] = { 0.017429f, 0.017507f, 0.017125f };
in.substract_mean_normalize(mean_vals, norm_vals);
}
std::vector<BoxInfo> NanoDet::detect(JNIEnv *env, jobject image, float score_threshold, float nms_threshold) {
AndroidBitmapInfo img_size;
AndroidBitmap_getInfo(env, image, &img_size);
float width_ratio = (float) img_size.width / (float) this->input_size[1];
float height_ratio = (float) img_size.height / (float) this->input_size[0];
ncnn::Mat input;
this->preprocess(env, image, input);
auto ex = this->Net->create_extractor();
ex.set_light_mode(true);
ex.set_num_threads(4);
hasGPU = ncnn::get_gpu_count() > 0;
//ex.set_vulkan_compute(hasGPU);
ex.input("data", input);
std::vector<std::vector<BoxInfo>> results;
results.resize(this->num_class);
ncnn::Mat out;
ex.extract("output", out);
// printf("%d %d %d \n", out.w, out.h, out.c);
// generate center priors in format of (x, y, stride)
std::vector<CenterPrior> center_priors;
generate_grid_center_priors(this->input_size[0], this->input_size[1], this->strides, center_priors);
this->decode_infer(out, center_priors, score_threshold, results, width_ratio, height_ratio);
std::vector<BoxInfo> dets;
for (int i = 0; i < (int)results.size(); i++)
{
this->nms(results[i], nms_threshold);
for (auto box : results[i])
{
dets.push_back(box);
}
}
return dets;
}
void NanoDet::decode_infer(ncnn::Mat& feats, std::vector<CenterPrior>& center_priors, float threshold, std::vector<std::vector<BoxInfo>>& results, float width_ratio, float height_ratio)
{
const int num_points = center_priors.size();
//printf("num_points:%d\n", num_points);
//cv::Mat debug_heatmap = cv::Mat(feature_h, feature_w, CV_8UC3);
for (int idx = 0; idx < num_points; idx++)
{
const int ct_x = center_priors[idx].x;
const int ct_y = center_priors[idx].y;
const int stride = center_priors[idx].stride;
const float* scores = feats.row(idx);
float score = 0;
int cur_label = 0;
for (int label = 0; label < this->num_class; label++)
{
if (scores[label] > score)
{
score = scores[label];
cur_label = label;
}
}
if (score > threshold)
{
//std::cout << "label:" << cur_label << " score:" << score << std::endl;
const float* bbox_pred = feats.row(idx) + this->num_class;
results[cur_label].push_back(this->disPred2Bbox(bbox_pred, cur_label, score, ct_x, ct_y, stride, width_ratio, height_ratio));
//debug_heatmap.at<cv::Vec3b>(row, col)[0] = 255;
//cv::imshow("debug", debug_heatmap);
}
}
}
BoxInfo NanoDet::disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride, float width_ratio, float height_ratio)
{
float ct_x = x * stride;
float ct_y = y * stride;
std::vector<float> dis_pred;
dis_pred.resize(4);
for (int i = 0; i < 4; i++)
{
float dis = 0;
float* dis_after_sm = new float[this->reg_max + 1];
activation_function_softmax(dfl_det + i * (this->reg_max + 1), dis_after_sm, this->reg_max + 1);
for (int j = 0; j < this->reg_max + 1; j++)
{
dis += j * dis_after_sm[j];
}
dis *= stride;
//std::cout << "dis:" << dis << std::endl;
dis_pred[i] = dis;
delete[] dis_after_sm;
}
float xmin = (std::max)(ct_x - dis_pred[0], .0f) * width_ratio;
float ymin = (std::max)(ct_y - dis_pred[1], .0f) * height_ratio;
float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size[1]) * width_ratio;
float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size[0]) * height_ratio;
//std::cout << xmin << "," << ymin << "," << xmax << "," << xmax << "," << std::endl;
return BoxInfo { xmin, ymin, xmax, ymax, score, label };
}
void NanoDet::nms(std::vector<BoxInfo>& input_boxes, float NMS_THRESH)
{
std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i) {
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
for (int i = 0; i < int(input_boxes.size()); ++i) {
for (int j = i + 1; j < int(input_boxes.size());) {
float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
float w = (std::max)(float(0), xx2 - xx1 + 1);
float h = (std::max)(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= NMS_THRESH) {
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
}
else {
j++;
}
}
}
}
//
// Create by RangiLyu
// 2020 / 10 / 2
//
#ifndef NANODET_H
#define NANODET_H
#include "net.h"
#include "YoloV5.h"
typedef struct HeadInfo_
{
std::string cls_layer;
std::string dis_layer;
int stride;
} HeadInfo;
typedef struct CenterPrior_
{
int x;
int y;
int stride;
} CenterPrior;
class NanoDet{
public:
NanoDet(AAssetManager *mgr, const char *param, const char *bin, bool useGPU);
~NanoDet();
std::vector<BoxInfo> detect(JNIEnv *env, jobject image, float score_threshold, float nms_threshold);
std::vector<std::string> labels{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"};
private:
void preprocess(JNIEnv *env, jobject image, ncnn::Mat& in);
void decode_infer(ncnn::Mat& feats, std::vector<CenterPrior>& center_priors, float threshold, std::vector<std::vector<BoxInfo>>& results, float width_ratio, float height_ratio);
BoxInfo disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride, float width_ratio, float height_ratio);
static void nms(std::vector<BoxInfo>& result, float nms_threshold);
ncnn::Net *Net;
// modify these parameters to the same with your config if you want to use your own model
int input_size[2] = {320, 320}; // input height and width
int num_class = 80; // number of classes. 80 for COCO
int reg_max = 7; // `reg_max` set in the training config. Default: 7.
std::vector<int> strides = { 8, 16, 32, 64 }; // strides of the multi-level feature.
public:
static NanoDet *detector;
static bool hasGPU;
};
#endif //NANODET_H
#include "YoloV4.h"
bool YoloV4::hasGPU = true;
YoloV4 *YoloV4::detector = nullptr;
YoloV4::YoloV4(AAssetManager *mgr, const char *param, const char *bin, bool useGPU) {
Net = new ncnn::Net();
// opt 需要在加载前设置
hasGPU = ncnn::get_gpu_count() > 0;
Net->opt.use_vulkan_compute = hasGPU && useGPU; // gpu
Net->opt.use_fp16_arithmetic = true; // fp16运算加速
Net->load_param(mgr, param);
Net->load_model(mgr, bin);
}
YoloV4::~YoloV4() {
delete Net;
}
std::vector<BoxInfo> YoloV4::detect(JNIEnv *env, jobject image, float threshold, float nms_threshold) {
AndroidBitmapInfo img_size;
AndroidBitmap_getInfo(env, image, &img_size);
ncnn::Mat in_net = ncnn::Mat::from_android_bitmap_resize(env, image, ncnn::Mat::PIXEL_RGBA2RGB, input_size,
input_size);
float norm[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
float mean[3] = {0, 0, 0};
in_net.substract_mean_normalize(mean, norm);
auto ex = Net->create_extractor();
ex.set_light_mode(true);
ex.set_num_threads(4);
hasGPU = ncnn::get_gpu_count() > 0;
ex.set_vulkan_compute(hasGPU);
ex.input(0, in_net);
std::vector<BoxInfo> result;
ncnn::Mat blob;
ex.extract("output", blob);
auto boxes = decode_infer(blob, {(int) img_size.width, (int) img_size.height}, input_size, num_class, threshold);
result.insert(result.begin(), boxes.begin(), boxes.end());
// nms(result,nms_threshold);
return result;
}
inline float fast_exp(float x) {
union {
uint32_t i;
float f;
} v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
}
inline float sigmoid(float x) {
return 1.0f / (1.0f + fast_exp(-x));
}
std::vector<BoxInfo>
YoloV4::decode_infer(ncnn::Mat &data, const yolocv::YoloSize &frame_size, int net_size, int num_classes, float threshold) {
std::vector<BoxInfo> result;
for (int i = 0; i < data.h; i++) {
BoxInfo box;
const float *values = data.row(i);
box.label = values[0] - 1;
box.score = values[1];
box.x1 = values[2] * (float) frame_size.width;
box.y1 = values[3] * (float) frame_size.height;
box.x2 = values[4] * (float) frame_size.width;
box.y2 = values[5] * (float) frame_size.height;
result.push_back(box);
}
return result;
}
#ifndef YOLOV4_H
#define YOLOV4_H
#include "net.h"
#include "YoloV5.h"
class YoloV4 {
public:
YoloV4(AAssetManager *mgr, const char *param, const char *bin, bool useGPU);
~YoloV4();
std::vector<BoxInfo> detect(JNIEnv *env, jobject image, float threshold, float nms_threshold);
std::vector<std::string> labels{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"};
private:
static std::vector<BoxInfo>
decode_infer(ncnn::Mat &data, const yolocv::YoloSize &frame_size, int net_size, int num_classes, float threshold);
// static void nms(std::vector<BoxInfo>& result,float nms_threshold);
ncnn::Net *Net;
int input_size = 640 / 2;
int num_class = 80;
public:
static YoloV4 *detector;
static bool hasGPU;
};
#endif //YOLOV4_H
//
// Created by 邓昊晴 on 14/6/2020.
//
#include "YoloV5.h"
bool YoloV5::hasGPU = true;
YoloV5 *YoloV5::detector = nullptr;
YoloV5::YoloV5(AAssetManager *mgr, const char *param, const char *bin, bool useGPU) {
Net = new ncnn::Net();
// opt 需要在加载前设置
hasGPU = ncnn::get_gpu_count() > 0;
Net->opt.use_vulkan_compute = hasGPU && useGPU; // gpu
Net->opt.use_fp16_arithmetic = true; // fp16运算加速
Net->load_param(mgr, param);
Net->load_model(mgr, bin);
}
YoloV5::~YoloV5() {
delete Net;
}
std::vector<BoxInfo> YoloV5::detect(JNIEnv *env, jobject image, float threshold, float nms_threshold) {
AndroidBitmapInfo img_size;
AndroidBitmap_getInfo(env, image, &img_size);
// ncnn::Mat in_net = ncnn::Mat::from_android_bitmap_resize(env,image,ncnn::Mat::PIXEL_BGR2RGB,input_size/2,input_size/2);
ncnn::Mat in_net = ncnn::Mat::from_android_bitmap_resize(env, image, ncnn::Mat::PIXEL_RGBA2RGB, input_size / 2,
input_size / 2);
float norm[3] = {1 / 255.f, 1 / 255.f, 1 / 255.f};
float mean[3] = {0, 0, 0};
in_net.substract_mean_normalize(mean, norm);
auto ex = Net->create_extractor();
ex.set_light_mode(true);
ex.set_num_threads(4);
hasGPU = ncnn::get_gpu_count() > 0;
ex.set_vulkan_compute(hasGPU);
ex.input(0, in_net);
std::vector<BoxInfo> result;
for (const auto &layer: layers) {
ncnn::Mat blob;
ex.extract(layer.name.c_str(), blob);
auto boxes = decode_infer(blob, layer.stride, {(int) img_size.width, (int) img_size.height}, input_size,
num_class, layer.anchors, threshold);
result.insert(result.begin(), boxes.begin(), boxes.end());
}
nms(result, nms_threshold);
return result;
}
inline float fast_exp(float x) {
union {
uint32_t i;
float f;
} v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
}
inline float sigmoid(float x) {
return 1.0f / (1.0f + fast_exp(-x));
}
std::vector<BoxInfo>
YoloV5::decode_infer(ncnn::Mat &data, int stride, const yolocv::YoloSize &frame_size, int net_size, int num_classes,
const std::vector<yolocv::YoloSize> &anchors, float threshold) {
std::vector<BoxInfo> result;
int grid_size = int(sqrt(data.h));
float *mat_data[data.c];
for (int i = 0; i < data.c; i++) {
mat_data[i] = data.channel(i);
}
float cx, cy, w, h;
for (int shift_y = 0; shift_y < grid_size; shift_y++) {
for (int shift_x = 0; shift_x < grid_size; shift_x++) {
int loc = shift_x + shift_y * grid_size;
for (int i = 0; i < 3; i++) {
float *record = mat_data[i];
float *cls_ptr = record + 5;
for (int cls = 0; cls < num_classes; cls++) {
float score = sigmoid(cls_ptr[cls]) * sigmoid(record[4]);
if (score > threshold) {
cx = (sigmoid(record[0]) * 2.f - 0.5f + (float) shift_x) * (float) stride;
cy = (sigmoid(record[1]) * 2.f - 0.5f + (float) shift_y) * (float) stride;
w = pow(sigmoid(record[2]) * 2.f, 2) * anchors[i].width;
h = pow(sigmoid(record[3]) * 2.f, 2) * anchors[i].height;
//printf("[grid size=%d, stride = %d]x y w h %f %f %f %f\n",grid_size,stride,record[0],record[1],record[2],record[3]);
BoxInfo box;
box.x1 = std::max(0, std::min(frame_size.width, int((cx - w / 2.f) * (float) frame_size.width / (float) net_size)));
box.y1 = std::max(0, std::min(frame_size.height, int((cy - h / 2.f) * (float) frame_size.height / (float) net_size)));
box.x2 = std::max(0, std::min(frame_size.width, int((cx + w / 2.f) * (float) frame_size.width / (float) net_size)));
box.y2 = std::max(0, std::min(frame_size.height, int((cy + h / 2.f) * (float) frame_size.height / (float) net_size)));
box.score = score;
box.label = cls;
result.push_back(box);
}
}
}
for (auto &ptr:mat_data) {
ptr += (num_classes + 5);
}
}
}
return result;
}
void YoloV5::nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {
std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i) {
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
for (int i = 0; i < int(input_boxes.size()); ++i) {
for (int j = i + 1; j < int(input_boxes.size());) {
float xx1 = std::max(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = std::max(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = std::min(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = std::min(input_boxes[i].y2, input_boxes[j].y2);
float w = std::max(float(0), xx2 - xx1 + 1);
float h = std::max(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= NMS_THRESH) {
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
} else {
j++;
}
}
}
}
//
// Created by 邓昊晴 on 14/6/2020.
//
#ifndef YOLOV5_H
#define YOLOV5_H
#include "net.h"
namespace yolocv {
typedef struct {
int width;
int height;
} YoloSize;
}
typedef struct {
std::string name;
int stride;
std::vector<yolocv::YoloSize> anchors;
} YoloLayerData;
typedef struct BoxInfo {
float x1;
float y1;
float x2;
float y2;
float score;
int label;
} BoxInfo;
class YoloV5 {
public:
YoloV5(AAssetManager *mgr, const char *param, const char *bin, bool useGPU);
~YoloV5();
std::vector<BoxInfo> detect(JNIEnv *env, jobject image, float threshold, float nms_threshold);
std::vector<std::string> labels{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"};
private:
static std::vector<BoxInfo>
decode_infer(ncnn::Mat &data, int stride, const yolocv::YoloSize &frame_size, int net_size, int num_classes,
const std::vector<yolocv::YoloSize> &anchors, float threshold);
static void nms(std::vector<BoxInfo> &result, float nms_threshold);
ncnn::Net *Net;
int input_size = 640;
int num_class = 80;
std::vector<YoloLayerData> layers{
{"394", 32, {{116, 90}, {156, 198}, {373, 326}}},
{"375", 16, {{30, 61}, {62, 45}, {59, 119}}},
{"output", 8, {{10, 13}, {16, 30}, {33, 23}}},
};
public:
static YoloV5 *detector;
static bool hasGPU;
};
#endif //YOLOV5_H
#include <jni.h>
#include <string>
#include <gpu.h>
#include <android/asset_manager_jni.h>
#include <android/log.h>
#include "YoloV5.h"
#include "YoloV4.h"
#include "NanoDet.h"
JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void *reserved) {
ncnn::create_gpu_instance();
if (ncnn::get_gpu_count() > 0) {
YoloV5::hasGPU = true;
YoloV4::hasGPU = true;
NanoDet::hasGPU = true;
}
return JNI_VERSION_1_6;
}
JNIEXPORT void JNI_OnUnload(JavaVM *vm, void *reserved) {
ncnn::destroy_gpu_instance();
}
/*********************************************************************************************
NanoDet
********************************************************************************************/
extern "C" JNIEXPORT void JNICALL
Java_com_rangi_nanodet_NanoDet_init(JNIEnv *env, jclass, jobject assetManager, jboolean useGPU) {
if (NanoDet::detector == nullptr) {
AAssetManager *mgr = AAssetManager_fromJava(env, assetManager);
NanoDet::detector = new NanoDet(mgr, "nanodet.param", "nanodet.bin", useGPU);
}
}
extern "C" JNIEXPORT jobjectArray JNICALL
Java_com_rangi_nanodet_NanoDet_detect(JNIEnv *env, jclass, jobject image, jdouble threshold, jdouble nms_threshold) {
auto result = NanoDet::detector->detect(env, image, threshold, nms_threshold);
auto box_cls = env->FindClass("com/rangi/nanodet/Box");
auto cid = env->GetMethodID(box_cls, "<init>", "(FFFFIF)V");
jobjectArray ret = env->NewObjectArray(result.size(), box_cls, nullptr);
int i = 0;
for (auto &box:result) {
env->PushLocalFrame(1);
jobject obj = env->NewObject(box_cls, cid, box.x1, box.y1, box.x2, box.y2, box.label, box.score);
obj = env->PopLocalFrame(obj);
env->SetObjectArrayElement(ret, i++, obj);
}
return ret;
}
/*********************************************************************************************
Yolov5
********************************************************************************************/
extern "C" JNIEXPORT void JNICALL
Java_com_rangi_nanodet_YOLOv5_init(JNIEnv *env, jclass, jobject assetManager, jboolean useGPU) {
if (YoloV5::detector == nullptr) {
AAssetManager *mgr = AAssetManager_fromJava(env, assetManager);
YoloV5::detector = new YoloV5(mgr, "yolov5.param", "yolov5.bin", useGPU);
}
}
extern "C" JNIEXPORT jobjectArray JNICALL
Java_com_rangi_nanodet_YOLOv5_detect(JNIEnv *env, jclass, jobject image, jdouble threshold, jdouble nms_threshold) {
auto result = YoloV5::detector->detect(env, image, threshold, nms_threshold);
auto box_cls = env->FindClass("com/rangi/nanodet/Box");
auto cid = env->GetMethodID(box_cls, "<init>", "(FFFFIF)V");
jobjectArray ret = env->NewObjectArray(result.size(), box_cls, nullptr);
int i = 0;
for (auto &box:result) {
env->PushLocalFrame(1);
jobject obj = env->NewObject(box_cls, cid, box.x1, box.y1, box.x2, box.y2, box.label, box.score);
obj = env->PopLocalFrame(obj);
env->SetObjectArrayElement(ret, i++, obj);
}
return ret;
}
/*********************************************************************************************
YOLOv4-tiny
********************************************************************************************/
extern "C" JNIEXPORT void JNICALL
Java_com_rangi_nanodet_YOLOv4_init(JNIEnv *env, jclass, jobject assetManager, jboolean useGPU) {
if (YoloV4::detector == nullptr) {
AAssetManager *mgr = AAssetManager_fromJava(env, assetManager);
YoloV4::detector = new YoloV4(mgr, "yolov4-tiny-opt.param", "yolov4-tiny-opt.bin", useGPU);
}
}
extern "C" JNIEXPORT jobjectArray JNICALL
Java_com_rangi_nanodet_YOLOv4_detect(JNIEnv *env, jclass, jobject image, jdouble threshold, jdouble nms_threshold) {
auto result = YoloV4::detector->detect(env, image, threshold, nms_threshold);
auto box_cls = env->FindClass("com/rangi/nanodet/Box");
auto cid = env->GetMethodID(box_cls, "<init>", "(FFFFIF)V");
jobjectArray ret = env->NewObjectArray(result.size(), box_cls, nullptr);
int i = 0;
for (auto &box:result) {
env->PushLocalFrame(1);
jobject obj = env->NewObject(box_cls, cid, box.x1, box.y1, box.x2, box.y2, box.label, box.score);
obj = env->PopLocalFrame(obj);
env->SetObjectArrayElement(ret, i++, obj);
}
return ret;
}
package com.rangi.nanodet;
import androidx.annotation.NonNull;
class AppCrashHandler implements Thread.UncaughtExceptionHandler {
private Thread.UncaughtExceptionHandler uncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler();
@Override
public void uncaughtException(@NonNull Thread t, @NonNull Throwable e) {
uncaughtExceptionHandler.uncaughtException(t, e);
}
public static void register() {
Thread.setDefaultUncaughtExceptionHandler(new AppCrashHandler());
}
}
package com.rangi.nanodet;
import android.graphics.Color;
import android.graphics.RectF;
import java.util.Random;
public class Box {
public float x0,y0,x1,y1;
private int label;
private float score;
private static String[] labels={"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"};
public Box(float x0,float y0, float x1, float y1, int label, float score){
this.x0 = x0;
this.y0 = y0;
this.x1 = x1;
this.y1 = y1;
this.label = label;
this.score = score;
}
public RectF getRect(){
return new RectF(x0,y0,x1,y1);
}
public String getLabel(){
return labels[label];
}
public float getScore(){
return score;
}
public int getColor(){
Random random = new Random(label);
return Color.argb(255,random.nextInt(256),random.nextInt(256),random.nextInt(256));
}
}
package com.rangi.nanodet;
import android.content.res.AssetManager;
import android.graphics.Bitmap;
public class NanoDet {
static {
System.loadLibrary("yolov5");
}
public static native void init(AssetManager manager, boolean useGPU);
public static native Box[] detect(Bitmap bitmap, double threshold, double nms_threshold);
}
package com.rangi.nanodet;
import android.app.Application;
import android.content.Context;
import android.util.Log;
import androidx.multidex.MultiDex;
import com.zxy.recovery.callback.RecoveryCallback;
import com.zxy.recovery.core.Recovery;
public class NcnnApp extends Application {
@Override
public void onCreate() {
super.onCreate();
//崩溃界面
initRecovery();
}
@Override
protected void attachBaseContext(Context base) {
super.attachBaseContext(base);
MultiDex.install(base);
}
private void initRecovery() {
Recovery.getInstance()
.debug(BuildConfig.DEBUG)
.recoverInBackground(true)
.recoverStack(true)
.mainPage(MainActivity.class)
.recoverEnabled(true)
.callback(new MyCrashCallback())
.silent(false, Recovery.SilentMode.RECOVER_ACTIVITY_STACK)
// .skip(TestActivity.class)
.init(this);
AppCrashHandler.register();
}
static final class MyCrashCallback implements RecoveryCallback {
@Override
public void stackTrace(String exceptionMessage) {
Log.e("wzt", "exceptionMessage:" + exceptionMessage);
}
@Override
public void cause(String cause) {
Log.e("wzt", "cause:" + cause);
}
@Override
public void exception(String exceptionType, String throwClassName, String throwMethodName, int throwLineNumber) {
Log.e("wzt", "exceptionClassName:" + exceptionType);
Log.e("wzt", "throwClassName:" + throwClassName);
Log.e("wzt", "throwMethodName:" + throwMethodName);
Log.e("wzt", "throwLineNumber:" + throwLineNumber);
}
@Override
public void throwable(Throwable throwable) {
}
}
}
package com.rangi.nanodet;
import androidx.appcompat.app.AlertDialog;
import androidx.appcompat.app.AppCompatActivity;
import android.content.Intent;
import android.os.Bundle;
import android.view.View;
import android.widget.Button;
import android.widget.CompoundButton;
import android.widget.ToggleButton;
public class WelcomeActivity extends AppCompatActivity {
private ToggleButton tbUseGpu;
private Button nanodet;
private Button yolov5s;
private Button yolov4tiny;
private boolean useGPU = false;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_welcome);
tbUseGpu = findViewById(R.id.tb_use_gpu);
tbUseGpu.setOnCheckedChangeListener(new CompoundButton.OnCheckedChangeListener() {
@Override
public void onCheckedChanged(CompoundButton buttonView, boolean isChecked) {
useGPU = isChecked;
MainActivity.USE_GPU = useGPU;
if (useGPU) {
AlertDialog.Builder builder = new AlertDialog.Builder(WelcomeActivity.this);
builder.setTitle("Warning");
builder.setMessage("It may not work well in GPU mode, or errors may occur.");
builder.setCancelable(true);
builder.setPositiveButton("OK", null);
AlertDialog dialog = builder.create();
dialog.show();
}
}
});
nanodet = findViewById(R.id.btn_start_detect0);
nanodet.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
MainActivity.USE_MODEL = MainActivity.NANODET;
Intent intent = new Intent(WelcomeActivity.this, MainActivity.class);
WelcomeActivity.this.startActivity(intent);
}
});
yolov5s = findViewById(R.id.btn_start_detect1);
yolov5s.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
MainActivity.USE_MODEL = MainActivity.YOLOV5S;
Intent intent = new Intent(WelcomeActivity.this, MainActivity.class);
WelcomeActivity.this.startActivity(intent);
}
});
yolov4tiny = findViewById(R.id.btn_start_detect2);
yolov4tiny.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
MainActivity.USE_MODEL = MainActivity.YOLOV4_TINY;
Intent intent = new Intent(WelcomeActivity.this, MainActivity.class);
WelcomeActivity.this.startActivity(intent);
}
});
}
}
package com.rangi.nanodet;
import android.content.res.AssetManager;
import android.graphics.Bitmap;
public class YOLOv4 {
static {
System.loadLibrary("yolov5"); // 存放在yolov5.so中
}
public static native void init(AssetManager manager, boolean useGPU);
public static native Box[] detect(Bitmap bitmap, double threshold, double nms_threshold);
}
package com.rangi.nanodet;
import android.content.res.AssetManager;
import android.graphics.Bitmap;
public class YOLOv5 {
static {
System.loadLibrary("yolov5");
}
public static native void init(AssetManager manager, boolean useGPU);
public static native Box[] detect(Bitmap bitmap, double threshold, double nms_threshold);
}
<vector xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:aapt="http://schemas.android.com/aapt"
android:width="108dp"
android:height="108dp"
android:viewportWidth="108"
android:viewportHeight="108">
<path
android:fillType="evenOdd"
android:pathData="M32,64C32,64 38.39,52.99 44.13,50.95C51.37,48.37 70.14,49.57 70.14,49.57L108.26,87.69L108,109.01L75.97,107.97L32,64Z"
android:strokeWidth="1"
android:strokeColor="#00000000">
<aapt:attr name="android:fillColor">
<gradient
android:endX="78.5885"
android:endY="90.9159"
android:startX="48.7653"
android:startY="61.0927"
android:type="linear">
<item
android:color="#44000000"
android:offset="0.0" />
<item
android:color="#00000000"
android:offset="1.0" />
</gradient>
</aapt:attr>
</path>
<path
android:fillColor="#FFFFFF"
android:fillType="nonZero"
android:pathData="M66.94,46.02L66.94,46.02C72.44,50.07 76,56.61 76,64L32,64C32,56.61 35.56,50.11 40.98,46.06L36.18,41.19C35.45,40.45 35.45,39.3 36.18,38.56C36.91,37.81 38.05,37.81 38.78,38.56L44.25,44.05C47.18,42.57 50.48,41.71 54,41.71C57.48,41.71 60.78,42.57 63.68,44.05L69.11,38.56C69.84,37.81 70.98,37.81 71.71,38.56C72.44,39.3 72.44,40.45 71.71,41.19L66.94,46.02ZM62.94,56.92C64.08,56.92 65,56.01 65,54.88C65,53.76 64.08,52.85 62.94,52.85C61.8,52.85 60.88,53.76 60.88,54.88C60.88,56.01 61.8,56.92 62.94,56.92ZM45.06,56.92C46.2,56.92 47.13,56.01 47.13,54.88C47.13,53.76 46.2,52.85 45.06,52.85C43.92,52.85 43,53.76 43,54.88C43,56.01 43.92,56.92 45.06,56.92Z"
android:strokeWidth="1"
android:strokeColor="#00000000" />
</vector>
<?xml version="1.0" encoding="utf-8"?>
<selector xmlns:android="http://schemas.android.com/apk/res/android">
<item android:drawable="@drawable/gpu" android:state_checked="true" />
<item android:drawable="@drawable/cpu" android:state_checked="false" />
</selector>
This diff is collapsed.
This diff is collapsed.
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment