使用 KITTI数据集训练YOLOX
1. 现在KITTI集后,首先将数据集转换为COCO数据集格式。
kitti_vis.py
import os
from pathlib import Path
import numpy as np
import cv2
def anno_vis(img, anno_list):
for anno in anno_list:
points = np.array(anno[4:8], dtype=np.float32)
cv2.rectangle(img, (int(points[0]),int(points[1])), (int(points[2]),int(points[3])), (0, 0, 255), 2)
cv2.putText(img, anno[0], (int(points[0]),int(points[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
cv2.imshow('img', img)
ret = cv2.waitKey(0)
if ret == 27:
exit(0)
if __name__ == '__main__':
img_root = Path(r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI-train_test-Image\training\image_2')
label_root = Path(r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI-train_test-Label\training\label_2')
img_list = os.listdir(img_root)
for img_name in img_list[:5]:
img_name = Path(img_name)
label_name = img_name.with_suffix('.txt')
img = cv2.imread(str(img_root/img_name))
with open(label_root/label_name) as f:
l = [x.split() for x in f.read().strip().splitlines()]
anno_vis(img, l)
kitti_split.py
'''
用于将KITTI数据集的7000多张训练集分为:前4000张为训练集,4000-6000张为验证集,剩余为测试集
运行命令:
python ./tools/kitti_split.py --source_img_path ./KITTI_origin/training/image_2 --source_label_path ./KITTI_origin/training/label_2/
--dst_img_path ./KITTI_YOLOX/img --dst_label_path ./KITTI_YOLOX/label
# img_root = Path(r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI-train_test-Image\training\image_2')
# label_root = Path(r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI-train_test-Label\training\label_2')
'''
import os
import argparse
from pathlib import Path
import shutil
from tqdm import tqdm
from loguru import logger
def make_parser():
parser = argparse.ArgumentParser("")
parser.add_argument('--source_img_path', default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI-train_test-Image\training\image_2', help="Specify original kitti img path")
parser.add_argument('--source_label_path', default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI-train_test-Label\training\label_2',help="Specify original kitti label path")
parser.add_argument('--dst_img_path', default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI_YOLOX\img',help="Specify splited kitti img path")
parser.add_argument('--dst_label_path', default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI_YOLOX\label',help="Specify splited kitti label path")
return parser
def check_dir(dir):
if Path(dir).is_dir() == False:
Path(dir).mkdir(parents=True, exist_ok=True)
logger.info('Created %s' % dir)
if __name__ == '__main__':
args = make_parser().parse_args()
img_root = Path(args.source_img_path)
label_root = Path(args.source_label_path)
# img_root = Path(r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI-train_test-Image\training\image_2')
# label_root = Path(r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI-train_test-Label\training\label_2')
img_list = os.listdir(img_root)
dst_train_img_root = Path(args.dst_img_path)/'train'
dst_val_img_root = Path(args.dst_img_path)/'val'
dst_test_img_root = Path(args.dst_img_path)/'test'
dst_train_label_root = Path(args.dst_label_path)/'train'
dst_val_label_root = Path(args.dst_label_path)/'val'
dst_test_label_root = Path(args.dst_label_path)/'test'
check_dir(dst_train_img_root)
check_dir(dst_val_img_root)
check_dir(dst_test_img_root)
check_dir(dst_train_label_root)
check_dir(dst_val_label_root)
check_dir(dst_test_label_root)
for img_name in tqdm(img_list):
if int(Path(img_name).stem) < 4000:
shutil.copyfile(img_root/img_name, dst_train_img_root/img_name)
shutil.copyfile(label_root/(Path(img_name).with_suffix('.txt')), dst_train_label_root/(Path(img_name).with_suffix('.txt')))
elif int(Path(img_name).stem) < 6000:
shutil.copyfile(img_root/img_name, dst_val_img_root/img_name)
shutil.copyfile(label_root/(Path(img_name).with_suffix('.txt')), dst_val_label_root/(Path(img_name).with_suffix('.txt')))
else:
shutil.copyfile(img_root/img_name, dst_test_img_root/img_name)
shutil.copyfile(label_root/(Path(img_name).with_suffix('.txt')), dst_test_label_root/(Path(img_name).with_suffix('.txt')))
kitti2coco.py
'''
KITTI标注转COCO标注
运行命令:
(1)训练集:python tools/kitti2coco.py --img_path ./KITTI_YOLOX/img/train --label_path ./KITTI_YOLOX/label/train --dst_json ./train.json
(2)验证集:python tools/kitti2coco.py --img_path ./KITTI_YOLOX/img/val --label_path ./KITTI_YOLOX/label/val --dst_json ./val.json
(3)测试集:python tools/kitti2coco.py --img_path ./KITTI_YOLOX/img/test --label_path ./KITTI_YOLOX/label/test --dst_json ./test.json
'''
import os
import json
import argparse
from pathlib import Path
import cv2
from tqdm import tqdm
# parser.add_argument('--dst_img_path', default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI_YOLOX\img',
# help="Specify splited kitti img path")
# parser.add_argument('--dst_label_path', default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI_YOLOX\label',
# help="Specify splited kitti label path")
def make_parser():
# parser = argparse.ArgumentParser("Kitti to COCO format")
# parser.add_argument('--img_path', type=str, default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI_YOLOX\img\train',
# help='Specify img path')
# parser.add_argument('--label_path', type=str, default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI_YOLOX\label\train',
# help='Specify label path')
# parser.add_argument('--dst_json', type=str, default=r'D:\BaiduNetdiskDownload\CV\KITTI\train.json', help='Specify generated json file name')
# parser = argparse.ArgumentParser("Kitti to COCO format")
# parser.add_argument('--img_path', type=str, default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI_YOLOX\img\test',
# help='Specify img path')
# parser.add_argument('--label_path', type=str, default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI_YOLOX\label\test',
# help='Specify label path')
# parser.add_argument('--dst_json', type=str, default=r'D:\BaiduNetdiskDownload\CV\KITTI\test.json', help='Specify generated json file name')
#
parser = argparse.ArgumentParser("Kitti to COCO format")
parser.add_argument('--img_path', type=str, default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI_YOLOX\img\val',
help='Specify img path')
parser.add_argument('--label_path', type=str, default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI_YOLOX\label\val',
help='Specify label path')
parser.add_argument('--dst_json', type=str, default=r'D:\BaiduNetdiskDownload\CV\KITTI\val.json', help='Specify generated json file name')
return parser
if __name__ == '__main__':
args = make_parser().parse_args()
img_root = Path(args.img_path)
label_root = Path(args.label_path)
category_dict = {
1:'Car',
2:'Van',
3:'Pedestrian',
4:'Person_sitting',
5:'Truck',
6:'Cyclist',
7:'Tram'
}
category_name2id_dict = {v:k for k, v in category_dict.items()}
img_list = os.listdir(img_root)
img_id = 0
anno_id = 0
json_images_list = list()
json_annotations_list = list()
json_categories_list = list()
for img_name in tqdm(img_list):
img = cv2.imread(str(img_root/img_name))
img_height, img_width, _ = img.shape
img_dict = {
'license': None,
'file_name': img_name,
'coco_url': None,
'height': img_height,
'width': img_width,
'date_captured': None,
'flickr_url': None,
'id': img_id
}
json_images_list.append(img_dict)
label_name = Path(img_name).with_suffix('.txt')
with open(label_root/label_name) as f:
anno_list = [x.split() for x in f.read().strip().splitlines()]
for anno in anno_list:
if anno[0] in category_name2id_dict:
bbox = [float(anno[4]), float(anno[5]),
float(anno[6])-float(anno[4]), float(anno[7])-float(anno[5])] # anno[4:8]
area = bbox[2]*bbox[3]
anno_dict = {
'segmentation': None,
'area': area,
'iscrowd': 0,
'image_id': img_id,
'bbox': bbox,
'category_id': category_name2id_dict[anno[0]],
'id': anno_id
}
json_annotations_list.append(anno_dict)
anno_id += 1
img_id += 1
for id in category_dict:
json_categories_list.append({
'supercategory': None,
'id': id,
'name': category_dict[id]
})
json_dict = {
'images': json_images_list,
'annotations': json_annotations_list,
'categories': json_categories_list
}
with open(args.dst_json,"w") as f:
json.dump(json_dict,f)
COCO_vis.py
'''
验证转换后的json格式标注的准确性。
运行命令:python tools/COCO_vis.py --img_root ./KITTI_YOLOX/img/train --label_file ./KITTI_YOLOX/train.json
'''
import argparse
from pathlib import Path
import numpy as np
import cv2
from pycocotools.coco import COCO
# parser.add_argument('--img_path', type=str, default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI_YOLOX\img\val',
# help='Specify img path')
# parser.add_argument('--label_path', type=str, default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI_YOLOX\label\val',
# help='Specify label path')
# parser.add_argument('--dst_json', type=str, default=r'D:\BaiduNetdiskDownload\CV\KITTI\val.json',
# help='Specify generated json file name')
def make_parser():
parser = argparse.ArgumentParser("")
parser.add_argument('--img_root', type=str, default=r'D:\BaiduNetdiskDownload\CV\KITTI\KITTI_YOLOX\img\train', help='Specify img path')
parser.add_argument('--label_file', type=str, default=r'D:\BaiduNetdiskDownload\CV\KITTI\train.json', help='Specify COCO format label file')
return parser
if __name__ == '__main__':
args = make_parser().parse_args()
img_root = args.img_root
anno_file = args.label_file
coco = COCO(anno_file)
img_ids = coco.getImgIds()
category_list = coco.loadCats(coco.getCatIds())
label_id2name = dict([(item['id'], item['name']) for item in category_list])
for img_id in img_ids:
img_info = coco.loadImgs(img_id)[0]
print('img name: ', str(Path(img_root)/img_info['file_name']))
img = cv2.imread(str(Path(img_root)/img_info['file_name']))
img_width = img_info["width"]
img_height = img_info["height"]
anno_ids = coco.getAnnIds(imgIds=[img_id], iscrowd=False)
result_anno_list = list()
for anno_id in anno_ids:
annotation = coco.loadAnns(anno_id)
x1 = np.max((0, annotation[0]["bbox"][0]))
y1 = np.max((0, annotation[0]["bbox"][1]))
x2 = np.min((img_width, x1 + np.max((0, annotation[0]["bbox"][2]))))
y2 = np.min((img_height, y1 + np.max((0, annotation[0]["bbox"][3]))))
label = label_id2name[annotation[0]['category_id']]
result_anno_list.append([label, x1, y1, x2, y2])
cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 1)
cv2.putText(img, label, (int(x1), int(y1)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (128,255,255))
cv2.imshow('img', img)
ret = cv2.waitKey(0)
if ret == 27:
exit(0)
2.按照训练COCO数据集合的指令训练KITTI即可
python -m yolox.tools.train -n yolox-s -d 1 -b 32 --fp16
或者:
python -m yolox.tools.train -f exps/default/yolox_s.py -d 1 -b 32 --fp
16
olox) xuefei@f123:/mnt/d/work/study/detect/7$
(yolox) xuefei@f123:/mnt/d/work/study/detect/7$ python -m yolox.tools.train -f exps/kitti_car_detection/yolox_s.py -d 1 -b 16 --fp16
2024-02-05 23:08:04 | INFO | yolox.core.trainer:130 - args: Namespace(batch_size=16, cache=False, ckpt=None, devices=1, dist_backend='nccl', dist_url=None, exp_file='exps/kitti_car_detection/yolox_s.py', experiment_name='yolox_s', fp16=True, logger='tensorboard', machine_rank=0, name=None, num_machines=1, occupy=False, opts=[], resume=False, start_epoch=None)
2024-02-05 23:08:04 | INFO | yolox.core.trainer:131 - exp value:
╒═══════════════════╤═══════════════════════════════════════════════════════════════╕
│ keys │ values │
╞═══════════════════╪═══════════════════════════════════════════════════════════════╡
│ seed │ None │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ output_dir │ './YOLOX_outputs' │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ print_interval │ 10 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ eval_interval │ 10 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ num_classes │ 7 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ depth │ 0.33 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ width │ 0.5 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ act │ 'silu' │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ data_num_workers │ 16 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ input_size │ (256, 832) │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ multiscale_range │ 5 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ data_dir │ '/mnt/d/BaiduNetdiskDownload/CV/KITTI/KITTI_YOLOX/img/' │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ train_ann │ '/mnt/d/BaiduNetdiskDownload/CV/KITTI/KITTI_YOLOX/train.json' │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ val_ann │ '/mnt/d/BaiduNetdiskDownload/CV/KITTI/KITTI_YOLOX/val.json' │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ test_ann │ '/mnt/d/BaiduNetdiskDownload/CV/KITTI/KITTI_YOLOX/test.json' │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ mosaic_prob │ 1.0 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ mixup_prob │ 1.0 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ hsv_prob │ 1.0 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ flip_prob │ 0.5 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ degrees │ 10.0 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ translate │ 0.1 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ mosaic_scale │ (0.1, 2) │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ enable_mixup │ True │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ mixup_scale │ (0.5, 1.5) │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ shear │ 2.0 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ warmup_epochs │ 5 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ max_epoch │ 300 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ warmup_lr │ 0 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ min_lr_ratio │ 0.05 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ basic_lr_per_img │ 0.00015625 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ scheduler │ 'yoloxwarmcos' │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ no_aug_epochs │ 80 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ ema │ True │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ weight_decay │ 0.0005 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ momentum │ 0.9 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ save_history_ckpt │ True │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ exp_name │ 'yolox_s' │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ test_size │ (256, 832) │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ test_conf │ 0.01 │
├───────────────────┼───────────────────────────────────────────────────────────────┤
│ nmsthre │ 0.65 │
╘═══════════════════╧═══════════════════════════════════════════════════════════════╛
2024-02-05 23:08:05 | INFO | yolox.core.trainer:137 - Model Summary: Params: 8.94M, Gflops: 13.92
2024-02-05 23:08:07 | INFO | yolox.data.datasets.kitti:64 - loading annotations into memory...
2024-02-05 23:08:07 | INFO | yolox.data.datasets.kitti:64 - Done (t=0.05s)
2024-02-05 23:08:07 | INFO | pycocotools.coco:86 - creating index...
2024-02-05 23:08:07 | INFO | pycocotools.coco:86 - index created!
2024-02-05 23:08:08 | INFO | yolox.core.trainer:155 - init prefetcher, this might take one minute or less...
2024-02-05 23:08:17 | INFO | yolox.data.datasets.kitti:64 - loading annotations into memory...
2024-02-05 23:08:17 | INFO | yolox.data.datasets.kitti:64 - Done (t=0.05s)
2024-02-05 23:08:17 | INFO | pycocotools.coco:86 - creating index...
2024-02-05 23:08:17 | INFO | pycocotools.coco:86 - index created!
2024-02-05 23:08:17 | INFO | yolox.core.trainer:191 - Training start...
2024-02-05 23:08:17 | INFO | yolox.core.trainer:192 -
YOLOX(
(backbone): YOLOPAFPN(
(backbone): CSPDarknet(
(stem): Focus(
(conv): BaseConv(
(conv): Conv2d(12, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
(dark2): Sequential(
(0): BaseConv(
(conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(1): CSPLayer(
(conv1): BaseConv(
(conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv3): BaseConv(
(conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(m): Sequential(
(0): Bottleneck(
(conv1): BaseConv(
(conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
)
)
)
(dark3): Sequential(
(0): BaseConv(
(conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(1): CSPLayer(
(conv1): BaseConv(
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv3): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(m): Sequential(
(0): Bottleneck(
(conv1): BaseConv(
(conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
(1): Bottleneck(
(conv1): BaseConv(
(conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
(2): Bottleneck(
(conv1): BaseConv(
(conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
)
)
)
(dark4): Sequential(
(0): BaseConv(
(conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(1): CSPLayer(
(conv1): BaseConv(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv3): BaseConv(
(conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(m): Sequential(
(0): Bottleneck(
(conv1): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
(1): Bottleneck(
(conv1): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
(2): Bottleneck(
(conv1): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
)
)
)
(dark5): Sequential(
(0): BaseConv(
(conv): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn): BatchNorm2d(512, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(1): SPPBottleneck(
(conv1): BaseConv(
(conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(m): ModuleList(
(0): MaxPool2d(kernel_size=5, stride=1, padding=2, dilation=1, ceil_mode=False)
(1): MaxPool2d(kernel_size=9, stride=1, padding=4, dilation=1, ceil_mode=False)
(2): MaxPool2d(kernel_size=13, stride=1, padding=6, dilation=1, ceil_mode=False)
)
(conv2): BaseConv(
(conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(512, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
(2): CSPLayer(
(conv1): BaseConv(
(conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv3): BaseConv(
(conv): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(512, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(m): Sequential(
(0): Bottleneck(
(conv1): BaseConv(
(conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
)
)
)
)
(upsample): Upsample(scale_factor=2.0, mode=nearest)
(lateral_conv0): BaseConv(
(conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(C3_p4): CSPLayer(
(conv1): BaseConv(
(conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv3): BaseConv(
(conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(m): Sequential(
(0): Bottleneck(
(conv1): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
)
)
(reduce_conv1): BaseConv(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(C3_p3): CSPLayer(
(conv1): BaseConv(
(conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv3): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(m): Sequential(
(0): Bottleneck(
(conv1): BaseConv(
(conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(64, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
)
)
(bu_conv2): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(C3_n3): CSPLayer(
(conv1): BaseConv(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv3): BaseConv(
(conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(m): Sequential(
(0): Bottleneck(
(conv1): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
)
)
(bu_conv1): BaseConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(C3_n4): CSPLayer(
(conv1): BaseConv(
(conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv3): BaseConv(
(conv): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(512, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(m): Sequential(
(0): Bottleneck(
(conv1): BaseConv(
(conv): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(conv2): BaseConv(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(256, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
)
)
)
(head): YOLOXHead(
(cls_convs): ModuleList(
(0): Sequential(
(0): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(1): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
(1): Sequential(
(0): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(1): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
(2): Sequential(
(0): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(1): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
)
(reg_convs): ModuleList(
(0): Sequential(
(0): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(1): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
(1): Sequential(
(0): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(1): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
(2): Sequential(
(0): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(1): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
)
(cls_preds): ModuleList(
(0): Conv2d(128, 7, kernel_size=(1, 1), stride=(1, 1))
(1): Conv2d(128, 7, kernel_size=(1, 1), stride=(1, 1))
(2): Conv2d(128, 7, kernel_size=(1, 1), stride=(1, 1))
)
(reg_preds): ModuleList(
(0): Conv2d(128, 4, kernel_size=(1, 1), stride=(1, 1))
(1): Conv2d(128, 4, kernel_size=(1, 1), stride=(1, 1))
(2): Conv2d(128, 4, kernel_size=(1, 1), stride=(1, 1))
)
(obj_preds): ModuleList(
(0): Conv2d(128, 1, kernel_size=(1, 1), stride=(1, 1))
(1): Conv2d(128, 1, kernel_size=(1, 1), stride=(1, 1))
(2): Conv2d(128, 1, kernel_size=(1, 1), stride=(1, 1))
)
(stems): ModuleList(
(0): BaseConv(
(conv): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(1): BaseConv(
(conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
(2): BaseConv(
(conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn): BatchNorm2d(128, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
(act): SiLU(inplace=True)
)
)
(l1_loss): L1Loss()
(bcewithlog_loss): BCEWithLogitsLoss()
(iou_loss): IOUloss()
)
)
2024-02-05 23:08:17 | INFO | yolox.core.trainer:203 - ---> start train epoch1
2024-02-05 23:08:22 | INFO | yolox.core.trainer:261 - epoch: 1/300, iter: 10/250, mem: 2730Mb, iter_time: 0.523s, data_time: 0.001s, total_loss: 17.5, iou_loss: 4.7, l1_loss: 3.0, conf_loss: 8.8, cls_loss: 1.0, lr: 1.600e-07, size: 256, ETA: 10:54:05
2024-02-05 23:08:27 | INFO | yolox.core.trainer:261 - epoch: 1/300, iter: 20/250, mem: 2730Mb, iter_time: 0.478s, data_time: 0.223s, total_loss: 13.0, iou_loss: 4.7, l1_loss: 2.3, conf_loss: 5.1, cls_loss: 1.0, lr: 6.400e-07, size: 96, ETA: 10:25:36
2024-02-05 23:08:37 | INFO | yolox.core.trainer:261 - epoch: 1/300, iter: 30/250, mem: 4257Mb, iter_time: 0.950s, data_time: 0.001s, total_loss: 22.2, iou_loss: 4.7, l1_loss: 3.0, conf_loss: 13.5, cls_loss: 1.0, lr: 1.440e-06, size: 416, ETA: 13:32:40
2024-02-05 23:08:43 | INFO | yolox.core.trainer:261 - epoch: 1/300, iter: 40/250, mem: 4259Mb, iter_time: 0.676s, data_time: 0.001s, total_loss: 21.0, iou_loss: 4.7, l1_loss: 3.0, conf_loss: 12.3, cls_loss: 1.0, lr: 2.560e-06, size: 416, ETA: 13:40:40
2024-02-05 23:08:45 | INFO | yolox.core.trainer:261 - epoch: 1/300, iter: 50/250, mem: 4259Mb, iter_time: 0.210s, data_time: 0.001s, total_loss: 13.2, iou_loss: 4.8, l1_loss: 2.6, conf_loss: 5.0, cls_loss: 0.8, lr: 4.000e-06, size: 96, ETA: 11:48:55
2024-02-05 23:08:52 | INFO | yolox.core.trainer:261 - epoch: 1/300, iter: 60/250, mem: 4259Mb, iter_time: 0.646s, data_time: 0.001s, total_loss: 18.1, iou_loss: 4.7, l1_loss: 2.6, conf_loss: 9.8, cls_loss: 1.0, lr: 5.760e-06, size: 320, ETA: 12:05:09
2024-02-05 23:08:59 | INFO | yolox.core.trainer:261 - epoch: 1/300, iter: 70/250, mem: 4279Mb, iter_time: 0.714s, data_time: 0.027s, total_loss: 20.1, iou_loss: 4.7, l1_loss: 2.7, conf_loss: 11.7, cls_loss: 1.0, lr: 7.840e-06, size: 416, ETA: 12:28:52
2024-02-05 23:09:04 | INFO | yolox.core.trainer:261 - epoch: 1/300, iter: 80/250, mem: 4279Mb, iter_time: 0.478s, data_time: 0.047s, total_loss: 14.9, iou_loss: 4.7, l1_loss: 2.8, conf_loss: 6.4, cls_loss: 1.1, lr: 1.024e-05, size: 224, ETA: 12:09:45
2024-02-05 23:09:06 | INFO | yolox.core.trainer:261 - epoch: 1/300, iter: 90/250, mem: 4279Mb, iter_time: 0.184s, data_time: 0.001s, total_loss: 12.9, iou_loss: 4.7, l1_loss: 2.2, conf_loss: 5.1, cls_loss: 0.9, lr: 1.296e-05, size: 96, ETA: 11:14:07
2024-02-05 23:09:15 | INFO | yolox.core.trainer:261 - epoch: 1/300, iter: 100/250, mem: 4279Mb, iter_time: 0.949s, data_time: 0.259s, total_loss: 20.1, iou_loss: 4.7, l1_loss: 2.7, conf_loss: 11.7, cls_loss: 1.1, lr: 1.600e-05, size: 352, ETA: 12:05:03
2024-02-05 23:09:18 | INFO | yolox.core.trainer:261 - epoch: 1/300, iter: 110/250, mem: 4279Mb, iter_time: 0.248s, data_time: 0.001s, total_loss: 13.7, iou_loss: 4.7, l1_loss: 2.6, conf_loss: 5.4, cls_loss: 1.0, lr: 1.936e-05, size: 128, ETA: 11:27:11