  • 페이스북 인공지능 연구소(FAIR)에서 개발한 객체 세그멘테이션 프레임워크
  • 페이스북에서 개발한 DensePose, Mask R-CNN 등을 Detectron2에서 제공
  • 손쉽게 다양한 사물들을 탐지하고 세그먼테이션하여, 객체의 유형, 크기, 위치 등을 자동으로 얻을 수 있음

Detectron2 설치

  • Tutorial: https://detectron2.readthedocs.io/tutorials/install.html
  • Detectron2: https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html
!pip install pyyaml==5.3.1
!pip install torch==1.9.0+cu102 torchvision==0.10.0+cu102 -f https://download.pytorch.org/whl/torch_stable.html
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
1.9.0+cu102 True

import detectron2
from detectron2.utils.logger import setup_logger

import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog 

사전 모델

  • input.jpg: http://images.cocodataset.org/val2017/000000439715.jpg
!wget http://images.cocodataset.org/val2017/000000439715.jpg -q -O input.jpg
im = cv2.imread('./input.jpg')
  • Config 파일: COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file('COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml')) # ResNet CNN으로 학습된 파일
# cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url('COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml')
# NotImplementedError
cfg.MODEL.WEIGHTS = '/content/drive/MyDrive/Colab Notebooks/deep_learning/model/COCO-InstanceSegmentation mask_rcnn_R_50_FPN_3x /model_final_f10217.pkl'
predictor = DefaultPredictor(cfg)
outputs = predictor(im)
tensor([17,  0,  0,  0,  0,  0,  0,  0, 25,  0, 25, 25,  0,  0, 24],
Boxes(tensor([[126.6035, 244.8977, 459.8291, 480.0000],
        [251.1083, 157.8127, 338.9731, 413.6379],
        [114.8496, 268.6864, 148.2352, 398.8111],
        [  0.8217, 281.0327,  78.6072, 478.4210],
        [ 49.3954, 274.1229,  80.1545, 342.9808],
        [561.2248, 271.5816, 596.2755, 385.2552],
        [385.9072, 270.3125, 413.7130, 304.0397],
        [515.9295, 278.3744, 562.2792, 389.3802],
        [335.2409, 251.9167, 414.7491, 275.9375],
        [350.9300, 269.2060, 386.0984, 297.9081],
        [331.6292, 230.9996, 393.2759, 257.2009],
        [510.7349, 263.2656, 570.9865, 295.9194],
        [409.0841, 271.8646, 460.5582, 356.8722],
        [506.8767, 283.3257, 529.9403, 324.0392],
        [594.5663, 283.4820, 609.0577, 311.4124]], device='cuda:0'))

v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2) # opencv에서는 bgr값을 사용
out = v.draw_instance_predictions(outputs['instances'].to('cpu'))
cv2_imshow(out.get_image()[:, :, ::-1]) # rgb로 변환
Out [8]:


커스텀 데이터셋 학습

데이터셋 준비

  • Balloon 데이터셋: https://github.com/matterport/Mask_RCNN/releases/download/v2.1/balloon_dataset.zip
!wget https://github.com/matterport/Mask_RCNN/releases/download/v2.1/balloon_dataset.zip
!unzip balloon_dataset.zip
from detectron2.structures import BoxMode

# json 데이터 이미지로 가공하는 함수
def get_balloon_dicts(img_dir):
    json_file = os.path.join(img_dir, 'via_region_data.json') # json 경로 저장 변수 지정
    with open(json_file) as f:
        imgs_anns = json.load(f)
    dataset_dicts = []
    for idx, v in enumerate(imgs_anns.values()):
        record = {}
        filename = os.path.join(img_dir, v['filename']) # 파일경로+이름
        height, width = cv2.imread(filename).shape[:2] # 이미지 크기

        record['file_name'] = filename
        record['image_id'] = idx
        record['height'] = height
        record['width'] = width

        annos = v['regions']
        objs = []

        for _, anno in annos.items():
            anno = anno['shape_attributes']
            px = anno['all_points_x']
            py = anno['all_points_y']
            poly = [(x+0.5, y+0.5) for x, y in zip(px, py)] # 0.5 값 조정
            poly = [p for x in poly for p in x] # x, y 번갈아가며 리스트로
            obj = {
                'bbox':[np.min(px), np.min(py), np.max(px), np.max(py)],
        record['annotations'] = objs
    return dataset_dicts

for d in ['train', 'val']:
    DatasetCatalog.register('balloon_'+d, lambda d=d:get_balloon_dicts('balloon/'+d))

balloon_metadata = MetadataCatalog.get('balloon_train')
dataset_dicts = get_balloon_dicts('balloon/train')
for d in random.sample(dataset_dicts, 2):
    img = cv2.imread(d['file_name']) # BGR로 불러옴
    v = Visualizer(img, metadata=balloon_metadata, scale=0.5)
    out = v.draw_dataset_dict(d)
Out [11]:




  • Config 파일: COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.DATASETS.TRAIN = ('balloon_train', ) # 튜플
# cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url('COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml')
# NotImplementedError
cfg.MODEL.WEIGHTS = '/content/drive/MyDrive/Colab Notebooks/deep_learning/model/COCO-InstanceSegmentation mask_rcnn_R_50_FPN_3x /model_final_f10217.pkl'
cfg.SOLVER.BASE_LR = 0.00025 # learning rate

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
%load_ext tensorboard
%tensorboard --logdir output
<IPython.core.display.Javascript object>

추론 및 평가

cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, 'model_final.pth')
predictor = DefaultPredictor(cfg)
from detectron2.utils.visualizer import ColorMode

dataset_dicts = get_balloon_dicts('balloon/val')
for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d['file_name'])
    outputs = predictor(img)
    v = Visualizer(img, metadata=balloon_metadata, scale=0.5,
                   instance_mode=ColorMode.IMAGE_BW) # detection 한 것만 원색으로
    out = v.draw_instance_predictions(outputs['instances'].to('cpu'))
Out [15]:




다른 타입 적용

  • Config 파일: COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
cfg = get_cfg()
# cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url('COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml')
# NotImplementedError
cfg.MODEL.WEIGHTS = '/content/drive/MyDrive/Colab Notebooks/deep_learning/model/COCO-Keypoints keypoint_rcnn_R_50_FPN_3x /model_final_a6e10b.pkl'
predictor = DefaultPredictor(cfg)
outputs = predictor(img)
v = Visualizer(img[:,:,::-1],MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),scale=0.5)
out = v.draw_instance_predictions(outputs['instances'].to('cpu'))
Out [16]:


  • Config 파일: COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
cfg = get_cfg()
# cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url('COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml')
# NotImplementedError
cfg.MODEL.WEIGHTS = '/content/drive/MyDrive/Colab Notebooks/deep_learning/model/COCO-PanopticSegmentation panoptic_fpn_R_101_3x /model_final_cafdb1.pkl'
predictor = DefaultPredictor(cfg)
panoptic_seg,segments_info = predictor(img)['panoptic_seg']
v = Visualizer(img[:,:,::-1],MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),scale=0.5)
out = v.draw_panoptic_seg_predictions(panoptic_seg.to('cpu'),segments_info)
Out [17]:


비디오 파일 적용

  • https://www.youtube.com/watch?v=ll8TgCZ0plk
from IPython.display import YouTubeVideo, display
video = YouTubeVideo('ll8TgCZ0plk',width=600)
!ffmpeg -i '/content/drive/MyDrive/Colab Notebooks/deep_learning/video.mp4' -t 00:00:06 -c:v copy video-clip.mp4
  • detectron2 github: https://github.com/facebookresearch/detectron2
  • detectron2 config file: detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
  • model weights: detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/model_final_cafdb1.pkl
!git clone https://github.com/facebookresearch/detectron2
%run detectron2/demo/demo.py \
--config-file detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml \
--video-input video-clip.mp4 \
--confidence-threshold 0.6 \
--output video-output.mkv \
--opts MODEL.WEIGHTS '/content/drive/MyDrive/Colab Notebooks/deep_learning/model/COCO-PanopticSegmentation panoptic_fpn_R_101_3x /model_final_cafdb1.pkl'
# --opts MODEL.WEIGHTS detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/model_final_cafdb1.pkl
from google.colab import files
  • 이 포스트는 SeSAC 인공지능 자연어처리, 컴퓨터비전 기술을 활용한 응용 SW 개발자 양성 과정 - 심선조 강사님의 강의를 정리한 내용입니다.
