LabelBoxのアウトプットが独自フォーマットになったのとCOCOで出力できなくなったのと公式レポジトリの変換ツールが使えないので作った。正直いろんなサイトのやつを見て作ったのでオリジナリティはない。
なお、for文でlabelboxのデータにアクセスするとたまにtimeoutになるので元の画像とアノテーション画像をローカルに保存してからやる。
あとLaeblboxはRLE形式では出力せずにどんなときもpolygon形式で出力していたのでそれに合わせた
import json import io import logging import datetime as dt import os import numpy as np from skimage import measure from PIL import Image from pycocotools import mask from tqdm import tqdm def main(): with open('input.json') as f: jsn = json.load(f) coco = make_coco_metadata("train", "John Dow") for data in tqdm(jsn): convert_data(coco, data['ID'], data['External ID'], data['Labeled Data'], data['Label']['objects']) with open('output.json', 'w') as f: json.dump(coco, f, indent=4) def convert_data(coco, id, file_name, image_url, labels): image = { "id": id, "file_name": file_name, "license": None, "flickr_url": image_url, "coco_url": image_url, "date_captured": None, } file_path = file_name image['width'], image['height'] = Image.open(file_path).size coco['images'].append(image) # labelがない場合、処理をスキップ if labels == []: return category_id = None for label_data in labels: for c in coco['categories']: if c['name'] == label_data['title']: category_id = c['id'] if category_id == None: category_id = len(coco['categories']) + 1 category = { # supercategoryがある場合はここを修正する 'supercategory': label_data['title'], 'id': category_id, 'name': label_data['title'] } coco['categories'].append(category) #conert("L")でアルファチャンネルを消して、2D画像化 binary_mask=np.array(Image.open(file_name).convert("L")) create_annotation_info(coco=coco, image_id=id, category_id=category_id, binary_mask=binary_mask) def create_annotation_info(coco, image_id, category_id, binary_mask, tolerance=2): binary_mask_encoded = mask.encode( np.asfortranarray(binary_mask.astype(np.uint8))) area = mask.area(binary_mask_encoded) bounding_box = mask.toBbox(binary_mask_encoded) polygons = get_polygons(binary_mask) #labelboxが出力するcocoはiscrowd:0で固定されていたのでこうしている annotation = { "id": len(coco['annotations']) + 1, "image_id": image_id, "category_id": category_id, "iscrowd": 0, "area": area.tolist(), "bbox": bounding_box.tolist(), "segmentation": polygons } coco['annotations'].append(annotation) def get_polygons(binary_mask): polygons = [] # pad mask to close contours of shapes which start and end at an edge padded_binary_mask = np.pad( binary_mask, pad_width=1, mode='constant', constant_values=0) contours = measure.find_contours(padded_binary_mask, 0.5) contours = np.subtract(contours, 1) for contour in contours: contour = close_contour(contour) contour = measure.approximate_polygon(contour, tolerance=5) if len(contour) < 3: continue contour = np.flip(contour, axis=1) segmentation = contour.ravel().tolist() # after padding and subtracting 1 we may get -0.5 points in our segmentation segmentation = [0 if i < 0 else i for i in segmentation] polygons.append(segmentation) return polygons def close_contour(contour): if not np.array_equal(contour[0], contour[-1]): contour = np.vstack((contour, contour[0])) return contour def make_coco_metadata(project_name, created_by): return { 'info': { 'year': dt.datetime.now(dt.timezone.utc).year, 'version': None, 'description': project_name, 'contributor': created_by, 'url': 'labelbox.com', 'date_created': dt.datetime.now(dt.timezone.utc).isoformat() }, 'images': [], 'annotations': [], 'licenses': [], 'categories': [] } if __name__ == '__main__': main()
参考:
Labelbox/coco_exporter.py at master · Labelbox/Labelbox · GitHub
pycococreator/pycococreatortools.py at master · waspinator/pycococreator · GitHub