LabelBoxのアウトプットが独自フォーマットになったのとCOCOで出力できなくなったのと公式レポジトリの変換ツールが使えないので作った。正直いろんなサイトのやつを見て作ったのでオリジナリティはない。
なお、for文でlabelboxのデータにアクセスするとたまにtimeoutになるので元の画像とアノテーション画像をローカルに保存してからやる。
あとLaeblboxはRLE形式では出力せずにどんなときもpolygon形式で出力していたのでそれに合わせた
import json
import io
import logging
import datetime as dt
import os
import numpy as np
from skimage import measure
from PIL import Image
from pycocotools import mask
from tqdm import tqdm
def main():
with open('input.json') as f:
jsn = json.load(f)
coco = make_coco_metadata("train", "John Dow")
for data in tqdm(jsn):
convert_data(coco, data['ID'], data['External ID'],
data['Labeled Data'], data['Label']['objects'])
with open('output.json', 'w') as f:
json.dump(coco, f, indent=4)
def convert_data(coco, id, file_name, image_url, labels):
image = {
"id": id,
"file_name": file_name,
"license": None,
"flickr_url": image_url,
"coco_url": image_url,
"date_captured": None,
}
file_path = file_name
image['width'], image['height'] = Image.open(file_path).size
coco['images'].append(image)
if labels == []:
return
category_id = None
for label_data in labels:
for c in coco['categories']:
if c['name'] == label_data['title']:
category_id = c['id']
if category_id == None:
category_id = len(coco['categories']) + 1
category = {
'supercategory': label_data['title'],
'id': category_id,
'name': label_data['title']
}
coco['categories'].append(category)
binary_mask=np.array(Image.open(file_name).convert("L"))
create_annotation_info(coco=coco, image_id=id, category_id=category_id, binary_mask=binary_mask)
def create_annotation_info(coco, image_id, category_id, binary_mask, tolerance=2):
binary_mask_encoded = mask.encode(
np.asfortranarray(binary_mask.astype(np.uint8)))
area = mask.area(binary_mask_encoded)
bounding_box = mask.toBbox(binary_mask_encoded)
polygons = get_polygons(binary_mask)
annotation = {
"id": len(coco['annotations']) + 1,
"image_id": image_id,
"category_id": category_id,
"iscrowd": 0,
"area": area.tolist(),
"bbox": bounding_box.tolist(),
"segmentation": polygons
}
coco['annotations'].append(annotation)
def get_polygons(binary_mask):
polygons = []
padded_binary_mask = np.pad(
binary_mask, pad_width=1, mode='constant', constant_values=0)
contours = measure.find_contours(padded_binary_mask, 0.5)
contours = np.subtract(contours, 1)
for contour in contours:
contour = close_contour(contour)
contour = measure.approximate_polygon(contour, tolerance=5)
if len(contour) < 3:
continue
contour = np.flip(contour, axis=1)
segmentation = contour.ravel().tolist()
segmentation = [0 if i < 0 else i for i in segmentation]
polygons.append(segmentation)
return polygons
def close_contour(contour):
if not np.array_equal(contour[0], contour[-1]):
contour = np.vstack((contour, contour[0]))
return contour
def make_coco_metadata(project_name, created_by):
return {
'info': {
'year': dt.datetime.now(dt.timezone.utc).year,
'version': None,
'description': project_name,
'contributor': created_by,
'url': 'labelbox.com',
'date_created': dt.datetime.now(dt.timezone.utc).isoformat()
},
'images': [],
'annotations': [],
'licenses': [],
'categories': []
}
if __name__ == '__main__':
main()
参考:
Labelbox/coco_exporter.py at master · Labelbox/Labelbox · GitHub
pycococreator/pycococreatortools.py at master · waspinator/pycococreator · GitHub