把COCO数据集的josn标注转变成VOC数据集xml格式的标注；json数据标注转xml数据标注；把coco数据集json格式转变单张图片对应的xml格式

本文主要是介绍把COCO数据集的josn标注转变成VOC数据集xml格式的标注；json数据标注转xml数据标注；把coco数据集json格式转变单张图片对应的xml格式，希望对大家解决编程问题提供一定的参考价值，需要的开发者们随着小编来一起学习吧！

主要是以目标检测为列进行的

COCO数据集json格式样本

{"info": {"description": "COCO 2017 Dataset","url": "http://cocodataset.org","version": "1.0","year": 2017,"contributor": "COCO Consortium","date_created": "2017/09/01"},"licenses": [{"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/","id": 1,"name": "Attribution-NonCommercial-ShareAlike License"}],
"images": [{"license": 4,"file_name": "000000397133.jpg","coco_url": "http://images.cocodataset.org/val2017/000000397133.jpg","height": 427,"width": 640,"date_captured": "2013-11-14 17:02:52","flickr_url": "http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg","id": 397133},{"license": 1,"file_name": "000000037777.jpg","coco_url": "http://images.cocodataset.org/val2017/000000037777.jpg","height": 230,"width": 352,"date_captured": "2013-11-14 20:55:31","flickr_url": "http://farm9.staticflickr.com/8429/7839199426_f6d48aa585_z.jpg","id": 37777}],
"annotations": [{"segmentation":[[510.66,423.01,511.72,420.03,510.45,416.0,510.34,413.02,510.77,410.26,510.77,407.5,510.34,405.16,511.51,402.83,511.41,400.49,510.24,398.16,509.39,397.31,504.61,399.22,502.17,399.64,500.89,401.66,500.47,402.08,499.09,401.87,495.79,401.98,490.59,401.77,488.79,401.77,485.39,398.58,483.9,397.31,481.56,396.35,478.48,395.93,476.68,396.03,475.4,396.77,473.92,398.79,473.28,399.96,473.49,401.87,474.56,403.47,473.07,405.59,473.39,407.71,476.68,409.41,479.23,409.73,481.56,410.69,480.4,411.85,481.35,414.93,479.86,418.65,477.32,420.03,476.04,422.58,479.02,422.58,480.29,423.01,483.79,419.93,486.66,416.21,490.06,415.57,492.18,416.85,491.65,420.24,492.82,422.9,493.56,424.39,496.43,424.6,498.02,423.01,498.13,421.31,497.07,420.03,497.07,415.15,496.33,414.51,501.1,411.96,502.06,411.32,503.02,415.04,503.33,418.12,501.1,420.24,498.98,421.63,500.47,424.39,505.03,423.32,506.2,421.31,507.69,419.5,506.31,423.32,510.03,423.01,510.45,423.01]],"area": 702.1057499999998,"iscrowd": 0,"image_id": 289343,"bbox": [473.07,395.93,38.65,28.67],"category_id": 18,"id": 1768}],"categories": [{"supercategory": "person","id": 1,"name": "person"},{"supercategory": "vehicle","id": 2,"name": "bicycle"},{"supercategory": "vehicle","id": 3,"name": "car"},{"supercategory": "vehicle","id": 4,"name": "motorcycle"},{"supercategory": "vehicle","id": 5,"name": "airplane"},{"supercategory": "vehicle","id": 6,"name": "bus"},{"supercategory": "vehicle","id": 7,"name": "train"},{"supercategory": "vehicle","id": 8,"name": "truck"},{"supercategory": "vehicle","id": 9,"name": "boat"},{"supercategory": "outdoor","id": 10,"name": "traffic light"},{"supercategory": "outdoor","id": 11,"name": "fire hydrant"},{"supercategory": "outdoor","id": 13,"name": "stop sign"},{"supercategory": "outdoor","id": 14,"name": "parking meter"},{"supercategory": "outdoor","id": 15,"name": "bench"},{"supercategory": "animal","id": 16,"name": "bird"},{"supercategory": "animal","id": 17,"name": "cat"},{"supercategory": "animal","id": 18,"name": "dog"},{"supercategory": "animal","id": 19,"name": "horse"},{"supercategory": "animal","id": 20,"name": "sheep"},{"supercategory": "animal","id": 21,"name": "cow"},{"supercategory": "animal","id": 22,"name": "elephant"},{"supercategory": "animal","id": 23,"name": "bear"},{"supercategory": "animal","id": 24,"name": "zebra"},{"supercategory": "animal","id": 25,"name": "giraffe"},{"supercategory": "accessory","id": 27,"name": "backpack"},{"supercategory": "accessory","id": 28,"name": "umbrella"},{"supercategory": "accessory","id": 31,"name": "handbag"},{"supercategory": "accessory","id": 32,"name": "tie"},{"supercategory": "accessory","id": 33,"name": "suitcase"},{"supercategory": "sports","id": 34,"name": "frisbee"},{"supercategory": "sports","id": 35,"name": "skis"},{"supercategory": "sports","id": 36,"name": "snowboard"},{"supercategory": "sports","id": 37,"name": "sports ball"},{"supercategory": "sports","id": 38,"name": "kite"},{"supercategory": "sports","id": 39,"name": "baseball bat"},{"supercategory": "sports","id": 40,"name": "baseball glove"},{"supercategory": "sports","id": 41,"name": "skateboard"},{"supercategory": "sports","id": 42,"name": "surfboard"},{"supercategory": "sports","id": 43,"name": "tennis racket"},{"supercategory": "kitchen","id": 44,"name": "bottle"},{"supercategory": "kitchen","id": 46,"name": "wine glass"},{"supercategory": "kitchen","id": 47,"name": "cup"},{"supercategory": "kitchen","id": 48,"name": "fork"},{"supercategory": "kitchen","id": 49,"name": "knife"},{"supercategory": "kitchen","id": 50,"name": "spoon"},{"supercategory": "kitchen","id": 51,"name": "bowl"},{"supercategory": "food","id": 52,"name": "banana"},{"supercategory": "food","id": 53,"name": "apple"},{"supercategory": "food","id": 54,"name": "sandwich"},{"supercategory": "food","id": 55,"name": "orange"},{"supercategory": "food","id": 56,"name": "broccoli"},{"supercategory": "food","id": 57,"name": "carrot"},{"supercategory": "food","id": 58,"name": "hot dog"},{"supercategory": "food","id": 59,"name": "pizza"},{"supercategory": "food","id": 60,"name": "donut"},{"supercategory": "food","id": 61,"name": "cake"},{"supercategory": "furniture","id": 62,"name": "chair"},{"supercategory": "furniture","id": 63,"name": "couch"},{"supercategory": "furniture","id": 64,"name": "potted plant"},{"supercategory": "furniture","id": 65,"name": "bed"},{"supercategory": "furniture","id": 67,"name": "dining table"},{"supercategory": "furniture","id": 70,"name": "toilet"},{"supercategory": "electronic","id": 72,"name": "tv"},{"supercategory": "electronic","id": 73,"name": "laptop"},{"supercategory": "electronic","id": 74,"name": "mouse"},{"supercategory": "electronic","id": 75,"name": "remote"},{"supercategory": "electronic","id": 76,"name": "keyboard"},{"supercategory": "electronic","id": 77,"name": "cell phone"},{"supercategory": "appliance","id": 78,"name": "microwave"},{"supercategory": "appliance","id": 79,"name": "oven"},{"supercategory": "appliance","id": 80,"name": "toaster"},{"supercategory": "appliance","id": 81,"name": "sink"},{"supercategory": "appliance","id": 82,"name": "refrigerator"},{"supercategory": "indoor","id": 84,"name": "book"},{"supercategory": "indoor","id": 85,"name": "clock"},{"supercategory": "indoor","id": 86,"name": "vase"},{"supercategory": "indoor","id": 87,"name": "scissors"},{"supercategory": "indoor","id": 88,"name": "teddy bear"},{"supercategory": "indoor","id": 89,"name": "hair drier"},{"supercategory": "indoor","id": 90,"name": "toothbrush"}]}

其中
info:可以不关注
images：主要包含一张图片的公共信息，如宽高，图片名，图片id

file_name:图片名称
height：高
width：宽
id：图片的id。在images中是唯一的

annotations：主要包含图像中每一个对象的信息，如标出的对象的边框box，标出对象的类别如人，狗，猫对应的id

image_id:图片id对应上面images中的id，但是这个不是唯一的。因为一张图中可能会标出多个对象。
bbox：是标注对象的边框信息[xmin,ymin,width,height]
category_id:对象类别id，如person对应的id为1

categories：主要的对象类别的信息，如类别名称，类别id(COCO数据集有90个类别)，我们只关注id、和name就行

id:类别id，唯一
name:类别名称

VOC数据集的xml格式样本

<?xml version='1.0' encoding='utf-8'?>
<annotation><folder>JPEGImages</folder><filename>PartA_00000.jpg</filename><path>/home/robot11/py-faster-rcnn/data/VOCdevkit2007/VOC2007/JPEGImages/00000.jpg</path><source><database>Unknown</database></source><size><width>1070</width><height>594</height><depth>3</depth></size><segmented>0</segmented><object><name>head</name><pose>Unspecified</pose><truncated>0</truncated><difficult>0</difficult><bndbox><xmin>64</xmin><ymin>222</ymin><xmax>107</xmax><ymax>271</ymax></bndbox></object>
</annotation>

只要把json转换成上面格式就行。实现代码如下

import os
import json
import cv2
from lxml import etree
import xml.etree.cElementTree as ET
import time
import pandas as pd
from tqdm import tqdm
from xml.dom.minidom import Document
anno = "instances_val2017.json"
xmldir = "train/"
with open(anno, 'r') as load_f:f = json.load(load_f)
df_anno = pd.DataFrame(f['annotations'])
imgs = f['images']
cata={}
def createCate():global catadf_cate = f['categories']for item in df_cate:cata[item['id']]=item['name']def json2xml():global catafor im in imgs:filename = im['file_name']height = im['height']img_id = im['id']width = im['width']doc = Document()annotation = doc.createElement('annotation')doc.appendChild(annotation)filenamedoc = doc.createElement("filename")annotation.appendChild(filenamedoc)filename_txt=doc.createTextNode(filename)filenamedoc.appendChild(filename_txt)size = doc.createElement("size")annotation.appendChild(size)widthdoc = doc.createElement("width")size.appendChild(widthdoc)width_txt = doc.createTextNode(str(width))widthdoc.appendChild(width_txt)heightdoc = doc.createElement("height")size.appendChild(heightdoc)height_txt = doc.createTextNode(str(height))heightdoc.appendChild(height_txt)annos = df_anno[df_anno["image_id"].isin([img_id])]for index, row in annos.iterrows():bbox = row["bbox"]category_id = row["category_id"]cate_name = cata[category_id]object = doc.createElement('object')annotation.appendChild(object)name = doc.createElement('name')object.appendChild(name)name_txt = doc.createTextNode(cate_name)name.appendChild(name_txt)pose = doc.createElement('pose')object.appendChild(pose)pose_txt = doc.createTextNode('Unspecified')pose.appendChild(pose_txt)truncated = doc.createElement('truncated')object.appendChild(truncated)truncated_txt = doc.createTextNode('0')truncated.appendChild(truncated_txt)difficult = doc.createElement('difficult')object.appendChild(difficult)difficult_txt = doc.createTextNode('0')difficult.appendChild(difficult_txt)bndbox = doc.createElement('bndbox')object.appendChild(bndbox)xmin = doc.createElement('xmin')bndbox.appendChild(xmin)xmin_txt = doc.createTextNode(str(int(bbox[0])))xmin.appendChild(xmin_txt)ymin = doc.createElement('ymin')bndbox.appendChild(ymin)ymin_txt = doc.createTextNode(str(int(bbox[1])))ymin.appendChild(ymin_txt)xmax = doc.createElement('xmax')bndbox.appendChild(xmax)xmax_txt = doc.createTextNode(str(int(bbox[0]+bbox[2])))xmax.appendChild(xmax_txt)ymax = doc.createElement('ymax')bndbox.appendChild(ymax)ymax_txt = doc.createTextNode(str(int(bbox[1]+bbox[3])))ymax.appendChild(ymax_txt)xmlpath = os.path.join(xmldir,filename.replace('.jpg','.xml'))f = open(xmlpath, "w")f.write(doc.toprettyxml(indent="  "))f.close()createCate()
json2xml()

去掉COCO数据集中的不需要的检测对象只保留自己想要的对象

import os
import json
import cv2
from lxml import etree
import xml.etree.cElementTree as ET
import time
import pandas as pd
from tqdm import tqdm
from xml.dom.minidom import Document
anno = "instances_val2017.json"
xml_dir = "test/"
# dttm = time.strftime("%Y%m%d%H%M%S", time.localtime())
# if os.path.exists(xml_dir):
#     os.rename(xml_dir,xml_dir+dttm)
# os.mkdir(xml_dir)
import jsonwith open(anno, 'r') as load_f:f = json.load(load_f)
df_anno = pd.DataFrame(f['annotations'])
imgs = f['images']
cata={}
nameList=[ 'bench',  'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',  'banana', 'apple',
'sandwich', 'orange', 'broccoli','carrot', 'hot dog', 'pizza', 'donut', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'book', 'clock', 'vase', 'scissors', 'hair drier',  'toothbrush']
#nameNum={ 'bench':0,  'backpack':0, 'umbrella':0, 'handbag':0, 'tie':0, 'suitcase':0, 'frisbee':0, 'skis':0, 'snowboard':0, 'sports ball':0, 'kite':0, 'baseball bat':0, 'baseball glove':0,
#          'skateboard':0, 'surfboard':0, 'tennis racket':0, 'bottle':0, 'wine glass':0, 'cup':0, 'fork':0, 'knife':0, 'spoon':0, 'bowl':0,  'banana':0, 'apple':0,
#'sandwich':0, 'orange':0, 'broccoli':0,'carrot':0, 'hot dog':0, 'pizza':0, 'donut':0, 'laptop':0, 'mouse':0, 'remote':0, 'keyboard':0, 'cell phone':0, 'book':0, 'clock':0, 'vase':0, 'scissors':0, 'hair drier':0,  'toothbrush':0}
#imageSum=0
flag=0
def createCate():global catadf_cate = f['categories']for item in df_cate:cata[item['id']]=item['name']def json2xml():global cataglobal flag#global imageSumfor im in imgs:#imageSum = imageSum+1flag = 0filename = im['file_name']height = im['height']img_id = im['id']width = im['width']doc = Document()annotation = doc.createElement('annotation')doc.appendChild(annotation)filenamedoc = doc.createElement("filename")annotation.appendChild(filenamedoc)filename_txt=doc.createTextNode(filename)filenamedoc.appendChild(filename_txt)size = doc.createElement("size")annotation.appendChild(size)widthdoc = doc.createElement("width")size.appendChild(widthdoc)width_txt = doc.createTextNode(str(width))widthdoc.appendChild(width_txt)heightdoc = doc.createElement("height")size.appendChild(heightdoc)height_txt = doc.createTextNode(str(height))heightdoc.appendChild(height_txt)annos = df_anno[df_anno["image_id"].isin([img_id])]for index, row in annos.iterrows():bbox = row["bbox"]category_id = row["category_id"]cate_name = cata[category_id]if cate_name not in nameList:print(cate_name+",don`t in namelis")continueflag=1#nameNum[cate_name]=nameNum[cate_name]+1object = doc.createElement('object')annotation.appendChild(object)name = doc.createElement('name')object.appendChild(name)name_txt = doc.createTextNode(cate_name)name.appendChild(name_txt)pose = doc.createElement('pose')object.appendChild(pose)pose_txt = doc.createTextNode('Unspecified')pose.appendChild(pose_txt)truncated = doc.createElement('truncated')object.appendChild(truncated)truncated_txt = doc.createTextNode('0')truncated.appendChild(truncated_txt)difficult = doc.createElement('difficult')object.appendChild(difficult)difficult_txt = doc.createTextNode('0')difficult.appendChild(difficult_txt)bndbox = doc.createElement('bndbox')object.appendChild(bndbox)xmin = doc.createElement('xmin')bndbox.appendChild(xmin)xmin_txt = doc.createTextNode(str(int(bbox[0])))xmin.appendChild(xmin_txt)ymin = doc.createElement('ymin')bndbox.appendChild(ymin)ymin_txt = doc.createTextNode(str(int(bbox[1])))ymin.appendChild(ymin_txt)xmax = doc.createElement('xmax')bndbox.appendChild(xmax)xmax_txt = doc.createTextNode(str(int(bbox[0]+bbox[2])))xmax.appendChild(xmax_txt)ymax = doc.createElement('ymax')bndbox.appendChild(ymax)ymax_txt = doc.createTextNode(str(int(bbox[1]+bbox[3])))ymax.appendChild(ymax_txt)if flag==1:xml_path = os.path.join(xml_dir,filename.replace('.jpg','.xml'))f = open(xml_path, "w")f.write(doc.toprettyxml(indent="  "))f.close()createCate()json2xml()
#print('imagenum:',imageSum)
#print(nameNum)

这篇关于把COCO数据集的josn标注转变成VOC数据集xml格式的标注；json数据标注转xml数据标注；把coco数据集json格式转变单张图片对应的xml格式的文章就介绍到这儿，希望我们推荐的文章对编程师们有所帮助！