Source code for mira.datasets.voc

"""VOC parsing tools"""

import os
import typing
from xml.etree import ElementTree
import logging

from tqdm import tqdm

from .. import core

log = logging.getLogger(__name__)  # pylint: disable=invalid-name

VOC_SCENE_METADATA_MAP = [
    ["folder"],
    ["filename"],
    ["size", "width"],
    ["size", "height"],
    ["source", "database"],
    ["source", "annotation"],
    ["source", "image"],
    ["segmented"],
]

VOC_ANNOTATION_METADATA_MAP = [["pose"], ["truncated"], ["difficult"]]


def map_xml_to_metadata(
    paths: typing.List[typing.List[str]], root: ElementTree.Element
):
    """Map XML paths into metadata."""
    metadata = {}
    for path in paths:
        key = ":".join(path)
        elem: typing.Optional[ElementTree.Element] = root
        for part in path:
            assert elem is not None
            elem = elem.find(part)
            if elem is None:
                log.info("Missing annotation metadata: %s", ":".join(path))
                break
        if elem is None:
            continue
        metadata[key] = elem.text
    return metadata


[docs]def load_voc( filepaths: typing.List[str], categories: core.Categories, image_dir: str = None, ) -> core.SceneCollection: """Read a scene from a VOC XML annotation file. Remaining arguments passed to scene constructor. Args: filepaths: A list of VOC files to read image_folder: Folder in which to look for images. Defaults to same folder as XML file prepended to the folder specified in the XML file. categories: The annotation configuration to use. Returns: A new scene collection, one scene per VOC file """ scenes = [] for filepath in tqdm(filepaths, desc="Loading VOC annotation files."): annotations = [] root = ElementTree.parse(filepath).getroot() # Get the scene level metadata scene_metadata = map_xml_to_metadata(paths=VOC_SCENE_METADATA_MAP, root=root) if image_dir is None: folder = scene_metadata["folder"] if folder is None: image_dir = os.path.dirname(filepath) else: image_dir = os.path.join(os.path.dirname(filepath), folder) image_path = os.path.join(image_dir, scene_metadata["filename"]) for obj in root.findall("object"): category = categories[obj.find("name").text] # type: ignore bndbox = obj.find("bndbox") xmin, ymin, xmax, ymax = [ int(float(bndbox.find(k).text)) # type: ignore for k in ["xmin", "ymin", "xmax", "ymax"] ] annotations.append( core.Annotation(x1=xmin, y1=ymin, x2=xmax, y2=ymax, category=category) ) scenes.append( core.Scene( categories=categories, annotations=annotations, image=image_path, ) ) return core.SceneCollection(scenes=scenes, categories=categories)