Source code for mira.datasets.voc

"""VOC parsing tools"""

import os
import typing
from xml.etree import ElementTree
import logging

from tqdm import tqdm

from .. import core

log = logging.getLogger(__name__)  # pylint: disable=invalid-name

VOC_SCENE_METADATA_MAP = [
    ["folder"],
    ["filename"],
    ["size", "width"],
    ["size", "height"],
    ["source", "database"],
    ["source", "annotation"],
    ["source", "image"],
    ["segmented"],
]

VOC_ANNOTATION_METADATA_MAP = [["pose"], ["truncated"], ["difficult"]]


def map_xml_to_metadata(
    paths: typing.List[typing.List[str]], root: ElementTree.Element
):
    """Map XML paths into metadata."""
    metadata = {}
    for path in paths:
        key = ":".join(path)
        elem: typing.Optional[ElementTree.Element] = root
        for part in path:
            assert elem is not None
            elem = elem.find(part)
            if elem is None:
                log.info("Missing annotation metadata: %s", ":".join(path))
                break
        if elem is None:
            continue
        metadata[key] = elem.text
    return metadata


[docs]def load_voc(
    filepaths: typing.List[str],
    categories: core.Categories,
    image_dir: str = None,
) -> core.SceneCollection:
    """Read a scene from a VOC XML annotation file. Remaining arguments
    passed to scene constructor.

    Args:
        filepaths: A list of VOC files to read
        image_folder: Folder in which to look for images. Defaults to same
            folder as XML file prepended to the folder specified in the
            XML file.
        categories: The annotation configuration to use.

    Returns:
        A new scene collection, one scene per VOC file
    """
    scenes = []
    for filepath in tqdm(filepaths, desc="Loading VOC annotation files."):
        annotations = []
        root = ElementTree.parse(filepath).getroot()

        # Get the scene level metadata
        scene_metadata = map_xml_to_metadata(paths=VOC_SCENE_METADATA_MAP, root=root)

        if image_dir is None:
            folder = scene_metadata["folder"]
            if folder is None:
                image_dir = os.path.dirname(filepath)
            else:
                image_dir = os.path.join(os.path.dirname(filepath), folder)

        image_path = os.path.join(image_dir, scene_metadata["filename"])
        for obj in root.findall("object"):
            category = categories[obj.find("name").text]  # type: ignore
            bndbox = obj.find("bndbox")
            xmin, ymin, xmax, ymax = [
                int(float(bndbox.find(k).text))  # type: ignore
                for k in ["xmin", "ymin", "xmax", "ymax"]
            ]
            annotations.append(
                core.Annotation(x1=xmin, y1=ymin, x2=xmax, y2=ymax, category=category)
            )
        scenes.append(
            core.Scene(
                categories=categories,
                annotations=annotations,
                image=image_path,
            )
        )
    return core.SceneCollection(scenes=scenes, categories=categories)