Source code for mira.metrics

# pylint: disable=invalid-name
"""Metrics for object detection tasks."""
import typing

import numpy as np

from .core.scene import SceneCollection
from .core.annotation import Annotation
from .core.utils import compute_iou


# pylint: disable=unsubscriptable-object
[docs]def precision_recall_curve( true_collection: SceneCollection, pred_collection: SceneCollection, iou_threshold: float = 0.5, ) -> typing.Dict[str, np.ndarray]: """Compute the precision-recall curve for each of the classes. Args: true_collection: The true scene collection pred_collection: The predicted scene collection iou_threshold: The threshold for detection Returns: A dict with category names as keys and array of shape (Ni, 3) which is the precision, recall, and score for each of the predicted boxes for the category. """ assert ( true_collection.categories == pred_collection.categories ), "Annotation configurations must match" categories = true_collection.categories assert len(true_collection.scenes) == len( pred_collection.scenes ), "Must have same scenes in each collection" # The ith entry in tfs is a list of lists, each of length three, # which are the change in the number of true positives and # false positives, along with the score at which the change # occurred for the ith class. tfs: typing.List[typing.List[typing.List[int]]] = [ [[], [], []] for c in range(len(categories)) ] # The ith entry in tfs is the number of true boxes # for the ith class. pos = [0 for c in range(len(categories))] for true, pred in zip(true_collection, pred_collection): pred_bboxes = pred.bboxes() true_bboxes = true.bboxes() pred_scores = pred.scores() assert all( s is not None for s in pred_scores ), "All annotations must have a score." for classIdx in range(len(categories)): pred_bboxes_cur = pred_bboxes[pred_bboxes[:, 4] == classIdx] true_bboxes_cur = true_bboxes[true_bboxes[:, 4] == classIdx] pred_scores_cur = pred_scores[pred_bboxes[:, 4] == classIdx] nPredicted = len(pred_bboxes_cur) nTrue = len(true_bboxes_cur) pos[classIdx] += nTrue if nPredicted == 0: # We have no new information to add if there were no # predicted boxes. continue if nTrue == 0: # All of them are false positives for score in pred_scores_cur: tfs[classIdx][0].append(0) tfs[classIdx][1].append(1) tfs[classIdx][2].append(score) continue # Sort the predicted boxes by decreasing confidence pred_bboxes_cur = pred_bboxes_cur[(-pred_scores_cur).argsort()] pred_scores_cur = pred_scores_cur[(-pred_scores_cur).argsort()] # (n, m): status for ith prediction for jth true box det = ( compute_iou( pred_bboxes_cur[:, :4], true_bboxes_cur[:, :4], ) > iou_threshold ) fp_prev = 0 tp_prev = 0 for i in range(nPredicted): tp_cur = det[: i + 1].max(axis=0).sum() fp_cur = (i + 1) - det[: i + 1].max(axis=1).sum() tp_delta = tp_cur - tp_prev fp_delta = fp_cur - fp_prev assert tp_delta >= 0 assert fp_delta >= 0 assert tp_cur <= nTrue assert fp_cur <= nPredicted tp_prev = tp_cur fp_prev = fp_cur tfs[classIdx][0].append(tp_delta) tfs[classIdx][1].append(fp_delta) tfs[classIdx][2].append(pred_scores_cur[i]) prs = [None for n in range(len(categories))] for classIdx, tfs_cur, pos_cur in zip(range(len(categories)), tfs, pos): # If we had no detections AND there # were no true boxes, precision and recall # are not defined. tfs_cur_arr = np.array(tfs_cur).T tfs_cur_arr = tfs_cur_arr[(-tfs_cur_arr[:, 2]).argsort()] tp = tfs_cur_arr[:, 0].cumsum() fp = tfs_cur_arr[:, 1].cumsum() scores = tfs_cur_arr[:, 2] precisions = tp / (tp + fp) recalls = tp / pos_cur prs[classIdx] = np.vstack([precisions, recalls, scores]).T # type: ignore return dict(zip([c.name for c in categories], prs)) # type: ignore
[docs]def mIOU( true_collection: SceneCollection, pred_collection: SceneCollection, threshold=0.5 ) -> typing.Dict[str, float]: """Compute mIOU for two scene collections""" categories = true_collection.categories intersection = np.zeros(len(categories), dtype="int32") union = np.zeros(len(categories), dtype="int32") for true, pred in zip(true_collection, pred_collection): true_segmap, pred_segmap = [ s.segmentation_map(binary=True, threshold=threshold).astype(bool) for s in [true, pred] ] intersection += (pred_segmap & true_segmap).sum(axis=(1, 2)) union += (pred_segmap | true_segmap).sum(axis=(1, 2)) return dict(zip([c.name for c in categories], intersection / union))
[docs]def mAP( true_collection: SceneCollection, pred_collection: SceneCollection, iou_threshold: float = 0.5, ) -> typing.Dict[str, float]: """Compute mAP (mean average precision) for a pair of scene collections. Args: true_collection: The true scene collection pred_collection: The predicted scene collection iou_threshold: The threshold for detection Returns: mAP class scores """ prs = precision_recall_curve(true_collection, pred_collection, iou_threshold) aps = {} for className, prs_cur in prs.items(): ps = prs_cur[:, 0] rs = prs_cur[:, 1].astype("float32") pi = np.zeros(11) # If rs is None, there were no detections and no true # boxes. If it is all nans, then there were no # true boxes but there were detections. if rs is None or np.isnan(rs).sum() == rs.shape[0]: aps[className] = np.nan continue for i, r in enumerate(np.linspace(0, 1, 11)): # From section 4.2 of VOC paper,the precision at each # recall level r is interpolated by taking the maximum # precision measured for a method for which # the corresponding recall exceeds r pc = ps[rs >= r] if len(pc) > 0: # pylint: disable=len-as-condition pi[i] = pc.max() aps[className] = pi.mean() return aps
[docs]def crop_error_examples( true_collection: SceneCollection, pred_collection: SceneCollection, threshold=0.3, iou_threshold=0.1, ) -> typing.List[typing.Dict[str, typing.List[Annotation]]]: """Get crops of true positives, false negatives, and false positives. Args: true_collection: A collection of the ground truth scenes. pred_collection: A collection of the predicted scenes. threshold: The score threshold for selecting annotations from predicted scenes. iou_threhsold: The IoU threshold for counting a box as a true positive. Returns: A list of dicts with "tps", "fps", and "fns" with the same length of the input collections. The values in each dict are crops from the original image. """ examples = [] for true_scene, pred_scene in zip(true_collection, pred_collection): pred_scene = pred_scene.assign( annotations=[ a for a in pred_scene.annotations if a.score is None or a.score > threshold ] ) boxes_true = true_scene.bboxes()[:, :4] boxes_pred = pred_scene.bboxes()[:, :4] iou = compute_iou(boxes_pred, boxes_true) examples.append( { "tps": [ ann.assign(score=pred_scene.annotations[predIdx].score) for ann, iou, predIdx in zip( true_scene.annotations, iou.max(axis=0), iou.argmax(axis=0) ) if iou > iou_threshold ] if (pred_scene.annotations and true_scene.annotations) else [], "fps": [ ann for ann, iou in zip( pred_scene.annotations, iou.max(axis=1) if true_scene.annotations else [-1] * len(pred_scene.annotations), ) if iou < iou_threshold ] if len(pred_scene.annotations) > 0 else [], "fns": [ ann for ann, iou in zip( true_scene.annotations, iou.max(axis=0) if pred_scene.annotations else [-1] * len(true_scene.annotations), ) if iou < iou_threshold ] if true_scene.annotations else [], } ) return examples
[docs]def classification_metrics( true_collection: SceneCollection, pred_collection: SceneCollection ): """Compute precision/recall/f1 for each class.""" true = true_collection.onehot() pred = pred_collection.onehot(binary=False).argmax(axis=1) metrics = {} for cIdx, category in enumerate(true_collection.categories): pos = true[:, cIdx] == 1 prd = pred == cIdx tps = (pos & prd).sum() fps = (~pos & prd).sum() fns = (pos & ~prd).sum() precision = tps / (tps + fps) recall = tps / (tps + fns) metrics[category.name] = { "precision": precision, "recall": recall, "f1": 2 * (precision * recall) / (precision + recall), } return metrics