# pylint: disable=invalid-name
"""Metrics for object detection tasks."""
import typing
import numpy as np
from .core.scene import SceneCollection
from .core.annotation import Annotation
from .core.utils import compute_iou
# pylint: disable=unsubscriptable-object
[docs]def precision_recall_curve(
true_collection: SceneCollection,
pred_collection: SceneCollection,
iou_threshold: float = 0.5,
) -> typing.Dict[str, np.ndarray]:
"""Compute the precision-recall curve for each of the
classes.
Args:
true_collection: The true scene collection
pred_collection: The predicted scene collection
iou_threshold: The threshold for detection
Returns:
A dict with category names as keys and array of shape (Ni, 3)
which is the precision, recall, and score for each of the
predicted boxes for the category.
"""
assert (
true_collection.categories == pred_collection.categories
), "Annotation configurations must match"
categories = true_collection.categories
assert len(true_collection.scenes) == len(
pred_collection.scenes
), "Must have same scenes in each collection"
# The ith entry in tfs is a list of lists, each of length three,
# which are the change in the number of true positives and
# false positives, along with the score at which the change
# occurred for the ith class.
tfs: typing.List[typing.List[typing.List[int]]] = [
[[], [], []] for c in range(len(categories))
]
# The ith entry in tfs is the number of true boxes
# for the ith class.
pos = [0 for c in range(len(categories))]
for true, pred in zip(true_collection, pred_collection):
pred_bboxes = pred.bboxes()
true_bboxes = true.bboxes()
pred_scores = pred.scores()
assert all(
s is not None for s in pred_scores
), "All annotations must have a score."
for classIdx in range(len(categories)):
pred_bboxes_cur = pred_bboxes[pred_bboxes[:, 4] == classIdx]
true_bboxes_cur = true_bboxes[true_bboxes[:, 4] == classIdx]
pred_scores_cur = pred_scores[pred_bboxes[:, 4] == classIdx]
nPredicted = len(pred_bboxes_cur)
nTrue = len(true_bboxes_cur)
pos[classIdx] += nTrue
if nPredicted == 0:
# We have no new information to add if there were no
# predicted boxes.
continue
if nTrue == 0:
# All of them are false positives
for score in pred_scores_cur:
tfs[classIdx][0].append(0)
tfs[classIdx][1].append(1)
tfs[classIdx][2].append(score)
continue
# Sort the predicted boxes by decreasing confidence
pred_bboxes_cur = pred_bboxes_cur[(-pred_scores_cur).argsort()]
pred_scores_cur = pred_scores_cur[(-pred_scores_cur).argsort()]
# (n, m): status for ith prediction for jth true box
det = (
compute_iou(
pred_bboxes_cur[:, :4],
true_bboxes_cur[:, :4],
)
> iou_threshold
)
fp_prev = 0
tp_prev = 0
for i in range(nPredicted):
tp_cur = det[: i + 1].max(axis=0).sum()
fp_cur = (i + 1) - det[: i + 1].max(axis=1).sum()
tp_delta = tp_cur - tp_prev
fp_delta = fp_cur - fp_prev
assert tp_delta >= 0
assert fp_delta >= 0
assert tp_cur <= nTrue
assert fp_cur <= nPredicted
tp_prev = tp_cur
fp_prev = fp_cur
tfs[classIdx][0].append(tp_delta)
tfs[classIdx][1].append(fp_delta)
tfs[classIdx][2].append(pred_scores_cur[i])
prs = [None for n in range(len(categories))]
for classIdx, tfs_cur, pos_cur in zip(range(len(categories)), tfs, pos):
# If we had no detections AND there
# were no true boxes, precision and recall
# are not defined.
tfs_cur_arr = np.array(tfs_cur).T
tfs_cur_arr = tfs_cur_arr[(-tfs_cur_arr[:, 2]).argsort()]
tp = tfs_cur_arr[:, 0].cumsum()
fp = tfs_cur_arr[:, 1].cumsum()
scores = tfs_cur_arr[:, 2]
precisions = tp / (tp + fp)
recalls = tp / pos_cur
prs[classIdx] = np.vstack([precisions, recalls, scores]).T # type: ignore
return dict(zip([c.name for c in categories], prs)) # type: ignore
[docs]def mIOU(
true_collection: SceneCollection, pred_collection: SceneCollection, threshold=0.5
) -> typing.Dict[str, float]:
"""Compute mIOU for two scene collections"""
categories = true_collection.categories
intersection = np.zeros(len(categories), dtype="int32")
union = np.zeros(len(categories), dtype="int32")
for true, pred in zip(true_collection, pred_collection):
true_segmap, pred_segmap = [
s.segmentation_map(binary=True, threshold=threshold).astype(bool)
for s in [true, pred]
]
intersection += (pred_segmap & true_segmap).sum(axis=(1, 2))
union += (pred_segmap | true_segmap).sum(axis=(1, 2))
return dict(zip([c.name for c in categories], intersection / union))
[docs]def mAP(
true_collection: SceneCollection,
pred_collection: SceneCollection,
iou_threshold: float = 0.5,
) -> typing.Dict[str, float]:
"""Compute mAP (mean average precision) for
a pair of scene collections.
Args:
true_collection: The true scene collection
pred_collection: The predicted scene collection
iou_threshold: The threshold for detection
Returns:
mAP class scores
"""
prs = precision_recall_curve(true_collection, pred_collection, iou_threshold)
aps = {}
for className, prs_cur in prs.items():
ps = prs_cur[:, 0]
rs = prs_cur[:, 1].astype("float32")
pi = np.zeros(11)
# If rs is None, there were no detections and no true
# boxes. If it is all nans, then there were no
# true boxes but there were detections.
if rs is None or np.isnan(rs).sum() == rs.shape[0]:
aps[className] = np.nan
continue
for i, r in enumerate(np.linspace(0, 1, 11)):
# From section 4.2 of VOC paper,the precision at each
# recall level r is interpolated by taking the maximum
# precision measured for a method for which
# the corresponding recall exceeds r
pc = ps[rs >= r]
if len(pc) > 0: # pylint: disable=len-as-condition
pi[i] = pc.max()
aps[className] = pi.mean()
return aps
[docs]def crop_error_examples(
true_collection: SceneCollection,
pred_collection: SceneCollection,
threshold=0.3,
iou_threshold=0.1,
) -> typing.List[typing.Dict[str, typing.List[Annotation]]]:
"""Get crops of true positives, false negatives, and false positives.
Args:
true_collection: A collection of the ground truth scenes.
pred_collection: A collection of the predicted scenes.
threshold: The score threshold for selecting annotations from predicted scenes.
iou_threhsold: The IoU threshold for counting a box as a true positive.
Returns:
A list of dicts with "tps", "fps", and "fns"
with the same length of the input collections. The values in each dict
are crops from the original image.
"""
examples = []
for true_scene, pred_scene in zip(true_collection, pred_collection):
pred_scene = pred_scene.assign(
annotations=[
a
for a in pred_scene.annotations
if a.score is None or a.score > threshold
]
)
boxes_true = true_scene.bboxes()[:, :4]
boxes_pred = pred_scene.bboxes()[:, :4]
iou = compute_iou(boxes_pred, boxes_true)
examples.append(
{
"tps": [
ann.assign(score=pred_scene.annotations[predIdx].score)
for ann, iou, predIdx in zip(
true_scene.annotations, iou.max(axis=0), iou.argmax(axis=0)
)
if iou > iou_threshold
]
if (pred_scene.annotations and true_scene.annotations)
else [],
"fps": [
ann
for ann, iou in zip(
pred_scene.annotations,
iou.max(axis=1)
if true_scene.annotations
else [-1] * len(pred_scene.annotations),
)
if iou < iou_threshold
]
if len(pred_scene.annotations) > 0
else [],
"fns": [
ann
for ann, iou in zip(
true_scene.annotations,
iou.max(axis=0)
if pred_scene.annotations
else [-1] * len(true_scene.annotations),
)
if iou < iou_threshold
]
if true_scene.annotations
else [],
}
)
return examples
[docs]def classification_metrics(
true_collection: SceneCollection, pred_collection: SceneCollection
):
"""Compute precision/recall/f1 for each class."""
true = true_collection.onehot()
pred = pred_collection.onehot(binary=False).argmax(axis=1)
metrics = {}
for cIdx, category in enumerate(true_collection.categories):
pos = true[:, cIdx] == 1
prd = pred == cIdx
tps = (pos & prd).sum()
fps = (~pos & prd).sum()
fns = (pos & ~prd).sum()
precision = tps / (tps + fps)
recall = tps / (tps + fns)
metrics[category.name] = {
"precision": precision,
"recall": recall,
"f1": 2 * (precision * recall) / (precision + recall),
}
return metrics