-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathshow_detections.py
More file actions
144 lines (123 loc) · 5.34 KB
/
show_detections.py
File metadata and controls
144 lines (123 loc) · 5.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import numpy as np
import cv2
from PIL import Image
import math
def normalize_bbox(bbox, img_w, img_h):
"""
Accepts bbox in one of the following forms and returns integer (x, y, w, h):
- [x, y, w, h]
- [x1, y1, x2, y2]
- {"x":..., "y":..., "w":..., "h":...}
- {"x1":..., "y1":..., "x2":..., "y2":...}
- {"xmin":..., "ymin":..., "xmax":..., "ymax":...}
Values may be absolute pixels or normalized floats in [0,1].
"""
# Extract raw numbers
if isinstance(bbox, dict):
if all(k in bbox for k in ("x", "y", "w", "h")):
x, y, w, h = bbox["x"], bbox["y"], bbox["w"], bbox["h"]
elif all(k in bbox for k in ("x1", "y1", "x2", "y2")):
x1, y1, x2, y2 = bbox["x1"], bbox["y1"], bbox["x2"], bbox["y2"]
x, y, w, h = x1, y1, (x2 - x1), (y2 - y1)
elif all(k in bbox for k in ("xmin", "ymin", "xmax", "ymax")):
x1, y1, x2, y2 = bbox["xmin"], bbox["ymin"], bbox["xmax"], bbox["ymax"]
x, y, w, h = x1, y1, (x2 - x1), (y2 - y1)
else:
raise ValueError(f"Unsupported bbox dict keys: {list(bbox.keys())}")
elif isinstance(bbox, (list, tuple)) and len(bbox) == 4:
a, b, c, d = bbox
# Heuristic: if c>d and (a<b) it might be x1,y1,x2,y2
if c > 0 and d > 0 and (c > 1.0 or d > 1.0) and (c > a and d > b):
# looks like x1,y1,x2,y2 in pixels
x, y, w, h = a, b, (c - a), (d - b)
else:
# assume x,y,w,h
x, y, w, h = a, b, c, d
else:
raise TypeError(f"Unsupported bbox type: {type(bbox)}")
# If values look normalized (<=1), scale to pixels
def maybe_scale(val, dim):
return val * dim if 0.0 <= float(val) <= 1.0 else float(val)
x = maybe_scale(x, img_w)
y = maybe_scale(y, img_h)
w = maybe_scale(w, img_w)
h = maybe_scale(h, img_h)
# Clamp to image bounds and cast to int
x = max(0, min(int(round(x)), img_w - 1))
y = max(0, min(int(round(y)), img_h - 1))
w = max(1, min(int(math.ceil(w)), img_w - x))
h = max(1, min(int(math.ceil(h)), img_h - y))
return x, y, w, h
def to_bgr_ndarray(img):
"""
Normalize various image types (PIL.Image, np.ndarray RGB/GRAY/BGRA, bytes) to BGR np.ndarray.
"""
if img is None:
raise ValueError("to_bgr_ndarray: image is None")
# PIL.Image -> RGB -> BGR
if isinstance(img, Image.Image):
img = img.convert("RGB")
arr = np.array(img) # RGB
return cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
# Bytes (e.g., JPEG bytes) -> decode with OpenCV
if isinstance(img, (bytes, bytearray)):
buf = np.frombuffer(img, dtype=np.uint8)
arr = cv2.imdecode(buf, cv2.IMREAD_COLOR) # returns BGR
if arr is None:
raise ValueError("to_bgr_ndarray: failed to decode bytes")
return arr
# Already ndarray
if isinstance(img, np.ndarray):
if img.ndim == 2: # GRAY -> BGR
return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
if img.ndim == 3:
if img.shape[2] == 3:
# Heuristic: assume already BGR (common for OpenCV); if you *know* it's RGB, convert here.
return img
if img.shape[2] == 4: # BGRA -> BGR
return cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
raise TypeError(f"to_bgr_ndarray: unsupported image type {type(img)}")
def show_detections(image, detections, window_name="Detections"):
"""
Draw bounding boxes and labels, then display with OpenCV.
detections: list of dicts with keys ["bbox"= [x,y,w,h], "displayName", "confidence"]
"""
vis = to_bgr_ndarray(image).copy()
ih, iw = vis.shape[:2]
for det in detections:
x, y, w, h = normalize_bbox(det["bbox"], iw, ih)
label = det.get("displayName", det.get("class", "obj"))
conf = float(det.get("confidence", 0.0))
cv2.rectangle(vis, (x, y), (x+w, y+h), (0,255,0), 2)
cv2.putText(vis, f"{label} {conf:.2f}",
(x, max(y-10, 20)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
cv2.imshow(window_name, vis)
cv2.waitKey(1)
def render_detections_data_url(image, detections, max_width=960, quality=80):
"""
Draw bboxes on a copy of `image`, optionally resize to max_width,
and return a 'data:image/jpeg;base64,...' URL string.
"""
import base64
from io import BytesIO
vis = to_bgr_ndarray(image).copy()
ih, iw = vis.shape[:2]
for det in detections:
x, y, w, h = normalize_bbox(det["bbox"], iw, ih)
label = det.get("displayName", det.get("class", "obj"))
conf = float(det.get("confidence", 0.0))
cv2.rectangle(vis, (x, y), (x+w, y+h), (0,255,0), 2)
cv2.putText(vis, f"{label} {conf:.2f}", (x, max(y-10, 20)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
# Optional downscale for bandwidth
if iw > max_width:
scale = max_width / float(iw)
vis = cv2.resize(vis, (int(iw*scale), int(ih*scale)), interpolation=cv2.INTER_AREA)
# Encode JPEG
encode_params = [int(cv2.IMWRITE_JPEG_QUALITY), int(quality)]
ok, buf = cv2.imencode('.jpg', vis, encode_params)
if not ok:
return None
b64 = base64.b64encode(buf.tobytes()).decode('ascii')
return f"data:image/jpeg;base64,{b64}"