-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmulti_object_tracking_fast.py
More file actions
225 lines (182 loc) · 6.86 KB
/
multi_object_tracking_fast.py
File metadata and controls
225 lines (182 loc) · 6.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# USAGE
# python multi_object_tracking_fast.py --prototxt mobilenet_ssd/MobileNetSSD_deploy.prototxt \
# --model mobilenet_ssd/MobileNetSSD_deploy.caffemodel --video race.mp4
# import the necessary packages
from imutils.video import FPS
import multiprocessing
import numpy as np
import argparse
import imutils
import dlib
import cv2
def start_tracker(box, label, rgb, inputQueue, outputQueue):
# construct a dlib rectangle object from the bounding box
# coordinates and then start the correlation tracker
t = dlib.correlation_tracker()
rect = dlib.rectangle(int(box[0]), int(box[1]), int(box[2]), int(box[3]))
t.start_track(rgb, rect)
# loop indefinitely -- this function will be called as a daemon
# process so we don't need to worry about joining it
while True:
# attempt to grab the next frame from the input queue
rgb = inputQueue.get()
# if there was an entry in our queue, process it
if rgb is not None:
# update the tracker and grab the position of the tracked
# object
t.update(rgb)
pos = t.get_position()
# unpack the position object
startX = int(pos.left())
startY = int(pos.top())
endX = int(pos.right())
endY = int(pos.bottom())
# add the label + bounding box coordinates to the output
# queue
outputQueue.put((label, (startX, startY, endX, endY)))
# construct the argument parser and parse the arguments
# def GetArgs():
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--prototxt", required=True,
help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
help="path to Caffe pre-trained model")
ap.add_argument("-v", "--video", required=True,
help="path to input video file")
ap.add_argument("-o", "--output", type=str,
help="path to optional output video file")
ap.add_argument("-c", "--confidence", type=float, default=0.2,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
# initialize our list of queues -- both input queue and output queue
# for *every* object that we will be tracking
inputQueues = []
outputQueues = []
# initialize the list of class labels MobileNet SSD was trained to
# detect
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
def main():
# args = GetArgs()
print(args)
# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
# initialize the video stream and output video writer
print("[INFO] starting video stream...")
vs = cv2.VideoCapture(args["video"])
writer = None
# start the frames per second throughput estimator
fps = FPS().start()
# loop over frames from the video file stream
while True:
# grab the next frame from the video file
(grabbed, frame) = vs.read()
# check to see if we have reached the end of the video file
if frame is None:
break
# resize the frame for faster processing and then convert the
# frame from BGR to RGB ordering (dlib needs RGB ordering)
frame = imutils.resize(frame, width=600)
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# if we are supposed to be writing a video to disk, initialize
# the writer
if args["output"] is not None and writer is None:
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
writer = cv2.VideoWriter(args["output"], fourcc, 30,
(frame.shape[1], frame.shape[0]), True)
# if our list of queues is empty then we know we have yet to
# create our first object tracker
if len(inputQueues) == 0:
# grab the frame dimensions and convert the frame to a blob
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 0.007843, (w, h), 127.5)
# pass the blob through the network and obtain the detections
# and predictions
net.setInput(blob)
detections = net.forward()
# loop over the detections
print(detections.shape)
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated
# with the prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by requiring a minimum
# confidence
if confidence > args["confidence"]:
# extract the index of the class label from the
# detections list
idx = int(detections[0, 0, i, 1])
label = CLASSES[idx]
# if the class label is not a person, ignore it
if CLASSES[idx] != "car":
continue
# compute the (x, y)-coordinates of the bounding box
# for the object
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
bb = (startX, startY, endX, endY)
# create two brand new input and output queues,
# respectively
iq = multiprocessing.Queue()
oq = multiprocessing.Queue()
inputQueues.append(iq)
outputQueues.append(oq)
# spawn a daemon process for a new object tracker
p = multiprocessing.Process(
target=start_tracker,
args=(bb, label, rgb, iq, oq))
p.daemon = True
p.start()
# grab the corresponding class label for the detection
# and draw the bounding box
cv2.rectangle(frame, (startX, startY), (endX, endY),
(0, 255, 0), 2)
cv2.putText(frame, label, (startX, startY - 15),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)
# otherwise, we've already performed detection so let's track
# multiple objects
else:
# loop over each of our input ques and add the input RGB
# frame to it, enabling us to update each of the respective
# object trackers running in separate processes
for iq in inputQueues:
iq.put(rgb)
# loop over each of the output queues
for oq in outputQueues:
# grab the updated bounding box coordinates for the
# object -- the .get method is a blocking operation so
# this will pause our execution until the respective
# process finishes the tracking update
(label, (startX, startY, endX, endY)) = oq.get()
# draw the bounding box from the correlation object
# tracker
cv2.rectangle(frame, (startX, startY), (endX, endY),
(0, 255, 0), 2)
cv2.putText(frame, label, (startX, startY - 15),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 0), 2)
# check to see if we should write the frame to disk
if writer is not None:
writer.write(frame)
# show the output frame
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# update the FPS counter
fps.update()
# stop the timer and display FPS information
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
# check to see if we need to release the video writer pointer
if writer is not None:
writer.release()
# do a bit of cleanup
cv2.destroyAllWindows()
vs.release()
if __name__ == "__main__":
main()