-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmotion.py
More file actions
308 lines (251 loc) · 10.6 KB
/
motion.py
File metadata and controls
308 lines (251 loc) · 10.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
import numpy as np
from skimage.transform import pyramid_gaussian
from skimage.filters import sobel_h, sobel_v, gaussian
from skimage.feature import corner_harris, corner_peaks
def lucas_kanade(img1, img2, keypoints, window_size=5):
""" Estimate flow vector at each keypoint using Lucas-Kanade method.
Args:
img1 - Grayscale image of the current frame. Flow vectors are computed
with respect to this frame.
img2 - Grayscale image of the next frame.
keypoints - Keypoints to track. Numpy array of shape (N, 2).
window_size - Window size to determine the neighborhood of each keypoint.
A window is centered around the current keypoint location.
You may assume that window_size is always an odd number.
Returns:
flow_vectors - Estimated flow vectors for keypoints. flow_vectors[i] is
the flow vector for keypoint[i]. Numpy array of shape (N, 2).
Hints:
- You may use np.linalg.inv to compute inverse matrix
"""
assert window_size % 2 == 1, "window_size must be an odd number"
flow_vectors = []
w = window_size // 2
# Compute partial derivatives
Iy, Ix = np.gradient(img1)
It = img2 - img1
# For each [y, x] in keypoints, estimate flow vector [vy, vx]
# using Lucas-Kanade method and append it to flow_vectors.
for y, x in keypoints:
# Keypoints can be loacated between integer pixels (subpixel locations).
# For simplicity, we round the keypoint coordinates to nearest integer.
# In order to achieve more accurate results, image brightness at subpixel
# locations can be computed using bilinear interpolation.
y = int(round(y)); x = int(round(x))
### YOUR CODE HERE
### The addition of np.eye in 'd' vector is to prevent singular matrix error
### during matrix inversion. This error occurs when you run this code on videos
### other than the one given in assignment. You can skip it for assignment.
A = np.c_[Iy[y-w:y+w+1, x-w:x+w+1].flatten(), Ix[y-w:y+w+1, x-w:x+w+1].flatten()]
b = -It[y-w:y+w+1, x-w:x+w+1].flatten()
d = np.dot(np.linalg.inv(A.T.dot(A) + np.eye(A.shape[1])*1e-6), A.T.dot(b))
flow_vectors.append(d.flatten())
### END YOUR CODE
flow_vectors = np.array(flow_vectors)
return flow_vectors
def iterative_lucas_kanade(img1, img2, keypoints,
window_size=9,
num_iters=5,
g=None):
""" Estimate flow vector at each keypoint using iterative Lucas-Kanade method.
Args:
img1 - Grayscale image of the current frame. Flow vectors are computed
with respect to this frame.
img2 - Grayscale image of the next frame.
keypoints - Keypoints to track. Numpy array of shape (N, 2).
window_size - Window size to determine the neighborhood of each keypoint.
A window is centered around the current keypoint location.
You may assume that window_size is always an odd number.
num_iters - Number of iterations to update flow vector.
g - Flow vector guessed from previous pyramid level.
Returns:
flow_vectors - Estimated flow vectors for keypoints. flow_vectors[i] is
the flow vector for keypoint[i]. Numpy array of shape (N, 2).
"""
assert window_size % 2 == 1, "window_size must be an odd number"
# Initialize g as zero vector if not provided
if g is None:
g = np.zeros(keypoints.shape)
flow_vectors = []
w = window_size // 2
maxy = img2.shape[0]
maxx = img2.shape[1]
# Compute spatial gradients
Iy, Ix = np.gradient(img1)
for y, x, gy, gx in np.hstack((keypoints, g)):
v = np.zeros(2) # Initialize flow vector as zero vector
y1 = int(round(y)); x1 = int(round(x))
# TODO: Compute inverse of G at point (x1, y1)
### YOUR CODE HERE
### This takes care of some corner cases when features are present
### at the boundary of the image. Again, not required for assignment purposes.
###--------------###
if y1-w<=1:
y1 = 1+w
if x1-w<=1:
x1 = 1+w
if y1+w>=maxy:
y1 = maxy-w-1
if x1+w>=maxx:
x1 = maxx-w-1
###---------------###
A = np.c_[Iy[y1-w:y1+w+1, x1-w:x1+w+1].flatten(), Ix[y1-w:y1+w+1, x1-w:x1+w+1].flatten()]
G = A.T.dot(A)
invG = np.linalg.inv(G + np.eye(A.shape[1])*1e-6)
### END YOUR CODE
# iteratively update flow vector
for k in range(num_iters):
vy, vx = v
# Refined position of the point in the next frame
y2 = int(round(y+gy+vy)); x2 = int(round(x+gx+vx))
# TODO: Compute bk and vk = inv(G) x bk
### YOUR CODE HERE
if y2-w<=1:
y2 = 1+w
if x2-w<=1:
x2 = 1+w
if y2+w>=maxy:
y2 = maxy-w-1
if x2+w>=maxx:
x2 = maxx-w-1
# Ik = img1[y1,x1] - img2[y2,x2]
# bk = np.array([np.sum(Ik*A[:,1]), np.sum(Ik*A[:,0])])
Ik = (img1[y1-w:y1+w+1,x1-w:x1+w+1] - img2[y2-w:y2+w+1,x2-w:x2+w+1]).flatten()
bk = np.array([Ik.dot(A[:,0]), Ik.dot(A[:,1])])
vk = invG.dot(bk)
### END YOUR CODE
# Update flow vector by vk
v += vk
vy, vx = v
flow_vectors.append([vy, vx])
return np.array(flow_vectors)
def pyramid_lucas_kanade(img1, img2, keypoints,
window_size=9, num_iters=7,
level=4, scale=2):
""" Pyramidal Lucas Kanade method
Args:
img1 - same as lucas_kanade
img2 - same as lucas_kanade
keypoints - same as lucas_kanade
window_size - same as lucas_kanade
num_iters - number of iterations to run iterative LK method
level - Max level in image pyramid. Original image is at level 0 of
the pyramid.
scale - scaling factor of image pyramid.
Returns:
d - final flow vectors
"""
# Build image pyramids of img1 and img2
pyramid1 = tuple(pyramid_gaussian(img1, max_layer=level, downscale=scale))
pyramid2 = tuple(pyramid_gaussian(img2, max_layer=level, downscale=scale))
# Initialize pyramidal guess
# H,W = keypoints.shape
g = np.zeros(keypoints.shape)
for L in range(level, -1, -1):
### YOUR CODE HERE
Il = pyramid1[L]
Jl = pyramid2[L]
pl = keypoints/(scale**L)
d = iterative_lucas_kanade(Il, Jl, pl, window_size=window_size, num_iters=num_iters, g=g)
### Do not scale in the last iteration
if L!=0:
g = scale * (g + d)
### END YOUR CODE
d = g + d
return d
def compute_error(patch1, patch2):
""" Compute MSE between patch1 and patch2
- Normalize patch1 and patch2
- Compute mean square error between patch1 and patch2
Args:
patch1 - Grayscale image patch of shape (patch_size, patch_size)
patch2 - Grayscale image patch of shape (patch_size, patch_size)
Returns:
error - Number representing mismatch between patch1 and patch2
"""
# assert patch1.shape == patch2.shape, 'Differnt patch shapes'
error = 3.1
if patch1.shape != patch2.shape:
return error
error = 0
### YOUR CODE HERE
patch1 = (patch1 - np.mean(patch1))/np.std(patch1)
patch2 = (patch2 - np.mean(patch2))/np.std(patch2)
error = np.mean(np.square(patch2 - patch1))
### END YOUR CODE
return error
def track_features(frames, keypoints,
error_thresh=1.5,
optflow_fn=pyramid_lucas_kanade,
exclude_border=5,
**kwargs):
""" Track keypoints over multiple frames
Args:
frames - List of grayscale images with the same shape.
keypoints - Keypoints in frames[0] to start tracking. Numpy array of
shape (N, 2).
error_thresh - Threshold to determine lost tracks.
optflow_fn(img1, img2, keypoints, **kwargs) - Optical flow function.
kwargs - keyword arguments for optflow_fn.
Returns:
trajs - A list containing tracked keypoints in each frame. trajs[i]
is a numpy array of keypoints in frames[i]. The shape of trajs[i]
is (Ni, 2), where Ni is number of tracked points in frames[i].
"""
kp_curr = keypoints
trajs = [kp_curr]
patch_size = 3 # Take 3x3 patches to compute error
w = patch_size // 2 # patch_size//2 around a pixel
for i in range(len(frames) - 1):
I = frames[i]
J = frames[i+1]
flow_vectors = optflow_fn(I, J, kp_curr, **kwargs)
kp_next = kp_curr + flow_vectors
new_keypoints = []
for yi, xi, yj, xj in np.hstack((kp_curr, kp_next)):
# Declare a keypoint to be 'lost' IF:
# 1. the keypoint falls outside the image J
# 2. the error between points in I and J is larger than threshold
yi = int(round(yi)); xi = int(round(xi))
yj = int(round(yj)); xj = int(round(xj))
# Point falls outside the image
if yj > J.shape[0]-exclude_border-1 or yj < exclude_border or\
xj > J.shape[1]-exclude_border-1 or xj < exclude_border:
continue
# Compute error between patches in image I and J
patchI = I[yi-w:yi+w+1, xi-w:xi+w+1]
patchJ = J[yj-w:yj+w+1, xj-w:xj+w+1]
error = compute_error(patchI, patchJ)
if error > error_thresh:
continue
new_keypoints.append([yj, xj])
kp_curr = np.array(new_keypoints)
trajs.append(kp_curr)
return trajs
def IoU(bbox1, bbox2):
""" Compute IoU of two bounding boxes
Args:
bbox1 - 4-tuple (x, y, w, h) where (x, y) is the top left corner of
the bounding box, and (w, h) are width and height of the box.
bbox2 - 4-tuple (x, y, w, h) where (x, y) is the top left corner of
the bounding box, and (w, h) are width and height of the box.
Returns:
score - IoU score
"""
x1, y1, w1, h1 = bbox1
x2, y2, w2, h2 = bbox2
score = 0
### YOUR CODE HERE
### IoU is given by intersection area of the two boxes divided by
### Area of union of the two boxes
a1 = w1*h1
a2 = w2*h2
xmin = max(x1,x2)
xmax = min(x1+w1,x2+w2)
ymin = max(y1,y2)
ymax = min(y1+h1,y2+h2)
intersection_area = max(0,xmax-xmin)*max(0,ymax-ymin)
union_area = a1+a2-intersection_area
score = intersection_area/union_area
### END YOUR CODE
return score