first commit

Guillermo Arana · Guillermo Arana · commit cecc2c08f66c · 2015-02-19T12:07:20.000-06:00
diff --git a/escuchar.py b/escuchar.py
@@ -0,0 +1,9 @@
+import speech_recognition as sr
+r = sr.Recognizer()
+with sr.Microphone() as source: # use the default microphone as the audio source
+    audio = r.listen(source) # listen for he first phrase and extract it into audio data
+
+    try:
+        print("You said " + r.recognize(audio)) # recognize speech using Google Speech Recognition
+    except LookupError: # speech is unintelligible
+        print("Could not understand audio")
diff --git a/opencv.py b/opencv.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+
+import cv
+
+class Target:
+
+    def __init__(self):
+        self.capture = cv.CaptureFromCAM(0)
+        cv.NamedWindow("Target", 1)
+
+    def run(self):
+        # Capture first frame to get size
+        frame = cv.QueryFrame(self.capture)
+        frame_size = cv.GetSize(frame)
+        color_image = cv.CreateImage(cv.GetSize(frame), 8, 3)
+        grey_image = cv.CreateImage(cv.GetSize(frame), cv.IPL_DEPTH_8U, 1)
+        moving_average = cv.CreateImage(cv.GetSize(frame), cv.IPL_DEPTH_32F, 3)
+
+        first = True
+
+        while True:
+            closest_to_left = cv.GetSize(frame)[0]
+            closest_to_right = cv.GetSize(frame)[1]
+
+            color_image = cv.QueryFrame(self.capture)
+
+            # Smooth to get rid of false positives
+            cv.Smooth(color_image, color_image, cv.CV_GAUSSIAN, 3, 0)
+
+            if first:
+                difference = cv.CloneImage(color_image)
+                temp = cv.CloneImage(color_image)
+                cv.ConvertScale(color_image, moving_average, 1.0, 0.0)
+                first = False
+            else:
+                cv.RunningAvg(color_image, moving_average, 0.020, None)
+
+            # Convert the scale of the moving average.
+            cv.ConvertScale(moving_average, temp, 1.0, 0.0)
+
+            # Minus the current frame from the moving average.
+            cv.AbsDiff(color_image, temp, difference)
+
+            # Convert the image to grayscale.
+            cv.CvtColor(difference, grey_image, cv.CV_RGB2GRAY)
+
+            # Convert the image to black and white.
+            cv.Threshold(grey_image, grey_image, 70, 255, cv.CV_THRESH_BINARY)
+
+            # Dilate and erode to get people blobs
+            cv.Dilate(grey_image, grey_image, None, 18)
+            cv.Erode(grey_image, grey_image, None, 10)
+
+            storage = cv.CreateMemStorage(0)
+            contour = cv.FindContours(grey_image, storage, cv.CV_RETR_CCOMP, cv.CV_CHAIN_APPROX_SIMPLE)
+            points = []
+
+            while contour:
+                bound_rect = cv.BoundingRect(list(contour))
+                contour = contour.h_next()
+
+                pt1 = (bound_rect[0], bound_rect[1])
+                pt2 = (bound_rect[0] + bound_rect[2], bound_rect[1] + bound_rect[3])
+                points.append(pt1)
+                points.append(pt2)
+                cv.Rectangle(color_image, pt1, pt2, cv.CV_RGB(255,0,0), 1)
+
+            if len(points):
+                center_point = reduce(lambda a, b: ((a[0] + b[0]) / 2, (a[1] + b[1]) / 2), points)
+                cv.Circle(color_image, center_point, 40, cv.CV_RGB(255, 255, 255), 1)
+                cv.Circle(color_image, center_point, 30, cv.CV_RGB(255, 100, 0), 1)
+                cv.Circle(color_image, center_point, 20, cv.CV_RGB(255, 255, 255), 1)
+                cv.Circle(color_image, center_point, 10, cv.CV_RGB(255, 100, 0), 1)
+
+            cv.ShowImage("Target", color_image)
+
+            # Listen for ESC key
+            c = cv.WaitKey(7) % 0x100
+            if c == 27:
+                break
+
+if __name__=="__main__":
+    t = Target()
+    t.run()
diff --git a/sound-spectrum-wave.py b/sound-spectrum-wave.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+# Written by Yu-Jie Lin
+# Public Domain
+#
+# Deps: PyAudio, NumPy, and Matplotlib
+# Blog: http://blog.yjl.im/2012/11/frequency-spectrum-of-sound-using.html
+
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+import struct
+import wave
+
+
+TITLE = ''
+FPS = 25.0
+
+nFFT = 512
+BUF_SIZE = 4 * nFFT
+SAMPLE_SIZE = 2
+CHANNELS = 2
+RATE = 44100
+
+def animate(i, line, wf, MAX_y):
+
+  N = (int((i + 1) * RATE / FPS) - wf.tell()) / nFFT
+  if not N:
+    return line,
+  N *= nFFT
+  data = wf.readframes(N)
+  print '{:5.1f}% - V: {:5,d} - A: {:10,d} / {:10,d}'.format(
+    100.0 * wf.tell() / wf.getnframes(), i, wf.tell(), wf.getnframes())
+
+  # Unpack data, LRLRLR...
+  y = np.array(struct.unpack("%dh" % (len(data) / SAMPLE_SIZE), data)) / MAX_y
+  y_L = y[::2]
+  y_R = y[1::2]
+
+  Y_L = np.fft.fft(y_L, nFFT)
+  Y_R = np.fft.fft(y_R, nFFT)
+
+  # Sewing FFT of two channels together, DC part uses right channel's
+  Y = abs(np.hstack((Y_L[-nFFT/2:-1], Y_R[:nFFT/2])))
+
+  line.set_ydata(Y)
+  return line,
+
+
+def init(line):
+
+  # This data is a clear frame for animation
+  line.set_ydata(np.zeros(nFFT - 1))
+  return line,
+
+
+def main():
+  
+  fig = plt.figure()
+
+  # Frequency range
+  x_f = 1.0 * np.arange(-nFFT / 2 + 1, nFFT / 2) / nFFT * RATE
+  ax = fig.add_subplot(111, title=TITLE, xlim=(x_f[0], x_f[-1]),
+                       ylim=(0, 2 * np.pi * nFFT**2 / RATE))
+  ax.set_yscale('symlog', linthreshy=nFFT**0.5)
+
+  line, = ax.plot(x_f, np.zeros(nFFT - 1))
+
+  # Change x tick labels for left channel
+  def change_xlabel(evt):
+    labels = [label.get_text().replace(u'\u2212', '')
+              for label in ax.get_xticklabels()]
+    ax.set_xticklabels(labels)
+    fig.canvas.mpl_disconnect(drawid)
+  drawid = fig.canvas.mpl_connect('draw_event', change_xlabel)
+
+  MAX_y = 2.0**(SAMPLE_SIZE * 8 - 1)
+  wf = wave.open('temp.wav', 'rb')
+  assert wf.getnchannels() == CHANNELS
+  assert wf.getsampwidth() == SAMPLE_SIZE
+  assert wf.getframerate() == RATE
+  frames = wf.getnframes()
+
+  ani = animation.FuncAnimation(fig, animate, int(frames / RATE * FPS),
+      init_func=lambda: init(line), fargs=(line, wf, MAX_y),
+      interval=1000.0/FPS, blit=True)
+  ani.save('temp.mp4', fps=FPS)
+
+  wf.close()
+
+
+if __name__ == '__main__':
+  main()
diff --git a/sound-spectrum.py b/sound-spectrum.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# Written by Yu-Jie Lin
+# Public Domain
+#
+# Deps: PyAudio, NumPy, and Matplotlib
+# Blog: http://blog.yjl.im/2012/11/frequency-spectrum-of-sound-using.html
+
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+import pyaudio
+import struct
+import wave
+
+
+SAVE = 0.0
+TITLE = ''
+FPS = 25.0
+
+nFFT = 512
+BUF_SIZE = 4 * nFFT
+FORMAT = pyaudio.paInt16
+CHANNELS = 2
+RATE = 44100
+
+
+def animate(i, line, stream, wf, MAX_y):
+
+  # Read n*nFFT frames from stream, n > 0
+  N = max(stream.get_read_available() / nFFT, 1) * nFFT
+  data = stream.read(N)
+  if SAVE:
+    wf.writeframes(data)
+
+  # Unpack data, LRLRLR...
+  y = np.array(struct.unpack("%dh" % (N * CHANNELS), data)) / MAX_y
+  y_L = y[::2]
+  y_R = y[1::2]
+
+  Y_L = np.fft.fft(y_L, nFFT)
+  Y_R = np.fft.fft(y_R, nFFT)
+
+  # Sewing FFT of two channels together, DC part uses right channel's
+  Y = abs(np.hstack((Y_L[-nFFT/2:-1], Y_R[:nFFT/2])))
+
+  line.set_ydata(Y)
+  return line,
+
+
+def init(line):
+
+  # This data is a clear frame for animation
+  line.set_ydata(np.zeros(nFFT - 1))
+  return line,
+
+
+def main():
+  
+  fig = plt.figure()
+
+  # Frequency range
+  x_f = 1.0 * np.arange(-nFFT / 2 + 1, nFFT / 2) / nFFT * RATE
+  ax = fig.add_subplot(111, title=TITLE, xlim=(x_f[0], x_f[-1]),
+                       ylim=(0, 2 * np.pi * nFFT**2 / RATE))
+  ax.set_yscale('symlog', linthreshy=nFFT**0.5)
+
+  line, = ax.plot(x_f, np.zeros(nFFT - 1))
+
+  # Change x tick labels for left channel
+  def change_xlabel(evt):
+    labels = [label.get_text().replace(u'\u2212', '')
+              for label in ax.get_xticklabels()]
+    ax.set_xticklabels(labels)
+    fig.canvas.mpl_disconnect(drawid)
+  drawid = fig.canvas.mpl_connect('draw_event', change_xlabel)
+
+  p = pyaudio.PyAudio()
+  # Used for normalizing signal. If use paFloat32, then it's already -1..1.
+  # Because of saving wave, paInt16 will be easier.
+  MAX_y = 2.0**(p.get_sample_size(FORMAT) * 8 - 1)
+
+  frames = None
+  wf = None
+  if SAVE:
+    frames = int(FPS * SAVE)
+    wf = wave.open('temp.wav', 'wb')
+    wf.setnchannels(CHANNELS)
+    wf.setsampwidth(p.get_sample_size(FORMAT))
+    wf.setframerate(RATE)
+
+  stream = p.open(format=FORMAT,
+                  channels=CHANNELS,
+                  rate=RATE,
+                  input=True,
+                  frames_per_buffer=BUF_SIZE)
+
+  ani = animation.FuncAnimation(fig, animate, frames,
+      init_func=lambda: init(line), fargs=(line, stream, wf, MAX_y),
+      interval=1000.0/FPS, blit=True)
+
+  if SAVE:
+    ani.save('temp.mp4', fps=FPS)
+  else:
+    plt.show()
+
+  stream.stop_stream()
+  stream.close()
+  p.terminate()
+
+  if SAVE:
+    wf.close()
+
+
+if __name__ == '__main__':
+  main()
diff --git a/speech.py b/speech.py
@@ -0,0 +1,6 @@
+import pyttsx
+engine = pyttsx.init()
+engine.setProperty('rate', 70)
+engine.setProperty('voice', 'spanish-latin-am')
+engine.say("Hola, como estas?")
+engine.runAndWait()
diff --git a/speech.py~ b/speech.py~
@@ -0,0 +1,6 @@
+import pyttsx
+engine = pyttsx.init()
+engine.setProperty('rate', 70)
+engine.setProperty('voice', 'spanish-latin-am')
+engine.say("Hola, como estas?")
+engine.runAndWait()
diff --git a/test.py b/test.py
diff --git a/tracking-arduino.cc b/tracking-arduino.cc
diff --git a/tracking.cc b/tracking.cc