Skip to content

Commit cecc2c0

Browse files
author
Guillermo Arana
committed
first commit
0 parents  commit cecc2c0

9 files changed

Lines changed: 787 additions & 0 deletions

File tree

escuchar.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import speech_recognition as sr
2+
r = sr.Recognizer()
3+
with sr.Microphone() as source: # use the default microphone as the audio source
4+
audio = r.listen(source) # listen for he first phrase and extract it into audio data
5+
6+
try:
7+
print("You said " + r.recognize(audio)) # recognize speech using Google Speech Recognition
8+
except LookupError: # speech is unintelligible
9+
print("Could not understand audio")

opencv.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#!/usr/bin/env python
2+
3+
import cv
4+
5+
class Target:
6+
7+
def __init__(self):
8+
self.capture = cv.CaptureFromCAM(0)
9+
cv.NamedWindow("Target", 1)
10+
11+
def run(self):
12+
# Capture first frame to get size
13+
frame = cv.QueryFrame(self.capture)
14+
frame_size = cv.GetSize(frame)
15+
color_image = cv.CreateImage(cv.GetSize(frame), 8, 3)
16+
grey_image = cv.CreateImage(cv.GetSize(frame), cv.IPL_DEPTH_8U, 1)
17+
moving_average = cv.CreateImage(cv.GetSize(frame), cv.IPL_DEPTH_32F, 3)
18+
19+
first = True
20+
21+
while True:
22+
closest_to_left = cv.GetSize(frame)[0]
23+
closest_to_right = cv.GetSize(frame)[1]
24+
25+
color_image = cv.QueryFrame(self.capture)
26+
27+
# Smooth to get rid of false positives
28+
cv.Smooth(color_image, color_image, cv.CV_GAUSSIAN, 3, 0)
29+
30+
if first:
31+
difference = cv.CloneImage(color_image)
32+
temp = cv.CloneImage(color_image)
33+
cv.ConvertScale(color_image, moving_average, 1.0, 0.0)
34+
first = False
35+
else:
36+
cv.RunningAvg(color_image, moving_average, 0.020, None)
37+
38+
# Convert the scale of the moving average.
39+
cv.ConvertScale(moving_average, temp, 1.0, 0.0)
40+
41+
# Minus the current frame from the moving average.
42+
cv.AbsDiff(color_image, temp, difference)
43+
44+
# Convert the image to grayscale.
45+
cv.CvtColor(difference, grey_image, cv.CV_RGB2GRAY)
46+
47+
# Convert the image to black and white.
48+
cv.Threshold(grey_image, grey_image, 70, 255, cv.CV_THRESH_BINARY)
49+
50+
# Dilate and erode to get people blobs
51+
cv.Dilate(grey_image, grey_image, None, 18)
52+
cv.Erode(grey_image, grey_image, None, 10)
53+
54+
storage = cv.CreateMemStorage(0)
55+
contour = cv.FindContours(grey_image, storage, cv.CV_RETR_CCOMP, cv.CV_CHAIN_APPROX_SIMPLE)
56+
points = []
57+
58+
while contour:
59+
bound_rect = cv.BoundingRect(list(contour))
60+
contour = contour.h_next()
61+
62+
pt1 = (bound_rect[0], bound_rect[1])
63+
pt2 = (bound_rect[0] + bound_rect[2], bound_rect[1] + bound_rect[3])
64+
points.append(pt1)
65+
points.append(pt2)
66+
cv.Rectangle(color_image, pt1, pt2, cv.CV_RGB(255,0,0), 1)
67+
68+
if len(points):
69+
center_point = reduce(lambda a, b: ((a[0] + b[0]) / 2, (a[1] + b[1]) / 2), points)
70+
cv.Circle(color_image, center_point, 40, cv.CV_RGB(255, 255, 255), 1)
71+
cv.Circle(color_image, center_point, 30, cv.CV_RGB(255, 100, 0), 1)
72+
cv.Circle(color_image, center_point, 20, cv.CV_RGB(255, 255, 255), 1)
73+
cv.Circle(color_image, center_point, 10, cv.CV_RGB(255, 100, 0), 1)
74+
75+
cv.ShowImage("Target", color_image)
76+
77+
# Listen for ESC key
78+
c = cv.WaitKey(7) % 0x100
79+
if c == 27:
80+
break
81+
82+
if __name__=="__main__":
83+
t = Target()
84+
t.run()

sound-spectrum-wave.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#!/usr/bin/env python
2+
# Written by Yu-Jie Lin
3+
# Public Domain
4+
#
5+
# Deps: PyAudio, NumPy, and Matplotlib
6+
# Blog: http://blog.yjl.im/2012/11/frequency-spectrum-of-sound-using.html
7+
8+
import numpy as np
9+
import matplotlib.pyplot as plt
10+
import matplotlib.animation as animation
11+
import struct
12+
import wave
13+
14+
15+
TITLE = ''
16+
FPS = 25.0
17+
18+
nFFT = 512
19+
BUF_SIZE = 4 * nFFT
20+
SAMPLE_SIZE = 2
21+
CHANNELS = 2
22+
RATE = 44100
23+
24+
def animate(i, line, wf, MAX_y):
25+
26+
N = (int((i + 1) * RATE / FPS) - wf.tell()) / nFFT
27+
if not N:
28+
return line,
29+
N *= nFFT
30+
data = wf.readframes(N)
31+
print '{:5.1f}% - V: {:5,d} - A: {:10,d} / {:10,d}'.format(
32+
100.0 * wf.tell() / wf.getnframes(), i, wf.tell(), wf.getnframes())
33+
34+
# Unpack data, LRLRLR...
35+
y = np.array(struct.unpack("%dh" % (len(data) / SAMPLE_SIZE), data)) / MAX_y
36+
y_L = y[::2]
37+
y_R = y[1::2]
38+
39+
Y_L = np.fft.fft(y_L, nFFT)
40+
Y_R = np.fft.fft(y_R, nFFT)
41+
42+
# Sewing FFT of two channels together, DC part uses right channel's
43+
Y = abs(np.hstack((Y_L[-nFFT/2:-1], Y_R[:nFFT/2])))
44+
45+
line.set_ydata(Y)
46+
return line,
47+
48+
49+
def init(line):
50+
51+
# This data is a clear frame for animation
52+
line.set_ydata(np.zeros(nFFT - 1))
53+
return line,
54+
55+
56+
def main():
57+
58+
fig = plt.figure()
59+
60+
# Frequency range
61+
x_f = 1.0 * np.arange(-nFFT / 2 + 1, nFFT / 2) / nFFT * RATE
62+
ax = fig.add_subplot(111, title=TITLE, xlim=(x_f[0], x_f[-1]),
63+
ylim=(0, 2 * np.pi * nFFT**2 / RATE))
64+
ax.set_yscale('symlog', linthreshy=nFFT**0.5)
65+
66+
line, = ax.plot(x_f, np.zeros(nFFT - 1))
67+
68+
# Change x tick labels for left channel
69+
def change_xlabel(evt):
70+
labels = [label.get_text().replace(u'\u2212', '')
71+
for label in ax.get_xticklabels()]
72+
ax.set_xticklabels(labels)
73+
fig.canvas.mpl_disconnect(drawid)
74+
drawid = fig.canvas.mpl_connect('draw_event', change_xlabel)
75+
76+
MAX_y = 2.0**(SAMPLE_SIZE * 8 - 1)
77+
wf = wave.open('temp.wav', 'rb')
78+
assert wf.getnchannels() == CHANNELS
79+
assert wf.getsampwidth() == SAMPLE_SIZE
80+
assert wf.getframerate() == RATE
81+
frames = wf.getnframes()
82+
83+
ani = animation.FuncAnimation(fig, animate, int(frames / RATE * FPS),
84+
init_func=lambda: init(line), fargs=(line, wf, MAX_y),
85+
interval=1000.0/FPS, blit=True)
86+
ani.save('temp.mp4', fps=FPS)
87+
88+
wf.close()
89+
90+
91+
if __name__ == '__main__':
92+
main()

sound-spectrum.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#!/usr/bin/env python
2+
# Written by Yu-Jie Lin
3+
# Public Domain
4+
#
5+
# Deps: PyAudio, NumPy, and Matplotlib
6+
# Blog: http://blog.yjl.im/2012/11/frequency-spectrum-of-sound-using.html
7+
8+
import numpy as np
9+
import matplotlib.pyplot as plt
10+
import matplotlib.animation as animation
11+
import pyaudio
12+
import struct
13+
import wave
14+
15+
16+
SAVE = 0.0
17+
TITLE = ''
18+
FPS = 25.0
19+
20+
nFFT = 512
21+
BUF_SIZE = 4 * nFFT
22+
FORMAT = pyaudio.paInt16
23+
CHANNELS = 2
24+
RATE = 44100
25+
26+
27+
def animate(i, line, stream, wf, MAX_y):
28+
29+
# Read n*nFFT frames from stream, n > 0
30+
N = max(stream.get_read_available() / nFFT, 1) * nFFT
31+
data = stream.read(N)
32+
if SAVE:
33+
wf.writeframes(data)
34+
35+
# Unpack data, LRLRLR...
36+
y = np.array(struct.unpack("%dh" % (N * CHANNELS), data)) / MAX_y
37+
y_L = y[::2]
38+
y_R = y[1::2]
39+
40+
Y_L = np.fft.fft(y_L, nFFT)
41+
Y_R = np.fft.fft(y_R, nFFT)
42+
43+
# Sewing FFT of two channels together, DC part uses right channel's
44+
Y = abs(np.hstack((Y_L[-nFFT/2:-1], Y_R[:nFFT/2])))
45+
46+
line.set_ydata(Y)
47+
return line,
48+
49+
50+
def init(line):
51+
52+
# This data is a clear frame for animation
53+
line.set_ydata(np.zeros(nFFT - 1))
54+
return line,
55+
56+
57+
def main():
58+
59+
fig = plt.figure()
60+
61+
# Frequency range
62+
x_f = 1.0 * np.arange(-nFFT / 2 + 1, nFFT / 2) / nFFT * RATE
63+
ax = fig.add_subplot(111, title=TITLE, xlim=(x_f[0], x_f[-1]),
64+
ylim=(0, 2 * np.pi * nFFT**2 / RATE))
65+
ax.set_yscale('symlog', linthreshy=nFFT**0.5)
66+
67+
line, = ax.plot(x_f, np.zeros(nFFT - 1))
68+
69+
# Change x tick labels for left channel
70+
def change_xlabel(evt):
71+
labels = [label.get_text().replace(u'\u2212', '')
72+
for label in ax.get_xticklabels()]
73+
ax.set_xticklabels(labels)
74+
fig.canvas.mpl_disconnect(drawid)
75+
drawid = fig.canvas.mpl_connect('draw_event', change_xlabel)
76+
77+
p = pyaudio.PyAudio()
78+
# Used for normalizing signal. If use paFloat32, then it's already -1..1.
79+
# Because of saving wave, paInt16 will be easier.
80+
MAX_y = 2.0**(p.get_sample_size(FORMAT) * 8 - 1)
81+
82+
frames = None
83+
wf = None
84+
if SAVE:
85+
frames = int(FPS * SAVE)
86+
wf = wave.open('temp.wav', 'wb')
87+
wf.setnchannels(CHANNELS)
88+
wf.setsampwidth(p.get_sample_size(FORMAT))
89+
wf.setframerate(RATE)
90+
91+
stream = p.open(format=FORMAT,
92+
channels=CHANNELS,
93+
rate=RATE,
94+
input=True,
95+
frames_per_buffer=BUF_SIZE)
96+
97+
ani = animation.FuncAnimation(fig, animate, frames,
98+
init_func=lambda: init(line), fargs=(line, stream, wf, MAX_y),
99+
interval=1000.0/FPS, blit=True)
100+
101+
if SAVE:
102+
ani.save('temp.mp4', fps=FPS)
103+
else:
104+
plt.show()
105+
106+
stream.stop_stream()
107+
stream.close()
108+
p.terminate()
109+
110+
if SAVE:
111+
wf.close()
112+
113+
114+
if __name__ == '__main__':
115+
main()

speech.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import pyttsx
2+
engine = pyttsx.init()
3+
engine.setProperty('rate', 70)
4+
engine.setProperty('voice', 'spanish-latin-am')
5+
engine.say("Hola, como estas?")
6+
engine.runAndWait()

speech.py~

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import pyttsx
2+
engine = pyttsx.init()
3+
engine.setProperty('rate', 70)
4+
engine.setProperty('voice', 'spanish-latin-am')
5+
engine.say("Hola, como estas?")
6+
engine.runAndWait()

0 commit comments

Comments
 (0)