Skip to content

Commit e70fd7f

Browse files
committed
upadte baiduyuyin
1 parent 96d1a0a commit e70fd7f

19 files changed

Lines changed: 465 additions & 0 deletions

File tree

python_baiduyuyin/1.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#! /usr/bin/env python3
2+
3+
import baidu_oauth
4+
import uuid
5+
import base64
6+
import json
7+
import urllib.request
8+
import sys
9+
asr_server = 'http://vop.baidu.com/server_api'
10+
baidu_oauth_url = 'https://openapi.baidu.com/oauth/2.0/token/'
11+
client_id = 'xxx'
12+
client_secret = 'xxx'
13+
access_token = baidu_oauth.get_baidu_access_token(baidu_oauth_url, client_id, client_secret)
14+
mac_address=uuid.UUID(int=uuid.getnode()).hex[-12:]
15+
def baidu_asr(speech_file):
16+
with open(speech_file, 'rb') as f:
17+
speech_data = f.read()
18+
speech_base64=base64.b64encode(speech_data).decode('utf-8')
19+
speech_length=len(speech_data)
20+
data_dict = {'format':'wav', 'rate':8000, 'channel':1, 'cuid':mac_address, 'token':access_token, 'lan':'zh', 'speech':speech_base64, 'len':speech_length}
21+
json_data = json.dumps(data_dict).encode('utf-8')
22+
json_length = len(json_data)
23+
24+
request = urllib.request.Request(url=asr_server)
25+
request.add_header("Content-Type", "application/json")
26+
request.add_header("Content-Length", json_length)
27+
fs = urllib.request.urlopen(url=request, data=json_data)
28+
29+
result_str = fs.read().decode('utf-8')
30+
json_resp = json.loads(result_str)
31+
return json_resp
32+
33+
json_resp = baidu_asr(sys.argv[1])
34+
print(json_resp)

python_baiduyuyin/1.py~

Whitespace-only changes.

python_baiduyuyin/2.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#encoding=utf-8
2+
3+
import wave
4+
import urllib, urllib2, pycurl
5+
import base64
6+
import json
7+
## get access token by api key & secret key
8+
9+
def get_token():
10+
apiKey = "Xv3HXWDAll8EmmZDecHcSgvn"
11+
secretKey = "eb46e9ea313aff9f3f200fb33708ae72"
12+
13+
auth_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=" + apiKey + "&client_secret=" + secretKey;
14+
15+
res = urllib2.urlopen(auth_url)
16+
json_data = res.read()
17+
return json.loads(json_data)['access_token']
18+
19+
def dump_res(buf):
20+
print buf
21+
22+
23+
## post audio to server
24+
def use_cloud(token):
25+
fp = wave.open('vad_0.wav', 'rb')
26+
nf = fp.getnframes()
27+
f_len = nf * 2
28+
audio_data = fp.readframes(nf)
29+
30+
cuid = "xxxxxxxxxx" #my xiaomi phone MAC
31+
srv_url = 'http://vop.baidu.com/server_api' + '?cuid=' + cuid + '&token=' + token
32+
http_header = [
33+
'Content-Type: audio/pcm; rate=8000',
34+
'Content-Length: %d' % f_len
35+
]
36+
37+
c = pycurl.Curl()
38+
c.setopt(pycurl.URL, str(srv_url)) #curl doesn't support unicode
39+
#c.setopt(c.RETURNTRANSFER, 1)
40+
c.setopt(c.HTTPHEADER, http_header) #must be list, not dict
41+
c.setopt(c.POST, 1)
42+
c.setopt(c.CONNECTTIMEOUT, 30)
43+
c.setopt(c.TIMEOUT, 30)
44+
c.setopt(c.WRITEFUNCTION, dump_res)
45+
c.setopt(c.POSTFIELDS, audio_data)
46+
c.setopt(c.POSTFIELDSIZE, f_len)
47+
c.perform() #pycurl.perform() has no return val
48+
49+
if __name__ == "__main__":
50+
token = get_token()
51+
use_cloud(token)

python_baiduyuyin/2.py~

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#encoding=utf-8
2+
3+
import wave
4+
import urllib, urllib2, pycurl
5+
import base64
6+
import json
7+
## get access token by api key & secret key
8+
9+
def get_token():
10+
apiKey = "xxxxxxxx"
11+
secretKey = "xxxxxxxxx"
12+
13+
auth_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=" + apiKey + "&client_secret=" + secretKey;
14+
15+
res = urllib2.urlopen(auth_url)
16+
json_data = res.read()
17+
return json.loads(json_data)['access_token']
18+
19+
def dump_res(buf):
20+
print buf
21+
22+
23+
## post audio to server
24+
def use_cloud(token):
25+
fp = wave.open('vad_0.wav', 'rb')
26+
nf = fp.getnframes()
27+
f_len = nf * 2
28+
audio_data = fp.readframes(nf)
29+
30+
cuid = "xxxxxxxxxx" #my xiaomi phone MAC
31+
srv_url = 'http://vop.baidu.com/server_api' + '?cuid=' + cuid + '&token=' + token
32+
http_header = [
33+
'Content-Type: audio/pcm; rate=8000',
34+
'Content-Length: %d' % f_len
35+
]
36+
37+
c = pycurl.Curl()
38+
c.setopt(pycurl.URL, str(srv_url)) #curl doesn't support unicode
39+
#c.setopt(c.RETURNTRANSFER, 1)
40+
c.setopt(c.HTTPHEADER, http_header) #must be list, not dict
41+
c.setopt(c.POST, 1)
42+
c.setopt(c.CONNECTTIMEOUT, 30)
43+
c.setopt(c.TIMEOUT, 30)
44+
c.setopt(c.WRITEFUNCTION, dump_res)
45+
c.setopt(c.POSTFIELDS, audio_data)
46+
c.setopt(c.POSTFIELDSIZE, f_len)
47+
c.perform() #pycurl.perform() has no return val
48+
49+
if __name__ == "__main__":
50+
token = get_token()
51+
use_cloud(token)
156 KB
Binary file not shown.
156 KB
Binary file not shown.

python_baiduyuyin/3.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#coding:utf-8
2+
import wave #需要导入python的wave module,函数用法参http://docs.python.org/library/wave.html
3+
import struct #struct module的用法参见http://docs.python.org/library/struct.html
4+
from math import sin, pi,pow
5+
6+
MAX_AMPLITUDE = 32767 #决定sin wave的音量
7+
SAMPLE_RATE = 44100 #采样频率,由于人听觉在20到20千赫兹,由于Nyquist定律,一般44100(大于20千
8+
# 的两倍)的频率足够满足人耳,再高就浪费文件空间啦,这也是CD通常的采样频率。
9+
DURATION_SEC = 10 #生成wav的时间为三秒
10+
SAMPLE_LEN = SAMPLE_RATE * DURATION_SEC # 乘一下就是要写多少个SAMPLE啦
11+
filename = 'output.wav' #起个文件名哈,文件生成后就到这个文件夹找啦
12+
print "Creating sound file:", filename
13+
print "Sample rate:", SAMPLE_RATE
14+
print "Duration (sec):", DURATION_SEC
15+
print "# samples:", SAMPLE_LEN
16+
wavefile = wave.open(filename, 'w') # 'w'写文件,其他还有'r','rb','wb'啥的,详情google之
17+
wavefile.setparams((2, 2, SAMPLE_RATE, 0, 'NONE', 'not compressed')) # 设置下wave file的头文件
18+
samples = [] #建一个tuple用来放好几个channel的
19+
for i in range(SAMPLE_LEN):
20+
t = float(i) / SAMPLE_RATE # t表示当下滴时间
21+
sample = MAX_AMPLITUDE * sin(t * 256 * 2 * pi) # 就根据sin wave的方程得到当下的amplitude啦,
22+
# 这里生成频率为256的音高哟,可以随便改。
23+
#print i, t, sample # show some generated values. comment out for speed.
24+
packed_sample = struct.pack('h', sample) # 转换成16进制的string
25+
samples.append(packed_sample) # append到samples,作为channel 1
26+
samples.append(packed_sample) # append一个一样的作为channel 2,要是append另一个频率的
27+
# packed_sample_2,就可以有和声效果啦
28+
sample_str = ''.join(samples) # 把samples里所有的值都convert到一个string上
29+
wavefile.writeframes(sample_str) # 终于要写waveframe啦!
30+
wavefile.close() #最后别忘了关掉文件,不然会出错滴
31+
print "Done writing file." #大告成功!

python_baiduyuyin/3.py~

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import wave #需要导入python的wave module,函数用法参http://docs.python.org/library/wave.html
2+
import struct #struct module的用法参见http://docs.python.org/library/struct.html
3+
from math import sin, pi, pow
4+
5+
MAX_AMPLITUDE = 32767 #决定sin wave的音量
6+
SAMPLE_RATE = 44100 #采样频率,由于人听觉在20到20千赫兹,由于Nyquist定律,一般44100(大于20千
7+
的两倍)的频率足够满足人耳,再高就浪费文件空间啦,这也是CD通常的采样频率。
8+
DURATION_SEC = 3 #生成wav的时间为三秒
9+
SAMPLE_LEN = SAMPLE_RATE * DURATION_SEC # 乘一下就是要写多少个SAMPLE啦
10+
filename = '/Users/rongjin/Desktop/output.wav' #起个文件名哈,文件生成后就到这个文件夹找啦
11+
print "Creating sound file:", filename
12+
print "Sample rate:", SAMPLE_RATE
13+
print "Duration (sec):", DURATION_SEC
14+
print "# samples:", SAMPLE_LEN
15+
wavefile = wave.open(filename, 'w') # 'w'写文件,其他还有'r','rb','wb'啥的,详情google之
16+
wavefile.setparams((2, 2, SAMPLE_RATE, 0, 'NONE', 'not compressed')) # 设置下wave file的头文件
17+
samples = [] #建一个tuple用来放好几个channel的
18+
for i in range(SAMPLE_LEN):
19+
t = float(i) / SAMPLE_RATE # t表示当下滴时间
20+
sample = MAX_AMPLITUDE * sin(t * 256 * 2 * pi) # 就根据sin wave的方程得到当下的amplitude啦,
21+
这里生成频率为256的音高哟,可以随便改。
22+
#print i, t, sample # show some generated values. comment out for speed.
23+
packed_sample = struct.pack('h', sample) # 转换成16进制的string
24+
samples.append(packed_sample) # append到samples,作为channel 1
25+
samples.append(packed_sample) # append一个一样的作为channel 2,要是append另一个频率的
26+
packed_sample_2,就可以有和声效果啦
27+
sample_str = ''.join(samples) # 把samples里所有的值都convert到一个string上
28+
wavefile.writeframes(sample_str) # 终于要写waveframe啦!
29+
wavefile.close() #最后别忘了关掉文件,不然会出错滴
30+
print "Done writing file." #大告成功!

python_baiduyuyin/4.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
from sys import byteorder
2+
from array import array
3+
from struct import pack
4+
5+
import pyaudio
6+
import wave
7+
8+
THRESHOLD = 500
9+
CHUNK_SIZE = 1024
10+
FORMAT = pyaudio.paInt16
11+
RATE = 44100
12+
13+
def is_si<a href="https://www.baidu.com/s?wd=len&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1Y3m1bvnhPBuWuhmhN-nHTd0AP8IA3qPjfsn1bkrjKxmLKz0ZNzUjdCIZwsrBtEXh9GuA7EQhF9pywdQhPEUiqkIyN1IA-EUBtkPWbsnHcznjn4nW6Lnj0krjc3" target="_blank" class="baidu-highlight">len</a>t(snd_data):
14+
"Returns 'True' if below the 'si<a href="https://www.baidu.com/s?wd=len&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1Y3m1bvnhPBuWuhmhN-nHTd0AP8IA3qPjfsn1bkrjKxmLKz0ZNzUjdCIZwsrBtEXh9GuA7EQhF9pywdQhPEUiqkIyN1IA-EUBtkPWbsnHcznjn4nW6Lnj0krjc3" target="_blank" class="baidu-highlight">len</a>t' threshold"
15+
return <a href="https://www.baidu.com/s?wd=max&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1Y3m1bvnhPBuWuhmhN-nHTd0AP8IA3qPjfsn1bkrjKxmLKz0ZNzUjdCIZwsrBtEXh9GuA7EQhF9pywdQhPEUiqkIyN1IA-EUBtkPWbsnHcznjn4nW6Lnj0krjc3" target="_blank" class="baidu-highlight">max</a>(snd_data) < THRESHOLD
16+
17+
def normalize(snd_data):
18+
"Average the volume out"
19+
<a href="https://www.baidu.com/s?wd=MAX&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1Y3m1bvnhPBuWuhmhN-nHTd0AP8IA3qPjfsn1bkrjKxmLKz0ZNzUjdCIZwsrBtEXh9GuA7EQhF9pywdQhPEUiqkIyN1IA-EUBtkPWbsnHcznjn4nW6Lnj0krjc3" target="_blank" class="baidu-highlight">MAX</a>IMUM = 16384
20+
times = float(<a href="https://www.baidu.com/s?wd=MAX&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1Y3m1bvnhPBuWuhmhN-nHTd0AP8IA3qPjfsn1bkrjKxmLKz0ZNzUjdCIZwsrBtEXh9GuA7EQhF9pywdQhPEUiqkIyN1IA-EUBtkPWbsnHcznjn4nW6Lnj0krjc3" target="_blank" class="baidu-highlight">MAX</a>IMUM)/max(<a href="https://www.baidu.com/s?wd=abs&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1Y3m1bvnhPBuWuhmhN-nHTd0AP8IA3qPjfsn1bkrjKxmLKz0ZNzUjdCIZwsrBtEXh9GuA7EQhF9pywdQhPEUiqkIyN1IA-EUBtkPWbsnHcznjn4nW6Lnj0krjc3" target="_blank" class="baidu-highlight">abs</a>(i) for i in snd_data)
21+
22+
r = array('h')
23+
for i in snd_data:
24+
r.app<a href="https://www.baidu.com/s?wd=end&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1Y3m1bvnhPBuWuhmhN-nHTd0AP8IA3qPjfsn1bkrjKxmLKz0ZNzUjdCIZwsrBtEXh9GuA7EQhF9pywdQhPEUiqkIyN1IA-EUBtkPWbsnHcznjn4nW6Lnj0krjc3" target="_blank" class="baidu-highlight">end</a>(int(i*times))
25+
return r
26+
27+
def trim(snd_data):
28+
"Trim the blank spots at the start and <a href="https://www.baidu.com/s?wd=end&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1Y3m1bvnhPBuWuhmhN-nHTd0AP8IA3qPjfsn1bkrjKxmLKz0ZNzUjdCIZwsrBtEXh9GuA7EQhF9pywdQhPEUiqkIyN1IA-EUBtkPWbsnHcznjn4nW6Lnj0krjc3" target="_blank" class="baidu-highlight">end</a>"
29+
def _trim(snd_data):
30+
snd_started = False
31+
r = array('h')
32+
33+
for i in snd_data:
34+
if not snd_started and <a href="https://www.baidu.com/s?wd=abs&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1Y3m1bvnhPBuWuhmhN-nHTd0AP8IA3qPjfsn1bkrjKxmLKz0ZNzUjdCIZwsrBtEXh9GuA7EQhF9pywdQhPEUiqkIyN1IA-EUBtkPWbsnHcznjn4nW6Lnj0krjc3" target="_blank" class="baidu-highlight">abs</a>(i)>THRESHOLD:
35+
snd_started = True
36+
r.app<a href="https://www.baidu.com/s?wd=end&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1Y3m1bvnhPBuWuhmhN-nHTd0AP8IA3qPjfsn1bkrjKxmLKz0ZNzUjdCIZwsrBtEXh9GuA7EQhF9pywdQhPEUiqkIyN1IA-EUBtkPWbsnHcznjn4nW6Lnj0krjc3" target="_blank" class="baidu-highlight">end</a>(i)
37+
38+
elif snd_started:
39+
r.append(i)
40+
return r
41+
42+
# Trim to the left
43+
snd_data = _trim(snd_data)
44+
45+
# Trim to the right
46+
snd_data.reverse()
47+
snd_data = _trim(snd_data)
48+
snd_data.reverse()
49+
return snd_data
50+
51+
def add_si<a href="https://www.baidu.com/s?wd=len&tn=44039180_cpr&fenlei=mv6quAkxTZn0IZRqIHckPjm4nH00T1Y3m1bvnhPBuWuhmhN-nHTd0AP8IA3qPjfsn1bkrjKxmLKz0ZNzUjdCIZwsrBtEXh9GuA7EQhF9pywdQhPEUiqkIyN1IA-EUBtkPWbsnHcznjn4nW6Lnj0krjc3" target="_blank" class="baidu-highlight">len</a>ce(snd_data, seconds):
52+
"Add silence to the start and end of 'snd_data' of length 'seconds' (float)"
53+
r = array('h', [0 for i in xrange(int(seconds*RATE))])
54+
r.extend(snd_data)
55+
r.extend([0 for i in xrange(int(seconds*RATE))])
56+
return r
57+
58+
def record():
59+
"""
60+
Record a word or words from the microphone and
61+
return the data as an array of signed shorts.
62+
63+
Normalizes the audio, trims silence from the
64+
start and end, and pads with 0.5 seconds of
65+
blank sound to make sure VLC et al can play
66+
it without getting chopped off.
67+
"""
68+
p = pyaudio.PyAudio()
69+
stream = p.open(format=FORMAT, channels=1, rate=RATE,
70+
input=True, output=True,
71+
frames_per_buffer=CHUNK_SIZE)
72+
73+
num_silent = 0
74+
snd_started = False
75+
76+
r = array('h')
77+
78+
while 1:
79+
# little endian, signed short
80+
snd_data = array('h', stream.read(CHUNK_SIZE))
81+
if byteorder == 'big':
82+
snd_data.byteswap()
83+
r.extend(snd_data)
84+
85+
silent = is_silent(snd_data)
86+
87+
if silent and snd_started:
88+
num_silent += 1
89+
elif not silent and not snd_started:
90+
snd_started = True
91+
92+
if snd_started and num_silent > 30:
93+
break
94+
95+
sample_width = p.get_sample_size(FORMAT)
96+
stream.stop_stream()
97+
stream.close()
98+
p.terminate()
99+
100+
r = normalize(r)
101+
r = trim(r)
102+
r = add_silence(r, 0.5)
103+
return sample_width, r
104+
105+
def record_to_file(path):
106+
"Records from the microphone and outputs the resulting data to 'path'"
107+
sample_width, data = record()
108+
data = pack('<' + ('h'*len(data)), *data)
109+
110+
wf = wave.open(path, 'wb')
111+
wf.setnchannels(1)
112+
wf.setsampwidth(sample_width)
113+
wf.setframerate(RATE)
114+
wf.writeframes(data)
115+
wf.close()
116+
117+
if __name__ == '__main__':
118+
print("please speak a word into the microphone")
119+
record_to_file('demo.wav')
120+
print("done - result written to demo.wav")

python_baiduyuyin/4.py~

Whitespace-only changes.

0 commit comments

Comments
 (0)