220220< span class ="kn "> import</ span > < span class ="nn "> logging</ span >
221221< span class ="kn "> from</ span > < span class ="nn "> queue</ span > < span class ="kn "> import</ span > < span class ="n "> Queue</ span >
222222< span class ="kn "> from</ span > < span class ="nn "> threading</ span > < span class ="kn "> import</ span > < span class ="n "> Thread</ span >
223- < span class ="kn "> from</ span > < span class ="nn "> typing</ span > < span class ="kn "> import</ span > < span class ="n "> Any</ span >
223+ < span class ="kn "> from</ span > < span class ="nn "> typing</ span > < span class ="kn "> import</ span > < span class ="n "> Any</ span > < span class =" p " > , </ span > < span class =" n " > Union </ span >
224224
225225< span class ="kn "> import</ span > < span class ="nn "> numpy</ span > < span class ="k "> as</ span > < span class ="nn "> np</ span > < span class ="c1 "> # type: ignore</ span >
226226< span class ="kn "> from</ span > < span class ="nn "> google.cloud</ span > < span class ="kn "> import</ span > < span class ="n "> speech</ span > < span class ="c1 "> # type: ignore</ span >
@@ -236,33 +236,49 @@ <h1>
236236 < span class ="sd "> """Transforms speech into text using Google's ASR.</ span >
237237
238238< span class ="sd "> Args:</ span >
239- < span class ="sd "> cred_path (str): path to google credentials</ span >
240- < span class ="sd "> language (str): language of input audio</ span >
239+ < span class ="sd "> language (str): The language of given audio as a</ span >
240+ < span class ="sd "> [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)</ span >
241+ < span class ="sd "> language tag. Example: "en-US"</ span >
242+ < span class ="sd "> credentials (Union[None, str, dict]): Dictionary of Google API credentials</ span >
243+ < span class ="sd "> or path to credentials. if set to None</ span >
244+ < span class ="sd "> credentials will be pulled from the</ span >
245+ < span class ="sd "> environment variable:</ span >
246+ < span class ="sd "> GOOGLE_APPLICATION_CREDENTIALS</ span >
241247< span class ="sd "> sample_rate (int): sample rate of the input audio (Hz)</ span >
242248< span class ="sd "> **kwargs (optional): additional keyword arguments</ span >
243249< span class ="sd "> """</ span >
244250
245251 < span class ="k "> def</ span > < span class ="fm "> __init__</ span > < span class ="p "> (</ span >
246252 < span class ="bp "> self</ span > < span class ="p "> ,</ span >
247- < span class ="n "> cred_path </ span > < span class ="p "> ,</ span >
248- < span class ="n "> language </ span > < span class ="o "> =</ span > < span class ="s2 " > "en-US" </ span > < span class ="p "> ,</ span >
249- < span class ="n "> sample_rate</ span > < span class ="o "> =</ span > < span class ="mi "> 16000</ span > < span class ="p "> ,</ span >
253+ < span class ="n "> language </ span > < span class =" p " > : </ span > < span class =" nb " > str </ span > < span class ="p "> ,</ span >
254+ < span class ="n "> credentials </ span > < span class ="p " > : </ span > < span class =" n " > Union </ span > < span class =" p " > [ </ span > < span class =" kc " > None </ span > < span class =" p " > , </ span > < span class =" nb " > str </ span > < span class =" p " > , </ span > < span class =" nb " > dict </ span > < span class =" p " > ] </ span > < span class =" o "> =</ span > < span class ="kc " > None </ span > < span class ="p "> ,</ span >
255+ < span class ="n "> sample_rate</ span > < span class ="p " > : </ span > < span class =" nb " > int </ span > < span class =" o "> =</ span > < span class ="mi "> 16000</ span > < span class ="p "> ,</ span >
250256 < span class ="o "> **</ span > < span class ="n "> kwargs</ span > < span class ="p "> ,</ span >
251257 < span class ="p "> )</ span > < span class ="o "> -></ span > < span class ="kc "> None</ span > < span class ="p "> :</ span >
252-
253- < span class ="n "> credentials</ span > < span class ="o "> =</ span > < span class ="n "> service_account</ span > < span class ="o "> .</ span > < span class ="n "> Credentials</ span > < span class ="o "> .</ span > < span class ="n "> from_service_account_file</ span > < span class ="p "> (</ span > < span class ="n "> cred_path</ span > < span class ="p "> )</ span >
254- < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _client</ span > < span class ="o "> =</ span > < span class ="n "> speech</ span > < span class ="o "> .</ span > < span class ="n "> SpeechClient</ span > < span class ="p "> (</ span > < span class ="n "> credentials</ span > < span class ="o "> =</ span > < span class ="n "> credentials</ span > < span class ="p "> ,</ span > < span class ="o "> **</ span > < span class ="n "> kwargs</ span > < span class ="p "> )</ span >
258+ < span class ="k "> if</ span > < span class ="n "> credentials</ span > < span class ="p "> :</ span >
259+ < span class ="k "> if</ span > < span class ="nb "> isinstance</ span > < span class ="p "> (</ span > < span class ="n "> credentials</ span > < span class ="p "> ,</ span > < span class ="nb "> str</ span > < span class ="p "> ):</ span >
260+ < span class ="n "> credentials</ span > < span class ="o "> =</ span > < span class ="n "> service_account</ span > < span class ="o "> .</ span > < span class ="n "> Credentials</ span > < span class ="o "> .</ span > < span class ="n "> from_service_account_file</ span > < span class ="p "> (</ span >
261+ < span class ="n "> credentials</ span >
262+ < span class ="p "> )</ span >
263+ < span class ="k "> elif</ span > < span class ="nb "> isinstance</ span > < span class ="p "> (</ span > < span class ="n "> credentials</ span > < span class ="p "> ,</ span > < span class ="nb "> dict</ span > < span class ="p "> ):</ span >
264+ < span class ="n "> credentials</ span > < span class ="o "> =</ span > < span class ="n "> service_account</ span > < span class ="o "> .</ span > < span class ="n "> Credentials</ span > < span class ="o "> .</ span > < span class ="n "> from_service_account_info</ span > < span class ="p "> (</ span >
265+ < span class ="n "> credentials</ span >
266+ < span class ="p "> )</ span >
267+ < span class ="k "> else</ span > < span class ="p "> :</ span >
268+ < span class ="k "> raise</ span > < span class ="ne "> ValueError</ span > < span class ="p "> (</ span >
269+ < span class ="s2 "> "Invalid Credentials: Only dict, str, or None accepted"</ span >
270+ < span class ="p "> )</ span >
271+
272+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _client</ span > < span class ="o "> =</ span > < span class ="n "> speech</ span > < span class ="o "> .</ span > < span class ="n "> SpeechClient</ span > < span class ="p "> (</ span > < span class ="n "> credentials</ span > < span class ="o "> =</ span > < span class ="n "> credentials</ span > < span class ="p "> )</ span >
255273 < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _config</ span > < span class ="o "> =</ span > < span class ="n "> speech</ span > < span class ="o "> .</ span > < span class ="n "> StreamingRecognitionConfig</ span > < span class ="p "> (</ span >
256274 < span class ="n "> config</ span > < span class ="o "> =</ span > < span class ="n "> speech</ span > < span class ="o "> .</ span > < span class ="n "> RecognitionConfig</ span > < span class ="p "> (</ span >
257275 < span class ="n "> encoding</ span > < span class ="o "> =</ span > < span class ="n "> speech</ span > < span class ="o "> .</ span > < span class ="n "> RecognitionConfig</ span > < span class ="o "> .</ span > < span class ="n "> AudioEncoding</ span > < span class ="o "> .</ span > < span class ="n "> LINEAR16</ span > < span class ="p "> ,</ span >
258276 < span class ="n "> sample_rate_hertz</ span > < span class ="o "> =</ span > < span class ="n "> sample_rate</ span > < span class ="p "> ,</ span >
259277 < span class ="n "> language_code</ span > < span class ="o "> =</ span > < span class ="n "> language</ span > < span class ="p "> ,</ span >
260278 < span class ="n "> enable_automatic_punctuation</ span > < span class ="o "> =</ span > < span class ="kc "> True</ span > < span class ="p "> ,</ span >
261279 < span class ="p "> ),</ span >
262- < span class ="n "> interim_results</ span > < span class ="o "> =</ span > < span class ="kc "> False </ span > < span class ="p "> ,</ span >
280+ < span class ="n "> interim_results</ span > < span class ="o "> =</ span > < span class ="kc "> True </ span > < span class ="p "> ,</ span >
263281 < span class ="p "> )</ span >
264- < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _sample_rate</ span > < span class ="o "> =</ span > < span class ="n "> sample_rate</ span >
265- < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _language</ span > < span class ="o "> =</ span > < span class ="n "> language</ span >
266282 < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _queue</ span > < span class ="p "> :</ span > < span class ="n "> Queue</ span > < span class ="o "> =</ span > < span class ="n "> Queue</ span > < span class ="p "> ()</ span >
267283 < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _thread</ span > < span class ="p "> :</ span > < span class ="n "> Any</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span >
268284
273289< span class ="sd "> context (SpeechContext): current state of the speech pipeline</ span >
274290< span class ="sd "> frame (np.ndarray): numpy array of PCM-16 audio.</ span >
275291
276- < span class ="sd "> Returns:</ span >
292+ < span class ="sd "> Returns: None </ span >
277293
278294< span class ="sd "> """</ span >
279295 < span class ="k "> if</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _thread</ span > < span class ="ow "> is</ span > < span class ="kc "> None</ span > < span class ="ow "> and</ span > < span class ="n "> context</ span > < span class ="o "> .</ span > < span class ="n "> is_active</ span > < span class ="p "> :</ span >
@@ -291,17 +307,21 @@ <h1>
291307 < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _thread</ span > < span class ="o "> .</ span > < span class ="n "> start</ span > < span class ="p "> ()</ span >
292308
293309 < span class ="k "> def</ span > < span class ="nf "> _receive</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> ,</ span > < span class ="n "> context</ span > < span class ="p "> ):</ span >
294-
295310 < span class ="k "> for</ span > < span class ="n "> response</ span > < span class ="ow "> in</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _client</ span > < span class ="o "> .</ span > < span class ="n "> streaming_recognize</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _config</ span > < span class ="p "> ,</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _drain</ span > < span class ="p "> ()):</ span >
296311 < span class ="k "> for</ span > < span class ="n "> result</ span > < span class ="ow "> in</ span > < span class ="n "> response</ span > < span class ="o "> .</ span > < span class ="n "> results</ span > < span class ="p "> [:</ span > < span class ="mi "> 1</ span > < span class ="p "> ]:</ span >
297312 < span class ="k "> for</ span > < span class ="n "> alternative</ span > < span class ="ow "> in</ span > < span class ="n "> result</ span > < span class ="o "> .</ span > < span class ="n "> alternatives</ span > < span class ="p "> [:</ span > < span class ="mi "> 1</ span > < span class ="p "> ]:</ span >
298313 < span class ="n "> context</ span > < span class ="o "> .</ span > < span class ="n "> transcript</ span > < span class ="o "> =</ span > < span class ="n "> alternative</ span > < span class ="o "> .</ span > < span class ="n "> transcript</ span >
299314 < span class ="n "> context</ span > < span class ="o "> .</ span > < span class ="n "> confidence</ span > < span class ="o "> =</ span > < span class ="n "> alternative</ span > < span class ="o "> .</ span > < span class ="n "> confidence</ span >
315+ < span class ="k "> if</ span > < span class ="n "> context</ span > < span class ="o "> .</ span > < span class ="n "> transcript</ span > < span class ="p "> :</ span >
316+ < span class ="n "> context</ span > < span class ="o "> .</ span > < span class ="n "> event</ span > < span class ="p "> (</ span > < span class ="s2 "> "partial_recognize"</ span > < span class ="p "> )</ span >
300317
301318 < span class ="k "> if</ span > < span class ="n "> result</ span > < span class ="o "> .</ span > < span class ="n "> is_final</ span > < span class ="p "> :</ span >
302319 < span class ="k "> if</ span > < span class ="n "> context</ span > < span class ="o "> .</ span > < span class ="n "> transcript</ span > < span class ="p "> :</ span >
303320 < span class ="n "> context</ span > < span class ="o "> .</ span > < span class ="n "> event</ span > < span class ="p "> (</ span > < span class ="s2 "> "recognize"</ span > < span class ="p "> )</ span >
304321 < span class ="n "> _LOG</ span > < span class ="o "> .</ span > < span class ="n "> debug</ span > < span class ="p "> (</ span > < span class ="s2 "> "recognize event"</ span > < span class ="p "> )</ span >
322+ < span class ="k "> else</ span > < span class ="p "> :</ span >
323+ < span class ="n "> context</ span > < span class ="o "> .</ span > < span class ="n "> event</ span > < span class ="p "> (</ span > < span class ="s2 "> "timeout"</ span > < span class ="p "> )</ span >
324+ < span class ="n "> _LOG</ span > < span class ="o "> .</ span > < span class ="n "> debug</ span > < span class ="p "> (</ span > < span class ="s2 "> "timeout event"</ span > < span class ="p "> )</ span >
305325
306326 < span class ="k "> def</ span > < span class ="nf "> _drain</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> ):</ span >
307327 < span class ="k "> while</ span > < span class ="n "> data</ span > < span class ="o "> :=</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _queue</ span > < span class ="o "> .</ span > < span class ="n "> get</ span > < span class ="p "> ():</ span >
317337
318338< div class ="viewcode-block " id ="GoogleSpeechRecognizer.reset "> < a class ="viewcode-back " href ="../../../../spokestack.asr.html#spokestack.asr.google.speech_recognizer.GoogleSpeechRecognizer.reset "> [docs]</ a > < span class ="k "> def</ span > < span class ="nf "> reset</ span > < span class ="p "> (</ span > < span class ="bp "> self</ span > < span class ="p "> )</ span > < span class ="o "> -></ span > < span class ="kc "> None</ span > < span class ="p "> :</ span >
319339 < span class ="sd "> """ resets recognizer """</ span >
320- < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _queue</ span > < span class ="o "> .</ span > < span class ="n "> empty</ span > < span class ="p "> ()</ span >
321340 < span class ="k "> if</ span > < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _thread</ span > < span class ="p "> :</ span >
341+ < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _queue</ span > < span class ="o "> .</ span > < span class ="n "> put</ span > < span class ="p "> (</ span > < span class ="kc "> None</ span > < span class ="p "> )</ span >
322342 < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _thread</ span > < span class ="o "> .</ span > < span class ="n "> join</ span > < span class ="p "> ()</ span >
323343 < span class ="bp "> self</ span > < span class ="o "> .</ span > < span class ="n "> _thread</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span > </ div >
324344
0 commit comments