@@ -2936,7 +2936,7 @@ struct whisper_full_params whisper_full_default_params(enum whisper_sampling_str
29362936 /* .language =*/ " en" ,
29372937
29382938 /* .suppress_blank =*/ true ,
2939- /* .suppress_non_speech_tokens =*/ true ,
2939+ /* .suppress_non_speech_tokens =*/ false ,
29402940
29412941 /* .temperature =*/ 0 .0f ,
29422942 /* .max_initial_ts =*/ 1 .0f ,
@@ -3078,8 +3078,7 @@ static int whisper_wrap_segment(struct whisper_context & ctx, int max_len, bool
30783078 return res;
30793079}
30803080
3081- static const std::vector<std::string> non_speech_tokens
3082- {
3081+ static const std::vector<std::string> non_speech_tokens = {
30833082 " \" " , " #" , " (" , " )" , " *" , " +" , " /" , " :" , " ;" , " <" , " =" , " >" , " @" , " [" , " \\ " , " ]" , " ^" ,
30843083 " _" , " `" , " {" , " |" , " }" , " ~" , " 「" , " 」" , " 『" , " 』" , " <<" , " >>" , " <<<" , " >>>" , " --" ,
30853084 " ---" , " -(" , " -[" , " ('" , " (\" " , " ((" , " ))" , " (((" , " )))" , " [[" , " ]]" , " {{" , " }}" , " ♪♪" ,
@@ -3149,26 +3148,21 @@ static void whisper_process_logits(
31493148
31503149 // suppress non-speech tokens
31513150 // ref: https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/tokenizer.py#L224-L253
3152- if (params.suppress_non_speech_tokens )
3153- {
3154- for (const std::string &token : non_speech_tokens)
3155- {
3156- std::string suppress_tokens[] = {token, " " + token};
3157- for (const std::string &suppress_token : suppress_tokens)
3158- {
3159- if (vocab.token_to_id .find (suppress_token) != vocab.token_to_id .end ())
3160- {
3151+ if (params.suppress_non_speech_tokens ) {
3152+ for (const std::string & token : non_speech_tokens) {
3153+ const std::string suppress_tokens[] = {token, " " + token};
3154+ for (const std::string & suppress_token : suppress_tokens) {
3155+ if (vocab.token_to_id .find (suppress_token) != vocab.token_to_id .end ()) {
31613156 logits[vocab.token_to_id .at (suppress_token)] = -INFINITY;
31623157 }
31633158 }
31643159 }
3160+
31653161 // allow hyphens "-" and single quotes "'" between words, but not at the beginning of a word
3166- if (vocab.token_to_id .find (" -" ) != vocab.token_to_id .end ())
3167- {
3162+ if (vocab.token_to_id .find (" -" ) != vocab.token_to_id .end ()) {
31683163 logits[vocab.token_to_id .at (" -" )] = -INFINITY;
31693164 }
3170- if (vocab.token_to_id .find (" '" ) != vocab.token_to_id .end ())
3171- {
3165+ if (vocab.token_to_id .find (" '" ) != vocab.token_to_id .end ()) {
31723166 logits[vocab.token_to_id .at (" '" )] = -INFINITY;
31733167 }
31743168 }
0 commit comments