python调用百度语音识别实现大音频文件语音识别功能 - 行业资讯

本文为大家分享了python实现大音频文件语音识别功能的具体代码,供大家参考,具体内容如下

实现思路:先用ffmpeg将其他非wav格式的音频转换为wav格式,并转换音频的声道(百度支持声道为1),采样率(值为8000),格式转换完成后,再用ffmpeg将音频切成百度。
　　

支持的时长(30秒和60秒2种,本程序用的是30秒)。
　　

　　　　　　utf - 8编码: 　　进口json 　　导入的时间　　进口base64 　　从公司进口rtysdb 　　进口urllib2 　　进口的要求　　进口操作系统　　进口uuid 　　从公司进口db_config 　　　　　　类BaiduRest: 　　def __init__(自我、cu_id api_key api_secert): 　　自我。token_url=" https://openapi.baidu.com/oauth/2.0/token& # 63; grant_type=client_credentials& client_id=% s& client_secret=% s” 　　自我。getvoice_url=" http://tsn.baidu.com/text2audio& # 63;特克斯=% s&局域网=zh& cuid=% s& ctp=1,托托=% s” 　　自我。upvoice_url=' http://vop.baidu.com/server_api ' 　　　　自我。cu_id=cu_id 　　自我。get_token (api_key api_secert) 　　返回　　　　def get_token(自我,api_key api_secert): 　　token_url=自我。token_url % (api_key api_secert) 　　r_str=urllib2.urlopen (token_url) .read () 　　token_data=https://www.yisu.com/zixun/json.loads (r_str) 　　自我。token_str=token_data (“access_token”) 　　还真　　　　#语音合成　　def text2audio(自我、文本文件名): 　　get_url=自我。getvoice_url % (urllib2.quote(文本),自我。cu_id self.token_str) 　　voice_data=https://www.yisu.com/zixun/urllib2.urlopen (get_url) .read () 　　voice_fp=open (filename, wb +) 　　voice_fp.write (voice_data) 　　voice_fp.close () 　　还真　　　　# #语音识别　　def audio2text(自我,文件名): 　　data=https://www.yisu.com/zixun/{} 　　数据(“格式”)=' wav ' 　　数据(“率”)=8000 　　数据(“通道”)=1 　　[' cuid ']=self.cu_id数据　　数据(“令牌”)=self.token_str 　　　　wav_fp=open (filename, rb) 　　voice_data=https://www.yisu.com/zixun/wav_fp.read () 　　数据[‘兰’]=len (voice_data) 　　#数据(“演讲”)=base64.b64encode (voice_data) .decode (“utf - 8”) 　　数据(“演讲”)=base64.b64encode (voice_data)。替换(' \ n ',”) 　　# post_data=https://www.yisu.com/zixun/json.dumps(数据) 　　结果=requests.post(自我。upvoice_url json=数据,标题={“内容类型”:“application/json”}) 　　data_result=result.json () 　　如果(data_result err_msg”==俺晒Α！?: 　　返回data_result['结果'][0] 　　其他: 　　返回假　　　　　　　　def test_voice (voice_file): 　　api_key=" vossGHIgEETS6IMRxBDeahv8 " 　　api_secert=" 3 c1fe6a6312f41fa21fa2c394dad5510” 　　bdr=BaiduRest (“0-57-7B-9F-1F-A1 api_key api_secert) 　　　　#生成　　#开始=time.time () 　　# bdr。text2audio(“你好啊”,“out.wav”) 　　#使用=time.time()——开始　　#打印使用　　　　#识别　　#开始=time.time () 　　结果=bdr.audio2text (voice_file) 　　#结果=bdr.audio2text (“weather.pcm”) 　　#使用=time.time()——开始　　返回结果　　　　def get_master_audio (check_status=癱ut_status”): 　　如果check_status==癱ut_status”: 　　sql="选择id、url、time_long专家从ocenter_recognition状态=0” 　　elif check_status==癴inished_status”: 　　sql="选择id、url、time_long专家从ocenter_recognition finished_status=0 " 　　其他: 　　返回假　　data=https://www.yisu.com/zixun/rtysdb.select_data (sql,“更多”) 　　如果数据: 　　返回数据　　其他: 　　返回假　　　　　　def go_recognize (master_id): 　　section_path=db_config.SYS_PATH 　　sql="选择id、清除、url、状态从ocenter_section掉=% d和状态=0订单id asc限制10“% (master_id) 　　#打印sql 　　记录=rtysdb.select_data (sql,“更多”) 　　#打印记录　　如果没有记录: 　　返回假　　矩形的记录: 　　#打印section_path +‘/? rec [1] 　　voice_file=section_path +‘/? rec [2] 　　如果不是os.path.exists (voice_file): 　　继续　　结果=test_voice (voice_file) 　　打印结果　　退出(0) 　　如果结果: 　　# rtysdb.update_by_pk (ocenter_section, rec[0],{“内容”,因此,“状态”:1}) 　　sql="更新ocenter_section设置内容=? s”,地位=% d id=% d”%(因此,rec[0]) #打印sql 　　rtysdb.do_exec_sql (sql) 　　parent_content=rtysdb。select_data(“选择id,内容从ocenter_recognition id=% d % (rec [1])) 　　#打印parent_content 　　如果parent_content: 　　new_content=parent_content[1] +结果　　update_content_sql="更新ocenter_recognition设置内容=' % s ' id=% d”% (new_content rec [1]) 　　rtysdb.do_exec_sql (update_content_sql) 　　其他: 　　rtysdb。do_exec_sql(“更新ocenter_section设置状态=' % d ' id=% d %(因此,rec [0])) 　　time . sleep (5) 　　其他: 　　rtysdb。do_exec_sql(“更新ocenter_recognition finished_status=1, id=% d % (master_id)) 　　#对百度语音识别不了的音频文件进行转换　　def ffmpeg_convert (): 　　section_path=db_config.SYS_PATH 　　#打印section_path 　　used_audio=get_master_audio (“cut_status”) 　　#打印used_audio 　　如果used_audio: 　　在used_audio音频: 　　audio_path=section_path +‘/?音频[1] 　　new_audio=uuid.uuid1 () 　　command_line=" ffmpeg - " + audio_path +“ar 8000 - ac 1 - f wav”+ section_path +“/上传/转换/convert_”+ str (new_audio) +“wav”; 　　#打印command_line 　　os.popen (command_line) 　　如果os.path。存在(section_path +“/上传/转换/convert_”+ str (new_audio) +“wav”): 　　convert_name="上传/转换/convert_”+ str (new_audio) +“wav” 　　ffmpeg_cut (convert_name音频[3],音频[0]) 　　sql="更新ocenter_recognition设置状态=1,convert_name=' % s ' id=% d % (convert_name、音频[0]) 　　rtysdb.do_exec_sql (sql) 　　#将大音频文件切成碎片　　def ffmpeg_cut (convert_name原浆master_id): 　　section_path=db_config.SYS_PATH 　　如果sharps> 0: 　　因为我在范围(0,专家): 　　timeArray=time.localtime(我* 30) 　　h=imeArray strftime (“% H”) 　　h=int (h) - 8所示　　h=" 0 " + str (h) 　　女士=time.strftime (" % M: % S”, timeArray) 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null