开发者

Python 识别录音并转为文字的实现

开发者 https://www.devze.com 2022-12-12 12:12 出处:网络 作者: leader_ww
目录程式功能: 用 UI 界面,点击界面上的“开始识别”来录音(调用百度云语音接口),并自动将结果显示在界面的文本框中
目录

    程式功能: 用 UI 界面,点击界面上的“开始识别”来录音(调用百度云语音接口),并自动将结果显示在界面的文本框中

    Time: 2022/03/06

    Author: Xiaohong

    功能:python 更改目录下 目录及文件的 顺序命名

    项目的文件结构方式:

    1. PyQt5 UI 文件:  My_Audio_Record_cloud.ui

    2. PyQt5 UI 文件转换生成的 PY 文件:  My_Audio_Record_cloud_Ui.py

    3. PyQt5 UI 文件对应的 Class 文件:  My_Audio_Record_cloud_class.py

    4. 通用的消息显示 文件(在My_Audio_Record_cloud_class.py 中被调用):  FangMessage.py

     本例为实验室产品,不具备直接使用,支持的语音录入长度也较短

    主程序界面如下:

    Python 识别录音并转为文字的实现

    主程序 My_Audio_Record_cloud_class.py:

    # -*- coding: utf-8 -*-
    '''
    程式功能: 用 UI 界面,点击界面上的“开始识别”来录音,并自动将结果显示在界面的文本框中
    Time: 2022/03/06
    Author: Xiaohong
    '''
    import wave  # pip3 install  wave
    import My_Audio_Record_cloud_Ui as my_audio_record_cloud
    from pyaudio import PyAudio, paInt16  # 直接用pip安装的pyaudio不支持3.7
     
    # 若安装失败的话,下载对应的whl 文件  https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyaudio
     
    from PyQt5 import QtGui, QtCore, QtWidgets
     
    from PyQt5.QtWidgets import (
        QApplication,
        QMainWindow,
        QDialog,
        QSplashScreen,
        QToolButton,
        QToolTip,
        QWidget,
        QMessageBox,
        QAction,
        QFileDialog,
    )
     
    # from PyQt5.QtWidgets import (
    #     QApplication,
    #     QWidget,
    # )
     
    import sys, os, json, pycurl, urllib
    import urllib.request
    from FangMessage import FangMessage
     
     
    class Audio_record_cloud_class(QMainWindow, my_audio_record_cloud.Ui_MainWindow):
        def __init__(self, parent=None):
            super().__init__()
            self.child = my_audio_record_cloud.Ui_MainWindow()
            self.child.setupUi(self)
            self.file_name = ""
            self.child.pushButton.clicked.connect(self.my_start)
            # self.child.pb_play.cli编程客栈cked.connect(self.play_audio)
            # 录音文件参数
            self.framerate = 8000
            self.NUM_SAMPLES = 2000
            self.channels = 1
            self.sampwidth = 2
            # 录音时长参数
            self.TIME = 5
            # 播放文件参数
            self.chunk = 1024
     
        # 设置默认的录音文件名
        # 当前目录+test+当前的时间ID+'.wav'
        def init_file_name(self):
            file_path = os.getcwd()
            file_name = 'test' + self.get_timeseq() + '.wav'
            file_wav = os.path.join(file_path, file_name)
            self.file_name = file_wav
            # self.child.lineEdit.setText(self.file_name)
            # print(file_wav)
            return file_wav
     
        # 获取当前的时间ID
        def get_timeseq(self):
            import time
     
            now = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
            return now
     
        # 开始录音
        def Start_record(self):
            self.init_file_name()
            pa = PyAudio()
            stream = pa.open(
                format=paInt16,
                channels=1,
                rate=self.framerate,
                input=True,
                frames_per_buffer=self.NUM_SAMPLES,
            )
            my_buf = []
            count = 0
            while count <= self.TIME * 4:
                string_audio_data = stream.read(self.NUM_SAMPLES)
                my_buf.append(string_audio_data)
                count += 1
                print("..")
     
            # print('begin:')
            # print(my_buf)
            self.save_wave_file(self.file_name, my_buf)
            stream.close()
            FangMessage1 = FangMessage()
            FangMessage1.runY('完成', '已完成录音', 'OK')
     
        # 保存声音文件
        def save_wave_file(self, filename, data):
            wf = wave.open(filename, 'wb')
            wf.setnchannels(self.channels)
            wf.setsampwidth(self.sampwidth)
            wf.setframerate(self.framerate)
            for i in data:
                wf.writeframes(i)
            wf.close()
     
        # 获取 百度返回结果,并 Print
        def dump_res(self, buf):
            print(buf)
            my_temp = json.loads(buf)
            my_list = my_temp['result']
            self.child.textBrowser.setText(my_list[0])
            print(my_list[0])
     
        # 访问 百度云语音 网站,根据自己申请的应用Key 获取本次访问的 Token
        def get_token(self):
            apiKey = "XXXXXXXXXXXXXXXXXXXXXXX"
            secretKey = "YYYYYYYYYYYYYYYYYYYYYYYYY"
     
            auth_url = (
                "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client编程客栈_id="
                + apiKey
                + "&client_secret="
                + secretKey
            )
     
            # print(auth_url)
     
            res = urllib.request.urlopen(auth_url)
            json_data = res.read()
            # print(json_data)
            # print('.....')
            # print(json.loads(json_data))
            return json.loads(json_data)['access_token']
     
        # 访问 百度云语音 网站,根据 Token,上传 wav 文件
        def use_cloud(self, token):
            fp = wave.open(self.file_name, 'rb')
            nf = fp.getnframes()
            print('sampwidth:', fp.getsampwidth())
            print('framerate:', fp.getframerate())
            print('channels:', fp.getnchannels())
            f_len = nf * 2
            audio_data = fp.readframes(nf)
     
            cuid = "4d36e972-e325-11ce-bfc1-08002be10318"
            print('token:')
            print(token)
            srv_url = (
                'http://vop.baidu.com/server_api' + '?cuid=' + cuid + '&token=' + token
            )
            http_header = ['Content-Type:audio/pcm;rate=8000', 'Content-Length:%d' % f_len]
            c = pycurl.Curl()
            c.setopt(pycurl.URL, str(srv_url))
            c.setopt(c.HTTPHEADER, http_header)
            c.setopt(c.POST, 1)
            c.setopt(c.CONNECTTIMEOUT, 80)
            c.setopt(c.TIMEOUT, 80)
            c.setopt(c.WRITEFUNCTION, self.dump_res)
            c.setopt(c.POSTFIELDS, audio_data)
            c.setopt(c.POSTFIELDSIZE, f_len)
            c.perform()
     
        def my_start(self):
            print('OK')
            self.Start_record()
            self.use_cloud(self.get_token())
     
     
    if __name__ == "__main__":
        app = QApplication(sys.argv)
        myWin = Audio_record_cloud_class()
        myWin.show()
        sys.exit(app.exec_())

    Ui 转化py文件如下:My_Audio_Record_cloud_Ui.py

    # -*- coding: utf-8 -*-
     
    # Form implementation generated from reading ui file 'd:\vscode_2020\My_Audio\My_Audio\My_Audio_Record_cloud.ui'
    #
    # Created by: PyQt5 UI code generator 5.15.0
    #
    # WARNING: Any manual changes mad编程客栈e to this file will be lost when pyuic5 is
    # run again.  Do not edit this file unless you know what you are doing.
     
     
    from PyQt5 import QtCore, QtGui, QtWidgets
     
     
    class Ui_MainWindow(object):
        def setupUi(self, MainWindow):
            MainWindow.setObjectName("MainWindow")
            MainWindow.resize(558, 525)
            self.centralwidget = QtWidgets.QWidget(MainWindow)
            self.centralwidget.setObjectName("centralwidget")
            self.textBrowser = QtWidgets.QTextBrowser(self.centralwidget)
            self.textBrowser.setGeometry(QtCore.QRect(30, 50, 501, 351))
            self.textBrowser.setObjectName("textBrowser")
            self.pushButton = QtWidgets.QPushButton(self.centralwidget)
            self.pushButton.setGeometry(QtCore.QRect(40, 420, 75, 23))
            self.pushButton.setObjectName("pushButton")
            self.label = QtWidgets.QLabel(self.centralwidget)
            self.label.setGeometry(QtCore.QRect(40, 460, 491, 16))
            self.label.setObjectName("label")
            self.label_2 = QtWidgets.QLabel(self.centralwidget)
            self.label_2.setGeometrhttp://www.cppcns.comy(QtCore.QRect(30, 30, 161, 16))
            self.label_2.setObjectName("label_2")
            self.label_3 = QtWidgets.QLabel(self.centralwidget)
            self.label_3.setGeometry(QtCore.QRect(180, 10, 111, 31))
            font = QtGui.QFont()
            font.setFamily("Agency FB")
            font.setPointSize(18)
            font.setBold(True)
            font.setWeight(75)
            self.label_3.setFont(font)
            self.label_3.setObjectName("label_3")
            self.label_4 = QtWidgets.QLabel(self.centralwidget)
            self.label_4.setGeometry(QtCore.QRect(480, 20, 54, 12))
            self.label_4.setObjectName("label_4")
            self.pushButton_2 = QtWidgets.QPushButton(self.centralwidget)
            self.pushButton_2.setGeometry(QtCore.QRect(450, 420, 75, 23))
            self.pushButton_2.setObjectName("pushButton_2")
            MainWindow.setCentralWidget(self.centralwidget)
            self.menubar = QtWidgets.QMenuBar(MainWindow)
            self.menubar.setGeometry(QtCore.QRect(0, 0, 558, 23))
            self.menubar.setObjectName("menubar")
            MainWindow.setMenuBar(self.menubar)
            self.statusbar = QtWidgets.QStatusBar(MainWindow)
            self.stahttp://www.cppcns.comtusbar.setObjectName("statusbar")
            MainWindow.setStatusBar(self.statusbar)
     
            self.retranslateUi(MainWindow)
            self.pushButton_2.clicked.connect(MainWindow.close)
            QtCore.QMetaObject.connectSlotsByName(MainWindow)
     
        def retranslateUi(self, MainWindow):
            _translate = QtCore.QCoreApplication.translate
            MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
            self.pushButton.setText(_translate("MainWindow", "开始识别"))
            self.label.setText(_translate("MainWindow", "说明:点击“开始识别”按钮来录音,并通过百度语音的功能,自动将结果显示在文本框中"))
            self.label_2.setText(_translate("MainWindow", "语音识别的结果:"))
            self.label_3.setText(_translate("MainWindow", "语音识别"))
            self.label_4.setText(_translate("MainWindow", "v20220306"))
            self.pushButton_2.setText(_translate("MainWindow", "结束"))

    到此这篇关于Python 识别录音并转为文字的实现的文章就介绍到这了,更多相关Python 识别录音转为文字内容请搜索我们以前的文章或继续浏览下面的相关文章希望大家以后多多支持我们!

    0

    精彩评论

    暂无评论...
    验证码 换一张
    取 消

    关注公众号