麦克风获取
stream = audio.open(format=FORMAT,channels=CHANNELS,rate=RATE,input=True,frames_per_buffer=CHUNK)
设置参数
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
读取数据块
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):data = stream.read(CHUNK)frames.append(data)
阿里语音识别模型加载
paraformer_path = "./iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn"
fsmn_path = "./iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
ct_punc_path = "./iic/punc_ct-transformer_cn-en-common-vocab471067-large"
cam_path = "./iic/speech_campplus_sv_zh-cn_16k-common"model = AutoModel(model=paraformer_path, model_revision="v2.0.4",vad_model=fsmn_path, vad_model_revision="v2.0.4",punc_model=ct_punc_path, punc_model_revision="v2.0.4",spk_model=cam_path, spk_model_revision="v2.0.2",device="cpu")
阿里语音识别
res = model.generate(input=WAVE_OUTPUT_FILENAME,batch_size_s=16000,hotword='魔搭')
整体代码
import pyaudio
import wave
import threading
import keyboard
from funasr import AutoModel
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
WAVE_OUTPUT_FILENAME = "./wav_data/output.wav"
audio = pyaudio.PyAudio()frames = []
stream = None
recording = Falseparaformer_path = "./iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn"
fsmn_path = "./iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
ct_punc_path = "./iic/punc_ct-transformer_cn-en-common-vocab471067-large"
cam_path = "./iic/speech_campplus_sv_zh-cn_16k-common"model = AutoModel(model=paraformer_path, model_revision="v2.0.4",vad_model=fsmn_path, vad_model_revision="v2.0.4",punc_model=ct_punc_path, punc_model_revision="v2.0.4",spk_model=cam_path, spk_model_revision="v2.0.2",device="cpu")print("加载模型完成!!!")def start_recording():"""开始录音"""global stream, recordingif not recording:print("开始录音...")recording = Truestream = audio.open(format=FORMAT,channels=CHANNELS,rate=RATE,input=True,frames_per_buffer=CHUNK)record_thread = threading.Thread(target=record_audio)record_thread.start()def stop_recording():"""停止录音并进行识别"""global stream, recordingif recording:print("录音结束.")recording = Falsestream.stop_stream()stream.close()save_audio()audio.terminate()print("开始识别!!!")res = model.generate(input=WAVE_OUTPUT_FILENAME,batch_size_s=16000,hotword='魔搭')print("识别结束!!!")print("识别结果:", res)def record_audio():"""录音功能实现"""while recording:data = stream.read(CHUNK)frames.append(data)def save_audio():"""保存录音文件"""wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')wf.setnchannels(CHANNELS)wf.setsampwidth(audio.get_sample_size(FORMAT))wf.setframerate(RATE)wf.writeframes(b''.join(frames))wf.close()
keyboard.add_hotkey('ctrl', start_recording)
keyboard.add_hotkey('alt', stop_recording)print("按 Ctrl 开始录音,按 Alt 结束录音")
keyboard.wait()