文章目录
- MFCC特征
- 代码分享
MFCC特征
MFCC(Mel-Frequency Cepstral Coefficients)是通过人耳对声音频率的感知方式对音频信号进行处理得到的特征,广泛用于语音识别和音频处理。
代码分享
import os
import librosa
import pywt
import matplotlib.pyplot as plt
import numpy as np
import cv2
from pathlib import Path
from tqdm import tqdm # 需要安装 tqdm 库
from skimage.transform import resize#数据处理 绘制图形
def process_audio_files(main_folder):main_path = Path(main_folder)output_base = main_path / "MFCC"output_base.mkdir(parents=True, exist_ok=True)# 收集所有音频文件(递归遍历所有子目录)all_audio_files = list(main_path.rglob("*.wav"))total_files = len(all_audio_files)if total_files == 0:print("未找到任何音频文件!")return# 显示总进度条with tqdm(total=total_files, desc="处理进度") as pbar:for file_path in all_audio_files:try:# 获取文件的相对路径relative_path = file_path.relative_to(main_path)# 创建对应的输出目录结构output_subfolder = output_base / relative_path.parentoutput_subfolder.mkdir(parents=True, exist_ok=True)# 读取音频文件audio_files, sr = librosa.load(file_path, sr=None)# 处理音频文件,确保每个音频文件的长度都是固定的target_duration = 20 # 每个音频文件被设置为20秒长y = librosa.util.fix_length(audio_files,size=target_duration * sr)# 计算MFCC# mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=24)spectrograms_base = librosa.feature.melspectrogram(y=y,sr=sr, n_mels=24)spectrograms_dB= librosa.power_to_db(spectrograms_base, ref=np.max)spectrograms_dB_base = librosa.feature.mfcc(S=spectrograms_dB, n_mfcc=24)# print(spectrograms_dB_base.shape)librosa.display.specshow(spectrograms_dB_base)plt.axis('off') # Turn off axesplt.xticks([]) # Remove x-axis ticksplt.yticks([]) # Remove y-axis ticks# 保存图像并关闭绘图output_path = output_subfolder / f"{file_path.stem}.png"plt.savefig(output_path, dpi=100, pad_inches=0, bbox_inches='tight')plt.close()pbar.update(1) # 更新进度条# print(f"已处理: {file_path} → {output_path}")except Exception as e:pbar.update(1) # 即使出错也更新进度条print(f"错误处理 {file_path}: {str(e)}")if __name__ == "__main__":# 安装依赖库(如果未安装)# pip install librosa matplotlib numpy tqdmmain_folder = "你的文件路径"process_audio_files(main_folder)