|
|
| """
|
| 音频处理模块 - 加载、保存和处理音频文件
|
| """
|
| import numpy as np
|
| import librosa
|
| import soundfile as sf
|
| from typing import Tuple, Optional
|
|
|
|
|
| def load_audio(path: str, sr: int = 16000) -> np.ndarray:
|
| """
|
| 加载音频文件并重采样
|
|
|
| Args:
|
| path: 音频文件路径
|
| sr: 目标采样率 (默认 16000)
|
|
|
| Returns:
|
| np.ndarray: 音频数据 (float32, 单声道)
|
| """
|
| audio, orig_sr = librosa.load(path, sr=None, mono=True)
|
|
|
| if orig_sr != sr:
|
| audio = librosa.resample(audio, orig_sr=orig_sr, target_sr=sr)
|
|
|
| return audio.astype(np.float32)
|
|
|
|
|
| def save_audio(path: str, audio: np.ndarray, sr: int = 48000):
|
| """
|
| 保存音频到文件
|
|
|
| Args:
|
| path: 输出文件路径
|
| audio: 音频数据
|
| sr: 采样率 (默认 48000)
|
| """
|
|
|
| audio = np.clip(audio, -1.0, 1.0)
|
| sf.write(path, audio, sr)
|
|
|
|
|
| def soft_clip(
|
| audio: np.ndarray,
|
| threshold: float = 0.9,
|
| ceiling: float = 0.99,
|
| ) -> np.ndarray:
|
| """
|
| 使用平滑软削波抑制峰值,尽量保留主体响度。
|
|
|
| Args:
|
| audio: 输入音频
|
| threshold: 开始压缩的阈值
|
| ceiling: 软削波上限
|
|
|
| Returns:
|
| np.ndarray: 处理后的音频
|
| """
|
| audio = np.asarray(audio, dtype=np.float32)
|
|
|
| if threshold <= 0:
|
| raise ValueError("threshold 必须大于 0")
|
| if ceiling <= threshold:
|
| raise ValueError("ceiling 必须大于 threshold")
|
|
|
| result = audio.copy()
|
| abs_audio = np.abs(result)
|
| mask = abs_audio > threshold
|
| if not np.any(mask):
|
| return result
|
|
|
| overshoot = (abs_audio[mask] - threshold) / (ceiling - threshold + 1e-8)
|
| compressed = threshold + (ceiling - threshold) * np.tanh(overshoot)
|
| result[mask] = np.sign(result[mask]) * compressed
|
| return result.astype(np.float32, copy=False)
|
|
|
|
|
| def soft_clip_array(
|
| audio: np.ndarray,
|
| threshold: float = 0.9,
|
| ceiling: float = 0.99,
|
| ) -> np.ndarray:
|
| """软削波数组版本,支持单声道/多声道。"""
|
| return soft_clip(audio, threshold=threshold, ceiling=ceiling)
|
|
|
|
|
| def get_audio_info(path: str) -> dict:
|
| """
|
| 获取音频文件信息
|
|
|
| Args:
|
| path: 音频文件路径
|
|
|
| Returns:
|
| dict: 音频信息
|
| """
|
| info = sf.info(path)
|
| return {
|
| "duration": info.duration,
|
| "sample_rate": info.samplerate,
|
| "channels": info.channels,
|
| "format": info.format
|
| }
|
|
|
|
|
| def normalize_audio(audio: np.ndarray, target_db: float = -20.0) -> np.ndarray:
|
| """
|
| 音频响度归一化
|
|
|
| Args:
|
| audio: 输入音频
|
| target_db: 目标响度 (dB)
|
|
|
| Returns:
|
| np.ndarray: 归一化后的音频
|
| """
|
| rms = np.sqrt(np.mean(audio ** 2))
|
| if rms > 0:
|
| target_rms = 10 ** (target_db / 20)
|
| audio = audio * (target_rms / rms)
|
| return np.clip(audio, -1.0, 1.0)
|
|
|
|
|
| def trim_silence(audio: np.ndarray, sr: int = 16000,
|
| top_db: int = 30) -> np.ndarray:
|
| """
|
| 去除音频首尾静音
|
|
|
| Args:
|
| audio: 输入音频
|
| sr: 采样率
|
| top_db: 静音阈值 (dB)
|
|
|
| Returns:
|
| np.ndarray: 去除静音后的音频
|
| """
|
| trimmed, _ = librosa.effects.trim(audio, top_db=top_db)
|
| return trimmed
|
|
|