Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import os
|
|
| 2 |
import io
|
| 3 |
import traceback
|
| 4 |
import numpy as np
|
| 5 |
-
import scipy.io.wavfile as wavfile
|
| 6 |
from fastapi import FastAPI, HTTPException
|
| 7 |
from fastapi.responses import Response
|
| 8 |
from pydantic import BaseModel
|
|
@@ -42,7 +42,7 @@ async def synthesize(request: TTSRequest):
|
|
| 42 |
# 2. Синтез
|
| 43 |
wav, duration = tts.synthesize(request.text, voice_style=style, lang=request.lang)
|
| 44 |
|
| 45 |
-
# 3. Конвертация в numpy (если модель вернула тензор PyTorch)
|
| 46 |
if hasattr(wav, 'cpu'):
|
| 47 |
wav = wav.cpu().numpy()
|
| 48 |
elif hasattr(wav, 'numpy'):
|
|
@@ -54,7 +54,6 @@ async def synthesize(request: TTSRequest):
|
|
| 54 |
wav = wav.squeeze()
|
| 55 |
|
| 56 |
# 5. Нормализация и конвертация в int16 (стандарт для WAV)
|
| 57 |
-
# Сначала приводим к float32 для безопасной нормализации
|
| 58 |
wav = wav.astype(np.float32)
|
| 59 |
max_val = np.max(np.abs(wav))
|
| 60 |
if max_val > 1.0:
|
|
@@ -66,18 +65,22 @@ async def synthesize(request: TTSRequest):
|
|
| 66 |
# 6. Получаем sample rate
|
| 67 |
sample_rate = getattr(tts, 'sample_rate', 24000)
|
| 68 |
|
| 69 |
-
# 7. Записываем в память через scipy
|
| 70 |
out = io.BytesIO()
|
| 71 |
wavfile.write(out, sample_rate, wav_int16)
|
| 72 |
audio_bytes = out.getvalue()
|
| 73 |
|
| 74 |
-
# 8. В
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
return Response(
|
| 76 |
content=audio_bytes,
|
| 77 |
media_type='audio/wav',
|
| 78 |
headers={
|
| 79 |
"Content-Disposition": "attachment; filename=speech.wav",
|
| 80 |
-
"X-Audio-Duration": str(round(
|
| 81 |
}
|
| 82 |
)
|
| 83 |
|
|
|
|
| 2 |
import io
|
| 3 |
import traceback
|
| 4 |
import numpy as np
|
| 5 |
+
import scipy.io.wavfile as wavfile
|
| 6 |
from fastapi import FastAPI, HTTPException
|
| 7 |
from fastapi.responses import Response
|
| 8 |
from pydantic import BaseModel
|
|
|
|
| 42 |
# 2. Синтез
|
| 43 |
wav, duration = tts.synthesize(request.text, voice_style=style, lang=request.lang)
|
| 44 |
|
| 45 |
+
# 3. Конвертация аудио в numpy (если модель вернула тензор PyTorch)
|
| 46 |
if hasattr(wav, 'cpu'):
|
| 47 |
wav = wav.cpu().numpy()
|
| 48 |
elif hasattr(wav, 'numpy'):
|
|
|
|
| 54 |
wav = wav.squeeze()
|
| 55 |
|
| 56 |
# 5. Нормализация и конвертация в int16 (стандарт для WAV)
|
|
|
|
| 57 |
wav = wav.astype(np.float32)
|
| 58 |
max_val = np.max(np.abs(wav))
|
| 59 |
if max_val > 1.0:
|
|
|
|
| 65 |
# 6. Получаем sample rate
|
| 66 |
sample_rate = getattr(tts, 'sample_rate', 24000)
|
| 67 |
|
| 68 |
+
# 7. Записываем в память через scipy
|
| 69 |
out = io.BytesIO()
|
| 70 |
wavfile.write(out, sample_rate, wav_int16)
|
| 71 |
audio_bytes = out.getvalue()
|
| 72 |
|
| 73 |
+
# 8. ИСПРАВЛЕНИЕ: Превращаем duration из numpy массива в обычный float
|
| 74 |
+
# .item() безопасно извлекает скалярное значение из numpy array
|
| 75 |
+
duration_float = float(np.asarray(duration).item())
|
| 76 |
+
|
| 77 |
+
# 9. Возвращаем аудио
|
| 78 |
return Response(
|
| 79 |
content=audio_bytes,
|
| 80 |
media_type='audio/wav',
|
| 81 |
headers={
|
| 82 |
"Content-Disposition": "attachment; filename=speech.wav",
|
| 83 |
+
"X-Audio-Duration": str(round(duration_float, 2))
|
| 84 |
}
|
| 85 |
)
|
| 86 |
|