Spaces:
Running
Running
File size: 3,764 Bytes
6ddd7e4 3f3d89a b5b5082 d76158a a8f789a 7890f41 d76158a 7890f41 d76158a 3c03d8e 7b8a54e ad0617c d76158a b5b5082 72db052 d2078f6 b5b5082 d2078f6 b5b5082 7890f41 d76158a b5b5082 d76158a 58068be d76158a b5b5082 d76158a b5b5082 d76158a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import spaces
import os
import sys
import time
import json
import numpy as np
import gradio as gr
import soundfile as sf
from datetime import datetime
sys.path.append("neutts-air")
from neuttsair.neutts import NeuTTSAir
# === Đường dẫn cơ bản ===
BASE_PATH = os.getcwd()
SAMPLES_PATH = os.path.join(BASE_PATH, "neutts-air", "samples")
HISTORY_PATH = os.path.join(BASE_PATH, "history")
os.makedirs(HISTORY_PATH, exist_ok=True)
DEFAULT_REF_TEXT = "So I'm live on radio..."
DEFAULT_REF_PATH = os.path.join(SAMPLES_PATH, "dave.wav")
DEFAULT_GEN_TEXT = "My name is Dave, and um, I'm from London."
# === Khởi tạo NeuTTS-Air ở CPU ===
tts = NeuTTSAir(
backbone_repo="neuphonic/neutts-air",
backbone_device="cpu",
codec_repo="neuphonic/neucodec",
codec_device="cpu"
)
# === Hàm xử lý chính ===
def infer(ref_text, ref_audio_path, gen_text):
gr.Info("Bắt đầu xử lý (CPU mode)...")
# Mã hóa reference
ref_codes = tts.encode_reference(ref_audio_path)
# Sinh âm thanh mới
wav = tts.infer(gen_text, ref_codes, ref_text)
sr = 24000
# === Lưu vào history ===
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
fname = f"tts_{timestamp}.wav"
fpath = os.path.join(HISTORY_PATH, fname)
sf.write(fpath, wav, sr)
# Lưu metadata
meta = {
"timestamp": timestamp,
"ref_text": ref_text,
"gen_text": gen_text,
"ref_audio": os.path.basename(ref_audio_path),
"output_audio": fname,
}
with open(os.path.join(HISTORY_PATH, f"{timestamp}.json"), "w", encoding="utf-8") as f:
json.dump(meta, f, ensure_ascii=False, indent=2)
gr.Info(f"Đã lưu file: {fpath}")
return sr, wav, f"Lưu thành công: {fname}"
# === Hàm tải lại lịch sử ===
def load_history():
items = []
for file in sorted(os.listdir(HISTORY_PATH)):
if file.endswith(".json"):
with open(os.path.join(HISTORY_PATH, file), "r", encoding="utf-8") as f:
data = json.load(f)
wav_path = os.path.join(HISTORY_PATH, data["output_audio"])
if os.path.exists(wav_path):
items.append(
(data["timestamp"], data["gen_text"], wav_path)
)
if not items:
return "Chưa có lịch sử nào."
html = "<h4>Lịch sử đã tạo:</h4><ul>"
for t, text, path in reversed(items):
rel = os.path.basename(path)
html += f"<li><b>{t}</b>: {text} - <a href='file/{path}' download='{rel}'>Tải</a></li>"
html += "</ul>"
return html
# === Giao diện Gradio ===
with gr.Blocks(title="NeuTTS-Air☁️ CPU Mode + Auto History") as demo:
gr.Markdown("## 🌀 NeuTTS-Air (CPU Mode) — Lưu tự động & tải lại lịch sử")
with gr.Row():
ref_text = gr.Textbox(label="Reference Text", value=DEFAULT_REF_TEXT)
ref_audio = gr.Audio(type="filepath", label="Reference Audio", value=DEFAULT_REF_PATH)
gen_text = gr.Textbox(label="Text to Generate", value=DEFAULT_GEN_TEXT)
output_audio = gr.Audio(type="numpy", label="Generated Speech")
info_text = gr.Textbox(label="Kết quả / Trạng thái")
gen_btn = gr.Button("🎤 Generate & Save")
gen_btn.click(infer, inputs=[ref_text, ref_audio, gen_text], outputs=[output_audio, info_text, info_text])
hist_html = gr.HTML()
reload_btn = gr.Button("🔄 Tải lại lịch sử")
reload_btn.click(load_history, outputs=hist_html)
# Tải sẵn lịch sử khi mở
demo.load(load_history, outputs=hist_html)
if __name__ == "__main__":
print("Chạy nền NeuTTS-Air CPU Mode (có lưu lịch sử)...")
demo.launch(allowed_paths=[SAMPLES_PATH, HISTORY_PATH], inbrowser=True)
|