neutts-air

Running

File size: 3,764 Bytes

import spaces
import os
import sys
import time
import json
import numpy as np
import gradio as gr
import soundfile as sf
from datetime import datetime

sys.path.append("neutts-air")
from neuttsair.neutts import NeuTTSAir

# === Đường dẫn cơ bản ===
BASE_PATH = os.getcwd()
SAMPLES_PATH = os.path.join(BASE_PATH, "neutts-air", "samples")
HISTORY_PATH = os.path.join(BASE_PATH, "history")
os.makedirs(HISTORY_PATH, exist_ok=True)

DEFAULT_REF_TEXT = "So I'm live on radio..."
DEFAULT_REF_PATH = os.path.join(SAMPLES_PATH, "dave.wav")
DEFAULT_GEN_TEXT = "My name is Dave, and um, I'm from London."

# === Khởi tạo NeuTTS-Air ở CPU ===
tts = NeuTTSAir(
    backbone_repo="neuphonic/neutts-air",
    backbone_device="cpu",
    codec_repo="neuphonic/neucodec",
    codec_device="cpu"
)

# === Hàm xử lý chính ===
def infer(ref_text, ref_audio_path, gen_text):
    gr.Info("Bắt đầu xử lý (CPU mode)...")

    # Mã hóa reference
    ref_codes = tts.encode_reference(ref_audio_path)

    # Sinh âm thanh mới
    wav = tts.infer(gen_text, ref_codes, ref_text)
    sr = 24000

    # === Lưu vào history ===
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    fname = f"tts_{timestamp}.wav"
    fpath = os.path.join(HISTORY_PATH, fname)

    sf.write(fpath, wav, sr)

    # Lưu metadata
    meta = {
        "timestamp": timestamp,
        "ref_text": ref_text,
        "gen_text": gen_text,
        "ref_audio": os.path.basename(ref_audio_path),
        "output_audio": fname,
    }

    with open(os.path.join(HISTORY_PATH, f"{timestamp}.json"), "w", encoding="utf-8") as f:
        json.dump(meta, f, ensure_ascii=False, indent=2)

    gr.Info(f"Đã lưu file: {fpath}")
    return sr, wav, f"Lưu thành công: {fname}"

# === Hàm tải lại lịch sử ===
def load_history():
    items = []
    for file in sorted(os.listdir(HISTORY_PATH)):
        if file.endswith(".json"):
            with open(os.path.join(HISTORY_PATH, file), "r", encoding="utf-8") as f:
                data = json.load(f)
            wav_path = os.path.join(HISTORY_PATH, data["output_audio"])
            if os.path.exists(wav_path):
                items.append(
                    (data["timestamp"], data["gen_text"], wav_path)
                )
    if not items:
        return "Chưa có lịch sử nào."
    html = "<h4>Lịch sử đã tạo:</h4><ul>"
    for t, text, path in reversed(items):
        rel = os.path.basename(path)
        html += f"<li><b>{t}</b>: {text} - <a href='file/{path}' download='{rel}'>Tải</a></li>"
    html += "</ul>"
    return html

# === Giao diện Gradio ===
with gr.Blocks(title="NeuTTS-Air☁️ CPU Mode + Auto History") as demo:
    gr.Markdown("## 🌀 NeuTTS-Air (CPU Mode) — Lưu tự động & tải lại lịch sử")

    with gr.Row():
        ref_text = gr.Textbox(label="Reference Text", value=DEFAULT_REF_TEXT)
        ref_audio = gr.Audio(type="filepath", label="Reference Audio", value=DEFAULT_REF_PATH)
        gen_text = gr.Textbox(label="Text to Generate", value=DEFAULT_GEN_TEXT)

    output_audio = gr.Audio(type="numpy", label="Generated Speech")
    info_text = gr.Textbox(label="Kết quả / Trạng thái")

    gen_btn = gr.Button("🎤 Generate & Save")
    gen_btn.click(infer, inputs=[ref_text, ref_audio, gen_text], outputs=[output_audio, info_text, info_text])

    hist_html = gr.HTML()
    reload_btn = gr.Button("🔄 Tải lại lịch sử")
    reload_btn.click(load_history, outputs=hist_html)

    # Tải sẵn lịch sử khi mở
    demo.load(load_history, outputs=hist_html)

if __name__ == "__main__":
    print("Chạy nền NeuTTS-Air CPU Mode (có lưu lịch sử)...")
    demo.launch(allowed_paths=[SAMPLES_PATH, HISTORY_PATH], inbrowser=True)