Spaces:
Paused
Paused
add audio file input
Browse files
app.py
CHANGED
|
@@ -129,12 +129,14 @@ def transcribe_audio(audio_input):
|
|
| 129 |
|
| 130 |
# ------------------- Main Processing Function -------------------
|
| 131 |
@spaces.GPU # Decorate with ZeroGPU to run on GPU when processing
|
| 132 |
-
def classify_intent(mode,
|
| 133 |
-
# Determine input based on
|
| 134 |
-
if mode == "Microphone" and
|
| 135 |
-
transcription = transcribe_audio(
|
| 136 |
elif mode == "Text" and text_input:
|
| 137 |
transcription = text_input
|
|
|
|
|
|
|
| 138 |
else:
|
| 139 |
return "請提供語音或文字輸入", "", None
|
| 140 |
|
|
@@ -151,28 +153,31 @@ def classify_intent(mode, audio_input, text_input, model_choice):
|
|
| 151 |
# ------------------- Gradio Blocks Interface Setup -------------------
|
| 152 |
with gr.Blocks() as demo:
|
| 153 |
gr.Markdown("## 🍽️ 餐廳訂位意圖識別")
|
| 154 |
-
gr.Markdown("
|
| 155 |
|
| 156 |
with gr.Row():
|
| 157 |
-
# Input Mode Selector
|
| 158 |
-
mode = gr.Radio(choices=["Microphone", "Text"], label="選擇輸入模式", value="Microphone")
|
| 159 |
|
| 160 |
with gr.Row():
|
| 161 |
-
#
|
| 162 |
-
|
| 163 |
-
audio_input = gr.Audio(sources=["microphone"], type="numpy", label="語音輸入 (點擊錄音)")
|
| 164 |
text_input = gr.Textbox(lines=2, placeholder="請輸入文字", label="文字輸入")
|
|
|
|
| 165 |
|
| 166 |
-
# Initially, only the microphone input is visible.
|
| 167 |
text_input.visible = False
|
|
|
|
| 168 |
|
| 169 |
-
# Change event for mode selection to toggle visibility.
|
| 170 |
def update_visibility(selected_mode):
|
| 171 |
if selected_mode == "Microphone":
|
| 172 |
-
return gr.update(visible=True), gr.update(visible=False)
|
| 173 |
-
|
| 174 |
-
return gr.update(visible=False), gr.update(visible=True)
|
| 175 |
-
|
|
|
|
|
|
|
| 176 |
|
| 177 |
with gr.Row():
|
| 178 |
model_dropdown = gr.Dropdown(choices=list(available_models.keys()),
|
|
@@ -188,9 +193,9 @@ with gr.Blocks() as demo:
|
|
| 188 |
with gr.Row():
|
| 189 |
tts_output = gr.Audio(type="numpy", label="TTS 語音輸出")
|
| 190 |
|
| 191 |
-
# Button event triggers the classification.
|
| 192 |
classify_btn.click(fn=classify_intent,
|
| 193 |
-
inputs=[mode,
|
| 194 |
outputs=[transcription_output, classification_output, tts_output])
|
| 195 |
|
| 196 |
demo.launch()
|
|
|
|
| 129 |
|
| 130 |
# ------------------- Main Processing Function -------------------
|
| 131 |
@spaces.GPU # Decorate with ZeroGPU to run on GPU when processing
|
| 132 |
+
def classify_intent(mode, mic_audio, text_input, file_audio, model_choice):
|
| 133 |
+
# Determine input based on mode.
|
| 134 |
+
if mode == "Microphone" and mic_audio is not None:
|
| 135 |
+
transcription = transcribe_audio(mic_audio)
|
| 136 |
elif mode == "Text" and text_input:
|
| 137 |
transcription = text_input
|
| 138 |
+
elif mode == "File" and file_audio is not None:
|
| 139 |
+
transcription = transcribe_audio(file_audio)
|
| 140 |
else:
|
| 141 |
return "請提供語音或文字輸入", "", None
|
| 142 |
|
|
|
|
| 153 |
# ------------------- Gradio Blocks Interface Setup -------------------
|
| 154 |
with gr.Blocks() as demo:
|
| 155 |
gr.Markdown("## 🍽️ 餐廳訂位意圖識別")
|
| 156 |
+
gr.Markdown("錄音、上傳語音檔案或輸入文字,自動判斷是否具有訂位意圖。")
|
| 157 |
|
| 158 |
with gr.Row():
|
| 159 |
+
# Input Mode Selector with three options.
|
| 160 |
+
mode = gr.Radio(choices=["Microphone", "Text", "File"], label="選擇輸入模式", value="Microphone")
|
| 161 |
|
| 162 |
with gr.Row():
|
| 163 |
+
# Three input components: one for microphone, one for file upload, and one for text.
|
| 164 |
+
mic_audio = gr.Audio(sources=["microphone"], type="numpy", label="語音輸入 (點擊錄音)")
|
|
|
|
| 165 |
text_input = gr.Textbox(lines=2, placeholder="請輸入文字", label="文字輸入")
|
| 166 |
+
file_audio = gr.Audio(sources=["upload"], type="numpy", label="上傳語音檔案")
|
| 167 |
|
| 168 |
+
# Initially, only the microphone input is visible; hide text and file inputs.
|
| 169 |
text_input.visible = False
|
| 170 |
+
file_audio.visible = False
|
| 171 |
|
| 172 |
+
# Change event for mode selection to toggle visibility of the three inputs.
|
| 173 |
def update_visibility(selected_mode):
|
| 174 |
if selected_mode == "Microphone":
|
| 175 |
+
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
|
| 176 |
+
elif selected_mode == "Text":
|
| 177 |
+
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
|
| 178 |
+
else: # File
|
| 179 |
+
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
|
| 180 |
+
mode.change(fn=update_visibility, inputs=mode, outputs=[mic_audio, text_input, file_audio])
|
| 181 |
|
| 182 |
with gr.Row():
|
| 183 |
model_dropdown = gr.Dropdown(choices=list(available_models.keys()),
|
|
|
|
| 193 |
with gr.Row():
|
| 194 |
tts_output = gr.Audio(type="numpy", label="TTS 語音輸出")
|
| 195 |
|
| 196 |
+
# Button event triggers the classification. Note that we now pass four inputs.
|
| 197 |
classify_btn.click(fn=classify_intent,
|
| 198 |
+
inputs=[mode, mic_audio, text_input, file_audio, model_dropdown],
|
| 199 |
outputs=[transcription_output, classification_output, tts_output])
|
| 200 |
|
| 201 |
demo.launch()
|