Spaces:

RSHVR
/

Command_RTC

Sleeping

App Files Files Community

RSHVR commited on Mar 30

Commit

eb62218

verified ·

1 Parent(s): 3562379

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -57

app.py CHANGED Viewed

@@ -2,11 +2,14 @@ import os
 import gradio as gr
 from fastrtc import Stream, ReplyOnPause, AdditionalOutputs
-# Import your modules
-import stt
-import tts
 import cohereAPI
 # Environment variables
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 system_message = "You respond concisely, in about 15 words or less"
@@ -14,17 +17,21 @@ system_message = "You respond concisely, in about 15 words or less"
 # Initialize conversation history
 conversation_history = []
-async def response(audio_file_path):
     global conversation_history
-    # Convert speech to text
-    user_message = await stt.transcribe_audio(audio_file_path)
-    # Add user message to chat history
-    yield AdditionalOutputs({"transcript": user_message, "role": "user"})
     # Send text to Cohere API
-    response_text, updated_history = await cohereAPI.send_message(
         system_message,
         user_message,
         conversation_history,
@@ -34,63 +41,29 @@ async def response(audio_file_path):
     # Update conversation history
     conversation_history = updated_history
-    # Generate speech from text
-    _, (sample_rate, speech_array) = await tts.generate_speech(
-        response_text,
-        voice_preset="random"
-    )
-    # Add assistant message to chat history
-    yield AdditionalOutputs({"transcript": response_text, "role": "assistant"})
-    # Return audio response
-    yield (sample_rate, speech_array)
-# Create FastRTC stream with ReplyOnPause
 stream = Stream(
-    handler=ReplyOnPause(response),
     modality="audio",
     mode="send-receive",
-    additional_outputs=[
-        {"name": "transcript", "type": "text"},
-        {"name": "role", "type": "text"}
-    ]
 )
-# Create Gradio interface that uses the FastRTC stream
-with gr.Blocks(title="Voice Chat Assistant with ReplyOnPause") as demo:
-    gr.Markdown("# Voice Chat Assistant")
-    gr.Markdown("Speak and pause to trigger a response.")
-    chatbot = gr.Chatbot(label="Conversation")
-    # Mount the FastRTC UI
-    stream_ui = stream.ui(label="Speak")
-    # Handle additional outputs from FastRTC to update the chatbot
-    def update_chat(transcript, role, history):
-        if transcript and role:
-            if role == "user":
-                history.append((transcript, None))
-            elif role == "assistant":
-                if history and history[-1][1] is None:
-                    history[-1] = (history[-1][0], transcript)
-                else:
-                    history.append((None, transcript))
-        return history
-    stream_ui.change(
-        update_chat,
-        inputs=[stream_ui.output_components[0], stream_ui.output_components[1], chatbot],
-        outputs=[chatbot]
-    )
-    clear_btn = gr.Button("Clear Conversation")
-    clear_btn.click(lambda: [], outputs=[chatbot])
-# Launch the app
 if __name__ == "__main__":
-    demo.queue().launch(
         server_name="0.0.0.0",
         share=False,
         show_error=True

 import gradio as gr
 from fastrtc import Stream, ReplyOnPause, AdditionalOutputs
+# Import your custom models
+from tts import tortoise_tts, TortoiseOptions
+from stt import whisper_stt
 import cohereAPI
+# Import HumAware-VAD
+from humaware_vad import HumAwareVADModel
 # Environment variables
 COHERE_API_KEY = os.getenv("COHERE_API_KEY")
 system_message = "You respond concisely, in about 15 words or less"
 # Initialize conversation history
 conversation_history = []
+# Initialize the HumAware-VAD model
+vad_model = HumAwareVADModel()
+# Create a handler function that uses both your custom models
+def response(audio):
     global conversation_history
+    # Convert speech to text using your Whisper model
+    user_message = whisper_stt.stt(audio)
+    # Yield the transcription
+    yield AdditionalOutputs(user_message)
     # Send text to Cohere API
+    response_text, updated_history = cohereAPI.send_message(
         system_message,
         user_message,
         conversation_history,
     # Update conversation history
     conversation_history = updated_history
+    # Print the response for logging
+    print(f"Assistant: {response_text}")
+    # Use your TTS model to generate audio
+    tts_options = TortoiseOptions(voice_preset="random")
+    # Stream the audio response in chunks
+    for chunk in tortoise_tts.stream_tts_sync(response_text, tts_options):
+        yield chunk
+# Create the FastRTC stream with HumAware-VAD for better pause detection
 stream = Stream(
+    handler=ReplyOnPause(response, model=vad_model),  # Use HumAware-VAD model
     modality="audio",
     mode="send-receive",
+    additional_outputs=[gr.Textbox(label="Transcription")],
+    additional_outputs_handler=lambda old, new: new if old is None else f"{old}\nUser: {new}"
 )
+# Launch the Gradio UI
 if __name__ == "__main__":
+    # Update your requirements.txt to include humaware-vad
+    stream.ui.launch(
         server_name="0.0.0.0",
         share=False,
         show_error=True