Spaces:

shaun3141
/

caribbean-voices-hackathon

Sleeping

shaun3141 commited on 17 days ago

Commit

1264f26

1 Parent(s): c41a0cd

Add configurable augmentation settings in UI and persistent logging

- Add UI controls for speed augmentation (min/max/count) and SpecAugment parameters
- Make augmentation settings configurable before training starts
- Implement persistent logging that survives space restarts
- Add log download functionality in training UI
- Logs are saved to persistent location and can be downloaded

Files changed (3) hide show

training/whisper_trainer.py +89 -22
ui/interface.py +71 -4
utils/logging.py +116 -0

training/whisper_trainer.py CHANGED Viewed

@@ -27,6 +27,7 @@ from training.augmentation import (
     get_deterministic_speed_factor_from_id,
     expand_dataset_with_speed_augmentation,
 )
 # Disable dataset caching to save disk space
 disable_caching()
@@ -199,7 +200,19 @@ def get_cache_key(dataset_name: str, model_name: str, split: str, seed: int) ->
     return hashlib.md5(cache_string.encode()).hexdigest()
-def prepare_whisper_dataset(dataset, processor, dataset_name: str = None, model_name: str = None, split: str = None, use_cache: bool = True):
     """
     Prepare dataset for Whisper training using Hugging Face Datasets.
     Supports caching to avoid reprocessing.
@@ -300,10 +313,16 @@ def prepare_whisper_dataset(dataset, processor, dataset_name: str = None, model_
                 feat = feat[0:1]
             # Apply spectrogram augmentations (SpecAugment, time warping) to features (only during training)
-            if is_training:
                 # feat is [1, n_mels, seq_len], remove batch dim for augmentation: [n_mels, seq_len]
                 feat_2d = feat[0]  # Remove batch dimension
-                feat_2d = apply_spectrogram_augmentations(feat_2d, apply_time_warp=True)
                 # Add batch dimension back: [1, n_mels, seq_len]
                 feat = np.expand_dims(feat_2d, axis=0)
@@ -400,15 +419,35 @@ def prepare_whisper_dataset(dataset, processor, dataset_name: str = None, model_
     return dataset
-def run_whisper_training_progress(epochs: int, batch_size: int, learning_rate: float, progress=None) -> Tuple[str, Optional[Dict[str, Any]]]:
     """
     Run Whisper training with progress tracking using HuggingFace transformers.
     Full integration with HuggingFace training features.
     """
     try:
         if progress:
             progress(0, desc="Preparing Whisper training...")
         # Check prerequisites
         if not os.path.exists(ENTITIES_PATH):
             raise FileNotFoundError(
@@ -442,22 +481,42 @@ def run_whisper_training_progress(epochs: int, batch_size: int, learning_rate: f
         train_full = train_full.cast_column("audio", Audio(sampling_rate=TARGET_SR))
         # Expand dataset with speed augmentation (proactive augmentation)
-        # Creates 3 versions of each sample: 0.9x, 1.0x, 1.1x speed
-        if progress:
-            progress(0.12, desc="Expanding dataset with speed augmentation...")
-        print("\n" + "=" * 70)
-        print("EXPANDING DATASET WITH SPEED AUGMENTATION")
-        print("=" * 70)
-        train_full = expand_dataset_with_speed_augmentation(
-            train_full,
-            speed_factors=[0.9, 1.0, 1.1],
-            id_column="id",
-            audio_column="audio",
-            transcription_column="transcription",
-            target_sr=TARGET_SR,
-        )
-        print("=" * 70 + "\n")
         # Create train/val split AFTER expansion
         if progress:
@@ -524,7 +583,12 @@ def run_whisper_training_progress(epochs: int, batch_size: int, learning_rate: f
             dataset_name=HF_DATASET_NAME,
             model_name=WHISPER_MODEL_NAME,
             split="train",
-            use_cache=True
         )
         if progress:
@@ -536,7 +600,8 @@ def run_whisper_training_progress(epochs: int, batch_size: int, learning_rate: f
             dataset_name=HF_DATASET_NAME,
             model_name=WHISPER_MODEL_NAME,
             split="val",
-            use_cache=True
         )
         # Training arguments
@@ -649,12 +714,14 @@ def run_whisper_training_progress(epochs: int, batch_size: int, learning_rate: f
 The model is now ready for inference!
 """
         return success_msg, final_metrics
     except Exception as e:
         import traceback
         error_msg = f"❌ Error during Whisper training: {str(e)}\n\n{traceback.format_exc()}"
         print(error_msg)
         if progress:
             progress(1.0, desc="Error!")
         return error_msg, None

     get_deterministic_speed_factor_from_id,
     expand_dataset_with_speed_augmentation,
 )
+from utils.logging import PersistentLogger, get_latest_log_file, get_log_directory
 # Disable dataset caching to save disk space
 disable_caching()
     return hashlib.md5(cache_string.encode()).hexdigest()
+def prepare_whisper_dataset(
+    dataset,
+    processor,
+    dataset_name: str = None,
+    model_name: str = None,
+    split: str = None,
+    use_cache: bool = True,
+    specaug_enabled: bool = True,
+    specaug_time_mask: int = 27,
+    specaug_freq_mask: int = 10,
+    specaug_time_warp: bool = True,
+    specaug_warp_param: int = 40,
+):
     """
     Prepare dataset for Whisper training using Hugging Face Datasets.
     Supports caching to avoid reprocessing.
                 feat = feat[0:1]
             # Apply spectrogram augmentations (SpecAugment, time warping) to features (only during training)
+            if is_training and specaug_enabled:
                 # feat is [1, n_mels, seq_len], remove batch dim for augmentation: [n_mels, seq_len]
                 feat_2d = feat[0]  # Remove batch dimension
+                feat_2d = apply_spectrogram_augmentations(
+                    feat_2d,
+                    time_mask_param=specaug_time_mask,
+                    freq_mask_param=specaug_freq_mask,
+                    apply_time_warp=specaug_time_warp,
+                    warp_param=specaug_warp_param,
+                )
                 # Add batch dimension back: [1, n_mels, seq_len]
                 feat = np.expand_dims(feat_2d, axis=0)
     return dataset
+def run_whisper_training_progress(
+    epochs: int,
+    batch_size: int,
+    learning_rate: float,
+    speed_aug_enabled: bool = True,
+    speed_factor_min: float = 0.9,
+    speed_factor_max: float = 1.1,
+    speed_factor_count: int = 3,
+    specaug_enabled: bool = True,
+    specaug_time_mask: int = 27,
+    specaug_freq_mask: int = 10,
+    specaug_time_warp: bool = True,
+    specaug_warp_param: int = 40,
+    progress=None
+) -> Tuple[str, Optional[Dict[str, Any]]]:
     """
     Run Whisper training with progress tracking using HuggingFace transformers.
     Full integration with HuggingFace training features.
     """
+    # Set up persistent logging
+    logger = PersistentLogger("whisper_training")
     try:
         if progress:
             progress(0, desc="Preparing Whisper training...")
+        print(f"📝 Training logs will be saved to: {get_log_directory()}")
+        print(f"📝 Latest log file: {get_latest_log_file('whisper_training')}")
         # Check prerequisites
         if not os.path.exists(ENTITIES_PATH):
             raise FileNotFoundError(
         train_full = train_full.cast_column("audio", Audio(sampling_rate=TARGET_SR))
         # Expand dataset with speed augmentation (proactive augmentation)
+        # Creates multiple versions of each sample based on speed factors
+        if speed_aug_enabled:
+            if progress:
+                progress(0.12, desc="Expanding dataset with speed augmentation...")
+            # Generate speed factors from min/max/count
+            if speed_factor_count == 1:
+                speed_factors = [1.0]
+            elif speed_factor_count == 2:
+                speed_factors = [speed_factor_min, speed_factor_max]
+            else:
+                # Generate evenly spaced factors including min, max, and intermediate values
+                speed_factors = [
+                    speed_factor_min + (speed_factor_max - speed_factor_min) * i / (speed_factor_count - 1)
+                    for i in range(speed_factor_count)
+                ]
+                # Ensure 1.0 is included if it's within range
+                if speed_factor_min <= 1.0 <= speed_factor_max:
+                    speed_factors.append(1.0)
+                    speed_factors = sorted(set(speed_factors))  # Remove duplicates and sort
+            print("\n" + "=" * 70)
+            print("EXPANDING DATASET WITH SPEED AUGMENTATION")
+            print("=" * 70)
+            print(f"Speed factors: {speed_factors}")
+            train_full = expand_dataset_with_speed_augmentation(
+                train_full,
+                speed_factors=speed_factors,
+                id_column="id",
+                audio_column="audio",
+                transcription_column="transcription",
+                target_sr=TARGET_SR,
+            )
+            print("=" * 70 + "\n")
+        else:
+            print("⚠ Speed augmentation disabled - using original dataset size")
         # Create train/val split AFTER expansion
         if progress:
             dataset_name=HF_DATASET_NAME,
             model_name=WHISPER_MODEL_NAME,
             split="train",
+            use_cache=True,
+            specaug_enabled=specaug_enabled,
+            specaug_time_mask=specaug_time_mask,
+            specaug_freq_mask=specaug_freq_mask,
+            specaug_time_warp=specaug_time_warp,
+            specaug_warp_param=specaug_warp_param,
         )
         if progress:
             dataset_name=HF_DATASET_NAME,
             model_name=WHISPER_MODEL_NAME,
             split="val",
+            use_cache=True,
+            specaug_enabled=False,  # No augmentation for validation
         )
         # Training arguments
 The model is now ready for inference!
 """
+        logger.close()
         return success_msg, final_metrics
     except Exception as e:
         import traceback
         error_msg = f"❌ Error during Whisper training: {str(e)}\n\n{traceback.format_exc()}"
         print(error_msg)
+        logger.close()
         if progress:
             progress(1.0, desc="Error!")
         return error_msg, None

ui/interface.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Gradio UI interface for Caribbean Voices OWSM platform."""
 import gradio as gr
 import time
 from pathlib import Path
 from datetime import datetime
@@ -12,6 +13,7 @@ from training.whisper_trainer import run_whisper_training_progress
 from models.inference import transcribe_audio, run_inference_owsm
 from models.loader import get_available_models
 from data.loader import load_data_from_hf_dataset
 def create_interface():
@@ -218,19 +220,84 @@ def create_interface():
                         with gr.Row():
                             with gr.Column():
                                 whisper_train_epochs = gr.Slider(1, 10, value=3, step=1, label="Epochs")
                                 whisper_train_batch_size = gr.Slider(1, 32, value=4, step=1, label="Batch Size")
                                 whisper_train_lr = gr.Slider(1e-6, 1e-3, value=3e-5, step=1e-6, label="Learning Rate")
-                                whisper_train_btn = gr.Button("Start Whisper Training", variant="primary")
                             with gr.Column():
                                 whisper_train_output = gr.Markdown()
                                 whisper_train_metrics = gr.JSON(label="Training Metrics")
                         whisper_train_btn.click(
-                            fn=run_whisper_training_progress,
-                            inputs=[whisper_train_epochs, whisper_train_batch_size, whisper_train_lr],
-                            outputs=[whisper_train_output, whisper_train_metrics]
                         )
             # Tab 5: Inference

 """Gradio UI interface for Caribbean Voices OWSM platform."""
 import gradio as gr
 import time
+import os
 from pathlib import Path
 from datetime import datetime
 from models.inference import transcribe_audio, run_inference_owsm
 from models.loader import get_available_models
 from data.loader import load_data_from_hf_dataset
+from utils.logging import get_latest_log_file, get_all_log_files, get_log_directory
 def create_interface():
                         with gr.Row():
                             with gr.Column():
+                                gr.Markdown("#### Training Hyperparameters")
                                 whisper_train_epochs = gr.Slider(1, 10, value=3, step=1, label="Epochs")
                                 whisper_train_batch_size = gr.Slider(1, 32, value=4, step=1, label="Batch Size")
                                 whisper_train_lr = gr.Slider(1e-6, 1e-3, value=3e-5, step=1e-6, label="Learning Rate")
+                                gr.Markdown("#### Speed Augmentation")
+                                gr.Markdown("Speed factors for dataset expansion (creates multiple versions of each sample)")
+                                speed_aug_enabled = gr.Checkbox(value=True, label="Enable Speed Augmentation")
+                                speed_factor_min = gr.Slider(0.8, 1.0, value=0.9, step=0.05, label="Min Speed Factor")
+                                speed_factor_max = gr.Slider(1.0, 1.2, value=1.1, step=0.05, label="Max Speed Factor")
+                                speed_factor_count = gr.Slider(2, 5, value=3, step=1, label="Number of Speed Variants")
+                                gr.Markdown("#### SpecAugment Parameters")
+                                gr.Markdown("Spectrogram augmentation settings (applied during training)")
+                                specaug_enabled = gr.Checkbox(value=True, label="Enable SpecAugment")
+                                specaug_time_mask = gr.Slider(0, 50, value=27, step=1, label="Time Mask Parameter")
+                                specaug_freq_mask = gr.Slider(0, 20, value=10, step=1, label="Frequency Mask Parameter")
+                                specaug_time_warp = gr.Checkbox(value=True, label="Enable Time Warping")
+                                specaug_warp_param = gr.Slider(0, 80, value=40, step=5, label="Time Warp Parameter")
+                                whisper_train_btn = gr.Button("Start Whisper Training", variant="primary", size="lg")
                             with gr.Column():
                                 whisper_train_output = gr.Markdown()
                                 whisper_train_metrics = gr.JSON(label="Training Metrics")
+                                gr.Markdown("#### Training Logs")
+                                log_info = gr.Markdown(f"Log directory: `{get_log_directory()}`")
+                                latest_log_file = gr.File(
+                                    label="Download Latest Training Log",
+                                    visible=False
+                                )
+                                def update_log_download():
+                                    latest = get_latest_log_file("whisper_training")
+                                    if latest and os.path.exists(latest):
+                                        return gr.File(value=latest, visible=True)
+                                    return gr.File(visible=False)
+                                refresh_log_btn = gr.Button("🔄 Refresh Logs", variant="secondary", size="sm")
+                                refresh_log_btn.click(
+                                    fn=update_log_download,
+                                    outputs=[latest_log_file]
+                                )
+                        def run_training_with_log_refresh(
+                            epochs, batch_size, lr,
+                            speed_aug_enabled, speed_factor_min, speed_factor_max, speed_factor_count,
+                            specaug_enabled, specaug_time_mask, specaug_freq_mask, specaug_time_warp, specaug_warp_param,
+                            progress=gr.Progress()
+                        ):
+                            """Run training and refresh log download after completion."""
+                            result = run_whisper_training_progress(
+                                epochs, batch_size, lr,
+                                speed_aug_enabled, speed_factor_min, speed_factor_max, speed_factor_count,
+                                specaug_enabled, specaug_time_mask, specaug_freq_mask, specaug_time_warp, specaug_warp_param,
+                                progress
+                            )
+                            latest_log = update_log_download()
+                            return result[0], result[1], latest_log
                         whisper_train_btn.click(
+                            fn=run_training_with_log_refresh,
+                            inputs=[
+                                whisper_train_epochs,
+                                whisper_train_batch_size,
+                                whisper_train_lr,
+                                speed_aug_enabled,
+                                speed_factor_min,
+                                speed_factor_max,
+                                speed_factor_count,
+                                specaug_enabled,
+                                specaug_time_mask,
+                                specaug_freq_mask,
+                                specaug_time_warp,
+                                specaug_warp_param,
+                            ],
+                            outputs=[whisper_train_output, whisper_train_metrics, latest_log_file]
                         )
             # Tab 5: Inference

utils/logging.py ADDED Viewed

	@@ -0,0 +1,116 @@

+"""
+Persistent logging utility for HuggingFace Spaces.
+Logs are written to files that persist across space restarts.
+"""
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+# Try to use persistent storage if available
+# HF Spaces may have /tmp or other persistent locations
+PERSISTENT_LOG_DIR = None
+# Try common persistent locations
+for log_dir in [
+    "/tmp/logs",  # Common temp location (may persist)
+    "/persistent/logs",  # Some HF Spaces have this
+    os.path.join(os.path.expanduser("~"), ".cache", "caribbean-voices", "logs"),  # User cache
+]:
+    try:
+        Path(log_dir).mkdir(parents=True, exist_ok=True)
+        # Test write
+        test_file = os.path.join(log_dir, ".test_write")
+        with open(test_file, "w") as f:
+            f.write("test")
+        os.remove(test_file)
+        PERSISTENT_LOG_DIR = log_dir
+        break
+    except (PermissionError, OSError):
+        continue
+# Fallback to current directory if no persistent location found
+if PERSISTENT_LOG_DIR is None:
+    PERSISTENT_LOG_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs")
+    Path(PERSISTENT_LOG_DIR).mkdir(parents=True, exist_ok=True)
+print(f"📝 Log directory: {PERSISTENT_LOG_DIR}")
+class TeeOutput:
+    """Tee output to both stdout and a file."""
+    def __init__(self, file_handle, original_stdout):
+        self.file = file_handle
+        self.stdout = original_stdout
+    def write(self, message):
+        self.stdout.write(message)
+        self.file.write(message)
+        self.file.flush()
+    def flush(self):
+        self.stdout.flush()
+        self.file.flush()
+class PersistentLogger:
+    """Logger that redirects stdout/stderr to both console and persistent log files."""
+    def __init__(self, log_name: str = "training"):
+        self.log_name = log_name
+        self.log_file = os.path.join(PERSISTENT_LOG_DIR, f"{log_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
+        self.log_handle = open(self.log_file, "a", buffering=1)  # Line buffered
+        # Save original stdout/stderr
+        self.original_stdout = sys.stdout
+        self.original_stderr = sys.stderr
+        # Create tee outputs
+        self.tee_stdout = TeeOutput(self.log_handle, self.original_stdout)
+        self.tee_stderr = TeeOutput(self.log_handle, self.original_stderr)
+        # Redirect stdout/stderr
+        sys.stdout = self.tee_stdout
+        sys.stderr = self.tee_stderr
+        print(f"📝 Logging to: {self.log_file}")
+    def close(self):
+        """Close the log file and restore original stdout/stderr."""
+        # Restore original stdout/stderr
+        sys.stdout = self.original_stdout
+        sys.stderr = self.original_stderr
+        # Close log file
+        if self.log_handle:
+            self.log_handle.close()
+            self.log_handle = None
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+def get_latest_log_file(log_name: str = "training") -> Optional[str]:
+    """Get the path to the latest log file for a given log name."""
+    log_files = list(Path(PERSISTENT_LOG_DIR).glob(f"{log_name}_*.log"))
+    if not log_files:
+        return None
+    # Sort by modification time, return most recent
+    latest = max(log_files, key=lambda p: p.stat().st_mtime)
+    return str(latest)
+def get_all_log_files(log_name: str = "training") -> list:
+    """Get all log files for a given log name, sorted by modification time (newest first)."""
+    log_files = list(Path(PERSISTENT_LOG_DIR).glob(f"{log_name}_*.log"))
+    return sorted(log_files, key=lambda p: p.stat().st_mtime, reverse=True)
+def get_log_directory() -> str:
+    """Get the log directory path."""
+    return PERSISTENT_LOG_DIR