Spaces:

nakas
/

TimberLine_Snow_History

Sleeping

File size: 17,505 Bytes

import gradio as gr
import pandas as pd
import numpy as np
import re
from playwright.sync_api import sync_playwright
import time
import os
import subprocess
import sys
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from windrose import WindroseAxes
from datetime import datetime

# Install Playwright browsers on startup
def install_playwright_browsers():
    try:
        if not os.path.exists('/home/user/.cache/ms-playwright'):
            print("Installing Playwright browsers...")
            subprocess.run(
                [sys.executable, "-m", "playwright", "install", "chromium"],
                check=True,
                capture_output=True,
                text=True
            )
            print("Playwright browsers installed successfully")
    except Exception as e:
        print(f"Error installing browsers: {e}")

# Install browsers when the module loads
install_playwright_browsers()

def scrape_weather_data(site_id, hours=720):
    """Scrape weather data from weather.gov timeseries"""
    url = f"https://www.weather.gov/wrh/timeseries?site={site_id}&hours={hours}&units=english&chart=on&headers=on&obs=tabular&hourly=false&pview=full&font=12&plot="
    
    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=['--no-sandbox', '--disable-dev-shm-usage']
            )
            
            context = browser.new_context(
                user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
            )
            
            page = context.new_page()
            response = page.goto(url, timeout=60000)
            print(f"Response status: {response.status}")

            # Wait for table to load with longer timeout
            page.wait_for_selector('table', timeout=45000)
            time.sleep(8)  # Give more time for dynamic content

            print("Extracting data...")
            content = page.evaluate('''() => {
                const getTextContent = () => {
                    const rows = [];
                    const tables = document.getElementsByTagName('table');
                    console.log(`Found ${tables.length} tables`);

                    for (const table of tables) {
                        const text = table.textContent;
                        // Look for Date/Time or just Date as header
                        if (text.includes('Date/Time') || text.includes('Date')) {
                            const headerRow = Array.from(table.querySelectorAll('th'))
                                .map(th => th.textContent.trim());

                            console.log('Headers:', headerRow);

                            const dataRows = Array.from(table.querySelectorAll('tbody tr'))
                                .map(row => Array.from(row.querySelectorAll('td'))
                                    .map(td => td.textContent.trim()));

                            console.log(`Found ${dataRows.length} data rows`);

                            if (dataRows.length > 0) {
                                return {headers: headerRow, rows: dataRows};
                            }
                        }
                    }
                    return null;
                };

                return getTextContent();
            }''')

            print(f"Found {len(content['rows'] if content else [])} rows of data")
            browser.close()

            if not content or not content.get('rows'):
                raise ValueError(f"No data found for station {site_id}. Station may not exist or may not be reporting.")

            return content

    except Exception as e:
        error_msg = f"Error scraping data for {site_id}: {str(e)}"
        print(error_msg)
        raise ValueError(error_msg)

def parse_date(date_str):
    """Parse date string to datetime"""
    try:
        current_year = datetime.now().year
        return pd.to_datetime(f"{date_str}, {current_year}", format="%b %d, %I:%M %p, %Y")
    except:
        return pd.NaT

def parse_weather_data(data):
    """Parse the weather data into a pandas DataFrame"""
    if not data or 'rows' not in data:
        raise ValueError("No valid weather data found")

    df = pd.DataFrame(data['rows'])

    # Different stations have different numbers of columns
    # Standard columns we expect (in order)
    expected_columns = ['datetime', 'temp', 'dew_point', 'humidity', 'wind_chill',
                       'wind_dir', 'wind_speed', 'snow_depth', 'snowfall_3hr',
                       'snowfall_6hr', 'snowfall_24hr', 'swe']

    # Get actual number of columns
    num_cols = len(df.columns)
    print(f"Data has {num_cols} columns")

    # Assign column names based on actual number of columns
    if num_cols <= len(expected_columns):
        df.columns = expected_columns[:num_cols]
        # Add missing columns with NaN values
        for col in expected_columns[num_cols:]:
            df[col] = np.nan
    else:
        # If more columns than expected, just use the first 12
        df = df.iloc[:, :12]
        df.columns = expected_columns
    
    numeric_cols = ['temp', 'dew_point', 'humidity', 'wind_chill', 'snow_depth',
                   'snowfall_3hr', 'snowfall_6hr', 'snowfall_24hr', 'swe']
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    
    def parse_wind(x):
        if pd.isna(x): return np.nan, np.nan
        match = re.search(r'(\d+)G(\d+)', str(x))
        if match:
            return float(match.group(1)), float(match.group(2))
        try:
            return float(x), np.nan
        except:
            return np.nan, np.nan
    
    wind_data = df['wind_speed'].apply(parse_wind)
    df['wind_speed'] = wind_data.apply(lambda x: x[0])
    df['wind_gust'] = wind_data.apply(lambda x: x[1])
    
    def parse_direction(direction):
        direction_map = {
            'N': 0, 'NNE': 22.5, 'NE': 45, 'ENE': 67.5,
            'E': 90, 'ESE': 112.5, 'SE': 135, 'SSE': 157.5,
            'S': 180, 'SSW': 202.5, 'SW': 225, 'WSW': 247.5,
            'W': 270, 'WNW': 292.5, 'NW': 315, 'NNW': 337.5
        }
        return direction_map.get(direction, np.nan)
    
    df['wind_dir_deg'] = df['wind_dir'].apply(parse_direction)
    
    df['datetime'] = df['datetime'].apply(parse_date)
    df['date'] = df['datetime'].dt.date
    
    return df

def calculate_total_new_snow(df):
    """
    Calculate total new snow by:
    1. Using ONLY the 3-hour snowfall amounts
    2. Using 9 AM as the daily reset point
    """
    # Sort by datetime to ensure correct calculation
    df = df.sort_values('datetime')

    # Create a copy of the dataframe with ONLY datetime and 3-hour snowfall
    snow_df = df[['datetime', 'snowfall_3hr']].copy()

    # Create a day group that starts at 9 AM instead of midnight
    snow_df['day_group'] = snow_df['datetime'].apply(
        lambda x: x.date() if x.hour >= 9 else (x - pd.Timedelta(days=1)).date()
    )

    # Calculate daily snow totals
    daily_totals = snow_df.groupby('day_group').apply(process_daily_snow)

    return daily_totals.sum()

def calculate_new_snow_from_depth(df):
    """
    Calculate new snow from changes in snow depth.
    More accurate than 3-hour readings which can be unreliable.
    Uses 9 AM as the daily reset point.
    """
    # Sort by datetime
    df = df.sort_values('datetime').copy()

    # Create day groups starting at 9 AM
    df['day_group'] = df['datetime'].apply(
        lambda x: x.date() if x.hour >= 9 else (x - pd.Timedelta(days=1)).date()
    )

    daily_snow_change = []

    for day, group in df.groupby('day_group'):
        group = group.sort_values('datetime')
        valid_depths = group['snow_depth'].dropna()

        if len(valid_depths) > 0:
            # Get max depth increase for the day
            max_depth = valid_depths.max()
            min_depth = valid_depths.min()

            # New snow is the increase in depth (ignore decreases from settling/melting)
            new_snow = max(0, max_depth - min_depth)
            daily_snow_change.append(new_snow)

    return sum(daily_snow_change)

def calculate_daily_snow_from_depth(df):
    """Calculate daily new snow from depth changes, returns a dataframe"""
    df = df.sort_values('datetime').copy()

    df['day_group'] = df['datetime'].apply(
        lambda x: x.date() if x.hour >= 9 else (x - pd.Timedelta(days=1)).date()
    )

    daily_data = []

    for day, group in df.groupby('day_group'):
        group = group.sort_values('datetime')
        valid_depths = group['snow_depth'].dropna()

        if len(valid_depths) > 0:
            max_depth = valid_depths.max()
            min_depth = valid_depths.min()
            new_snow = max(0, max_depth - min_depth)
            daily_data.append({'date': day, 'new_snow_from_depth': new_snow})

    return pd.DataFrame(daily_data)

def process_daily_snow(group):
    """Sum up ONLY the 3-hour snowfall amounts for each day period"""
    # Sort by time to ensure proper sequence
    group = group.sort_values('datetime')

    # Sum only the valid 3-hour amounts, treating NaN as 0
    valid_amounts = group['snowfall_3hr'].fillna(0)
    daily_total = valid_amounts.sum()

    return daily_total

def create_plots(df):
    """Create all weather plots including SWE estimates"""
    # Create figure with adjusted height and spacing
    fig = plt.figure(figsize=(20, 24))
    
    # Calculate height ratios for different plots
    height_ratios = [1, 1, 1, 1, 1]  # Equal height for all plots
    gs = GridSpec(5, 1, figure=fig, height_ratios=height_ratios)
    gs.update(hspace=0.4)  # Increase vertical spacing between plots
    
    # Temperature plot
    ax1 = fig.add_subplot(gs[0])
    ax1.plot(df['datetime'], df['temp'], label='Temperature', color='red')
    ax1.plot(df['datetime'], df['wind_chill'], label='Wind Chill', color='blue')
    ax1.set_title('Temperature and Wind Chill Over Time', pad=20)
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Temperature (°F)')
    ax1.legend()
    ax1.grid(True)
    ax1.tick_params(axis='x', rotation=45)
    
    # Wind speed plot
    ax2 = fig.add_subplot(gs[1])
    ax2.plot(df['datetime'], df['wind_speed'], label='Wind Speed', color='blue')
    ax2.plot(df['datetime'], df['wind_gust'], label='Wind Gust', color='orange')
    ax2.set_title('Wind Speed and Gusts Over Time', pad=20)
    ax2.set_xlabel('Date')
    ax2.set_ylabel('Wind Speed (mph)')
    ax2.legend()
    ax2.grid(True)
    ax2.tick_params(axis='x', rotation=45)
    
    # Snow depth plot
    ax3 = fig.add_subplot(gs[2])
    ax3.plot(df['datetime'], df['snow_depth'], color='blue', label='Snow Depth')
    ax3.set_title('Snow Depth Over Time', pad=20)
    ax3.set_xlabel('Date')
    ax3.set_ylabel('Snow Depth (inches)')
    ax3.grid(True)
    ax3.tick_params(axis='x', rotation=45)
    
    # Daily new snow comparison plot - showing both methods
    ax4 = fig.add_subplot(gs[3])

    # Calculate from 3-hour reports
    snow_df = df[['datetime', 'snowfall_3hr']].copy()
    snow_df['day_group'] = snow_df['datetime'].apply(
        lambda x: x.date() if x.hour >= 9 else (x - pd.Timedelta(days=1)).date()
    )
    daily_snow_3hr = snow_df.groupby('day_group').apply(process_daily_snow).reset_index()
    daily_snow_3hr.columns = ['date', 'new_snow_3hr']

    # Calculate from depth changes
    daily_snow_depth = calculate_daily_snow_from_depth(df)

    # Merge the two calculations
    daily_combined = pd.merge(daily_snow_3hr, daily_snow_depth, on='date', how='outer').fillna(0)

    # Create grouped bar chart
    x = range(len(daily_combined))
    width = 0.35

    bars1 = ax4.bar([i - width/2 for i in x], daily_combined['new_snow_3hr'],
                     width, label='3-Hour Reports', color='skyblue', alpha=0.8)
    bars2 = ax4.bar([i + width/2 for i in x], daily_combined['new_snow_from_depth'],
                     width, label='Snow Depth Change', color='darkblue', alpha=0.8)

    ax4.set_title('Daily New Snow - Two Methods (9 AM Reset)', pad=20, fontsize=12, fontweight='bold')
    ax4.set_xlabel('Date')
    ax4.set_ylabel('New Snow (inches)')
    ax4.set_xticks(x)
    ax4.set_xticklabels([str(d) for d in daily_combined['date']], rotation=45, ha='right')
    ax4.legend()
    ax4.grid(True, axis='y', linestyle='--', alpha=0.7)

    # Add value labels on bars
    for bars in [bars1, bars2]:
        for bar in bars:
            height = bar.get_height()
            if height > 0:
                ax4.text(bar.get_x() + bar.get_width()/2., height,
                        f'{height:.1f}"', ha='center', va='bottom', fontsize=8)
    
    # SWE bar plot
    ax5 = fig.add_subplot(gs[4])
    daily_swe = df.groupby('date')['swe'].mean()
    ax5.bar(daily_swe.index, daily_swe.values, color='lightblue')
    ax5.set_title('Snow/Water Equivalent', pad=20)
    ax5.set_xlabel('Date')
    ax5.set_ylabel('SWE (inches)')
    ax5.tick_params(axis='x', rotation=45)
    
    # Adjust layout
    plt.subplots_adjust(top=0.95, bottom=0.05, left=0.1, right=0.95)
    
    # Create separate wind rose figure
    fig_rose = plt.figure(figsize=(10, 10))
    ax_rose = WindroseAxes.from_ax(fig=fig_rose)
    create_wind_rose(df, ax_rose)
    fig_rose.subplots_adjust(top=0.95, bottom=0.05, left=0.1, right=0.95)
    
    return fig, fig_rose

def create_wind_rose(df, ax):
    """Create a wind rose plot"""
    if not isinstance(ax, WindroseAxes):
        ax = WindroseAxes.from_ax(ax=ax)
    ax.bar(df['wind_dir_deg'].dropna(), df['wind_speed'].dropna(),
           bins=np.arange(0, 40, 5), normed=True, opening=0.8, edgecolor='white')
    ax.set_legend(title='Wind Speed (mph)')
    ax.set_title('Wind Rose')

def analyze_weather_data(site_id, hours):
    """Analyze weather data and create visualizations"""
    try:
        print(f"Scraping data for {site_id}...")
        raw_data = scrape_weather_data(site_id, hours)
        if not raw_data:
            return "Error: Could not retrieve weather data.", None, None
        
        print("Parsing data...")
        df = parse_weather_data(raw_data)

        # Calculate total new snow using both methods
        total_new_snow_3hr = calculate_total_new_snow(df)
        total_new_snow_depth = calculate_new_snow_from_depth(df)
        current_swe = df['swe'].iloc[0]  # Get most recent SWE measurement

        print("Calculating statistics...")
        stats = {
            'Temperature Range': f"{df['temp'].min():.1f}°F to {df['temp'].max():.1f}°F",
            'Average Temperature': f"{df['temp'].mean():.1f}°F",
            'Max Wind Speed': f"{df['wind_speed'].max():.1f} mph",
            'Max Wind Gust': f"{df['wind_gust'].max():.1f} mph",
            'Average Humidity': f"{df['humidity'].mean():.1f}%",
            'Current Snow Depth': f"{df['snow_depth'].iloc[0]:.1f} inches",
            'Total New Snow (3-hr reports)': f"{total_new_snow_3hr:.1f} inches",
            'Total New Snow (depth change)': f"{total_new_snow_depth:.1f} inches",
            'Current Snow/Water Equivalent': f"{current_swe:.2f} inches"
        }
        
        html_output = "<div style='font-size: 16px; line-height: 1.5;'>"
        html_output += f"<p><strong>Weather Station:</strong> {site_id}</p>"
        html_output += f"<p><strong>Data Range:</strong> {df['datetime'].min().strftime('%Y-%m-%d %H:%M')} to {df['datetime'].max().strftime('%Y-%m-%d %H:%M')}</p>"
        for key, value in stats.items():
            html_output += f"<p><strong>{key}:</strong> {value}</p>"
        html_output += "</div>"
        
        print("Creating plots...")
        main_plots, wind_rose = create_plots(df)
        
        return html_output, main_plots, wind_rose
        
    except Exception as e:
        print(f"Error in analysis: {str(e)}")
        return f"Error analyzing data: {str(e)}", None, None

# Create Gradio interface
with gr.Blocks(title="Weather Station Data Analyzer") as demo:
    gr.Markdown("# Weather Station Data Analyzer")
    gr.Markdown("""
    Select a weather station and number of hours to analyze.

    **New Snow Calculation Methods:**
    - **3-Hour Reports**: Sum of reported 3-hour snowfall amounts
    - **Depth Change**: Calculated from snow depth increases (generally more accurate)
    """)

    with gr.Row():
        site_id = gr.Dropdown(
            label="Weather Station",
            choices=[
                ("Yellowstone Club - Timberline", "YCTIM"),
                ("Yellowstone Club - Andesite", "YCAND"),
                ("Yellowstone Club - American Spirit", "YCAMS"),
                ("Yellowstone Club - Base", "YCBAS"),
                ("Yellowstone Club - Great Bear", "YCGBR"),
                ("Bozeman Airport", "KBZN"),
                ("Salt Lake City", "KSLC")
            ],
            value="YCTIM"
        )
        hours = gr.Number(
            label="Hours of Data",
            value=720,
            minimum=1,
            maximum=1440
        )
    
    analyze_btn = gr.Button("Fetch and Analyze Weather Data")
    
    with gr.Row():
        stats_output = gr.HTML(label="Statistics")
    
    with gr.Row():
        weather_plots = gr.Plot(label="Weather Plots")
        wind_rose = gr.Plot(label="Wind Rose")
    
    analyze_btn.click(
        fn=analyze_weather_data,
        inputs=[site_id, hours],
        outputs=[stats_output, weather_plots, wind_rose]
    )

if __name__ == "__main__":
    demo.launch()