import gradio as gr import pandas as pd import numpy as np import re from playwright.sync_api import sync_playwright import time import os import subprocess import sys import matplotlib.pyplot as plt from matplotlib.gridspec import GridSpec from windrose import WindroseAxes from datetime import datetime # Install Playwright browsers on startup def install_playwright_browsers(): try: if not os.path.exists('/home/user/.cache/ms-playwright'): print("Installing Playwright browsers...") subprocess.run( [sys.executable, "-m", "playwright", "install", "chromium"], check=True, capture_output=True, text=True ) print("Playwright browsers installed successfully") except Exception as e: print(f"Error installing browsers: {e}") # Install browsers when the module loads install_playwright_browsers() def scrape_weather_data(site_id, hours=720): """Scrape weather data from weather.gov timeseries""" url = f"https://www.weather.gov/wrh/timeseries?site={site_id}&hours={hours}&units=english&chart=on&headers=on&obs=tabular&hourly=false&pview=full&font=12&plot=" try: with sync_playwright() as p: browser = p.chromium.launch( headless=True, args=['--no-sandbox', '--disable-dev-shm-usage'] ) context = browser.new_context( user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' ) page = context.new_page() response = page.goto(url, timeout=60000) print(f"Response status: {response.status}") # Wait for table to load with longer timeout page.wait_for_selector('table', timeout=45000) time.sleep(8) # Give more time for dynamic content print("Extracting data...") content = page.evaluate('''() => { const getTextContent = () => { const rows = []; const tables = document.getElementsByTagName('table'); console.log(`Found ${tables.length} tables`); for (const table of tables) { const text = table.textContent; // Look for Date/Time or just Date as header if (text.includes('Date/Time') || text.includes('Date')) { const headerRow = Array.from(table.querySelectorAll('th')) .map(th => th.textContent.trim()); console.log('Headers:', headerRow); const dataRows = Array.from(table.querySelectorAll('tbody tr')) .map(row => Array.from(row.querySelectorAll('td')) .map(td => td.textContent.trim())); console.log(`Found ${dataRows.length} data rows`); if (dataRows.length > 0) { return {headers: headerRow, rows: dataRows}; } } } return null; }; return getTextContent(); }''') print(f"Found {len(content['rows'] if content else [])} rows of data") browser.close() if not content or not content.get('rows'): raise ValueError(f"No data found for station {site_id}. Station may not exist or may not be reporting.") return content except Exception as e: error_msg = f"Error scraping data for {site_id}: {str(e)}" print(error_msg) raise ValueError(error_msg) def parse_date(date_str): """Parse date string to datetime""" try: current_year = datetime.now().year return pd.to_datetime(f"{date_str}, {current_year}", format="%b %d, %I:%M %p, %Y") except: return pd.NaT def parse_weather_data(data): """Parse the weather data into a pandas DataFrame""" if not data or 'rows' not in data: raise ValueError("No valid weather data found") df = pd.DataFrame(data['rows']) # Different stations have different numbers of columns # Standard columns we expect (in order) expected_columns = ['datetime', 'temp', 'dew_point', 'humidity', 'wind_chill', 'wind_dir', 'wind_speed', 'snow_depth', 'snowfall_3hr', 'snowfall_6hr', 'snowfall_24hr', 'swe'] # Get actual number of columns num_cols = len(df.columns) print(f"Data has {num_cols} columns") # Assign column names based on actual number of columns if num_cols <= len(expected_columns): df.columns = expected_columns[:num_cols] # Add missing columns with NaN values for col in expected_columns[num_cols:]: df[col] = np.nan else: # If more columns than expected, just use the first 12 df = df.iloc[:, :12] df.columns = expected_columns numeric_cols = ['temp', 'dew_point', 'humidity', 'wind_chill', 'snow_depth', 'snowfall_3hr', 'snowfall_6hr', 'snowfall_24hr', 'swe'] for col in numeric_cols: df[col] = pd.to_numeric(df[col], errors='coerce') def parse_wind(x): if pd.isna(x): return np.nan, np.nan match = re.search(r'(\d+)G(\d+)', str(x)) if match: return float(match.group(1)), float(match.group(2)) try: return float(x), np.nan except: return np.nan, np.nan wind_data = df['wind_speed'].apply(parse_wind) df['wind_speed'] = wind_data.apply(lambda x: x[0]) df['wind_gust'] = wind_data.apply(lambda x: x[1]) def parse_direction(direction): direction_map = { 'N': 0, 'NNE': 22.5, 'NE': 45, 'ENE': 67.5, 'E': 90, 'ESE': 112.5, 'SE': 135, 'SSE': 157.5, 'S': 180, 'SSW': 202.5, 'SW': 225, 'WSW': 247.5, 'W': 270, 'WNW': 292.5, 'NW': 315, 'NNW': 337.5 } return direction_map.get(direction, np.nan) df['wind_dir_deg'] = df['wind_dir'].apply(parse_direction) df['datetime'] = df['datetime'].apply(parse_date) df['date'] = df['datetime'].dt.date return df def calculate_total_new_snow(df): """ Calculate total new snow by: 1. Using ONLY the 3-hour snowfall amounts 2. Using 9 AM as the daily reset point """ # Sort by datetime to ensure correct calculation df = df.sort_values('datetime') # Create a copy of the dataframe with ONLY datetime and 3-hour snowfall snow_df = df[['datetime', 'snowfall_3hr']].copy() # Create a day group that starts at 9 AM instead of midnight snow_df['day_group'] = snow_df['datetime'].apply( lambda x: x.date() if x.hour >= 9 else (x - pd.Timedelta(days=1)).date() ) # Calculate daily snow totals daily_totals = snow_df.groupby('day_group').apply(process_daily_snow) return daily_totals.sum() def calculate_new_snow_from_depth(df): """ Calculate new snow from changes in snow depth. More accurate than 3-hour readings which can be unreliable. Uses 9 AM as the daily reset point. """ # Sort by datetime df = df.sort_values('datetime').copy() # Create day groups starting at 9 AM df['day_group'] = df['datetime'].apply( lambda x: x.date() if x.hour >= 9 else (x - pd.Timedelta(days=1)).date() ) daily_snow_change = [] for day, group in df.groupby('day_group'): group = group.sort_values('datetime') valid_depths = group['snow_depth'].dropna() if len(valid_depths) > 0: # Get max depth increase for the day max_depth = valid_depths.max() min_depth = valid_depths.min() # New snow is the increase in depth (ignore decreases from settling/melting) new_snow = max(0, max_depth - min_depth) daily_snow_change.append(new_snow) return sum(daily_snow_change) def calculate_daily_snow_from_depth(df): """Calculate daily new snow from depth changes, returns a dataframe""" df = df.sort_values('datetime').copy() df['day_group'] = df['datetime'].apply( lambda x: x.date() if x.hour >= 9 else (x - pd.Timedelta(days=1)).date() ) daily_data = [] for day, group in df.groupby('day_group'): group = group.sort_values('datetime') valid_depths = group['snow_depth'].dropna() if len(valid_depths) > 0: max_depth = valid_depths.max() min_depth = valid_depths.min() new_snow = max(0, max_depth - min_depth) daily_data.append({'date': day, 'new_snow_from_depth': new_snow}) return pd.DataFrame(daily_data) def process_daily_snow(group): """Sum up ONLY the 3-hour snowfall amounts for each day period""" # Sort by time to ensure proper sequence group = group.sort_values('datetime') # Sum only the valid 3-hour amounts, treating NaN as 0 valid_amounts = group['snowfall_3hr'].fillna(0) daily_total = valid_amounts.sum() return daily_total def create_plots(df): """Create all weather plots including SWE estimates""" # Create figure with adjusted height and spacing fig = plt.figure(figsize=(20, 24)) # Calculate height ratios for different plots height_ratios = [1, 1, 1, 1, 1] # Equal height for all plots gs = GridSpec(5, 1, figure=fig, height_ratios=height_ratios) gs.update(hspace=0.4) # Increase vertical spacing between plots # Temperature plot ax1 = fig.add_subplot(gs[0]) ax1.plot(df['datetime'], df['temp'], label='Temperature', color='red') ax1.plot(df['datetime'], df['wind_chill'], label='Wind Chill', color='blue') ax1.set_title('Temperature and Wind Chill Over Time', pad=20) ax1.set_xlabel('Date') ax1.set_ylabel('Temperature (°F)') ax1.legend() ax1.grid(True) ax1.tick_params(axis='x', rotation=45) # Wind speed plot ax2 = fig.add_subplot(gs[1]) ax2.plot(df['datetime'], df['wind_speed'], label='Wind Speed', color='blue') ax2.plot(df['datetime'], df['wind_gust'], label='Wind Gust', color='orange') ax2.set_title('Wind Speed and Gusts Over Time', pad=20) ax2.set_xlabel('Date') ax2.set_ylabel('Wind Speed (mph)') ax2.legend() ax2.grid(True) ax2.tick_params(axis='x', rotation=45) # Snow depth plot ax3 = fig.add_subplot(gs[2]) ax3.plot(df['datetime'], df['snow_depth'], color='blue', label='Snow Depth') ax3.set_title('Snow Depth Over Time', pad=20) ax3.set_xlabel('Date') ax3.set_ylabel('Snow Depth (inches)') ax3.grid(True) ax3.tick_params(axis='x', rotation=45) # Daily new snow comparison plot - showing both methods ax4 = fig.add_subplot(gs[3]) # Calculate from 3-hour reports snow_df = df[['datetime', 'snowfall_3hr']].copy() snow_df['day_group'] = snow_df['datetime'].apply( lambda x: x.date() if x.hour >= 9 else (x - pd.Timedelta(days=1)).date() ) daily_snow_3hr = snow_df.groupby('day_group').apply(process_daily_snow).reset_index() daily_snow_3hr.columns = ['date', 'new_snow_3hr'] # Calculate from depth changes daily_snow_depth = calculate_daily_snow_from_depth(df) # Merge the two calculations daily_combined = pd.merge(daily_snow_3hr, daily_snow_depth, on='date', how='outer').fillna(0) # Create grouped bar chart x = range(len(daily_combined)) width = 0.35 bars1 = ax4.bar([i - width/2 for i in x], daily_combined['new_snow_3hr'], width, label='3-Hour Reports', color='skyblue', alpha=0.8) bars2 = ax4.bar([i + width/2 for i in x], daily_combined['new_snow_from_depth'], width, label='Snow Depth Change', color='darkblue', alpha=0.8) ax4.set_title('Daily New Snow - Two Methods (9 AM Reset)', pad=20, fontsize=12, fontweight='bold') ax4.set_xlabel('Date') ax4.set_ylabel('New Snow (inches)') ax4.set_xticks(x) ax4.set_xticklabels([str(d) for d in daily_combined['date']], rotation=45, ha='right') ax4.legend() ax4.grid(True, axis='y', linestyle='--', alpha=0.7) # Add value labels on bars for bars in [bars1, bars2]: for bar in bars: height = bar.get_height() if height > 0: ax4.text(bar.get_x() + bar.get_width()/2., height, f'{height:.1f}"', ha='center', va='bottom', fontsize=8) # SWE bar plot ax5 = fig.add_subplot(gs[4]) daily_swe = df.groupby('date')['swe'].mean() ax5.bar(daily_swe.index, daily_swe.values, color='lightblue') ax5.set_title('Snow/Water Equivalent', pad=20) ax5.set_xlabel('Date') ax5.set_ylabel('SWE (inches)') ax5.tick_params(axis='x', rotation=45) # Adjust layout plt.subplots_adjust(top=0.95, bottom=0.05, left=0.1, right=0.95) # Create separate wind rose figure fig_rose = plt.figure(figsize=(10, 10)) ax_rose = WindroseAxes.from_ax(fig=fig_rose) create_wind_rose(df, ax_rose) fig_rose.subplots_adjust(top=0.95, bottom=0.05, left=0.1, right=0.95) return fig, fig_rose def create_wind_rose(df, ax): """Create a wind rose plot""" if not isinstance(ax, WindroseAxes): ax = WindroseAxes.from_ax(ax=ax) ax.bar(df['wind_dir_deg'].dropna(), df['wind_speed'].dropna(), bins=np.arange(0, 40, 5), normed=True, opening=0.8, edgecolor='white') ax.set_legend(title='Wind Speed (mph)') ax.set_title('Wind Rose') def analyze_weather_data(site_id, hours): """Analyze weather data and create visualizations""" try: print(f"Scraping data for {site_id}...") raw_data = scrape_weather_data(site_id, hours) if not raw_data: return "Error: Could not retrieve weather data.", None, None print("Parsing data...") df = parse_weather_data(raw_data) # Calculate total new snow using both methods total_new_snow_3hr = calculate_total_new_snow(df) total_new_snow_depth = calculate_new_snow_from_depth(df) current_swe = df['swe'].iloc[0] # Get most recent SWE measurement print("Calculating statistics...") stats = { 'Temperature Range': f"{df['temp'].min():.1f}°F to {df['temp'].max():.1f}°F", 'Average Temperature': f"{df['temp'].mean():.1f}°F", 'Max Wind Speed': f"{df['wind_speed'].max():.1f} mph", 'Max Wind Gust': f"{df['wind_gust'].max():.1f} mph", 'Average Humidity': f"{df['humidity'].mean():.1f}%", 'Current Snow Depth': f"{df['snow_depth'].iloc[0]:.1f} inches", 'Total New Snow (3-hr reports)': f"{total_new_snow_3hr:.1f} inches", 'Total New Snow (depth change)': f"{total_new_snow_depth:.1f} inches", 'Current Snow/Water Equivalent': f"{current_swe:.2f} inches" } html_output = "
" html_output += f"

Weather Station: {site_id}

" html_output += f"

Data Range: {df['datetime'].min().strftime('%Y-%m-%d %H:%M')} to {df['datetime'].max().strftime('%Y-%m-%d %H:%M')}

" for key, value in stats.items(): html_output += f"

{key}: {value}

" html_output += "
" print("Creating plots...") main_plots, wind_rose = create_plots(df) return html_output, main_plots, wind_rose except Exception as e: print(f"Error in analysis: {str(e)}") return f"Error analyzing data: {str(e)}", None, None # Create Gradio interface with gr.Blocks(title="Weather Station Data Analyzer") as demo: gr.Markdown("# Weather Station Data Analyzer") gr.Markdown(""" Select a weather station and number of hours to analyze. **New Snow Calculation Methods:** - **3-Hour Reports**: Sum of reported 3-hour snowfall amounts - **Depth Change**: Calculated from snow depth increases (generally more accurate) """) with gr.Row(): site_id = gr.Dropdown( label="Weather Station", choices=[ ("Yellowstone Club - Timberline", "YCTIM"), ("Yellowstone Club - Andesite", "YCAND"), ("Yellowstone Club - American Spirit", "YCAMS"), ("Yellowstone Club - Base", "YCBAS"), ("Bozeman Airport", "KBZN"), ("Salt Lake City", "KSLC") ], value="YCTIM", info="Note: Great Bear not available (no weather.gov station ID)" ) hours = gr.Number( label="Hours of Data", value=720, minimum=1, maximum=1440 ) analyze_btn = gr.Button("Fetch and Analyze Weather Data") with gr.Row(): stats_output = gr.HTML(label="Statistics") with gr.Row(): weather_plots = gr.Plot(label="Weather Plots") wind_rose = gr.Plot(label="Wind Rose") analyze_btn.click( fn=analyze_weather_data, inputs=[site_id, hours], outputs=[stats_output, weather_plots, wind_rose] ) if __name__ == "__main__": demo.launch()