Spaces:

nakas
/

TimberLine_Snow_History

Sleeping

App Files Files Community

nakas commited on Feb 10

Commit

6cc6c47

verified ·

1 Parent(s): 130973c

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -125

app.py CHANGED Viewed

@@ -8,7 +8,6 @@ import subprocess
 import sys
 import os
-# Install Playwright browsers if they don't exist
 def install_playwright_browsers():
     try:
         if not os.path.exists('/home/user/.cache/ms-playwright'):
@@ -23,23 +22,14 @@ def install_playwright_browsers():
     except Exception as e:
         print(f"Error installing browsers: {e}")
-# Install browsers when the module loads
 install_playwright_browsers()
 def scrape_weather_data(site_id="YCTIM", hours=720):
-    """
-    Scrape weather data from weather.gov timeseries
-    Args:
-        site_id (str): The weather station ID
-        hours (int): Number of hours of data to retrieve
-    Returns:
-        dict: Dictionary containing parsed weather data and statistics
-    """
     url = f"https://www.weather.gov/wrh/timeseries?site={site_id}&hours={hours}&units=english&chart=on&headers=on&obs=tabular&hourly=false&pview=full&font=12&plot="
     try:
         with sync_playwright() as p:
-            # Launch browser in headless mode with reduced arguments for compatibility
             browser = p.chromium.launch(
                 headless=True,
                 args=['--no-sandbox', '--disable-dev-shm-usage']
@@ -49,77 +39,51 @@ def scrape_weather_data(site_id="YCTIM", hours=720):
             )
             page = context.new_page()
-            # Navigate to the page and wait for initial load
-            page.goto(url)
-            # Wait for the main container to load
-            page.wait_for_selector('div.container-fluid', timeout=30000)
-            # Additional wait for dynamic content
-            time.sleep(10)
-            # Get all text content
             content = page.content()
-            # Extract data using a more robust selector
-            data = page.evaluate('''() => {
-                const tables = Array.from(document.querySelectorAll('table'));
-                for (const table of tables) {
-                    if (table.textContent.includes('Date/Time')) {
-                        const headers = Array.from(table.querySelectorAll('th')).map(th => th.textContent.trim());
-                        const rows = Array.from(table.querySelectorAll('tbody tr')).map(row => {
-                            return Array.from(row.querySelectorAll('td')).map(td => td.textContent.trim());
-                        });
-                        return {headers, rows};
-                    }
-                }
-                return null;
             }''')
-            if not data:
-                # Take a screenshot for debugging
-                page.screenshot(path="error_screenshot.png")
-                raise Exception("Could not find weather data table. The page might not have loaded correctly.")
-            # Close browser
             browser.close()
-            # Process the data
-            headers = [h.replace('\n', ' ').strip() for h in data['headers']]
-            df = pd.DataFrame(data['rows'], columns=headers)
-            # Convert numeric columns
-            numeric_columns = [col for col in df.columns if any(term in col for term in ['Temp', 'Point', 'Humidity', 'Chill', 'Depth'])]
-            for col in numeric_columns:
-                df[col] = pd.to_numeric(df[col], errors='coerce')
-            # Parse wind speed and gusts
-            wind_col = next((col for col in df.columns if 'Wind Speed' in col), None)
-            if wind_col:
-                df[['Wind Speed', 'Wind Gust']] = df[wind_col].str.extract(r'(\d+)G(\d+)').astype(float)
-            # Calculate statistics
-            temp_col = next((col for col in df.columns if 'Temp' in col), None)
-            humidity_col = next((col for col in df.columns if 'Humidity' in col), None)
-            snow_col = next((col for col in df.columns if 'Snow Depth' in col), None)
-            stats = {
-                'Temperature Range': f"{df[temp_col].min():.1f}°F to {df[temp_col].max():.1f}°F",
-                'Average Temperature': f"{df[temp_col].mean():.1f}°F",
-                'Max Wind Speed': f"{df['Wind Speed'].max():.1f} mph",
-                'Max Wind Gust': f"{df['Wind Gust'].max():.1f} mph"
-            }
-            if humidity_col:
-                stats['Average Humidity'] = f"{df[humidity_col].mean():.1f}%"
-            if snow_col:
-                stats['Max Snow Depth'] = f"{df[snow_col].max():.1f} inches"
             return {
-                'status': 'Success',
-                'statistics': stats,
-                'data': df.to_dict('records'),
-                'columns': df.columns.tolist()
             }
     except Exception as e:
@@ -129,56 +93,14 @@ def scrape_weather_data(site_id="YCTIM", hours=720):
         }
 def format_output(result):
-    """Format the output for display in Gradio"""
     if result['status'] == 'Error':
         return f"Error: {result['error_message']}", None, None
-    # Create statistics HTML
-    stats_html = "<div style='font-size: 16px; line-height: 1.5;'>"
-    for key, value in result['statistics'].items():
-        stats_html += f"<p><strong>{key}:</strong> {value}</p>"
-    stats_html += "</div>"
-    # Convert data back to DataFrame for plotting
-    df = pd.DataFrame(result['data'])
-    # Find the date/time column
-    date_col = next((col for col in df.columns if 'Date' in col or 'Time' in col), None)
-    temp_col = next((col for col in df.columns if 'Temp' in col), None)
-    chill_col = next((col for col in df.columns if 'Chill' in col), None)
-    if date_col:
-        df[date_col] = pd.to_datetime(df[date_col])
-        # Create temperature plot
-        temp_fig = gr.Plot()
-        plot_cols = [temp_col]
-        if chill_col:
-            plot_cols.append(chill_col)
-        df.plot(x=date_col, y=plot_cols,
-                title='Temperature Over Time',
-                figsize=(12, 6))
-        temp_fig.pyplot()
-        # Create wind plot
-        wind_fig = gr.Plot()
-        df.plot(x=date_col, y=['Wind Speed', 'Wind Gust'],
-                title='Wind Speed and Gusts Over Time',
-                figsize=(12, 6))
-        wind_fig.pyplot()
-        return stats_html, temp_fig, wind_fig
-    else:
-        return "Error: Could not find date/time column in the data", None, None
 # Create Gradio interface
-with gr.Blocks(title="Weather Station Data Analyzer") as demo:
-    gr.Markdown("# Weather Station Data Analyzer")
-    gr.Markdown("""
-    This tool fetches and analyzes weather data from weather.gov.
-    - Default station: YCTIM
-    - Data range: Up to 720 hours (30 days)
-    """)
     with gr.Row():
         site_id = gr.Textbox(
@@ -193,19 +115,15 @@ with gr.Blocks(title="Weather Station Data Analyzer") as demo:
             maximum=1440
         )
-    analyze_btn = gr.Button("Fetch and Analyze Weather Data")
-    with gr.Row():
-        stats_output = gr.HTML(label="Statistics")
     with gr.Row():
-        temp_plot = gr.Plot(label="Temperature Plot")
-        wind_plot = gr.Plot(label="Wind Plot")
     analyze_btn.click(
         fn=lambda sid, hrs: format_output(scrape_weather_data(sid, hrs)),
         inputs=[site_id, hours],
-        outputs=[stats_output, temp_plot, wind_plot]
     )
 if __name__ == "__main__":

 import sys
 import os
 def install_playwright_browsers():
     try:
         if not os.path.exists('/home/user/.cache/ms-playwright'):
     except Exception as e:
         print(f"Error installing browsers: {e}")
 install_playwright_browsers()
 def scrape_weather_data(site_id="YCTIM", hours=720):
+    """Debug version that prints page content"""
     url = f"https://www.weather.gov/wrh/timeseries?site={site_id}&hours={hours}&units=english&chart=on&headers=on&obs=tabular&hourly=false&pview=full&font=12&plot="
     try:
         with sync_playwright() as p:
             browser = p.chromium.launch(
                 headless=True,
                 args=['--no-sandbox', '--disable-dev-shm-usage']
             )
             page = context.new_page()
+            print("\nNavigating to URL...")
+            response = page.goto(url)
+            print(f"Response status: {response.status}")
+            # Wait a bit and get the content
+            time.sleep(5)
+            # Print all HTML content
             content = page.content()
+            print("\nPage Content:")
+            print("=" * 80)
+            print(content)
+            print("=" * 80)
+            # Print all text content
+            text_content = page.evaluate('() => document.body.innerText')
+            print("\nText Content:")
+            print("=" * 80)
+            print(text_content)
+            print("=" * 80)
+            # List all elements
+            elements = page.evaluate('''() => {
+                const all = document.getElementsByTagName('*');
+                return Array.from(all).map(el => ({
+                    tag: el.tagName,
+                    id: el.id,
+                    class: el.className
+                }));
             }''')
+            print("\nPage Elements:")
+            print("=" * 80)
+            for el in elements:
+                print(f"Tag: {el['tag']}, ID: {el['id']}, Class: {el['class']}")
+            print("=" * 80)
+            # Save screenshot
+            page.screenshot(path="debug_screenshot.png")
+            print("\nSaved screenshot as debug_screenshot.png")
             browser.close()
             return {
+                'status': 'Debug',
+                'message': 'Check console output for page content'
             }
     except Exception as e:
         }
 def format_output(result):
+    """Simple output formatter for debug version"""
     if result['status'] == 'Error':
         return f"Error: {result['error_message']}", None, None
+    return result['message'], None, None
 # Create Gradio interface
+with gr.Blocks(title="Weather Data Scraper Debug") as demo:
+    gr.Markdown("# Weather Data Scraper (Debug Version)")
     with gr.Row():
         site_id = gr.Textbox(
             maximum=1440
         )
+    analyze_btn = gr.Button("Fetch and Print Page Content")
     with gr.Row():
+        output = gr.HTML(label="Output")
     analyze_btn.click(
         fn=lambda sid, hrs: format_output(scrape_weather_data(sid, hrs)),
         inputs=[site_id, hours],
+        outputs=[output]
     )
 if __name__ == "__main__":