nakas commited on
Commit
6cc6c47
·
verified ·
1 Parent(s): 130973c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -125
app.py CHANGED
@@ -8,7 +8,6 @@ import subprocess
8
  import sys
9
  import os
10
 
11
- # Install Playwright browsers if they don't exist
12
  def install_playwright_browsers():
13
  try:
14
  if not os.path.exists('/home/user/.cache/ms-playwright'):
@@ -23,23 +22,14 @@ def install_playwright_browsers():
23
  except Exception as e:
24
  print(f"Error installing browsers: {e}")
25
 
26
- # Install browsers when the module loads
27
  install_playwright_browsers()
28
 
29
  def scrape_weather_data(site_id="YCTIM", hours=720):
30
- """
31
- Scrape weather data from weather.gov timeseries
32
- Args:
33
- site_id (str): The weather station ID
34
- hours (int): Number of hours of data to retrieve
35
- Returns:
36
- dict: Dictionary containing parsed weather data and statistics
37
- """
38
  url = f"https://www.weather.gov/wrh/timeseries?site={site_id}&hours={hours}&units=english&chart=on&headers=on&obs=tabular&hourly=false&pview=full&font=12&plot="
39
 
40
  try:
41
  with sync_playwright() as p:
42
- # Launch browser in headless mode with reduced arguments for compatibility
43
  browser = p.chromium.launch(
44
  headless=True,
45
  args=['--no-sandbox', '--disable-dev-shm-usage']
@@ -49,77 +39,51 @@ def scrape_weather_data(site_id="YCTIM", hours=720):
49
  )
50
  page = context.new_page()
51
 
52
- # Navigate to the page and wait for initial load
53
- page.goto(url)
 
54
 
55
- # Wait for the main container to load
56
- page.wait_for_selector('div.container-fluid', timeout=30000)
57
 
58
- # Additional wait for dynamic content
59
- time.sleep(10)
60
-
61
- # Get all text content
62
  content = page.content()
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- # Extract data using a more robust selector
65
- data = page.evaluate('''() => {
66
- const tables = Array.from(document.querySelectorAll('table'));
67
- for (const table of tables) {
68
- if (table.textContent.includes('Date/Time')) {
69
- const headers = Array.from(table.querySelectorAll('th')).map(th => th.textContent.trim());
70
- const rows = Array.from(table.querySelectorAll('tbody tr')).map(row => {
71
- return Array.from(row.querySelectorAll('td')).map(td => td.textContent.trim());
72
- });
73
- return {headers, rows};
74
- }
75
- }
76
- return null;
77
  }''')
 
 
 
 
 
78
 
79
- if not data:
80
- # Take a screenshot for debugging
81
- page.screenshot(path="error_screenshot.png")
82
- raise Exception("Could not find weather data table. The page might not have loaded correctly.")
83
 
84
- # Close browser
85
  browser.close()
86
 
87
- # Process the data
88
- headers = [h.replace('\n', ' ').strip() for h in data['headers']]
89
- df = pd.DataFrame(data['rows'], columns=headers)
90
-
91
- # Convert numeric columns
92
- numeric_columns = [col for col in df.columns if any(term in col for term in ['Temp', 'Point', 'Humidity', 'Chill', 'Depth'])]
93
- for col in numeric_columns:
94
- df[col] = pd.to_numeric(df[col], errors='coerce')
95
-
96
- # Parse wind speed and gusts
97
- wind_col = next((col for col in df.columns if 'Wind Speed' in col), None)
98
- if wind_col:
99
- df[['Wind Speed', 'Wind Gust']] = df[wind_col].str.extract(r'(\d+)G(\d+)').astype(float)
100
-
101
- # Calculate statistics
102
- temp_col = next((col for col in df.columns if 'Temp' in col), None)
103
- humidity_col = next((col for col in df.columns if 'Humidity' in col), None)
104
- snow_col = next((col for col in df.columns if 'Snow Depth' in col), None)
105
-
106
- stats = {
107
- 'Temperature Range': f"{df[temp_col].min():.1f}°F to {df[temp_col].max():.1f}°F",
108
- 'Average Temperature': f"{df[temp_col].mean():.1f}°F",
109
- 'Max Wind Speed': f"{df['Wind Speed'].max():.1f} mph",
110
- 'Max Wind Gust': f"{df['Wind Gust'].max():.1f} mph"
111
- }
112
-
113
- if humidity_col:
114
- stats['Average Humidity'] = f"{df[humidity_col].mean():.1f}%"
115
- if snow_col:
116
- stats['Max Snow Depth'] = f"{df[snow_col].max():.1f} inches"
117
-
118
  return {
119
- 'status': 'Success',
120
- 'statistics': stats,
121
- 'data': df.to_dict('records'),
122
- 'columns': df.columns.tolist()
123
  }
124
 
125
  except Exception as e:
@@ -129,56 +93,14 @@ def scrape_weather_data(site_id="YCTIM", hours=720):
129
  }
130
 
131
  def format_output(result):
132
- """Format the output for display in Gradio"""
133
  if result['status'] == 'Error':
134
  return f"Error: {result['error_message']}", None, None
135
-
136
- # Create statistics HTML
137
- stats_html = "<div style='font-size: 16px; line-height: 1.5;'>"
138
- for key, value in result['statistics'].items():
139
- stats_html += f"<p><strong>{key}:</strong> {value}</p>"
140
- stats_html += "</div>"
141
-
142
- # Convert data back to DataFrame for plotting
143
- df = pd.DataFrame(result['data'])
144
-
145
- # Find the date/time column
146
- date_col = next((col for col in df.columns if 'Date' in col or 'Time' in col), None)
147
- temp_col = next((col for col in df.columns if 'Temp' in col), None)
148
- chill_col = next((col for col in df.columns if 'Chill' in col), None)
149
-
150
- if date_col:
151
- df[date_col] = pd.to_datetime(df[date_col])
152
-
153
- # Create temperature plot
154
- temp_fig = gr.Plot()
155
- plot_cols = [temp_col]
156
- if chill_col:
157
- plot_cols.append(chill_col)
158
- df.plot(x=date_col, y=plot_cols,
159
- title='Temperature Over Time',
160
- figsize=(12, 6))
161
- temp_fig.pyplot()
162
-
163
- # Create wind plot
164
- wind_fig = gr.Plot()
165
- df.plot(x=date_col, y=['Wind Speed', 'Wind Gust'],
166
- title='Wind Speed and Gusts Over Time',
167
- figsize=(12, 6))
168
- wind_fig.pyplot()
169
-
170
- return stats_html, temp_fig, wind_fig
171
- else:
172
- return "Error: Could not find date/time column in the data", None, None
173
 
174
  # Create Gradio interface
175
- with gr.Blocks(title="Weather Station Data Analyzer") as demo:
176
- gr.Markdown("# Weather Station Data Analyzer")
177
- gr.Markdown("""
178
- This tool fetches and analyzes weather data from weather.gov.
179
- - Default station: YCTIM
180
- - Data range: Up to 720 hours (30 days)
181
- """)
182
 
183
  with gr.Row():
184
  site_id = gr.Textbox(
@@ -193,19 +115,15 @@ with gr.Blocks(title="Weather Station Data Analyzer") as demo:
193
  maximum=1440
194
  )
195
 
196
- analyze_btn = gr.Button("Fetch and Analyze Weather Data")
197
-
198
- with gr.Row():
199
- stats_output = gr.HTML(label="Statistics")
200
 
201
  with gr.Row():
202
- temp_plot = gr.Plot(label="Temperature Plot")
203
- wind_plot = gr.Plot(label="Wind Plot")
204
 
205
  analyze_btn.click(
206
  fn=lambda sid, hrs: format_output(scrape_weather_data(sid, hrs)),
207
  inputs=[site_id, hours],
208
- outputs=[stats_output, temp_plot, wind_plot]
209
  )
210
 
211
  if __name__ == "__main__":
 
8
  import sys
9
  import os
10
 
 
11
  def install_playwright_browsers():
12
  try:
13
  if not os.path.exists('/home/user/.cache/ms-playwright'):
 
22
  except Exception as e:
23
  print(f"Error installing browsers: {e}")
24
 
 
25
  install_playwright_browsers()
26
 
27
  def scrape_weather_data(site_id="YCTIM", hours=720):
28
+ """Debug version that prints page content"""
 
 
 
 
 
 
 
29
  url = f"https://www.weather.gov/wrh/timeseries?site={site_id}&hours={hours}&units=english&chart=on&headers=on&obs=tabular&hourly=false&pview=full&font=12&plot="
30
 
31
  try:
32
  with sync_playwright() as p:
 
33
  browser = p.chromium.launch(
34
  headless=True,
35
  args=['--no-sandbox', '--disable-dev-shm-usage']
 
39
  )
40
  page = context.new_page()
41
 
42
+ print("\nNavigating to URL...")
43
+ response = page.goto(url)
44
+ print(f"Response status: {response.status}")
45
 
46
+ # Wait a bit and get the content
47
+ time.sleep(5)
48
 
49
+ # Print all HTML content
 
 
 
50
  content = page.content()
51
+ print("\nPage Content:")
52
+ print("=" * 80)
53
+ print(content)
54
+ print("=" * 80)
55
+
56
+ # Print all text content
57
+ text_content = page.evaluate('() => document.body.innerText')
58
+ print("\nText Content:")
59
+ print("=" * 80)
60
+ print(text_content)
61
+ print("=" * 80)
62
 
63
+ # List all elements
64
+ elements = page.evaluate('''() => {
65
+ const all = document.getElementsByTagName('*');
66
+ return Array.from(all).map(el => ({
67
+ tag: el.tagName,
68
+ id: el.id,
69
+ class: el.className
70
+ }));
 
 
 
 
 
71
  }''')
72
+ print("\nPage Elements:")
73
+ print("=" * 80)
74
+ for el in elements:
75
+ print(f"Tag: {el['tag']}, ID: {el['id']}, Class: {el['class']}")
76
+ print("=" * 80)
77
 
78
+ # Save screenshot
79
+ page.screenshot(path="debug_screenshot.png")
80
+ print("\nSaved screenshot as debug_screenshot.png")
 
81
 
 
82
  browser.close()
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  return {
85
+ 'status': 'Debug',
86
+ 'message': 'Check console output for page content'
 
 
87
  }
88
 
89
  except Exception as e:
 
93
  }
94
 
95
  def format_output(result):
96
+ """Simple output formatter for debug version"""
97
  if result['status'] == 'Error':
98
  return f"Error: {result['error_message']}", None, None
99
+ return result['message'], None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  # Create Gradio interface
102
+ with gr.Blocks(title="Weather Data Scraper Debug") as demo:
103
+ gr.Markdown("# Weather Data Scraper (Debug Version)")
 
 
 
 
 
104
 
105
  with gr.Row():
106
  site_id = gr.Textbox(
 
115
  maximum=1440
116
  )
117
 
118
+ analyze_btn = gr.Button("Fetch and Print Page Content")
 
 
 
119
 
120
  with gr.Row():
121
+ output = gr.HTML(label="Output")
 
122
 
123
  analyze_btn.click(
124
  fn=lambda sid, hrs: format_output(scrape_weather_data(sid, hrs)),
125
  inputs=[site_id, hours],
126
+ outputs=[output]
127
  )
128
 
129
  if __name__ == "__main__":