Spaces:
Sleeping
Sleeping
| """ | |
| Collect OpenMeteo 1-week sample data for Sept 23-30, 2025 | |
| Collects weather data for 52 strategic grid points across Core FBMC zones: | |
| - Temperature (2m), Wind (10m, 100m), Solar radiation, Cloud cover, Pressure | |
| Matches the JAO and ENTSOE sample period for integrated analysis. | |
| """ | |
| import os | |
| import sys | |
| from pathlib import Path | |
| from datetime import datetime, timedelta | |
| import pandas as pd | |
| import polars as pl | |
| import requests | |
| import time | |
| # 52 Strategic Grid Points (4-5 per country, covering major generation areas) | |
| GRID_POINTS = [ | |
| # Austria (5 points) | |
| {'name': 'AT_Vienna', 'lat': 48.21, 'lon': 16.37, 'zone': 'AT'}, | |
| {'name': 'AT_Graz', 'lat': 47.07, 'lon': 15.44, 'zone': 'AT'}, | |
| {'name': 'AT_Linz', 'lat': 48.31, 'lon': 14.29, 'zone': 'AT'}, | |
| {'name': 'AT_Salzburg', 'lat': 47.81, 'lon': 13.04, 'zone': 'AT'}, | |
| {'name': 'AT_Innsbruck', 'lat': 47.27, 'lon': 11.39, 'zone': 'AT'}, | |
| # Belgium (4 points) | |
| {'name': 'BE_Brussels', 'lat': 50.85, 'lon': 4.35, 'zone': 'BE'}, | |
| {'name': 'BE_Antwerp', 'lat': 51.22, 'lon': 4.40, 'zone': 'BE'}, | |
| {'name': 'BE_Liege', 'lat': 50.63, 'lon': 5.57, 'zone': 'BE'}, | |
| {'name': 'BE_Ghent', 'lat': 51.05, 'lon': 3.72, 'zone': 'BE'}, | |
| # Czech Republic (5 points) | |
| {'name': 'CZ_Prague', 'lat': 50.08, 'lon': 14.44, 'zone': 'CZ'}, | |
| {'name': 'CZ_Brno', 'lat': 49.19, 'lon': 16.61, 'zone': 'CZ'}, | |
| {'name': 'CZ_Ostrava', 'lat': 49.82, 'lon': 18.26, 'zone': 'CZ'}, | |
| {'name': 'CZ_Plzen', 'lat': 49.75, 'lon': 13.38, 'zone': 'CZ'}, | |
| {'name': 'CZ_Liberec', 'lat': 50.77, 'lon': 15.06, 'zone': 'CZ'}, | |
| # Germany-Luxembourg (5 points - major generation areas) | |
| {'name': 'DE_Berlin', 'lat': 52.52, 'lon': 13.40, 'zone': 'DE_LU'}, | |
| {'name': 'DE_Munich', 'lat': 48.14, 'lon': 11.58, 'zone': 'DE_LU'}, | |
| {'name': 'DE_Frankfurt', 'lat': 50.11, 'lon': 8.68, 'zone': 'DE_LU'}, | |
| {'name': 'DE_Hamburg', 'lat': 53.55, 'lon': 9.99, 'zone': 'DE_LU'}, | |
| {'name': 'DE_Cologne', 'lat': 50.94, 'lon': 6.96, 'zone': 'DE_LU'}, | |
| # France (5 points) | |
| {'name': 'FR_Paris', 'lat': 48.86, 'lon': 2.35, 'zone': 'FR'}, | |
| {'name': 'FR_Marseille', 'lat': 43.30, 'lon': 5.40, 'zone': 'FR'}, | |
| {'name': 'FR_Lyon', 'lat': 45.76, 'lon': 4.84, 'zone': 'FR'}, | |
| {'name': 'FR_Toulouse', 'lat': 43.60, 'lon': 1.44, 'zone': 'FR'}, | |
| {'name': 'FR_Nantes', 'lat': 47.22, 'lon': -1.55, 'zone': 'FR'}, | |
| # Croatia (4 points) | |
| {'name': 'HR_Zagreb', 'lat': 45.81, 'lon': 15.98, 'zone': 'HR'}, | |
| {'name': 'HR_Split', 'lat': 43.51, 'lon': 16.44, 'zone': 'HR'}, | |
| {'name': 'HR_Rijeka', 'lat': 45.33, 'lon': 14.44, 'zone': 'HR'}, | |
| {'name': 'HR_Osijek', 'lat': 45.55, 'lon': 18.69, 'zone': 'HR'}, | |
| # Hungary (5 points) | |
| {'name': 'HU_Budapest', 'lat': 47.50, 'lon': 19.04, 'zone': 'HU'}, | |
| {'name': 'HU_Debrecen', 'lat': 47.53, 'lon': 21.64, 'zone': 'HU'}, | |
| {'name': 'HU_Szeged', 'lat': 46.25, 'lon': 20.15, 'zone': 'HU'}, | |
| {'name': 'HU_Miskolc', 'lat': 48.10, 'lon': 20.78, 'zone': 'HU'}, | |
| {'name': 'HU_Pecs', 'lat': 46.07, 'lon': 18.23, 'zone': 'HU'}, | |
| # Netherlands (4 points) | |
| {'name': 'NL_Amsterdam', 'lat': 52.37, 'lon': 4.89, 'zone': 'NL'}, | |
| {'name': 'NL_Rotterdam', 'lat': 51.92, 'lon': 4.48, 'zone': 'NL'}, | |
| {'name': 'NL_Utrecht', 'lat': 52.09, 'lon': 5.12, 'zone': 'NL'}, | |
| {'name': 'NL_Groningen', 'lat': 53.22, 'lon': 6.57, 'zone': 'NL'}, | |
| # Poland (5 points) | |
| {'name': 'PL_Warsaw', 'lat': 52.23, 'lon': 21.01, 'zone': 'PL'}, | |
| {'name': 'PL_Krakow', 'lat': 50.06, 'lon': 19.94, 'zone': 'PL'}, | |
| {'name': 'PL_Gdansk', 'lat': 54.35, 'lon': 18.65, 'zone': 'PL'}, | |
| {'name': 'PL_Wroclaw', 'lat': 51.11, 'lon': 17.04, 'zone': 'PL'}, | |
| {'name': 'PL_Poznan', 'lat': 52.41, 'lon': 16.93, 'zone': 'PL'}, | |
| # Romania (4 points) | |
| {'name': 'RO_Bucharest', 'lat': 44.43, 'lon': 26.11, 'zone': 'RO'}, | |
| {'name': 'RO_Cluj', 'lat': 46.77, 'lon': 23.60, 'zone': 'RO'}, | |
| {'name': 'RO_Timisoara', 'lat': 45.75, 'lon': 21.23, 'zone': 'RO'}, | |
| {'name': 'RO_Iasi', 'lat': 47.16, 'lon': 27.59, 'zone': 'RO'}, | |
| # Slovenia (3 points) | |
| {'name': 'SI_Ljubljana', 'lat': 46.06, 'lon': 14.51, 'zone': 'SI'}, | |
| {'name': 'SI_Maribor', 'lat': 46.56, 'lon': 15.65, 'zone': 'SI'}, | |
| {'name': 'SI_Celje', 'lat': 46.24, 'lon': 15.27, 'zone': 'SI'}, | |
| # Slovakia (3 points) | |
| {'name': 'SK_Bratislava', 'lat': 48.15, 'lon': 17.11, 'zone': 'SK'}, | |
| {'name': 'SK_Kosice', 'lat': 48.72, 'lon': 21.26, 'zone': 'SK'}, | |
| {'name': 'SK_Zilina', 'lat': 49.22, 'lon': 18.74, 'zone': 'SK'}, | |
| ] | |
| # 7 Weather variables (as specified in feature plan) | |
| WEATHER_VARS = [ | |
| 'temperature_2m', | |
| 'windspeed_10m', | |
| 'windspeed_100m', | |
| 'winddirection_100m', | |
| 'shortwave_radiation', | |
| 'cloudcover', | |
| 'surface_pressure', | |
| ] | |
| # Sample period: Sept 23-30, 2025 (matches JAO/ENTSOE sample) | |
| START_DATE = '2025-09-23' | |
| END_DATE = '2025-09-30' | |
| print("=" * 70) | |
| print("OpenMeteo 1-Week Sample Data Collection") | |
| print("=" * 70) | |
| print(f"Period: {START_DATE} to {END_DATE}") | |
| print(f"Grid Points: {len(GRID_POINTS)} strategic locations") | |
| print(f"Variables: {len(WEATHER_VARS)} weather parameters") | |
| print(f"Duration: 7 days = 168 hours") | |
| print() | |
| # Collect data for all grid points | |
| all_weather_data = [] | |
| for i, point in enumerate(GRID_POINTS, 1): | |
| print(f"[{i:2d}/{len(GRID_POINTS)}] {point['name']}...", end=" ") | |
| try: | |
| # OpenMeteo API call | |
| url = "https://api.open-meteo.com/v1/forecast" | |
| params = { | |
| 'latitude': point['lat'], | |
| 'longitude': point['lon'], | |
| 'hourly': ','.join(WEATHER_VARS), | |
| 'start_date': START_DATE, | |
| 'end_date': END_DATE, | |
| 'timezone': 'UTC' | |
| } | |
| response = requests.get(url, params=params) | |
| response.raise_for_status() | |
| data = response.json() | |
| # Extract hourly data | |
| hourly = data.get('hourly', {}) | |
| timestamps = pd.to_datetime(hourly['time']) | |
| # Create DataFrame for this point | |
| point_df = pd.DataFrame({ | |
| 'timestamp': timestamps, | |
| 'grid_point': point['name'], | |
| 'zone': point['zone'], | |
| 'lat': point['lat'], | |
| 'lon': point['lon'], | |
| }) | |
| # Add all weather variables | |
| for var in WEATHER_VARS: | |
| if var in hourly: | |
| point_df[var] = hourly[var] | |
| else: | |
| point_df[var] = None | |
| all_weather_data.append(point_df) | |
| print(f"[OK] {len(point_df)} hours") | |
| # Rate limiting: 270 req/min = ~0.22 sec between requests | |
| time.sleep(0.25) | |
| except Exception as e: | |
| print(f"[ERROR] {e}") | |
| continue | |
| if not all_weather_data: | |
| print("\n[ERROR] No data collected") | |
| sys.exit(1) | |
| # Combine all grid points | |
| print("\n" + "=" * 70) | |
| print("Processing collected data...") | |
| combined_df = pd.concat(all_weather_data, axis=0, ignore_index=True) | |
| print(f" Combined shape: {combined_df.shape}") | |
| print(f" Total hours: {len(combined_df) // len(GRID_POINTS)} per point") | |
| print(f" Columns: {list(combined_df.columns)}") | |
| # Save to parquet | |
| output_dir = Path("data/raw/sample") | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| output_file = output_dir / "weather_sample_sept2025.parquet" | |
| combined_df.to_parquet(output_file, index=False) | |
| print(f"\n[SUCCESS] Saved to: {output_file}") | |
| print(f" File size: {output_file.stat().st_size / 1024:.1f} KB") | |
| print() | |
| print("=" * 70) | |
| print("OpenMeteo Sample Collection Complete") | |
| print("=" * 70) | |
| print(f"\nCollected: {len(GRID_POINTS)} points × 7 variables × 168 hours") | |
| print(f"Total records: {len(combined_df):,}") | |
| print("\nNext: Add weather exploration to Marimo notebook") | |