""" Collect OpenMeteo 1-week sample data for Sept 23-30, 2025 Collects weather data for 52 strategic grid points across Core FBMC zones: - Temperature (2m), Wind (10m, 100m), Solar radiation, Cloud cover, Pressure Matches the JAO and ENTSOE sample period for integrated analysis. """ import os import sys from pathlib import Path from datetime import datetime, timedelta import pandas as pd import polars as pl import requests import time # 52 Strategic Grid Points (4-5 per country, covering major generation areas) GRID_POINTS = [ # Austria (5 points) {'name': 'AT_Vienna', 'lat': 48.21, 'lon': 16.37, 'zone': 'AT'}, {'name': 'AT_Graz', 'lat': 47.07, 'lon': 15.44, 'zone': 'AT'}, {'name': 'AT_Linz', 'lat': 48.31, 'lon': 14.29, 'zone': 'AT'}, {'name': 'AT_Salzburg', 'lat': 47.81, 'lon': 13.04, 'zone': 'AT'}, {'name': 'AT_Innsbruck', 'lat': 47.27, 'lon': 11.39, 'zone': 'AT'}, # Belgium (4 points) {'name': 'BE_Brussels', 'lat': 50.85, 'lon': 4.35, 'zone': 'BE'}, {'name': 'BE_Antwerp', 'lat': 51.22, 'lon': 4.40, 'zone': 'BE'}, {'name': 'BE_Liege', 'lat': 50.63, 'lon': 5.57, 'zone': 'BE'}, {'name': 'BE_Ghent', 'lat': 51.05, 'lon': 3.72, 'zone': 'BE'}, # Czech Republic (5 points) {'name': 'CZ_Prague', 'lat': 50.08, 'lon': 14.44, 'zone': 'CZ'}, {'name': 'CZ_Brno', 'lat': 49.19, 'lon': 16.61, 'zone': 'CZ'}, {'name': 'CZ_Ostrava', 'lat': 49.82, 'lon': 18.26, 'zone': 'CZ'}, {'name': 'CZ_Plzen', 'lat': 49.75, 'lon': 13.38, 'zone': 'CZ'}, {'name': 'CZ_Liberec', 'lat': 50.77, 'lon': 15.06, 'zone': 'CZ'}, # Germany-Luxembourg (5 points - major generation areas) {'name': 'DE_Berlin', 'lat': 52.52, 'lon': 13.40, 'zone': 'DE_LU'}, {'name': 'DE_Munich', 'lat': 48.14, 'lon': 11.58, 'zone': 'DE_LU'}, {'name': 'DE_Frankfurt', 'lat': 50.11, 'lon': 8.68, 'zone': 'DE_LU'}, {'name': 'DE_Hamburg', 'lat': 53.55, 'lon': 9.99, 'zone': 'DE_LU'}, {'name': 'DE_Cologne', 'lat': 50.94, 'lon': 6.96, 'zone': 'DE_LU'}, # France (5 points) {'name': 'FR_Paris', 'lat': 48.86, 'lon': 2.35, 'zone': 'FR'}, {'name': 'FR_Marseille', 'lat': 43.30, 'lon': 5.40, 'zone': 'FR'}, {'name': 'FR_Lyon', 'lat': 45.76, 'lon': 4.84, 'zone': 'FR'}, {'name': 'FR_Toulouse', 'lat': 43.60, 'lon': 1.44, 'zone': 'FR'}, {'name': 'FR_Nantes', 'lat': 47.22, 'lon': -1.55, 'zone': 'FR'}, # Croatia (4 points) {'name': 'HR_Zagreb', 'lat': 45.81, 'lon': 15.98, 'zone': 'HR'}, {'name': 'HR_Split', 'lat': 43.51, 'lon': 16.44, 'zone': 'HR'}, {'name': 'HR_Rijeka', 'lat': 45.33, 'lon': 14.44, 'zone': 'HR'}, {'name': 'HR_Osijek', 'lat': 45.55, 'lon': 18.69, 'zone': 'HR'}, # Hungary (5 points) {'name': 'HU_Budapest', 'lat': 47.50, 'lon': 19.04, 'zone': 'HU'}, {'name': 'HU_Debrecen', 'lat': 47.53, 'lon': 21.64, 'zone': 'HU'}, {'name': 'HU_Szeged', 'lat': 46.25, 'lon': 20.15, 'zone': 'HU'}, {'name': 'HU_Miskolc', 'lat': 48.10, 'lon': 20.78, 'zone': 'HU'}, {'name': 'HU_Pecs', 'lat': 46.07, 'lon': 18.23, 'zone': 'HU'}, # Netherlands (4 points) {'name': 'NL_Amsterdam', 'lat': 52.37, 'lon': 4.89, 'zone': 'NL'}, {'name': 'NL_Rotterdam', 'lat': 51.92, 'lon': 4.48, 'zone': 'NL'}, {'name': 'NL_Utrecht', 'lat': 52.09, 'lon': 5.12, 'zone': 'NL'}, {'name': 'NL_Groningen', 'lat': 53.22, 'lon': 6.57, 'zone': 'NL'}, # Poland (5 points) {'name': 'PL_Warsaw', 'lat': 52.23, 'lon': 21.01, 'zone': 'PL'}, {'name': 'PL_Krakow', 'lat': 50.06, 'lon': 19.94, 'zone': 'PL'}, {'name': 'PL_Gdansk', 'lat': 54.35, 'lon': 18.65, 'zone': 'PL'}, {'name': 'PL_Wroclaw', 'lat': 51.11, 'lon': 17.04, 'zone': 'PL'}, {'name': 'PL_Poznan', 'lat': 52.41, 'lon': 16.93, 'zone': 'PL'}, # Romania (4 points) {'name': 'RO_Bucharest', 'lat': 44.43, 'lon': 26.11, 'zone': 'RO'}, {'name': 'RO_Cluj', 'lat': 46.77, 'lon': 23.60, 'zone': 'RO'}, {'name': 'RO_Timisoara', 'lat': 45.75, 'lon': 21.23, 'zone': 'RO'}, {'name': 'RO_Iasi', 'lat': 47.16, 'lon': 27.59, 'zone': 'RO'}, # Slovenia (3 points) {'name': 'SI_Ljubljana', 'lat': 46.06, 'lon': 14.51, 'zone': 'SI'}, {'name': 'SI_Maribor', 'lat': 46.56, 'lon': 15.65, 'zone': 'SI'}, {'name': 'SI_Celje', 'lat': 46.24, 'lon': 15.27, 'zone': 'SI'}, # Slovakia (3 points) {'name': 'SK_Bratislava', 'lat': 48.15, 'lon': 17.11, 'zone': 'SK'}, {'name': 'SK_Kosice', 'lat': 48.72, 'lon': 21.26, 'zone': 'SK'}, {'name': 'SK_Zilina', 'lat': 49.22, 'lon': 18.74, 'zone': 'SK'}, ] # 7 Weather variables (as specified in feature plan) WEATHER_VARS = [ 'temperature_2m', 'windspeed_10m', 'windspeed_100m', 'winddirection_100m', 'shortwave_radiation', 'cloudcover', 'surface_pressure', ] # Sample period: Sept 23-30, 2025 (matches JAO/ENTSOE sample) START_DATE = '2025-09-23' END_DATE = '2025-09-30' print("=" * 70) print("OpenMeteo 1-Week Sample Data Collection") print("=" * 70) print(f"Period: {START_DATE} to {END_DATE}") print(f"Grid Points: {len(GRID_POINTS)} strategic locations") print(f"Variables: {len(WEATHER_VARS)} weather parameters") print(f"Duration: 7 days = 168 hours") print() # Collect data for all grid points all_weather_data = [] for i, point in enumerate(GRID_POINTS, 1): print(f"[{i:2d}/{len(GRID_POINTS)}] {point['name']}...", end=" ") try: # OpenMeteo API call url = "https://api.open-meteo.com/v1/forecast" params = { 'latitude': point['lat'], 'longitude': point['lon'], 'hourly': ','.join(WEATHER_VARS), 'start_date': START_DATE, 'end_date': END_DATE, 'timezone': 'UTC' } response = requests.get(url, params=params) response.raise_for_status() data = response.json() # Extract hourly data hourly = data.get('hourly', {}) timestamps = pd.to_datetime(hourly['time']) # Create DataFrame for this point point_df = pd.DataFrame({ 'timestamp': timestamps, 'grid_point': point['name'], 'zone': point['zone'], 'lat': point['lat'], 'lon': point['lon'], }) # Add all weather variables for var in WEATHER_VARS: if var in hourly: point_df[var] = hourly[var] else: point_df[var] = None all_weather_data.append(point_df) print(f"[OK] {len(point_df)} hours") # Rate limiting: 270 req/min = ~0.22 sec between requests time.sleep(0.25) except Exception as e: print(f"[ERROR] {e}") continue if not all_weather_data: print("\n[ERROR] No data collected") sys.exit(1) # Combine all grid points print("\n" + "=" * 70) print("Processing collected data...") combined_df = pd.concat(all_weather_data, axis=0, ignore_index=True) print(f" Combined shape: {combined_df.shape}") print(f" Total hours: {len(combined_df) // len(GRID_POINTS)} per point") print(f" Columns: {list(combined_df.columns)}") # Save to parquet output_dir = Path("data/raw/sample") output_dir.mkdir(parents=True, exist_ok=True) output_file = output_dir / "weather_sample_sept2025.parquet" combined_df.to_parquet(output_file, index=False) print(f"\n[SUCCESS] Saved to: {output_file}") print(f" File size: {output_file.stat().st_size / 1024:.1f} KB") print() print("=" * 70) print("OpenMeteo Sample Collection Complete") print("=" * 70) print(f"\nCollected: {len(GRID_POINTS)} points × 7 variables × 168 hours") print(f"Total records: {len(combined_df):,}") print("\nNext: Add weather exploration to Marimo notebook")