Spaces:
Sleeping
Sleeping
| """ | |
| Collect ENTSOE 1-week sample data for Sept 23-30, 2025 | |
| Collects generation by type for all 12 Core FBMC zones: | |
| - Wind, Solar, Thermal, Hydro, Nuclear generation | |
| Matches the JAO sample period for integrated analysis. | |
| """ | |
| import os | |
| import sys | |
| from pathlib import Path | |
| from datetime import datetime, timedelta | |
| import pandas as pd | |
| from entsoe import EntsoePandasClient | |
| from dotenv import load_dotenv | |
| # Add src to path | |
| sys.path.insert(0, str(Path(__file__).parent.parent / "src")) | |
| # Load API key | |
| load_dotenv() | |
| API_KEY = os.getenv('ENTSOE_API_KEY') | |
| if not API_KEY: | |
| print("[ERROR] ENTSOE_API_KEY not found in .env file") | |
| print("Please add: ENTSOE_API_KEY=your_key_here") | |
| sys.exit(1) | |
| # Initialize client | |
| client = EntsoePandasClient(api_key=API_KEY) | |
| # Core FBMC zones (12 total) | |
| FBMC_ZONES = { | |
| 'AT': '10YAT-APG------L', # Austria | |
| 'BE': '10YBE----------2', # Belgium | |
| 'CZ': '10YCZ-CEPS-----N', # Czech Republic | |
| 'DE_LU': '10Y1001A1001A83F', # Germany-Luxembourg | |
| 'FR': '10YFR-RTE------C', # France | |
| 'HR': '10YHR-HEP------M', # Croatia | |
| 'HU': '10YHU-MAVIR----U', # Hungary | |
| 'NL': '10YNL----------L', # Netherlands | |
| 'PL': '10YPL-AREA-----S', # Poland | |
| 'RO': '10YRO-TEL------P', # Romania | |
| 'SI': '10YSI-ELES-----O', # Slovenia | |
| 'SK': '10YSK-SEPS-----K', # Slovakia | |
| } | |
| # Generation types mapping (ENTSOE API codes) | |
| GENERATION_TYPES = { | |
| 'B16': 'solar', # Solar | |
| 'B19': 'wind_offshore', # Wind offshore | |
| 'B18': 'wind_onshore', # Wind onshore | |
| 'B01': 'biomass', # Biomass | |
| 'B10': 'hydro_pumped', # Hydro pumped storage | |
| 'B11': 'hydro_run', # Hydro run-of-river | |
| 'B12': 'hydro_reservoir', # Hydro reservoir | |
| 'B14': 'nuclear', # Nuclear | |
| 'B02': 'fossil_brown_coal', # Fossil brown coal/lignite | |
| 'B05': 'fossil_coal', # Fossil hard coal | |
| 'B04': 'fossil_gas', # Fossil gas | |
| 'B03': 'fossil_oil', # Fossil oil | |
| } | |
| # Sample period: Sept 23-30, 2025 (matches JAO sample) | |
| START_DATE = pd.Timestamp('2025-09-23', tz='UTC') | |
| END_DATE = pd.Timestamp('2025-09-30', tz='UTC') | |
| print("=" * 70) | |
| print("ENTSOE 1-Week Sample Data Collection") | |
| print("=" * 70) | |
| print(f"Period: {START_DATE.date()} to {END_DATE.date()}") | |
| print(f"Zones: {len(FBMC_ZONES)} Core FBMC zones") | |
| print(f"Duration: 7 days = 168 hours") | |
| print() | |
| # Collect data | |
| all_generation = [] | |
| for zone_code, zone_eic in FBMC_ZONES.items(): | |
| print(f"\n[{zone_code}] Collecting generation data...") | |
| try: | |
| # Query generation by type | |
| gen_df = client.query_generation( | |
| zone_eic, | |
| start=START_DATE, | |
| end=END_DATE, | |
| psr_type=None # Get all generation types | |
| ) | |
| # Add zone identifier | |
| gen_df['zone'] = zone_code | |
| # Reshape: generation types as columns | |
| if isinstance(gen_df, pd.DataFrame): | |
| # Already in correct format | |
| all_generation.append(gen_df) | |
| print(f" [OK] Collected {len(gen_df)} rows") | |
| else: | |
| print(f" [WARNING] Unexpected format: {type(gen_df)}") | |
| except Exception as e: | |
| print(f" [ERROR] {e}") | |
| continue | |
| if not all_generation: | |
| print("\n[ERROR] No data collected - check API key and zone codes") | |
| sys.exit(1) | |
| # Combine all zones | |
| print("\n" + "=" * 70) | |
| print("Processing collected data...") | |
| combined_df = pd.concat(all_generation, axis=0) | |
| # Reset index to make timestamp a column | |
| combined_df = combined_df.reset_index() | |
| if 'index' in combined_df.columns: | |
| combined_df = combined_df.rename(columns={'index': 'timestamp'}) | |
| print(f" Combined shape: {combined_df.shape}") | |
| print(f" Columns: {list(combined_df.columns)}") | |
| # Save to parquet | |
| output_dir = Path("data/raw/sample") | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| output_file = output_dir / "entsoe_sample_sept2025.parquet" | |
| combined_df.to_parquet(output_file, index=False) | |
| print(f"\n[SUCCESS] Saved to: {output_file}") | |
| print(f" File size: {output_file.stat().st_size / 1024:.1f} KB") | |
| print() | |
| print("=" * 70) | |
| print("ENTSOE Sample Collection Complete") | |
| print("=" * 70) | |
| print("\nNext: Add ENTSOE exploration to Marimo notebook") | |