File size: 4,126 Bytes
27cb60a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""
Collect ENTSOE 1-week sample data for Sept 23-30, 2025

Collects generation by type for all 12 Core FBMC zones:
- Wind, Solar, Thermal, Hydro, Nuclear generation

Matches the JAO sample period for integrated analysis.
"""

import os
import sys
from pathlib import Path
from datetime import datetime, timedelta
import pandas as pd
from entsoe import EntsoePandasClient
from dotenv import load_dotenv

# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))

# Load API key
load_dotenv()
API_KEY = os.getenv('ENTSOE_API_KEY')

if not API_KEY:
    print("[ERROR] ENTSOE_API_KEY not found in .env file")
    print("Please add: ENTSOE_API_KEY=your_key_here")
    sys.exit(1)

# Initialize client
client = EntsoePandasClient(api_key=API_KEY)

# Core FBMC zones (12 total)
FBMC_ZONES = {
    'AT': '10YAT-APG------L',  # Austria
    'BE': '10YBE----------2',  # Belgium
    'CZ': '10YCZ-CEPS-----N',  # Czech Republic
    'DE_LU': '10Y1001A1001A83F',  # Germany-Luxembourg
    'FR': '10YFR-RTE------C',  # France
    'HR': '10YHR-HEP------M',  # Croatia
    'HU': '10YHU-MAVIR----U',  # Hungary
    'NL': '10YNL----------L',  # Netherlands
    'PL': '10YPL-AREA-----S',  # Poland
    'RO': '10YRO-TEL------P',  # Romania
    'SI': '10YSI-ELES-----O',  # Slovenia
    'SK': '10YSK-SEPS-----K',  # Slovakia
}

# Generation types mapping (ENTSOE API codes)
GENERATION_TYPES = {
    'B16': 'solar',  # Solar
    'B19': 'wind_offshore',  # Wind offshore
    'B18': 'wind_onshore',  # Wind onshore
    'B01': 'biomass',  # Biomass
    'B10': 'hydro_pumped',  # Hydro pumped storage
    'B11': 'hydro_run',  # Hydro run-of-river
    'B12': 'hydro_reservoir',  # Hydro reservoir
    'B14': 'nuclear',  # Nuclear
    'B02': 'fossil_brown_coal',  # Fossil brown coal/lignite
    'B05': 'fossil_coal',  # Fossil hard coal
    'B04': 'fossil_gas',  # Fossil gas
    'B03': 'fossil_oil',  # Fossil oil
}

# Sample period: Sept 23-30, 2025 (matches JAO sample)
START_DATE = pd.Timestamp('2025-09-23', tz='UTC')
END_DATE = pd.Timestamp('2025-09-30', tz='UTC')

print("=" * 70)
print("ENTSOE 1-Week Sample Data Collection")
print("=" * 70)
print(f"Period: {START_DATE.date()} to {END_DATE.date()}")
print(f"Zones: {len(FBMC_ZONES)} Core FBMC zones")
print(f"Duration: 7 days = 168 hours")
print()

# Collect data
all_generation = []

for zone_code, zone_eic in FBMC_ZONES.items():
    print(f"\n[{zone_code}] Collecting generation data...")

    try:
        # Query generation by type
        gen_df = client.query_generation(
            zone_eic,
            start=START_DATE,
            end=END_DATE,
            psr_type=None  # Get all generation types
        )

        # Add zone identifier
        gen_df['zone'] = zone_code

        # Reshape: generation types as columns
        if isinstance(gen_df, pd.DataFrame):
            # Already in correct format
            all_generation.append(gen_df)
            print(f"  [OK] Collected {len(gen_df)} rows")
        else:
            print(f"  [WARNING] Unexpected format: {type(gen_df)}")

    except Exception as e:
        print(f"  [ERROR] {e}")
        continue

if not all_generation:
    print("\n[ERROR] No data collected - check API key and zone codes")
    sys.exit(1)

# Combine all zones
print("\n" + "=" * 70)
print("Processing collected data...")
combined_df = pd.concat(all_generation, axis=0)

# Reset index to make timestamp a column
combined_df = combined_df.reset_index()
if 'index' in combined_df.columns:
    combined_df = combined_df.rename(columns={'index': 'timestamp'})

print(f"  Combined shape: {combined_df.shape}")
print(f"  Columns: {list(combined_df.columns)}")

# Save to parquet
output_dir = Path("data/raw/sample")
output_dir.mkdir(parents=True, exist_ok=True)
output_file = output_dir / "entsoe_sample_sept2025.parquet"

combined_df.to_parquet(output_file, index=False)

print(f"\n[SUCCESS] Saved to: {output_file}")
print(f"  File size: {output_file.stat().st_size / 1024:.1f} KB")
print()
print("=" * 70)
print("ENTSOE Sample Collection Complete")
print("=" * 70)
print("\nNext: Add ENTSOE exploration to Marimo notebook")