Spaces:
Sleeping
Sleeping
| """ | |
| Phase 1D: Comprehensive FBMC Border Query for Asset-Specific Outages | |
| ===================================================================== | |
| Queries all FBMC borders systematically to maximize CNEC coverage. | |
| Approach: | |
| 1. Define all FBMC bidding zone EIC codes | |
| 2. Query transmission outages for all border pairs | |
| 3. Parse XML to extract Asset_RegisteredResource.mRID from each | |
| 4. Aggregate all extracted EICs and match against 200 CNEC list | |
| 5. Report coverage statistics | |
| Expected outcome: 40-80% CNEC coverage (80-165 features) | |
| """ | |
| import os | |
| import sys | |
| from pathlib import Path | |
| import pandas as pd | |
| import polars as pl | |
| import zipfile | |
| from io import BytesIO | |
| import xml.etree.ElementTree as ET | |
| from dotenv import load_dotenv | |
| from entsoe import EntsoePandasClient | |
| import time | |
| sys.path.append(str(Path(__file__).parent.parent)) | |
| load_dotenv() | |
| API_KEY = os.getenv('ENTSOE_API_KEY') | |
| client = EntsoePandasClient(api_key=API_KEY) | |
| print("="*80) | |
| print("PHASE 1D: COMPREHENSIVE FBMC BORDER QUERY") | |
| print("="*80) | |
| print() | |
| # ============================================================================ | |
| # FBMC Bidding Zones (EIC Codes) | |
| # ============================================================================ | |
| FBMC_ZONES = { | |
| 'AT': '10YAT-APG------L', # Austria | |
| 'BE': '10YBE----------2', # Belgium | |
| 'HR': '10YHR-HEP------M', # Croatia | |
| 'CZ': '10YCZ-CEPS-----N', # Czech Republic | |
| 'FR': '10YFR-RTE------C', # France | |
| 'DE_LU': '10Y1001A1001A82H', # Germany-Luxembourg | |
| 'HU': '10YHU-MAVIR----U', # Hungary | |
| 'NL': '10YNL----------L', # Netherlands | |
| 'PL': '10YPL-AREA-----S', # Poland | |
| 'RO': '10YRO-TEL------P', # Romania | |
| 'SK': '10YSK-SEPS-----K', # Slovakia | |
| 'SI': '10YSI-ELES-----O', # Slovenia | |
| 'CH': '10YCH-SWISSGRIDZ' # Switzerland (also part of FBMC) | |
| } | |
| # ============================================================================ | |
| # FBMC Border Pairs (Known Interconnections) | |
| # ============================================================================ | |
| # Based on European transmission network topology | |
| FBMC_BORDERS = [ | |
| # Germany-Luxembourg borders | |
| ('DE_LU', 'FR'), | |
| ('DE_LU', 'BE'), | |
| ('DE_LU', 'NL'), | |
| ('DE_LU', 'AT'), | |
| ('DE_LU', 'CZ'), | |
| ('DE_LU', 'PL'), | |
| ('DE_LU', 'CH'), | |
| # France borders | |
| ('FR', 'BE'), | |
| ('FR', 'CH'), | |
| # Austria borders | |
| ('AT', 'CZ'), | |
| ('AT', 'HU'), | |
| ('AT', 'SI'), | |
| ('AT', 'CH'), | |
| # Czech Republic borders | |
| ('CZ', 'SK'), | |
| ('CZ', 'PL'), | |
| # Poland borders | |
| ('PL', 'SK'), | |
| # Slovakia borders | |
| ('SK', 'HU'), | |
| # Hungary borders | |
| ('HU', 'RO'), | |
| ('HU', 'HR'), | |
| ('HU', 'SI'), | |
| # Slovenia borders | |
| ('SI', 'HR'), | |
| # Belgium borders | |
| ('BE', 'NL'), | |
| ] | |
| print(f"FBMC Bidding Zones: {len(FBMC_ZONES)}") | |
| print(f"Border Pairs to Query: {len(FBMC_BORDERS)}") | |
| print() | |
| # ============================================================================ | |
| # Load CNEC EIC Codes | |
| # ============================================================================ | |
| print("Loading 200 CNEC EIC codes...") | |
| cnec_file = Path(__file__).parent.parent / 'data' / 'processed' / 'critical_cnecs_all.csv' | |
| cnec_df = pl.read_csv(cnec_file) | |
| cnec_eics = cnec_df.select('cnec_eic').to_series().to_list() | |
| print(f"[OK] Loaded {len(cnec_eics)} CNEC EICs") | |
| print() | |
| # ============================================================================ | |
| # Query All Borders for Transmission Outages | |
| # ============================================================================ | |
| print("-"*80) | |
| print("QUERYING ALL FBMC BORDERS") | |
| print("-"*80) | |
| print() | |
| all_extracted_eics = [] | |
| border_results = {} | |
| start_time = time.time() | |
| query_count = 0 | |
| for i, (zone1, zone2) in enumerate(FBMC_BORDERS, 1): | |
| border_name = f"{zone1} -> {zone2}" | |
| print(f"[{i}/{len(FBMC_BORDERS)}] {border_name}...") | |
| try: | |
| # Query transmission outages for this border | |
| response = client._base_request( | |
| params={ | |
| 'documentType': 'A78', # Transmission unavailability | |
| 'in_Domain': FBMC_ZONES[zone2], | |
| 'out_Domain': FBMC_ZONES[zone1] | |
| }, | |
| start=pd.Timestamp('2025-09-23', tz='UTC'), | |
| end=pd.Timestamp('2025-09-30', tz='UTC') | |
| ) | |
| outages_zip = response.content | |
| query_count += 1 | |
| # Parse ZIP and extract Asset_RegisteredResource.mRID | |
| border_eics = [] | |
| with zipfile.ZipFile(BytesIO(outages_zip), 'r') as zf: | |
| xml_files = [f for f in zf.namelist() if f.endswith('.xml')] | |
| for xml_file in xml_files: | |
| with zf.open(xml_file) as xf: | |
| xml_content = xf.read() | |
| root = ET.fromstring(xml_content) | |
| # Get namespace | |
| nsmap = dict([node for _, node in ET.iterparse(BytesIO(xml_content), events=['start-ns'])]) | |
| ns_uri = nsmap.get('', None) | |
| # Find TimeSeries elements | |
| if ns_uri: | |
| timeseries_found = root.findall('.//{' + ns_uri + '}TimeSeries') | |
| else: | |
| timeseries_found = root.findall('.//TimeSeries') | |
| for ts in timeseries_found: | |
| # Extract Asset_RegisteredResource.mRID | |
| if ns_uri: | |
| reg_resource = ts.find('.//{' + ns_uri + '}Asset_RegisteredResource') | |
| else: | |
| reg_resource = ts.find('.//Asset_RegisteredResource') | |
| if reg_resource is not None: | |
| if ns_uri: | |
| mrid_elem = reg_resource.find('.//{' + ns_uri + '}mRID') | |
| else: | |
| mrid_elem = reg_resource.find('.//mRID') | |
| if mrid_elem is not None: | |
| eic_code = mrid_elem.text | |
| border_eics.append(eic_code) | |
| # Store results | |
| unique_border_eics = list(set(border_eics)) | |
| border_matches = [eic for eic in unique_border_eics if eic in cnec_eics] | |
| border_results[border_name] = { | |
| 'total_eics': len(unique_border_eics), | |
| 'cnec_matches': len(border_matches), | |
| 'matched_eics': border_matches | |
| } | |
| all_extracted_eics.extend(border_eics) | |
| print(f" EICs extracted: {len(unique_border_eics)}, CNEC matches: {len(border_matches)}") | |
| # Rate limiting: 27 requests per minute | |
| if i < len(FBMC_BORDERS): | |
| time.sleep(2.2) | |
| except Exception as e: | |
| print(f" [FAIL] {e}") | |
| border_results[border_name] = { | |
| 'total_eics': 0, | |
| 'cnec_matches': 0, | |
| 'matched_eics': [], | |
| 'error': str(e) | |
| } | |
| total_time = time.time() - start_time | |
| print() | |
| print("="*80) | |
| print("AGGREGATED RESULTS") | |
| print("="*80) | |
| print() | |
| # Aggregate statistics | |
| unique_eics = list(set(all_extracted_eics)) | |
| cnec_matches = [eic for eic in unique_eics if eic in cnec_eics] | |
| match_rate = len(cnec_matches) / len(cnec_eics) * 100 | |
| print(f"Query Statistics:") | |
| print(f" Borders queried: {query_count}") | |
| print(f" Total time: {total_time / 60:.1f} minutes") | |
| print(f" Avg time per border: {total_time / query_count:.1f} seconds") | |
| print() | |
| print(f"EIC Extraction Results:") | |
| print(f" Total asset EICs extracted: {len(all_extracted_eics)} (with duplicates)") | |
| print(f" Unique asset EICs: {len(unique_eics)}") | |
| print() | |
| print(f"CNEC Matching Results:") | |
| print(f" CNEC EICs matched: {len(cnec_matches)} / {len(cnec_eics)}") | |
| print(f" Match rate: {match_rate:.1f}%") | |
| print() | |
| # ============================================================================ | |
| # Detailed Border Breakdown | |
| # ============================================================================ | |
| print("-"*80) | |
| print("BORDER-BY-BORDER BREAKDOWN") | |
| print("-"*80) | |
| print() | |
| # Sort borders by number of CNEC matches (descending) | |
| sorted_borders = sorted( | |
| border_results.items(), | |
| key=lambda x: x[1]['cnec_matches'], | |
| reverse=True | |
| ) | |
| for border_name, result in sorted_borders: | |
| if result['cnec_matches'] > 0: | |
| print(f"{border_name}:") | |
| print(f" Total EICs: {result['total_eics']}") | |
| print(f" CNEC matches: {result['cnec_matches']}") | |
| # Show matched CNEC names | |
| for eic in result['matched_eics'][:5]: # First 5 | |
| try: | |
| cnec_name = cnec_df.filter(pl.col('cnec_eic') == eic).select('cnec_name').item(0, 0) | |
| print(f" - {eic}: {cnec_name}") | |
| except: | |
| print(f" - {eic}") | |
| if result['cnec_matches'] > 5: | |
| print(f" ... and {result['cnec_matches'] - 5} more") | |
| print() | |
| print() | |
| # ============================================================================ | |
| # Coverage Analysis | |
| # ============================================================================ | |
| print("="*80) | |
| print("COVERAGE ANALYSIS") | |
| print("="*80) | |
| print() | |
| if match_rate >= 80: | |
| print(f"[EXCELLENT] {match_rate:.1f}% CNEC coverage achieved!") | |
| print(f">> Can implement {len(cnec_matches)}-feature asset-specific outages") | |
| print(f">> Exceeds 80% target - comprehensive coverage") | |
| elif match_rate >= 40: | |
| print(f"[GOOD] {match_rate:.1f}% CNEC coverage achieved!") | |
| print(f">> Can implement {len(cnec_matches)}-feature asset-specific outages") | |
| print(f">> Meets 40-80% target range") | |
| elif match_rate >= 20: | |
| print(f"[PARTIAL] {match_rate:.1f}% CNEC coverage") | |
| print(f">> Can implement {len(cnec_matches)}-feature asset-specific outages") | |
| print(f">> Below 40% target but still useful") | |
| else: | |
| print(f"[LIMITED] {match_rate:.1f}% CNEC coverage") | |
| print(f">> Only {len(cnec_matches)} CNECs matched") | |
| print(f">> May need to investigate EIC code mapping or alternative approaches") | |
| print() | |
| # ============================================================================ | |
| # Non-Matching EICs (for investigation) | |
| # ============================================================================ | |
| non_matches = [eic for eic in unique_eics if eic not in cnec_eics] | |
| if non_matches: | |
| print("-"*80) | |
| print("NON-MATCHING TRANSMISSION ELEMENT EICs") | |
| print("-"*80) | |
| print() | |
| print(f"Total non-matching EICs: {len(non_matches)}") | |
| print() | |
| print("Sample non-matching EICs (first 20):") | |
| for eic in non_matches[:20]: | |
| print(f" - {eic}") | |
| if len(non_matches) > 20: | |
| print(f" ... and {len(non_matches) - 20} more") | |
| print() | |
| print("These are transmission elements NOT in the 200 CNEC list.") | |
| print("They may be:") | |
| print(" 1. Non-critical transmission lines (not in JAO CNEC list)") | |
| print(" 2. Internal lines (not cross-border)") | |
| print(" 3. Different EIC code format (JAO vs ENTSO-E)") | |
| print() | |
| # ============================================================================ | |
| # SUMMARY & NEXT STEPS | |
| # ============================================================================ | |
| print("="*80) | |
| print("PHASE 1D SUMMARY") | |
| print("="*80) | |
| print() | |
| print(f"Asset-Specific Transmission Outages: {len(cnec_matches)} features") | |
| print(f" Coverage: {match_rate:.1f}% of 200 CNECs") | |
| print(f" Implementation: Parse border-level XML, filter to CNEC EICs") | |
| print() | |
| print("Combined ENTSO-E Features (Estimated):") | |
| print(f" - Generation (12 zones × 8 types): 96 features") | |
| print(f" - Demand (12 zones): 12 features") | |
| print(f" - Day-ahead prices (12 zones): 12 features") | |
| print(f" - Hydro reservoirs (7 zones): 7 features") | |
| print(f" - Pumped storage generation (7 zones): 7 features") | |
| print(f" - Load forecasts (12 zones): 12 features") | |
| print(f" - Transmission outages (asset-specific): {len(cnec_matches)} features") | |
| print(f" - Generation outages (nuclear): ~20 features") | |
| print(f" TOTAL ENTSO-E: {146 + len(cnec_matches)} features") | |
| print() | |
| print("Combined with JAO (726 features):") | |
| print(f" GRAND TOTAL: {726 + 146 + len(cnec_matches)} features") | |
| print() | |
| print("="*80) | |
| print("NEXT STEPS:") | |
| print("1. Extend collect_entsoe.py with XML parsing method") | |
| print("2. Implement process_entsoe_features.py for outage encoding") | |
| print("3. Collect 24-month historical ENTSO-E data") | |
| print("4. Create ENTSO-E features EDA notebook") | |
| print("5. Merge JAO + ENTSO-E features") | |
| print("="*80) | |