fbmc-chronos2 / scripts /find_alegro_real_eic.py
Evgueni Poloukarov
feat: Alegro HVDC investigation complete - documented data limitation
0df759f
raw
history blame
7.23 kB
"""
Find the real Alegro HVDC cable EIC code from ENTSO-E transmission outages.
Strategy: Query BE-DE border transmission outages, parse XML to extract all
Asset_RegisteredResource.mRID codes, and identify which one is Alegro HVDC.
Author: Claude + Evgueni Poloukarov
Date: 2025-11-09
"""
import sys
from pathlib import Path
import pandas as pd
from entsoe import EntsoePandasClient
from dotenv import load_dotenv
import os
import zipfile
from io import BytesIO
import xml.etree.ElementTree as ET
# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent / 'src'))
# Load environment
load_dotenv()
def find_alegro_eic():
"""Find Alegro EIC by querying BE-DE border outages and parsing XML."""
print("=" * 80)
print("FINDING ALEGRO REAL EIC CODE FROM ENTSO-E")
print("=" * 80)
print()
api_key = os.getenv('ENTSOE_API_KEY')
if not api_key:
print("[ERROR] ENTSOE_API_KEY not found in .env")
return None
client = EntsoePandasClient(api_key=api_key)
# Belgium and Germany EIC codes
be_eic = '10YBE----------2'
de_eic = '10Y1001A1001A82H' # Germany-Luxembourg
print(f"Querying BE-DE border transmission outages...")
print(f" Belgium EIC: {be_eic}")
print(f" Germany EIC: {de_eic}")
print()
# Query recent period (last 3 months) to get sample data
start = pd.Timestamp('2024-09-01', tz='UTC')
end = pd.Timestamp('2024-11-09', tz='UTC')
print(f"Period: {start} to {end}")
print()
try:
# Query using _base_request to get raw XML
print("Making API request...")
response = client._base_request(
params={
'documentType': 'A78', # Transmission unavailability
'in_Domain': de_eic,
'out_Domain': be_eic
},
start=start,
end=end
)
print(f"[SUCCESS] Got response ({len(response.content)} bytes)")
print()
# Parse ZIP file
outages_zip = response.content
print("Parsing ZIP file...")
with zipfile.ZipFile(BytesIO(outages_zip), 'r') as zf:
xml_files = [f for f in zf.namelist() if f.endswith('.xml')]
print(f" Found {len(xml_files)} XML files")
print()
all_assets = {} # EIC -> (name, count)
for xml_file in xml_files:
with zf.open(xml_file) as xf:
xml_content = xf.read()
root = ET.fromstring(xml_content)
# Get namespace
nsmap = dict([node for _, node in ET.iterparse(
BytesIO(xml_content), events=['start-ns']
)])
ns_uri = nsmap.get('', None)
# Find TimeSeries elements
if ns_uri:
timeseries_found = root.findall('.//{' + ns_uri + '}TimeSeries')
else:
timeseries_found = root.findall('.//TimeSeries')
for ts in timeseries_found:
# Extract Asset_RegisteredResource.mRID
if ns_uri:
reg_resource = ts.find('.//{' + ns_uri + '}Asset_RegisteredResource')
else:
reg_resource = ts.find('.//Asset_RegisteredResource')
if reg_resource is not None:
# Get asset EIC and name
if ns_uri:
mrid_elem = reg_resource.find('.//{' + ns_uri + '}mRID')
name_elem = reg_resource.find('.//{' + ns_uri + '}name')
else:
mrid_elem = reg_resource.find('.//mRID')
name_elem = reg_resource.find('.//name')
if mrid_elem is not None:
asset_eic = mrid_elem.text
asset_name = name_elem.text if name_elem is not None else '(no name)'
# Count occurrences
if asset_eic in all_assets:
all_assets[asset_eic] = (asset_name, all_assets[asset_eic][1] + 1)
else:
all_assets[asset_eic] = (asset_name, 1)
print("=" * 80)
print(f"FOUND {len(all_assets)} UNIQUE TRANSMISSION ASSETS ON BE-DE BORDER")
print("=" * 80)
print()
# Sort by count (descending) to see most frequently affected assets
sorted_assets = sorted(all_assets.items(), key=lambda x: x[1][1], reverse=True)
print(f"{'EIC Code':<20} {'Asset Name':<50} {'Outages':<10}")
print("-" * 80)
alegro_candidates = []
for eic, (name, count) in sorted_assets:
print(f"{eic:<20} {name:<50} {count:<10}")
# Identify Alegro by name keywords
name_lower = name.lower()
if any(keyword in name_lower for keyword in ['alegro', 'aachen', 'liege', 'oberzier', 'lixhe', 'alde']):
alegro_candidates.append((eic, name, count))
print()
print("=" * 80)
if alegro_candidates:
print("ALEGRO CANDIDATES FOUND:")
print("=" * 80)
for eic, name, count in alegro_candidates:
print(f" EIC: {eic}")
print(f" Name: {name}")
print(f" Outages: {count}")
print()
# Return the first candidate (most likely)
alegro_eic = alegro_candidates[0][0]
alegro_name = alegro_candidates[0][1]
print(f"[IDENTIFIED] Alegro EIC: {alegro_eic}")
print(f"[IDENTIFIED] Alegro Name: {alegro_name}")
print()
return alegro_eic, alegro_name
else:
print("[WARNING] No Alegro assets found in BE-DE border outages")
print("Possible reasons:")
print(" 1. Alegro had no outages in the query period")
print(" 2. HVDC outages are reported differently (separate endpoint)")
print(" 3. Alegro is classified under a different border/domain")
print()
print("Try:")
print(" 1. Expanding date range (query last 12-24 months)")
print(" 2. Querying bidirectional (DE->BE as well as BE->DE)")
print(" 3. Checking if HVDC requires different documentType")
print()
return None, None
except Exception as e:
print(f"[ERROR] Failed to query transmission outages: {e}")
import traceback
traceback.print_exc()
return None, None
if __name__ == '__main__':
alegro_eic, alegro_name = find_alegro_eic()
if alegro_eic:
print()
print("=" * 80)
print("NEXT STEPS:")
print("=" * 80)
print(f"1. Replace custom Alegro EICs in cnecs_alegro_8.csv with: {alegro_eic}")
print(f"2. Update master CNEC list to use real EIC code")
print(f"3. Re-run asset-specific outage collection with real EIC")
print()