Spaces:

evgueni-p
/

fbmc-chronos2

Sleeping

fbmc-chronos2 / scripts /collect_entsoe_sample.py

Evgueni Poloukarov

feat: complete Phase 1 ENTSO-E asset-specific outage validation

27cb60a about 1 month ago

4.13 kB

	"""
	Collect ENTSOE 1-week sample data for Sept 23-30, 2025

	Collects generation by type for all 12 Core FBMC zones:
	- Wind, Solar, Thermal, Hydro, Nuclear generation

	Matches the JAO sample period for integrated analysis.
	"""

	import os
	import sys
	from pathlib import Path
	from datetime import datetime, timedelta
	import pandas as pd
	from entsoe import EntsoePandasClient
	from dotenv import load_dotenv

	# Add src to path
	sys.path.insert(0, str(Path(__file__).parent.parent / "src"))

	# Load API key
	load_dotenv()
	API_KEY = os.getenv('ENTSOE_API_KEY')

	if not API_KEY:
	print("[ERROR] ENTSOE_API_KEY not found in .env file")
	print("Please add: ENTSOE_API_KEY=your_key_here")
	sys.exit(1)

	# Initialize client
	client = EntsoePandasClient(api_key=API_KEY)

	# Core FBMC zones (12 total)
	FBMC_ZONES = {
	'AT': '10YAT-APG------L', # Austria
	'BE': '10YBE----------2', # Belgium
	'CZ': '10YCZ-CEPS-----N', # Czech Republic
	'DE_LU': '10Y1001A1001A83F', # Germany-Luxembourg
	'FR': '10YFR-RTE------C', # France
	'HR': '10YHR-HEP------M', # Croatia
	'HU': '10YHU-MAVIR----U', # Hungary
	'NL': '10YNL----------L', # Netherlands
	'PL': '10YPL-AREA-----S', # Poland
	'RO': '10YRO-TEL------P', # Romania
	'SI': '10YSI-ELES-----O', # Slovenia
	'SK': '10YSK-SEPS-----K', # Slovakia
	}

	# Generation types mapping (ENTSOE API codes)
	GENERATION_TYPES = {
	'B16': 'solar', # Solar
	'B19': 'wind_offshore', # Wind offshore
	'B18': 'wind_onshore', # Wind onshore
	'B01': 'biomass', # Biomass
	'B10': 'hydro_pumped', # Hydro pumped storage
	'B11': 'hydro_run', # Hydro run-of-river
	'B12': 'hydro_reservoir', # Hydro reservoir
	'B14': 'nuclear', # Nuclear
	'B02': 'fossil_brown_coal', # Fossil brown coal/lignite
	'B05': 'fossil_coal', # Fossil hard coal
	'B04': 'fossil_gas', # Fossil gas
	'B03': 'fossil_oil', # Fossil oil
	}

	# Sample period: Sept 23-30, 2025 (matches JAO sample)
	START_DATE = pd.Timestamp('2025-09-23', tz='UTC')
	END_DATE = pd.Timestamp('2025-09-30', tz='UTC')

	print("=" * 70)
	print("ENTSOE 1-Week Sample Data Collection")
	print("=" * 70)
	print(f"Period: {START_DATE.date()} to {END_DATE.date()}")
	print(f"Zones: {len(FBMC_ZONES)} Core FBMC zones")
	print(f"Duration: 7 days = 168 hours")
	print()

	# Collect data
	all_generation = []

	for zone_code, zone_eic in FBMC_ZONES.items():
	print(f"\n[{zone_code}] Collecting generation data...")

	try:
	# Query generation by type
	gen_df = client.query_generation(
	zone_eic,
	start=START_DATE,
	end=END_DATE,
	psr_type=None # Get all generation types
	)

	# Add zone identifier
	gen_df['zone'] = zone_code

	# Reshape: generation types as columns
	if isinstance(gen_df, pd.DataFrame):
	# Already in correct format
	all_generation.append(gen_df)
	print(f" [OK] Collected {len(gen_df)} rows")
	else:
	print(f" [WARNING] Unexpected format: {type(gen_df)}")

	except Exception as e:
	print(f" [ERROR] {e}")
	continue

	if not all_generation:
	print("\n[ERROR] No data collected - check API key and zone codes")
	sys.exit(1)

	# Combine all zones
	print("\n" + "=" * 70)
	print("Processing collected data...")
	combined_df = pd.concat(all_generation, axis=0)

	# Reset index to make timestamp a column
	combined_df = combined_df.reset_index()
	if 'index' in combined_df.columns:
	combined_df = combined_df.rename(columns={'index': 'timestamp'})

	print(f" Combined shape: {combined_df.shape}")
	print(f" Columns: {list(combined_df.columns)}")

	# Save to parquet
	output_dir = Path("data/raw/sample")
	output_dir.mkdir(parents=True, exist_ok=True)
	output_file = output_dir / "entsoe_sample_sept2025.parquet"

	combined_df.to_parquet(output_file, index=False)

	print(f"\n[SUCCESS] Saved to: {output_file}")
	print(f" File size: {output_file.stat().st_size / 1024:.1f} KB")
	print()
	print("=" * 70)
	print("ENTSOE Sample Collection Complete")
	print("=" * 70)
	print("\nNext: Add ENTSOE exploration to Marimo notebook")