Spaces:

maaroufabousaleh
/

advisorai-data-enhanced

Running

advisorai-data-enhanced / src /merge /norm /crypto.py

Maaroufabousaleh

c49b21b 4 months ago

31.4 kB

	import pandas as pd
	import numpy as np
	from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, LabelEncoder, PowerTransformer
	import json
	import pickle
	from datetime import datetime
	import warnings
	warnings.filterwarnings('ignore')
	import os

	class CryptoDataNormalizer:
	"""
	Enhanced normalization pipeline for cryptocurrency features data with crypto-specific handling
	"""

	def __init__(self, preserve_symbol=True, handle_outliers=True, feature_engineering=True):
	self.scalers = {}
	self.encoders = {}
	self.feature_info = {}
	self.is_fitted = False
	self.preserve_symbol = preserve_symbol
	self.handle_outliers = handle_outliers
	self.feature_engineering = feature_engineering
	self.outlier_bounds = {}

	def _detect_outliers(self, df, column):
	"""Detect outliers using IQR method"""
	Q1 = df[column].quantile(0.25)
	Q3 = df[column].quantile(0.75)
	IQR = Q3 - Q1
	lower_bound = Q1 - 1.5 * IQR
	upper_bound = Q3 + 1.5 * IQR
	return lower_bound, upper_bound

	def _handle_outliers(self, df, column, method='clip'):
	"""Handle outliers in numerical data"""
	if column not in self.outlier_bounds:
	lower_bound, upper_bound = self._detect_outliers(df, column)
	self.outlier_bounds[column] = (lower_bound, upper_bound)
	else:
	lower_bound, upper_bound = self.outlier_bounds[column]

	if method == 'clip':
	return df[column].clip(lower_bound, upper_bound)
	elif method == 'remove':
	return df[column].where((df[column] >= lower_bound) & (df[column] <= upper_bound))
	return df[column]

	def _categorize_features(self, df):
	"""Enhanced feature categorization for crypto data"""
	# Core identification features
	id_features = ['symbol', 'backup_id', '__index_level_0__', 'cg_id']

	# Timestamp features
	timestamp_features = [col for col in df.columns if 'timestamp' in col.lower()]

	# Binary features (0/1, True/False, or boolean-like)
	binary_features = []
	for col in df.columns:
	if col not in id_features + timestamp_features:
	unique_vals = set(df[col].dropna().unique())
	if (df[col].dtype == bool or
	(len(unique_vals) <= 2 and unique_vals.issubset({0, 1, True, False, np.nan})) or
	col in ['stable']):
	binary_features.append(col)

	# Categorical features (strings, objects, or low cardinality integers)
	categorical_features = []
	for col in df.columns:
	if (col not in id_features + binary_features + timestamp_features and
	(df[col].dtype == 'object' or
	df[col].dtype.name == 'category' or
	(df[col].nunique() < 20 and df[col].dtype in ['int64', 'int32']))):
	categorical_features.append(col)

	# Crypto-specific features
	crypto_specific_features = []
	crypto_keywords = ['dominance', 'rank']
	for col in df.columns:
	if any(keyword in col.lower() for keyword in crypto_keywords):
	if col not in id_features + timestamp_features + binary_features + categorical_features:
	crypto_specific_features.append(col)

	# Price/volume/market features
	price_volume_features = []
	for col in df.columns:
	if any(keyword in col.lower() for keyword in ['price', 'volume', 'marketcap', 'open']):
	if col not in id_features + timestamp_features + binary_features + categorical_features + crypto_specific_features:
	price_volume_features.append(col)

	# Exchange price features
	exchange_features = []
	for col in df.columns:
	if col.startswith('exchangePrices.'):
	exchange_features.append(col)

	# Performance features
	performance_features = []
	for col in df.columns:
	if col.startswith('performance.'):
	performance_features.append(col)

	# Rank difference features
	rank_diff_features = []
	for col in df.columns:
	if col.startswith('rankDiffs.'):
	rank_diff_features.append(col)

	# Technical indicator features
	technical_features = []
	tech_keywords = ['rsi', 'macd', 'ema', 'sma', 'bb_', 'cci', 'mfi', 'atr', 'stoch', 'roc']
	for col in df.columns:
	if any(keyword in col.lower() for keyword in tech_keywords):
	if col not in (id_features + timestamp_features + binary_features + categorical_features +
	crypto_specific_features + price_volume_features + exchange_features +
	performance_features + rank_diff_features):
	technical_features.append(col)

	# Social sentiment features
	social_features = []
	for col in df.columns:
	if any(keyword in col.lower() for keyword in ['social', 'sentiment', 'confidence', 'pos', 'neg', 'neu']):
	if col not in (id_features + timestamp_features + binary_features + categorical_features +
	crypto_specific_features + price_volume_features + exchange_features +
	performance_features + rank_diff_features + technical_features):
	social_features.append(col)

	# Transaction/blockchain features
	transaction_features = []
	for col in df.columns:
	if any(keyword in col.lower() for keyword in ['transaction', 'tx_', 'gas', 'fees']):
	if col not in (id_features + timestamp_features + binary_features + categorical_features +
	crypto_specific_features + price_volume_features + exchange_features +
	performance_features + rank_diff_features + technical_features + social_features):
	transaction_features.append(col)

	# Data quality features
	quality_features = []
	for col in df.columns:
	if any(keyword in col.lower() for keyword in ['completeness', 'quality', 'correlation']):
	if col not in (id_features + timestamp_features + binary_features + categorical_features +
	crypto_specific_features + price_volume_features + exchange_features +
	performance_features + rank_diff_features + technical_features +
	social_features + transaction_features):
	quality_features.append(col)

	# Remaining numerical features
	numerical_features = []
	all_categorized = (id_features + timestamp_features + binary_features + categorical_features +
	crypto_specific_features + price_volume_features + exchange_features +
	performance_features + rank_diff_features + technical_features +
	social_features + transaction_features + quality_features)

	for col in df.columns:
	if (col not in all_categorized and
	pd.api.types.is_numeric_dtype(df[col])):
	numerical_features.append(col)

	return {
	'id_features': id_features,
	'timestamp_features': timestamp_features,
	'binary_features': binary_features,
	'categorical_features': categorical_features,
	'crypto_specific_features': crypto_specific_features,
	'price_volume_features': price_volume_features,
	'exchange_features': exchange_features,
	'performance_features': performance_features,
	'rank_diff_features': rank_diff_features,
	'technical_features': technical_features,
	'social_features': social_features,
	'transaction_features': transaction_features,
	'quality_features': quality_features,
	'numerical_features': numerical_features
	}

	def _engineer_crypto_features(self, df, normalized_df):
	"""Create crypto-specific engineered features"""
	if not self.feature_engineering:
	return normalized_df

	# Exchange price spread analysis
	exchange_cols = [col for col in df.columns if col.startswith('exchangePrices.')]
	if len(exchange_cols) > 1:
	exchange_prices = df[exchange_cols].replace([np.inf, -np.inf], np.nan)
	if not exchange_prices.empty and exchange_prices.notna().any().any():
	price_mean = exchange_prices.mean(axis=1)
	price_max = exchange_prices.max(axis=1)
	price_min = exchange_prices.min(axis=1)
	price_std = exchange_prices.std(axis=1)

	# Only calculate if we have valid data
	valid_mask = (price_mean > 0) & price_mean.notna()
	if valid_mask.any():
	normalized_df['exchange_price_spread'] = ((price_max - price_min) / price_mean).fillna(0)
	normalized_df['exchange_price_std'] = (price_std / price_mean).fillna(0)

	# Performance momentum
	perf_short_cols = [col for col in df.columns if col.startswith('performance.') and any(timeframe in col for timeframe in ['min1', 'min5', 'min15', 'hour'])]
	perf_long_cols = [col for col in df.columns if col.startswith('performance.') and any(timeframe in col for timeframe in ['day', 'week', 'month'])]

	if perf_short_cols:
	short_perf = df[perf_short_cols].replace([np.inf, -np.inf], np.nan)
	normalized_df['short_term_momentum'] = short_perf.mean(axis=1).fillna(0)
	if perf_long_cols:
	long_perf = df[perf_long_cols].replace([np.inf, -np.inf], np.nan)
	normalized_df['long_term_momentum'] = long_perf.mean(axis=1).fillna(0)

	# Rank stability
	rank_diff_cols = [col for col in df.columns if col.startswith('rankDiffs.')]
	if rank_diff_cols:
	rank_diffs = df[rank_diff_cols].replace([np.inf, -np.inf], np.nan).fillna(0)
	normalized_df['rank_stability'] = 1 / (1 + rank_diffs.abs().sum(axis=1) + 1e-8) # Add small epsilon to avoid division by zero

	# Social sentiment aggregation
	social_sentiment_cols = [col for col in df.columns if 'social_sentiment' in col.lower()]
	if social_sentiment_cols:
	social_data = df[social_sentiment_cols].replace([np.inf, -np.inf], np.nan)
	normalized_df['avg_social_sentiment'] = social_data.mean(axis=1).fillna(0.5) # Neutral sentiment

	# Technical strength (similar to stocks but crypto-focused)
	tech_cols = [col for col in df.columns if any(tech in col.lower() for tech in ['rsi', 'macd', 'cci'])]
	if tech_cols:
	tech_data = df[tech_cols].replace([np.inf, -np.inf], np.nan)
	normalized_df['technical_strength'] = tech_data.mean(axis=1).fillna(0)

	# Volume-price relationship
	if 'volume' in df.columns and 'price' in df.columns:
	volume = df['volume'].replace([np.inf, -np.inf], np.nan)
	price = df['price'].replace([np.inf, -np.inf], np.nan)
	valid_mask = (price > 0) & price.notna() & volume.notna()
	if valid_mask.any():
	ratio = volume / price
	normalized_df['volume_price_ratio'] = ratio.fillna(0)

	# Market dominance relative to rank
	if 'dominance' in df.columns and 'rank' in df.columns:
	dominance = df['dominance'].replace([np.inf, -np.inf], np.nan).fillna(0)
	rank = df['rank'].replace([np.inf, -np.inf], np.nan).fillna(1000) # High rank for unknown
	# Avoid division by zero
	rank_reciprocal = 1 / (rank + 1e-8)
	normalized_df['dominance_rank_ratio'] = (dominance / rank_reciprocal).fillna(0)

	return normalized_df

	def fit(self, df):
	"""Fit the normalizer on training data with crypto-specific preprocessing"""
	if isinstance(df, dict):
	df = pd.DataFrame([df])

	self.feature_info = self._categorize_features(df)

	# Fit scalers for different feature types
	feature_types = {
	'crypto_specific_features': RobustScaler(), # Rank and dominance can have outliers
	'price_volume_features': RobustScaler(), # Price and volume data often has outliers
	'exchange_features': StandardScaler(), # Exchange prices should be similar
	'performance_features': StandardScaler(), # Performance percentages
	'rank_diff_features': StandardScaler(), # Rank differences are usually small integers
	'technical_features': StandardScaler(), # Technical indicators are usually normalized
	'social_features': StandardScaler(), # Sentiment scores
	'transaction_features': PowerTransformer(), # Transaction data can be very skewed
	'quality_features': MinMaxScaler(), # Quality scores are usually 0-1
	'numerical_features': PowerTransformer() # General numerical features
	}

	for feature_type, scaler in feature_types.items():
	features = self.feature_info[feature_type]
	if features:
	# Filter existing columns
	existing_features = [col for col in features if col in df.columns]
	if existing_features:
	# Handle outliers if enabled
	if self.handle_outliers and feature_type in ['crypto_specific_features', 'price_volume_features']:
	df_clean = df.copy()
	for col in existing_features:
	df_clean[col] = self._handle_outliers(df_clean, col)
	else:
	df_clean = df.copy()

	# Comprehensive data cleaning for fitting
	try:
	# Replace inf/-inf with NaN
	df_clean[existing_features] = df_clean[existing_features].replace([np.inf, -np.inf], np.nan)

	# Fill NaN with appropriate strategy based on feature type
	if feature_type in ['crypto_specific_features', 'price_volume_features']:
	# For price/volume data, use forward fill then median
	for col in existing_features:
	df_clean[col] = df_clean[col].fillna(method='ffill').fillna(df_clean[col].median()).fillna(0)
	elif feature_type in ['performance_features', 'rank_diff_features']:
	# Performance and rank diffs can be 0 when no change
	df_clean[existing_features] = df_clean[existing_features].fillna(0)
	elif feature_type == 'quality_features':
	# Quality features should default to reasonable values
	df_clean[existing_features] = df_clean[existing_features].fillna(0.5)
	else:
	# General strategy: median then 0
	for col in existing_features:
	df_clean[col] = df_clean[col].fillna(df_clean[col].median()).fillna(0)

	# Ensure no infinite values remain
	df_clean[existing_features] = df_clean[existing_features].replace([np.inf, -np.inf], 0)

	# Fit the scaler
	scaler.fit(df_clean[existing_features])
	self.scalers[feature_type] = scaler
	self.feature_info[f'{feature_type}_existing'] = existing_features

	except Exception as e:
	print(f"Warning: Could not fit scaler for {feature_type}: {e}")
	# Skip this feature type if fitting fails
	continue

	# Fit encoders for categorical features
	for col in self.feature_info['categorical_features']:
	if col in df.columns:
	self.encoders[col] = LabelEncoder()
	self.encoders[col].fit(df[col].astype(str).fillna('unknown'))

	self.is_fitted = True
	return self

	def transform(self, data):
	"""Transform data using fitted normalizers with crypto-specific handling"""
	if not self.is_fitted:
	raise ValueError("Normalizer must be fitted before transform")

	if isinstance(data, dict):
	df = pd.DataFrame([data])
	else:
	df = data.copy()

	normalized_df = pd.DataFrame(index=df.index)

	# 1. Preserve symbol if requested
	if self.preserve_symbol and 'symbol' in df.columns:
	normalized_df['symbol'] = df['symbol']

	# 2. Enhanced timestamp features
	for col in self.feature_info['timestamp_features']:
	if col in df.columns:
	ts = pd.to_datetime(df[col], unit='ms', errors='coerce')
	# Crypto markets are 24/7, so different time features
	normalized_df[f'{col}_hour'] = ts.dt.hour / 23.0
	normalized_df[f'{col}_day_of_week'] = ts.dt.dayofweek / 6.0
	normalized_df[f'{col}_month'] = (ts.dt.month - 1) / 11.0
	normalized_df[f'{col}_quarter'] = (ts.dt.quarter - 1) / 3.0
	normalized_df[f'{col}_is_weekend'] = (ts.dt.dayofweek >= 5).astype(int)
	# For crypto, we might want to track different time patterns
	normalized_df[f'{col}_is_asian_hours'] = ((ts.dt.hour >= 0) & (ts.dt.hour <= 8)).astype(int)
	normalized_df[f'{col}_is_european_hours'] = ((ts.dt.hour >= 8) & (ts.dt.hour <= 16)).astype(int)
	normalized_df[f'{col}_is_american_hours'] = ((ts.dt.hour >= 16) & (ts.dt.hour <= 24)).astype(int)

	# 3. Binary features (keep as is, fill NaN with 0)
	for col in self.feature_info['binary_features']:
	if col in df.columns:
	normalized_df[col] = df[col].fillna(0).astype(int)

	# 4. Categorical features with better encoding
	for col in self.feature_info['categorical_features']:
	if col in df.columns and col in self.encoders:
	try:
	# Handle unknown categories
	values = df[col].astype(str).fillna('unknown')
	encoded_values = []
	for val in values:
	try:
	encoded_values.append(self.encoders[col].transform([val])[0])
	except ValueError:
	# Unknown category, assign most frequent class
	encoded_values.append(0)
	normalized_df[f'{col}_encoded'] = encoded_values
	except Exception:
	normalized_df[f'{col}_encoded'] = 0

	# 5. Scale different feature types with appropriate scalers
	feature_types = ['crypto_specific_features', 'price_volume_features', 'exchange_features',
	'performance_features', 'rank_diff_features', 'technical_features',
	'social_features', 'transaction_features', 'quality_features', 'numerical_features']

	for feature_type in feature_types:
	if feature_type in self.scalers:
	existing_features = self.feature_info.get(f'{feature_type}_existing', [])
	available_features = [col for col in existing_features if col in df.columns]
	if available_features:
	try:
	# Handle outliers if enabled
	if (self.handle_outliers and
	feature_type in ['crypto_specific_features', 'price_volume_features']):
	df_clean = df.copy()
	for col in available_features:
	if col in self.outlier_bounds:
	lower_bound, upper_bound = self.outlier_bounds[col]
	df_clean[col] = df_clean[col].clip(lower_bound, upper_bound)
	else:
	df_clean = df.copy()

	# Comprehensive data cleaning for transform
	# Replace inf/-inf with NaN
	df_clean[available_features] = df_clean[available_features].replace([np.inf, -np.inf], np.nan)

	# Fill NaN with appropriate strategy based on feature type
	if feature_type in ['crypto_specific_features', 'price_volume_features']:
	# For price/volume data, use forward fill then median from training
	for col in available_features:
	df_clean[col] = df_clean[col].fillna(method='ffill').fillna(method='bfill').fillna(0)
	elif feature_type in ['performance_features', 'rank_diff_features']:
	# Performance and rank diffs can be 0 when no change
	df_clean[available_features] = df_clean[available_features].fillna(0)
	elif feature_type == 'quality_features':
	# Quality features should default to reasonable values
	df_clean[available_features] = df_clean[available_features].fillna(0.5)
	else:
	# General strategy: 0 (since we don't have training medians in transform)
	df_clean[available_features] = df_clean[available_features].fillna(0)

	# Ensure no infinite values remain
	df_clean[available_features] = df_clean[available_features].replace([np.inf, -np.inf], 0)

	# Transform the data
	scaled_data = self.scalers[feature_type].transform(df_clean[available_features])

	# Add scaled features with descriptive names
	scaler_name = type(self.scalers[feature_type]).__name__.lower().replace('scaler', '').replace('transformer', '')
	for i, col in enumerate(available_features):
	normalized_df[f'{col}_{scaler_name}_scaled'] = scaled_data[:, i]

	except Exception as e:
	print(f"Warning: Could not transform {feature_type}: {e}")
	# If transformation fails, add original features with minimal processing
	for col in available_features:
	if col in df.columns:
	clean_col = df[col].replace([np.inf, -np.inf], np.nan).fillna(0)
	normalized_df[f'{col}_raw'] = clean_col

	# 6. Crypto-specific feature engineering
	normalized_df = self._engineer_crypto_features(df, normalized_df)

	# 7. Final comprehensive cleanup of any remaining issues
	# Replace any infinite values that might have been created
	normalized_df = normalized_df.replace([np.inf, -np.inf], np.nan)

	# Fill remaining NaN values with appropriate defaults
	for col in normalized_df.columns:
	if normalized_df[col].isna().any():
	if col == 'symbol':
	continue # Don't fill symbol
	elif 'sentiment' in col.lower():
	normalized_df[col] = normalized_df[col].fillna(0.5) # Neutral sentiment
	elif 'ratio' in col.lower() or 'momentum' in col.lower():
	normalized_df[col] = normalized_df[col].fillna(0) # No change/neutral
	elif 'hour' in col or 'day_of_week' in col or 'month' in col or 'quarter' in col:
	normalized_df[col] = normalized_df[col].fillna(0) # Time features
	elif col.endswith('_encoded'):
	normalized_df[col] = normalized_df[col].fillna(0) # Encoded categories
	else:
	normalized_df[col] = normalized_df[col].fillna(0) # General fallback

	# Final validation - ensure no NaN or infinite values remain
	try:
	assert not normalized_df.isnull().any().any(), "Still contains NaN values after cleanup"
	assert not np.isinf(normalized_df.select_dtypes(include=[np.number])).any().any(), "Still contains infinite values after cleanup"
	except AssertionError as e:
	print(f"Warning: {e}")
	# Emergency cleanup
	normalized_df = normalized_df.fillna(0).replace([np.inf, -np.inf], 0)

	return normalized_df

	def fit_transform(self, data):
	"""Fit and transform in one step"""
	return self.fit(data).transform(data)

	def get_feature_importance_info(self):
	"""Return information about feature categories for model interpretation"""
	return {
	'feature_categories': self.feature_info,
	'scalers_used': {k: type(v).__name__ for k, v in self.scalers.items()},
	'total_features': sum(len(features) for features in self.feature_info.values() if isinstance(features, list))
	}

	def save(self, filepath):
	"""Save the fitted normalizer"""
	with open(filepath, 'wb') as f:
	pickle.dump({
	'scalers': self.scalers,
	'encoders': self.encoders,
	'feature_info': self.feature_info,
	'is_fitted': self.is_fitted,
	'preserve_symbol': self.preserve_symbol,
	'handle_outliers': self.handle_outliers,
	'feature_engineering': self.feature_engineering,
	'outlier_bounds': self.outlier_bounds
	}, f)

	def load(self, filepath):
	"""Load a fitted normalizer"""
	with open(filepath, 'rb') as f:
	data = pickle.load(f)
	self.scalers = data['scalers']
	self.encoders = data['encoders']
	self.feature_info = data['feature_info']
	self.is_fitted = data['is_fitted']
	self.preserve_symbol = data.get('preserve_symbol', True)
	self.handle_outliers = data.get('handle_outliers', True)
	self.feature_engineering = data.get('feature_engineering', True)
	self.outlier_bounds = data.get('outlier_bounds', {})
	return self

	def normalize_crypto_data_file(input_file, output_file, save_normalizer=True, **kwargs):
	"""
	Enhanced normalization function for crypto data
	"""
	# Load data
	if input_file.endswith('.parquet'):
	df = pd.read_parquet(input_file)
	print(f"Loaded {len(df)} records with {len(df.columns)} features from parquet")
	else:
	data = []
	with open(input_file, 'r') as f:
	for line in f:
	data.append(json.loads(line.strip()))
	df = pd.DataFrame(data)
	print(f"Loaded {len(df)} records with {len(df.columns)} features from jsonl")

	# Initialize crypto normalizer
	normalizer = CryptoDataNormalizer(**kwargs)

	# Show feature categorization
	feature_info = normalizer._categorize_features(df)
	print("\nCrypto Feature Categorization:")
	for category, features in feature_info.items():
	if features:
	print(f" {category}: {len(features)} features")

	# Fit and transform
	normalized_df = normalizer.fit_transform(df)

	print(f"\nNormalized to {len(normalized_df.columns)} features")
	print(f"Data shape: {normalized_df.shape}")

	# Show feature importance info
	importance_info = normalizer.get_feature_importance_info()
	print(f"\nScalers used: {importance_info['scalers_used']}")

	# Ensure output directory exists
	import os
	output_dir = os.path.dirname(output_file)
	if output_dir and not os.path.exists(output_dir):
	os.makedirs(output_dir, exist_ok=True)

	# Save normalized data as pickle instead of CSV
	pkl_output_file = output_file.replace('.csv', '.pkl')
	normalized_df.to_pickle(pkl_output_file)
	print(f"Saved normalized data to {pkl_output_file}")

	# Save normalizer
	if save_normalizer:
	normalizer_file = output_file.replace('.csv', '_crypto_normalizer.pkl')
	normalizer.save(normalizer_file)
	print(f"Saved normalizer to {normalizer_file}")

	return normalized_df, normalizer

	# CLI function
	import argparse

	def main():
	parser = argparse.ArgumentParser(
	description="Enhanced normalization for cryptocurrency features with crypto-specific handling"
	)
	parser.add_argument('input', nargs='?', default='data/merged/features/crypto_features.parquet',
	help='Input file (.parquet or .jsonl)')
	parser.add_argument('output', nargs='?', default='data/merged/features/norm/crypto_features_normalized.pkl',
	help='Output PKL file for normalized features')
	parser.add_argument('--no-save-normalizer', action='store_true',
	help='Do not save the normalizer pickle')
	parser.add_argument('--no-preserve-symbol', action='store_true',
	help='Do not preserve symbol column')
	parser.add_argument('--no-handle-outliers', action='store_true',
	help='Do not handle outliers')
	parser.add_argument('--no-feature-engineering', action='store_true',
	help='Do not create engineered features')
	parser.add_argument('--train', action='store_true',
	help='Normalize the train file and save under train/norm/')

	args = parser.parse_args()

	kwargs = {
	'preserve_symbol': not args.no_preserve_symbol,
	'handle_outliers': not args.no_handle_outliers,
	'feature_engineering': not args.no_feature_engineering
	}

	if args.train:
	train_input = 'data/merged/train/crypto_features_train.parquet'
	train_norm_dir = 'data/merged/train/norm'
	os.makedirs(train_norm_dir, exist_ok=True)
	train_output = os.path.join(train_norm_dir, 'crypto_features_train_normalized.pkl')
	print(f"[INFO] Normalizing train file: {train_input} -> {train_output}")
	normalize_crypto_data_file(
	train_input,
	train_output,
	save_normalizer=not args.no_save_normalizer,
	**kwargs
	)
	else:
	print(f"[INFO] Enhanced crypto normalizing: {args.input} -> {args.output}")
	print(f"[INFO] Options: {kwargs}")
	normalize_crypto_data_file(
	args.input,
	args.output,
	save_normalizer=not args.no_save_normalizer,
	**kwargs
	)

	if __name__ == "__main__":
	main()