Mezura / pipelines /utils /common.py
nmmursit's picture
initial commit
3232d64 verified
raw
history blame
5.74 kB
"""
Common utility functions for pipelines.
"""
import logging
from typing import Dict, Any, List, Optional
import pandas as pd
logger = logging.getLogger("mezura.pipelines.utils")
# UI component registry to share components between modules
_ui_components = {}
_current_evaluation_type = None
def set_ui_component(name: str, component: Any) -> None:
"""
Register a UI component for later use
Args:
name: Name of the component
component: The component object
"""
_ui_components[name] = component
def get_ui_component(name: str) -> Optional[Any]:
"""
Get a registered UI component
Args:
name: Name of the component
Returns:
The component if found, None otherwise
"""
return _ui_components.get(name)
def set_evaluation_type(evaluation_type: Any) -> None:
"""
Set the current evaluation type
Args:
evaluation_type: The evaluation type UI component or string value
"""
global _current_evaluation_type
if hasattr(evaluation_type, "value"):
# If a Gradio component is passed, get its value
_current_evaluation_type = evaluation_type.value
else:
# Otherwise, assume it's a string
_current_evaluation_type = evaluation_type
logger.info(f"Set evaluation type to: {_current_evaluation_type}")
def get_evaluation_type() -> Optional[str]:
"""
Get the current evaluation type
Returns:
The current evaluation type, or None if not set
"""
return _current_evaluation_type
def update_evaluation_params(evaluation_type: str) -> List[Any]:
"""
Update the visibility of evaluation parameter components based on evaluation type
Args:
evaluation_type: The current evaluation type
Returns:
List of update objects for Gradio
"""
# This is a simplified version that just returns empty updates
# In the new design, we don't need complex parameter switching
logger.info(f"Updating evaluation params for type: {evaluation_type}")
return []
def setup_request_logging():
"""Set up simplified HTTP request logging"""
# Özel HTTP istek takipçisi
class SimpleHTTPRequestLogger:
def __init__(self):
self.logger = logging.getLogger("mezura.http")
self.logger.setLevel(logging.INFO)
def log_request(self, method, url, data=None):
if data:
self.logger.info(f"HTTP Request: {method} {url} with data: {data}")
else:
self.logger.info(f"HTTP Request: {method} {url}")
def log_response(self, status_code, body=None):
if body:
self.logger.info(f"HTTP Response: {status_code} with body: {body}")
else:
self.logger.info(f"HTTP Response: {status_code}")
# Global değişken olarak HTTP logger'ı oluştur
global http_logger
http_logger = SimpleHTTPRequestLogger()
# requests kütüphanesi kullanılıyorsa, onun için de hook ekle
try:
import requests
old_request = requests.Session.request
def new_request(self, method, url, **kwargs):
http_logger.log_request(method, url, kwargs.get('json') or kwargs.get('data'))
response = old_request(self, method, url, **kwargs)
try:
body = response.json() if response.headers.get('content-type') == 'application/json' else None
except:
body = None
http_logger.log_response(response.status_code, body)
return response
requests.Session.request = new_request
logger.info("Installed simplified HTTP request logging for requests")
except ImportError:
logger.debug("requests library not found, skipping HTTP request logging")
# HTTP isteklerini loglama fonksiyonunu çağır
http_logger = None
setup_request_logging()
def search_and_filter(query: str, df: pd.DataFrame, filter_type: str = "All") -> pd.DataFrame:
"""
Search and filter the dataframe based on query and filter type
Args:
query: Search query string (can include multiple queries separated by ';')
df: DataFrame to search in
filter_type: Filter type (default: "All")
Returns:
pd.DataFrame: Filtered dataframe
"""
logger.debug(f"Searching with query: '{query}', filter_type: '{filter_type}'")
if df.empty or 'No data available' in str(df.iloc[0].values[0]):
return df
# If no query, return original data
if not query or query.strip() == "":
return df
# Split multiple queries by ';'
queries = [q.strip() for q in query.split(';') if q.strip()]
# For each query, filter dataframe
filtered_df = df.copy()
if queries:
# Create a boolean mask that starts with all False values, using the same index as the DataFrame
mask = pd.Series([False] * len(filtered_df), index=filtered_df.index)
for q in queries:
# For each query, update the mask to include matching rows
query_mask = filtered_df.astype(str).apply(
lambda x: x.str.contains(q, case=False, regex=True)
).any(axis=1)
mask = mask | query_mask
# Apply the combined mask to the dataframe
filtered_df = filtered_df[mask]
logger.debug(f"Search returned {len(filtered_df)} results")
# If we filtered out all rows, return a message
if filtered_df.empty:
return pd.DataFrame({"Result": [f"No results found for query: {query}"]})
return filtered_df