import json import os from typing import Optional def find_file_by_task_id(task_id: str, metadata_path: str = "validation/metadata.jsonl") -> Optional[str]: """ Search for a filename that matches a given task_id in the metadata.jsonl file. Args: task_id (str): The task_id to search for metadata_path (str): Path to the metadata.jsonl file. Defaults to the validation directory path. Returns: Optional[str]: The filename if found, None if not found or if task_id has no associated file Example: >>> find_file_by_task_id("32102e3e-d12a-4209-9163-7b3a104efe5d") "32102e3e-d12a-4209-9163-7b3a104efe5d.xlsx" """ if not os.path.exists(metadata_path): try: current_dir = os.path.dirname(os.path.abspath(__file__)) metadata_path = os.path.join(current_dir, "validation", "metadata.jsonl") except Exception as e: raise FileNotFoundError(f"Metadata file not found at {metadata_path}") with open(metadata_path, 'r', encoding='utf-8') as f: for line in f: try: data = json.loads(line.strip()) #print("DATA: ", data) #print("TASK ID: ", task_id) # print("DATA GET TASK ID: ", data.get('task_id')) # print("DATA GET FILE NAME: ", data.get('file_name')) if data.get('task_id') == task_id: filename = data.get('file_name', '') return filename if filename else None except json.JSONDecodeError: continue return None def get_full_file_path(task_id: str, base_dir: str = "validation") -> Optional[str]: """ Get the full file path for a given task_id if it exists. Args: task_id (str): The task_id to search for base_dir (str): Base directory where files are stored. Defaults to validation directory. Returns: Optional[str]: Full path to the file if found, None if not found Example: >>> get_full_file_path("32102e3e-d12a-4209-9163-7b3a104efe5d") "validation/32102e3e-d12a-4209-9163-7b3a104efe5d.xlsx" """ filename = find_file_by_task_id(task_id) if not filename: print("FILE NOT FOUND FOR TASK ID: ", task_id) return None full_path = os.path.join(base_dir, filename) return full_path if os.path.exists(full_path) else None