Spaces:
Sleeping
Sleeping
import json | |
import os | |
from typing import Optional | |
def find_file_by_task_id(task_id: str, metadata_path: str = "validation/metadata.jsonl") -> Optional[str]: | |
""" | |
Search for a filename that matches a given task_id in the metadata.jsonl file. | |
Args: | |
task_id (str): The task_id to search for | |
metadata_path (str): Path to the metadata.jsonl file. Defaults to the validation directory path. | |
Returns: | |
Optional[str]: The filename if found, None if not found or if task_id has no associated file | |
Example: | |
>>> find_file_by_task_id("32102e3e-d12a-4209-9163-7b3a104efe5d") | |
"32102e3e-d12a-4209-9163-7b3a104efe5d.xlsx" | |
""" | |
if not os.path.exists(metadata_path): | |
try: | |
current_dir = os.path.dirname(os.path.abspath(__file__)) | |
metadata_path = os.path.join(current_dir, "validation", "metadata.jsonl") | |
except Exception as e: | |
raise FileNotFoundError(f"Metadata file not found at {metadata_path}") | |
with open(metadata_path, 'r', encoding='utf-8') as f: | |
for line in f: | |
try: | |
data = json.loads(line.strip()) | |
#print("DATA: ", data) | |
#print("TASK ID: ", task_id) | |
# print("DATA GET TASK ID: ", data.get('task_id')) | |
# print("DATA GET FILE NAME: ", data.get('file_name')) | |
if data.get('task_id') == task_id: | |
filename = data.get('file_name', '') | |
return filename if filename else None | |
except json.JSONDecodeError: | |
continue | |
return None | |
def get_full_file_path(task_id: str, base_dir: str = "validation") -> Optional[str]: | |
""" | |
Get the full file path for a given task_id if it exists. | |
Args: | |
task_id (str): The task_id to search for | |
base_dir (str): Base directory where files are stored. Defaults to validation directory. | |
Returns: | |
Optional[str]: Full path to the file if found, None if not found | |
Example: | |
>>> get_full_file_path("32102e3e-d12a-4209-9163-7b3a104efe5d") | |
"validation/32102e3e-d12a-4209-9163-7b3a104efe5d.xlsx" | |
""" | |
filename = find_file_by_task_id(task_id) | |
if not filename: | |
print("FILE NOT FOUND FOR TASK ID: ", task_id) | |
return None | |
full_path = os.path.join(base_dir, filename) | |
return full_path if os.path.exists(full_path) else None | |