Spaces:
Sleeping
Sleeping
File size: 2,496 Bytes
30ffa0e f6dabb0 30ffa0e 8d48756 30ffa0e f6dabb0 4ae85c7 30ffa0e f6dabb0 30ffa0e f6dabb0 30ffa0e 9368de3 30ffa0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import json
import os
from typing import Optional
def find_file_by_task_id(task_id: str, metadata_path: str = "validation/metadata.jsonl") -> Optional[str]:
"""
Search for a filename that matches a given task_id in the metadata.jsonl file.
Args:
task_id (str): The task_id to search for
metadata_path (str): Path to the metadata.jsonl file. Defaults to the validation directory path.
Returns:
Optional[str]: The filename if found, None if not found or if task_id has no associated file
Example:
>>> find_file_by_task_id("32102e3e-d12a-4209-9163-7b3a104efe5d")
"32102e3e-d12a-4209-9163-7b3a104efe5d.xlsx"
"""
if not os.path.exists(metadata_path):
try:
current_dir = os.path.dirname(os.path.abspath(__file__))
metadata_path = os.path.join(current_dir, "validation", "metadata.jsonl")
except Exception as e:
raise FileNotFoundError(f"Metadata file not found at {metadata_path}")
with open(metadata_path, 'r', encoding='utf-8') as f:
for line in f:
try:
data = json.loads(line.strip())
#print("DATA: ", data)
#print("TASK ID: ", task_id)
# print("DATA GET TASK ID: ", data.get('task_id'))
# print("DATA GET FILE NAME: ", data.get('file_name'))
if data.get('task_id') == task_id:
filename = data.get('file_name', '')
return filename if filename else None
except json.JSONDecodeError:
continue
return None
def get_full_file_path(task_id: str, base_dir: str = "validation") -> Optional[str]:
"""
Get the full file path for a given task_id if it exists.
Args:
task_id (str): The task_id to search for
base_dir (str): Base directory where files are stored. Defaults to validation directory.
Returns:
Optional[str]: Full path to the file if found, None if not found
Example:
>>> get_full_file_path("32102e3e-d12a-4209-9163-7b3a104efe5d")
"validation/32102e3e-d12a-4209-9163-7b3a104efe5d.xlsx"
"""
filename = find_file_by_task_id(task_id)
if not filename:
print("FILE NOT FOUND FOR TASK ID: ", task_id)
return None
full_path = os.path.join(base_dir, filename)
return full_path if os.path.exists(full_path) else None
|