File size: 2,496 Bytes
30ffa0e
 
 
 
f6dabb0
30ffa0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d48756
 
 
 
 
30ffa0e
 
 
 
 
f6dabb0
 
4ae85c7
 
30ffa0e
 
 
 
 
 
 
 
f6dabb0
30ffa0e
 
 
 
 
 
 
 
 
 
 
 
f6dabb0
30ffa0e
 
 
9368de3
30ffa0e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import json
import os
from typing import Optional

def find_file_by_task_id(task_id: str, metadata_path: str = "validation/metadata.jsonl") -> Optional[str]:
    """
    Search for a filename that matches a given task_id in the metadata.jsonl file.
    
    Args:
        task_id (str): The task_id to search for
        metadata_path (str): Path to the metadata.jsonl file. Defaults to the validation directory path.
        
    Returns:
        Optional[str]: The filename if found, None if not found or if task_id has no associated file
        
    Example:
        >>> find_file_by_task_id("32102e3e-d12a-4209-9163-7b3a104efe5d")
        "32102e3e-d12a-4209-9163-7b3a104efe5d.xlsx"
    """
    if not os.path.exists(metadata_path):
        try:
            current_dir = os.path.dirname(os.path.abspath(__file__))
            metadata_path = os.path.join(current_dir, "validation", "metadata.jsonl")
        except Exception as e:
            raise FileNotFoundError(f"Metadata file not found at {metadata_path}")
        
    with open(metadata_path, 'r', encoding='utf-8') as f:
        for line in f:
            try:
                data = json.loads(line.strip())
                #print("DATA: ", data)
                #print("TASK ID: ", task_id)
                # print("DATA GET TASK ID: ", data.get('task_id'))
                # print("DATA GET FILE NAME: ", data.get('file_name'))
                if data.get('task_id') == task_id:
                    filename = data.get('file_name', '')
                    return filename if filename else None
            except json.JSONDecodeError:
                continue
                
    return None

def get_full_file_path(task_id: str, base_dir: str = "validation") -> Optional[str]:
    """
    Get the full file path for a given task_id if it exists.
    
    Args:
        task_id (str): The task_id to search for
        base_dir (str): Base directory where files are stored. Defaults to validation directory.
        
    Returns:
        Optional[str]: Full path to the file if found, None if not found
        
    Example:
        >>> get_full_file_path("32102e3e-d12a-4209-9163-7b3a104efe5d")
        "validation/32102e3e-d12a-4209-9163-7b3a104efe5d.xlsx"
    """
    filename = find_file_by_task_id(task_id)
    if not filename:
        print("FILE NOT FOUND FOR TASK ID: ", task_id)
        return None
        
    full_path = os.path.join(base_dir, filename)
    return full_path if os.path.exists(full_path) else None