File size: 5,651 Bytes
295a9df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# environment_loader.py
'''Utility module to load .xlsx files from environment-specific folders and optional Huggingface repositories.'''

import os
from pathlib import Path
import pandas as pd
from datetime import datetime, timedelta

# Optional Huggingface support
try:
    from huggingface_hub import hf_hub_download
except ImportError:
    hf_hub_download = None


def get_latest_file_in_directory(directory_path: str, pattern: str = '*.xlsx') -> Path:
    '''Return the Path to the latest modified file matching pattern in directory_path.'''
    dir_path = Path(directory_path)
    files = list(dir_path.glob(pattern))
    if not files:
        return None
    return max(files, key=lambda f: f.stat().st_mtime)


def get_file_by_date(directory_path: str, target_date: datetime.date, pattern: str = '*.xlsx') -> Path:
    '''Return Path to the file whose name contains target_date or whose modification date matches target_date.'''
    date_str = target_date.strftime('%Y-%m-%d')
    dir_path = Path(directory_path)
    # First try matching date string in filename
    candidates = [f for f in dir_path.glob(pattern) if date_str in f.name]
    if candidates:
        return max(candidates, key=lambda f: f.stat().st_mtime)
    # Fallback to checking file modification date
    files = [f for f in dir_path.glob(pattern) if datetime.fromtimestamp(f.stat().st_mtime).date() == target_date]
    if files:
        return max(files, key=lambda f: f.stat().st_mtime)
    return None


def load_latest_xlsx_for_env(env_code: str,
                             base_path: str = 'Q:/Selenium_Reports',
                             use_date: bool = False,
                             target_date: datetime.date = None) -> pd.DataFrame:
    '''Load the latest or date-specific .xlsx file for the given environment code.'''
    folder = Path(base_path) / env_code / 'XLSX'
    if not folder.exists():
        raise FileNotFoundError(f"Environment folder not found: {folder}")
    if use_date:
        if target_date is None:
            raise ValueError('target_date must be provided when use_date is True')
        file_path = get_file_by_date(folder, target_date)
    else:
        file_path = get_latest_file_in_directory(folder)
    if file_path is None:
        raise FileNotFoundError(f'No .xlsx files found for environment {env_code} in {folder}')
    return pd.read_excel(file_path)


def load_environments(env_codes: list,
                      base_path: str = 'Q:/Selenium_Reports',
                      by_date: bool = False,
                      days_ago: int = 1) -> dict:
    '''Load DataFrame for each environment code; by_date chooses file from days_ago days before.'''
    data = {}
    target_date = datetime.now().date() - timedelta(days=days_ago)
    for env in env_codes:
        df = load_latest_xlsx_for_env(
            env_code=env,
            base_path=base_path,
            use_date=by_date,
            target_date=target_date
        )
        data[env] = df
    return data


def load_from_huggingface(repo_id: str,
                          filenames: list,
                          revision: str = 'main') -> dict:
    '''Download files from a Huggingface repo and load as DataFrames.'''
    if hf_hub_download is None:
        raise ImportError('huggingface_hub is not installed. Please pip install huggingface_hub')
    data = {}
    for fname in filenames:
        local_path = hf_hub_download(repo_id=repo_id, filename=fname, revision=revision)
        data[fname] = pd.read_excel(local_path)
    return data


def get_latest_xlsx_path_for_env(env_code: str,
                                 base_path: str = 'Q:/Selenium_Reports',
                                 use_date: bool = False,
                                 target_date: datetime.date = None) -> Path:
    '''Return the Path to the desired .xlsx file for the given environment code without loading.'''
    folder = Path(base_path) / env_code / 'XLSX'
    if not folder.exists():
        raise FileNotFoundError(f"Environment folder not found: {folder}")
    if use_date:
        if target_date is None:
            raise ValueError('target_date must be provided when use_date is True')
        file_path = get_file_by_date(folder, target_date)
    else:
        file_path = get_latest_file_in_directory(folder)
    if file_path is None:
        raise FileNotFoundError(f'No .xlsx files found for environment {env_code} in {folder}')
    return file_path


def get_environments_paths(env_codes: list,
                           base_path: str = 'Q:/Selenium_Reports',
                           by_date: bool = False,
                           days_ago: int = 1) -> dict:
    '''Return file Paths for each environment code; by_date chooses file from days_ago days before.'''
    data = {}
    target_date = datetime.now().date() - timedelta(days=days_ago)
    for env in env_codes:
        path = get_latest_xlsx_path_for_env(
            env_code=env,
            base_path=base_path,
            use_date=by_date,
            target_date=target_date
        )
        data[env] = path
    return data


def get_huggingface_paths(repo_id: str,
                          filenames: list,
                          revision: str = 'main') -> dict:
    '''Download files from a Huggingface repo and return local Paths without loading.'''
    if hf_hub_download is None:
        raise ImportError('huggingface_hub is not installed. Please pip install huggingface_hub')
    data = {}
    for fname in filenames:
        local_path = hf_hub_download(repo_id=repo_id, filename=fname, revision=revision)
        data[fname] = Path(local_path)
    return data