Spaces:
Paused
Paused
File size: 3,378 Bytes
ff8f4ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import os
import shutil
import fnmatch
import json
from ..globals import Global
def init_data_dir():
current_file_path = os.path.abspath(__file__)
parent_directory_path = os.path.dirname(current_file_path)
project_dir_path = os.path.abspath(
os.path.join(parent_directory_path, "..", ".."))
copy_sample_data_if_not_exists(os.path.join(project_dir_path, "templates"),
os.path.join(Global.data_dir, "templates"))
copy_sample_data_if_not_exists(os.path.join(project_dir_path, "datasets"),
os.path.join(Global.data_dir, "datasets"))
copy_sample_data_if_not_exists(os.path.join(project_dir_path, "lora_models"),
os.path.join(Global.data_dir, "lora_models"))
def copy_sample_data_if_not_exists(source, destination):
if os.path.exists(destination):
return
print(f"Copying sample data to \"{destination}\"")
shutil.copytree(source, destination)
def get_available_template_names():
templates_directory_path = os.path.join(Global.data_dir, "templates")
all_files = os.listdir(templates_directory_path)
return [os.path.splitext(filename)[0] for filename in all_files if fnmatch.fnmatch(filename, "*.json")]
def get_available_dataset_names():
datasets_directory_path = os.path.join(Global.data_dir, "datasets")
all_files = os.listdir(datasets_directory_path)
return [filename for filename in all_files if fnmatch.fnmatch(filename, "*.json") or fnmatch.fnmatch(filename, "*.jsonl")]
def get_available_lora_model_names():
datasets_directory_path = os.path.join(Global.data_dir, "lora_models")
all_items = os.listdir(datasets_directory_path)
return [item for item in all_items if os.path.isdir(os.path.join(datasets_directory_path, item))]
def get_path_of_available_lora_model(name):
datasets_directory_path = os.path.join(Global.data_dir, "lora_models")
path = os.path.join(datasets_directory_path, name)
if os.path.isdir(path):
return path
return None
def get_info_of_available_lora_model(name):
try:
if "/" in name:
return None
path_of_available_lora_model = get_path_of_available_lora_model(
name)
if not path_of_available_lora_model:
return None
with open(os.path.join(path_of_available_lora_model, "info.json"), "r") as json_file:
return json.load(json_file)
except Exception as e:
return None
def get_dataset_content(name):
file_name = os.path.join(Global.data_dir, "datasets", name)
if not os.path.exists(file_name):
raise ValueError(
f"Can't read {file_name} from datasets. File does not exist.")
with open(file_name, "r") as file:
if fnmatch.fnmatch(name, "*.json"):
return json.load(file)
elif fnmatch.fnmatch(name, "*.jsonl"):
data = []
for line_number, line in enumerate(file, start=1):
try:
data.append(json.loads(line))
except Exception as e:
raise ValueError(
f"Error parsing JSON on line {line_number}: {e}")
return data
else:
raise ValueError(
f"Unknown file format: {file_name}. Expects '*.json' or '*.jsonl'")
|