Alignment-Lab-AI
commited on
Commit
•
5a9b2ce
1
Parent(s):
c6a30f0
Delete script2.py
Browse files- script2.py +0 -47
script2.py
DELETED
@@ -1,47 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import json
|
3 |
-
import random
|
4 |
-
from glob import glob
|
5 |
-
from huggingface_hub import Repository
|
6 |
-
|
7 |
-
# Name of the combined file
|
8 |
-
output_file_name = "combined_conversations.jsonl"
|
9 |
-
# Hugging Face Hub repository ID
|
10 |
-
repo_id = "AlignmentLab-AI/idonteven"
|
11 |
-
|
12 |
-
# Shuffle and combine jsonl files
|
13 |
-
def shuffle_and_combine_jsonls(output_file_name):
|
14 |
-
all_lines = []
|
15 |
-
for jsonl_file in glob("*.jsonl"):
|
16 |
-
with open(jsonl_file, 'r') as file:
|
17 |
-
all_lines.extend(file.readlines())
|
18 |
-
random.shuffle(all_lines)
|
19 |
-
with open(output_file_name, 'w') as outfile:
|
20 |
-
outfile.writelines(all_lines)
|
21 |
-
return output_file_name
|
22 |
-
|
23 |
-
# Clone your repository from Hugging Face and return the local path
|
24 |
-
def clone_repository(repo_id):
|
25 |
-
repo = Repository(repo_id, clone_from=repo_id)
|
26 |
-
return repo
|
27 |
-
|
28 |
-
# Copy the combined jsonl file and scripts to the cloned repository
|
29 |
-
def copy_files_to_repo(combined_jsonl_path):
|
30 |
-
# Copy the combined jsonl file
|
31 |
-
os.system(f"cp {combined_jsonl_path} {repo_id}")
|
32 |
-
# Copy other necessary files, e.g., scripts
|
33 |
-
for file in glob("*"):
|
34 |
-
if file != repo_id:
|
35 |
-
os.system(f"cp {file} {repo_id}")
|
36 |
-
|
37 |
-
# Commit and push changes to the Hugging Face repository
|
38 |
-
def push_to_hub(repo):
|
39 |
-
repo.git_add()
|
40 |
-
repo.git_commit("Update dataset")
|
41 |
-
repo.git_push()
|
42 |
-
|
43 |
-
# Run all steps
|
44 |
-
combined_jsonl_path = shuffle_and_combine_jsonls(output_file_name)
|
45 |
-
repo = clone_repository(repo_id)
|
46 |
-
copy_files_to_repo(combined_jsonl_path)
|
47 |
-
push_to_hub(repo)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|