Alignment-Lab-AI commited on
Commit
5a9b2ce
1 Parent(s): c6a30f0

Delete script2.py

Browse files
Files changed (1) hide show
  1. script2.py +0 -47
script2.py DELETED
@@ -1,47 +0,0 @@
1
- import os
2
- import json
3
- import random
4
- from glob import glob
5
- from huggingface_hub import Repository
6
-
7
- # Name of the combined file
8
- output_file_name = "combined_conversations.jsonl"
9
- # Hugging Face Hub repository ID
10
- repo_id = "AlignmentLab-AI/idonteven"
11
-
12
- # Shuffle and combine jsonl files
13
- def shuffle_and_combine_jsonls(output_file_name):
14
- all_lines = []
15
- for jsonl_file in glob("*.jsonl"):
16
- with open(jsonl_file, 'r') as file:
17
- all_lines.extend(file.readlines())
18
- random.shuffle(all_lines)
19
- with open(output_file_name, 'w') as outfile:
20
- outfile.writelines(all_lines)
21
- return output_file_name
22
-
23
- # Clone your repository from Hugging Face and return the local path
24
- def clone_repository(repo_id):
25
- repo = Repository(repo_id, clone_from=repo_id)
26
- return repo
27
-
28
- # Copy the combined jsonl file and scripts to the cloned repository
29
- def copy_files_to_repo(combined_jsonl_path):
30
- # Copy the combined jsonl file
31
- os.system(f"cp {combined_jsonl_path} {repo_id}")
32
- # Copy other necessary files, e.g., scripts
33
- for file in glob("*"):
34
- if file != repo_id:
35
- os.system(f"cp {file} {repo_id}")
36
-
37
- # Commit and push changes to the Hugging Face repository
38
- def push_to_hub(repo):
39
- repo.git_add()
40
- repo.git_commit("Update dataset")
41
- repo.git_push()
42
-
43
- # Run all steps
44
- combined_jsonl_path = shuffle_and_combine_jsonls(output_file_name)
45
- repo = clone_repository(repo_id)
46
- copy_files_to_repo(combined_jsonl_path)
47
- push_to_hub(repo)