Alignment-Lab-AI commited on
Commit
d952c8a
1 Parent(s): d721e7b

Update caption.py

Browse files
Files changed (1) hide show
  1. caption.py +20 -16
caption.py CHANGED
@@ -26,6 +26,7 @@ def process_file(llm, filepath, sampling_params):
26
  prev_eps = 0
27
  batch_texts = []
28
  df = pd.DataFrame()
 
29
 
30
  if filepath.endswith('.parquet'):
31
  print("Reading from a parquet file...")
@@ -73,27 +74,30 @@ def process_file(llm, filepath, sampling_params):
73
  executor.submit(df.to_parquet, filepath)
74
 
75
  i += BATCH_SIZE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  # Delete the original parquet file if it is empty
78
  if df.empty:
79
  os.remove(filepath)
80
  print(f"Deleted the original file: {filepath}")
81
 
82
- # Initialize the HuggingFace API
83
- api = HfApi()
84
-
85
- # Upload the processed file to the repository
86
- try:
87
- api.upload_file(
88
- path_or_fileobj=new_filepath,
89
- path_in_repo=new_filepath,
90
- repo_id="AlignmentLab-AI/caption_creation_0.8",
91
- repo_type="dataset",
92
- )
93
- print(f"Uploaded {new_filepath} to AlignmentLab-AI/caption_creation_0.8 repository.")
94
- except Exception as e:
95
- print(f"Error uploading file: {e}")
96
-
97
  def main():
98
  folder_name = 'captionate'
99
  sampling_params = SamplingParams(temperature=0.7, top_p=0.95, max_tokens=100)
@@ -108,4 +112,4 @@ def main():
108
 
109
  if __name__ == "__main__":
110
  main()
111
-
 
26
  prev_eps = 0
27
  batch_texts = []
28
  df = pd.DataFrame()
29
+ batch_counter = 0 # Counter to keep track of batches processed
30
 
31
  if filepath.endswith('.parquet'):
32
  print("Reading from a parquet file...")
 
74
  executor.submit(df.to_parquet, filepath)
75
 
76
  i += BATCH_SIZE
77
+ batch_counter += 1
78
+
79
+ # Push to hub every 10 batches
80
+ if batch_counter % 10 == 0:
81
+ # Initialize the HuggingFace API
82
+ api = HfApi()
83
+
84
+ # Upload the processed file to the repository
85
+ try:
86
+ api.upload_file(
87
+ path_or_fileobj=new_filepath,
88
+ path_in_repo=new_filepath,
89
+ repo_id="AlignmentLab-AI/caption_creation_0.8",
90
+ repo_type="dataset",
91
+ )
92
+ print(f"Uploaded {new_filepath} to AlignmentLab-AI/caption_creation_0.8 repository.")
93
+ except Exception as e:
94
+ print(f"Error uploading file: {e}")
95
 
96
  # Delete the original parquet file if it is empty
97
  if df.empty:
98
  os.remove(filepath)
99
  print(f"Deleted the original file: {filepath}")
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  def main():
102
  folder_name = 'captionate'
103
  sampling_params = SamplingParams(temperature=0.7, top_p=0.95, max_tokens=100)
 
112
 
113
  if __name__ == "__main__":
114
  main()
115
+ `