cstr commited on
Commit
3233c26
1 Parent(s): 2e13b19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -9
app.py CHANGED
@@ -12,7 +12,7 @@ import json
12
  import io
13
  from tqdm import tqdm
14
  import subprocess
15
- from huggingface_hub import snapshot_download, upload_file
16
 
17
  # Function to download a Parquet file from a specified URL
18
  def download_parquet(url, local_path):
@@ -454,14 +454,33 @@ def generate_failed_items_str(indices):
454
 
455
  # Function to upload the output file to Hugging Face
456
  def upload_output_to_huggingface(output_file_path, repo_name, token):
457
- upload_file(
458
- path_or_fileobj=output_file_path,
459
- path_in_repo=output_file_path,
460
- repo_id=repo_name,
461
- repo_type="dataset",
462
- token=token
463
- )
464
- print(f"Uploaded {output_file_path} to Hugging Face repository: {repo_name}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
 
466
  def translate_dataset(train_url, local_parquet_path, input_file_path, output_file_path, raw_file_path, range_specification, model_type, output_dir, output_repo_name, token, translator, tokenizer):
467
  try:
 
12
  import io
13
  from tqdm import tqdm
14
  import subprocess
15
+ from huggingface_hub import snapshot_download, upload_file, HfApi, create_repo
16
 
17
  # Function to download a Parquet file from a specified URL
18
  def download_parquet(url, local_path):
 
454
 
455
  # Function to upload the output file to Hugging Face
456
  def upload_output_to_huggingface(output_file_path, repo_name, token):
457
+ api = HfApi()
458
+
459
+ # Check if the repository exists
460
+ try:
461
+ api.repo_info(repo_id=repo_name, repo_type="dataset", token=token)
462
+ except Exception as e:
463
+ if "404" in str(e):
464
+ # Create the repository if it doesn't exist
465
+ create_repo(repo_id=repo_name, repo_type="dataset", token=token)
466
+ print(f"Created repository: {repo_name}")
467
+ else:
468
+ print(f"Failed to check repository existence: {e}")
469
+ return
470
+
471
+ # Upload the file to the repository
472
+ try:
473
+ upload_file(
474
+ path_or_fileobj=output_file_path,
475
+ path_in_repo=output_file_path,
476
+ repo_id=repo_name,
477
+ repo_type="dataset",
478
+ token=token
479
+ )
480
+ print(f"Uploaded {output_file_path} to Hugging Face repository: {repo_name}")
481
+ except Exception as e:
482
+ print(f"Failed to upload {output_file_path} to Hugging Face: {e}")
483
+ raise
484
 
485
  def translate_dataset(train_url, local_parquet_path, input_file_path, output_file_path, raw_file_path, range_specification, model_type, output_dir, output_repo_name, token, translator, tokenizer):
486
  try: