Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ import json
|
|
12 |
import io
|
13 |
from tqdm import tqdm
|
14 |
import subprocess
|
15 |
-
from huggingface_hub import snapshot_download, upload_file
|
16 |
|
17 |
# Function to download a Parquet file from a specified URL
|
18 |
def download_parquet(url, local_path):
|
@@ -454,14 +454,33 @@ def generate_failed_items_str(indices):
|
|
454 |
|
455 |
# Function to upload the output file to Hugging Face
|
456 |
def upload_output_to_huggingface(output_file_path, repo_name, token):
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
repo_type="dataset",
|
462 |
-
|
463 |
-
|
464 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
465 |
|
466 |
def translate_dataset(train_url, local_parquet_path, input_file_path, output_file_path, raw_file_path, range_specification, model_type, output_dir, output_repo_name, token, translator, tokenizer):
|
467 |
try:
|
|
|
12 |
import io
|
13 |
from tqdm import tqdm
|
14 |
import subprocess
|
15 |
+
from huggingface_hub import snapshot_download, upload_file, HfApi, create_repo
|
16 |
|
17 |
# Function to download a Parquet file from a specified URL
|
18 |
def download_parquet(url, local_path):
|
|
|
454 |
|
455 |
# Function to upload the output file to Hugging Face
|
456 |
def upload_output_to_huggingface(output_file_path, repo_name, token):
|
457 |
+
api = HfApi()
|
458 |
+
|
459 |
+
# Check if the repository exists
|
460 |
+
try:
|
461 |
+
api.repo_info(repo_id=repo_name, repo_type="dataset", token=token)
|
462 |
+
except Exception as e:
|
463 |
+
if "404" in str(e):
|
464 |
+
# Create the repository if it doesn't exist
|
465 |
+
create_repo(repo_id=repo_name, repo_type="dataset", token=token)
|
466 |
+
print(f"Created repository: {repo_name}")
|
467 |
+
else:
|
468 |
+
print(f"Failed to check repository existence: {e}")
|
469 |
+
return
|
470 |
+
|
471 |
+
# Upload the file to the repository
|
472 |
+
try:
|
473 |
+
upload_file(
|
474 |
+
path_or_fileobj=output_file_path,
|
475 |
+
path_in_repo=output_file_path,
|
476 |
+
repo_id=repo_name,
|
477 |
+
repo_type="dataset",
|
478 |
+
token=token
|
479 |
+
)
|
480 |
+
print(f"Uploaded {output_file_path} to Hugging Face repository: {repo_name}")
|
481 |
+
except Exception as e:
|
482 |
+
print(f"Failed to upload {output_file_path} to Hugging Face: {e}")
|
483 |
+
raise
|
484 |
|
485 |
def translate_dataset(train_url, local_parquet_path, input_file_path, output_file_path, raw_file_path, range_specification, model_type, output_dir, output_repo_name, token, translator, tokenizer):
|
486 |
try:
|