dinhquangson commited on
Commit
fde2b19
·
verified ·
1 Parent(s): fe8dc94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -77,12 +77,21 @@ async def create_upload_file(file: UploadFile = File(...)):
77
  with open(file_savePath,'wb') as f:
78
  shutil.copyfileobj(file.file, f)
79
  # Here you can save the file and do other operations as needed
80
- full_dataset = load_dataset("parquet",
81
- data_files=file_savePath,
82
- split="train",
83
- cache_dir=temp_path,
84
- keep_in_memory=True,
85
- num_proc=NUM_PROC*2)
 
 
 
 
 
 
 
 
 
86
  # Generate and append embeddings to the train split
87
  law_embeddings = generate_embeddings(full_dataset)
88
  full_dataset= full_dataset.add_column("embeddings", law_embeddings)
 
77
  with open(file_savePath,'wb') as f:
78
  shutil.copyfileobj(file.file, f)
79
  # Here you can save the file and do other operations as needed
80
+ if '.json' in file_savePath:
81
+ full_dataset = load_dataset('json',
82
+ data_files='my_file.json',
83
+ cache_dir=temp_path,
84
+ keep_in_memory=True,
85
+ num_proc=NUM_PROC*2)
86
+ elif '.parquet' in file_savePath:
87
+ full_dataset = load_dataset("parquet",
88
+ data_files=file_savePath,
89
+ split="train",
90
+ cache_dir=temp_path,
91
+ keep_in_memory=True,
92
+ num_proc=NUM_PROC*2)
93
+ else:
94
+ raise NotImplementedError("This feature is not supported yet")
95
  # Generate and append embeddings to the train split
96
  law_embeddings = generate_embeddings(full_dataset)
97
  full_dataset= full_dataset.add_column("embeddings", law_embeddings)