QDrantRAG9

Sleeping

dinhquangson commited on Jun 18, 2024

Commit

fde2b19

verified ·

1 Parent(s): fe8dc94

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -77,12 +77,21 @@ async def create_upload_file(file: UploadFile = File(...)):
     with open(file_savePath,'wb') as f:
         shutil.copyfileobj(file.file, f)
     # Here you can save the file and do other operations as needed
-    full_dataset = load_dataset("parquet",
-                data_files=file_savePath,
-                split="train",
-                cache_dir=temp_path,
-                keep_in_memory=True,
-                num_proc=NUM_PROC*2)
     # Generate and append embeddings to the train split
     law_embeddings = generate_embeddings(full_dataset)
     full_dataset= full_dataset.add_column("embeddings", law_embeddings)

     with open(file_savePath,'wb') as f:
         shutil.copyfileobj(file.file, f)
     # Here you can save the file and do other operations as needed
+    if '.json' in file_savePath:
+        full_dataset = load_dataset('json',
+                    data_files='my_file.json',
+                    cache_dir=temp_path,
+                    keep_in_memory=True,
+                    num_proc=NUM_PROC*2)
+    elif '.parquet' in file_savePath:
+        full_dataset = load_dataset("parquet",
+                    data_files=file_savePath,
+                    split="train",
+                    cache_dir=temp_path,
+                    keep_in_memory=True,
+                    num_proc=NUM_PROC*2)
+    else:
+        raise NotImplementedError("This feature is not supported yet")
     # Generate and append embeddings to the train split
     law_embeddings = generate_embeddings(full_dataset)
     full_dataset= full_dataset.add_column("embeddings", law_embeddings)