Spaces:

HumbleBeeAI
/

al-ghazali-rag-retrieval

Running

App Files Files Community

eli02 commited on 10 days ago

Commit

32046db

1 Parent(s): d3b717e

update: Enhance dataset management by loading existing data and pushing updates to the Hugging Face Hub

Browse files

Files changed (1) hide show

app.py +33 -3

app.py CHANGED Viewed

@@ -3,7 +3,10 @@ import torch as t
 import pandas as pd
 from sentence_transformers import SentenceTransformer, util
 from time import perf_counter as timer
-from datasets import Dataset
 def load_data(database_file):
     df = pd.read_parquet(database_file)
@@ -26,8 +29,28 @@ def save_reactions_to_dataset(user_type, query, results):
         data["retrieved_text"].append(result["text"])
         data["reaction"].append(result["reaction"])
-    dataset = Dataset.from_dict(data)
-    dataset.save_to_disk("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation")
 def main():
     st.title("Semantic Text Retrieval Evaluation Interface")
@@ -42,6 +65,13 @@ def main():
     if "results_saved" not in st.session_state:
         st.session_state.results_saved = False
     # Select device
     device = "cuda" if t.cuda.is_available() else "cpu"
     st.write(f"Using device: {device}")

 import pandas as pd
 from sentence_transformers import SentenceTransformer, util
 from time import perf_counter as timer
+from datasets import Dataset, load_dataset
+from huggingface_hub import login
+import os
 def load_data(database_file):
     df = pd.read_parquet(database_file)
         data["retrieved_text"].append(result["text"])
         data["reaction"].append(result["reaction"])
+    # Load existing dataset from the Hub (if it exists)
+    try:
+        dataset = load_dataset("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation", split="train")
+        existing_data = dataset.to_dict()
+    except Exception:
+        # If the dataset doesn't exist, start with an empty dataset
+        existing_data = {
+            "user_type": [],
+            "query": [],
+            "retrieved_text": [],
+            "reaction": []
+        }
+    # Append new data to existing data
+    for key in data:
+        existing_data[key].extend(data[key])
+    # Create a new dataset from the combined data
+    updated_dataset = Dataset.from_dict(existing_data)
+    # Push the updated dataset to the Hub
+    updated_dataset.push_to_hub("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation")
 def main():
     st.title("Semantic Text Retrieval Evaluation Interface")
     if "results_saved" not in st.session_state:
         st.session_state.results_saved = False
+    # Access the Hugging Face token from the environment variable
+    huggingface_token = os.environ.get("al_ghazali_rag_retrieval_evaluation")
+    if huggingface_token:
+        login(token=huggingface_token)
+    else:
+        st.error("Hugging Face API token not found in environment variables.")
     # Select device
     device = "cuda" if t.cuda.is_available() else "cpu"
     st.write(f"Using device: {device}")