eli02 commited on
Commit
32046db
·
1 Parent(s): d3b717e

update: Enhance dataset management by loading existing data and pushing updates to the Hugging Face Hub

Browse files
Files changed (1) hide show
  1. app.py +33 -3
app.py CHANGED
@@ -3,7 +3,10 @@ import torch as t
3
  import pandas as pd
4
  from sentence_transformers import SentenceTransformer, util
5
  from time import perf_counter as timer
6
- from datasets import Dataset
 
 
 
7
 
8
  def load_data(database_file):
9
  df = pd.read_parquet(database_file)
@@ -26,8 +29,28 @@ def save_reactions_to_dataset(user_type, query, results):
26
  data["retrieved_text"].append(result["text"])
27
  data["reaction"].append(result["reaction"])
28
 
29
- dataset = Dataset.from_dict(data)
30
- dataset.save_to_disk("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  def main():
33
  st.title("Semantic Text Retrieval Evaluation Interface")
@@ -42,6 +65,13 @@ def main():
42
  if "results_saved" not in st.session_state:
43
  st.session_state.results_saved = False
44
 
 
 
 
 
 
 
 
45
  # Select device
46
  device = "cuda" if t.cuda.is_available() else "cpu"
47
  st.write(f"Using device: {device}")
 
3
  import pandas as pd
4
  from sentence_transformers import SentenceTransformer, util
5
  from time import perf_counter as timer
6
+ from datasets import Dataset, load_dataset
7
+ from huggingface_hub import login
8
+ import os
9
+
10
 
11
  def load_data(database_file):
12
  df = pd.read_parquet(database_file)
 
29
  data["retrieved_text"].append(result["text"])
30
  data["reaction"].append(result["reaction"])
31
 
32
+ # Load existing dataset from the Hub (if it exists)
33
+ try:
34
+ dataset = load_dataset("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation", split="train")
35
+ existing_data = dataset.to_dict()
36
+ except Exception:
37
+ # If the dataset doesn't exist, start with an empty dataset
38
+ existing_data = {
39
+ "user_type": [],
40
+ "query": [],
41
+ "retrieved_text": [],
42
+ "reaction": []
43
+ }
44
+
45
+ # Append new data to existing data
46
+ for key in data:
47
+ existing_data[key].extend(data[key])
48
+
49
+ # Create a new dataset from the combined data
50
+ updated_dataset = Dataset.from_dict(existing_data)
51
+
52
+ # Push the updated dataset to the Hub
53
+ updated_dataset.push_to_hub("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation")
54
 
55
  def main():
56
  st.title("Semantic Text Retrieval Evaluation Interface")
 
65
  if "results_saved" not in st.session_state:
66
  st.session_state.results_saved = False
67
 
68
+ # Access the Hugging Face token from the environment variable
69
+ huggingface_token = os.environ.get("al_ghazali_rag_retrieval_evaluation")
70
+ if huggingface_token:
71
+ login(token=huggingface_token)
72
+ else:
73
+ st.error("Hugging Face API token not found in environment variables.")
74
+
75
  # Select device
76
  device = "cuda" if t.cuda.is_available() else "cpu"
77
  st.write(f"Using device: {device}")