File size: 6,292 Bytes
eee239e f0539b9 32046db 72d58ce 32046db 9c1e786 6fda102 69f5cc3 9c1e786 f0539b9 72d58ce f0539b9 7ffa9fc 9b0bd3d 7ffa9fc 9b0bd3d 86f7fa2 9b0bd3d 7ffa9fc 9b0bd3d 32046db 9b0bd3d 72d58ce eee239e 9c1e786 eee239e 3adff1b 32046db eee239e 32046db 72d58ce f0539b9 d4bf6ac 72d58ce f0539b9 d4bf6ac 72d58ce eee239e 63cf07b d4bf6ac eee239e 767e056 eee239e 767e056 eee239e f0539b9 eee239e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
import gradio as gr
import pandas as pd
from time import perf_counter as timer
from datasets import Dataset, load_dataset
from huggingface_hub import login
import os
from openai import OpenAI
# Load credentials from environment variables or a secure source
def load_credentials():
credentials = {}
for i in range(1, 51): # Assuming you have 10 credentials
username = os.environ.get(f"login_{i}")
password = os.environ.get(f"password_{i}")
if username and password:
credentials[username] = password
return credentials
# Authentication function
def authenticate(username, password, credentials):
return credentials.get(username) == password
def load_data(database_file):
df = pd.read_parquet(database_file)
return df
def save_reactions_to_dataset(user_type, query, results):
data = {
"user_type": [],
"query": [],
"retrieved_text": [],
"reaction": []
}
for result in results:
data["user_type"].append(user_type)
data["query"].append(query)
data["retrieved_text"].append(result["text"])
data["reaction"].append(result["reaction"])
# Load existing dataset from the Hub (if it exists)
try:
dataset = load_dataset("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation", split="train")
existing_data = dataset.to_dict()
except Exception:
# If the dataset doesn't exist, start with an empty dataset
existing_data = {
"user_type": [],
"query": [],
"retrieved_text": [],
"reaction": []
}
# Append new data to existing data
for key in data:
existing_data[key].extend(data[key])
# Create a new dataset from the combined data
updated_dataset = Dataset.from_dict(existing_data)
# Push the updated dataset to the Hub
updated_dataset.push_to_hub("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation")
def generate_openai_embeddings(client, text):
response = client.embeddings.create(
input=text,
model="text-embedding-3-small"
)
return response.data[0].embedding
def cosine_similarity(embedding_0, embedding_1):
dot_product = sum(a * b for a, b in zip(embedding_0, embedding_1))
norm_0 = sum(a * a for a in embedding_0) ** 0.5
norm_1 = sum(b * b for b in embedding_1) ** 0.5
return dot_product / (norm_0 * norm_1)
def search_query(client, query, df, n=3):
embedding = generate_openai_embeddings(client, query)
df['similarities'] = df.openai_embedding.apply(lambda x: cosine_similarity(x, embedding))
res = df.sort_values('similarities', ascending=False).head(n)
return res
def main(username, password, user_type, query, reactions=None):
credentials = load_credentials()
if not authenticate(username, password, credentials):
return "Invalid username or password", [], []
# Access the Hugging Face token from the environment variable
huggingface_token = os.environ.get("al_ghazali_rag_retrieval_evaluation")
if huggingface_token:
login(token=huggingface_token)
else:
return "Hugging Face API token not found in environment variables.", [], []
# Initialize OpenAI client
client = OpenAI()
# Load database from predefined path
database_file = '[openai_embedded] The Alchemy of Happiness (GhazzΔlΔ«, Claud Field) (Z-Library).parquet'
try:
df = load_data(database_file)
start_time = timer()
res = search_query(client, query, df, n=3)
end_time = timer()
results = []
for idx in res.index.tolist():
text = df.iloc[int(idx)]["ext"]
results.append({"text": text, "index": idx})
# If reactions are provided, save them to the dataset
if reactions:
reaction_results = []
for idx, reaction in reactions.items():
reaction_results.append({
"text": df.iloc[int(idx)]["ext"],
"reaction": reaction
})
save_reactions_to_dataset(user_type, query, reaction_results)
return f"Time taken to compute scores: {end_time - start_time:.5f} seconds", results, "Reactions saved successfully!"
return f"Time taken to compute scores: {end_time - start_time:.5f} seconds", results, ""
except Exception as e:
return f"Failed to load database: {str(e)}", [], []
# Gradio interface for collecting reactions
def collect_reactions(results, reaction_1, reaction_2, reaction_3):
reactions = {}
for i, reaction in enumerate([reaction_1, reaction_2, reaction_3]):
if results and i < len(results):
reactions[results[i]["index"]] = reaction
return reactions
# Define the Gradio interface
def gradio_interface(username, password, user_type, query, reaction_1=None, reaction_2=None, reaction_3=None):
time_taken, results, save_message = main(username, password, user_type, query)
# Only collect reactions if they are provided
if reaction_1 is not None or reaction_2 is not None or reaction_3 is not None:
reactions = collect_reactions(results, reaction_1, reaction_2, reaction_3)
if any(reactions.values()): # If any reaction is provided, save them
_, _, save_message = main(username, password, user_type, query, reactions)
return time_taken, results, save_message
# Input and output components for Gradio
inputs = [
gr.Textbox(label="Username"),
gr.Textbox(label="Password", type="password"),
gr.Radio(["Layman", "Enthusiast", "Ustaz (Expert)"], label="Select your user type:"),
gr.Textbox(label="Enter your query:"),
gr.Radio(["π", "π€·", "π"], label="Reaction for Result 1"),
gr.Radio(["π", "π€·", "π"], label="Reaction for Result 2"),
gr.Radio(["π", "π€·", "π"], label="Reaction for Result 3"),
]
outputs = [
gr.Textbox(label="Time taken"),
gr.JSON(label="Results"),
gr.Textbox(label="Save Status"),
]
iface = gr.Interface(
fn=gradio_interface,
inputs=inputs,
outputs=outputs,
title="EnlightenQalb (Alchemy of Happiness)",
description="Search and rate results from The Alchemy of Happiness."
)
if __name__ == "__main__":
iface.launch() |