Spaces:
Running
Running
# import gradio as gr | |
# from pinecone import Pinecone | |
# from sentence_transformers import SentenceTransformer | |
# from openai import AzureOpenAI | |
# from huggingface_hub import login as hf_login | |
# import cohere | |
# import os | |
# from dotenv import load_dotenv | |
# # Load environment variables | |
# load_dotenv() | |
# # === ENVIRONMENT VARIABLES === | |
# AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY") | |
# AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") | |
# COHERE_API_KEY = os.getenv("COHERE_API_KEY") | |
# PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") | |
# # Pinecone Setup | |
# EMBED_INDEXES = { | |
# "cohere": { | |
# "name": "cohere-pdf-index", | |
# "dimension": 1536, | |
# "region": "us-east-1" | |
# }, | |
# "qwen": { | |
# "name": "gwen-embeddings", | |
# "dimension": 1024, | |
# "region": "us-west-2" | |
# } | |
# } | |
# pc = Pinecone(api_key=PINECONE_API_KEY) | |
# # Azure OpenAI Client | |
# llm_client = AzureOpenAI( | |
# api_key=AZURE_OPENAI_KEY, | |
# api_version="2024-12-01-preview", | |
# azure_endpoint=AZURE_OPENAI_ENDPOINT | |
# ) | |
# # RAG Query Function | |
# def run_rag_query(query: str, model_choice: str) -> str: | |
# if model_choice not in EMBED_INDEXES: | |
# return f"Invalid model selected. Choose from {list(EMBED_INDEXES.keys())}" | |
# index_config = EMBED_INDEXES[model_choice] | |
# index = pc.Index(index_config["name"]) | |
# # Embedding generation | |
# if model_choice == "cohere": | |
# co = cohere.Client(COHERE_API_KEY) | |
# embedding = co.embed( | |
# model="embed-v4.0", | |
# texts=[query], | |
# input_type="search_query", | |
# truncate="NONE" | |
# ).embeddings[0] | |
# else: # qwen | |
# model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B") | |
# embedding = model.encode([query], prompt_name="query")[0].tolist() | |
# # Pinecone query | |
# results = index.query(vector=embedding, top_k=15, include_metadata=True) | |
# context = "\n\n".join([m["metadata"].get("text", "") for m in results.matches]) | |
# # Prompt for LLM | |
# prompt = f"""You are a helpful assistant. Use the following context to answer the question: | |
# Context: | |
# {context} | |
# Question: | |
# {query} | |
# Answer:""" | |
# response = llm_client.chat.completions.create( | |
# model="gpt-4o-mini", | |
# messages=[{"role": "user", "content": prompt}], | |
# temperature=0.3 | |
# ) | |
# answer = response.choices[0].message.content | |
# # Show top retrieved chunks with source | |
# top_matches = "\n\n".join([ | |
# f"**Rank {i+1}** \n" | |
# f"π **Source:** {m.metadata.get('source', 'N/A')} \n" | |
# f"π **Text:** {m.metadata.get('text', '').strip()[:500]}..." | |
# for i, m in enumerate(results.matches) | |
# ]) | |
# return f"### Answer:\n{answer}\n\n---\n### Top Retrieved Chunks:\n{top_matches}" | |
# # Gradio UI | |
# iface = gr.Interface( | |
# fn=run_rag_query, | |
# inputs=[ | |
# gr.Textbox(label="Enter your query"), | |
# gr.Radio(["cohere", "qwen"], label="Choose embedding model") | |
# ], | |
# outputs=gr.Markdown(label="RAG Response"), | |
# title="QWEN vs COHERE RAG App", | |
# description="Ask a question and retrieve contextual answers from your embedded documents.\n[PDF Files Here](https://drive.google.com/drive/folders/1fq-PyNptFg20cknkzNrmW6Tev-869RY9?usp=sharing)" | |
# ) | |
# if __name__ == "__main__": | |
# iface.launch() | |
import os | |
import uuid | |
import gradio as gr | |
import asyncio | |
from dotenv import load_dotenv | |
from pinecone import Pinecone | |
from sentence_transformers import SentenceTransformer | |
from openai import AzureOpenAI, AsyncAzureOpenAI | |
import cohere | |
# === Load .env Variables === | |
load_dotenv() | |
AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY") | |
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") | |
AZURE_EMBEDDING_DEPLOYMENT = "embedding" # Make sure it matches Azure deployment name | |
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") | |
COHERE_API_KEY = os.getenv("COHERE_API_KEY") | |
EMBED_API = os.getenv("EMBED_API") | |
EMBED_ENDPOINT = os.getenv("EMBED_ENDPOINT") | |
# === Pinecone Setup === | |
EMBED_INDEXES = { | |
"cohere": { | |
"name": "cohere-pdf-index", | |
"dimension": 1536, | |
"region": "us-east-1" | |
}, | |
"qwen": { | |
"name": "gwen-embeddings", | |
"dimension": 1024, | |
"region": "us-west-2" | |
}, | |
"ada": { | |
"name": "ada-embeddings", | |
"dimension": 1536, | |
"region": "us-east-1" | |
} | |
} | |
pc = Pinecone(api_key=PINECONE_API_KEY) | |
llm_client = AzureOpenAI( | |
api_key=AZURE_OPENAI_KEY, | |
api_version="2024-12-01-preview", | |
azure_endpoint=AZURE_OPENAI_ENDPOINT | |
) | |
# === Async Embedding Function for Ada === | |
async def get_ada_embedding(texts): | |
client = AsyncAzureOpenAI( | |
api_key=EMBED_API, | |
api_version="2023-05-15", | |
base_url=f"{EMBED_ENDPOINT}/openai/deployments/{AZURE_EMBEDDING_DEPLOYMENT}" | |
) | |
response = await client.embeddings.create( | |
input=texts, | |
model=AZURE_EMBEDDING_DEPLOYMENT | |
) | |
return [d.embedding for d in response.data] | |
# === RAG Query Function === | |
def run_rag_query(query: str, model_choice: str) -> str: | |
if model_choice not in EMBED_INDEXES: | |
return f"Invalid model selected. Choose from {list(EMBED_INDEXES.keys())}" | |
index_config = EMBED_INDEXES[model_choice] | |
index = pc.Index(index_config["name"]) | |
# --- Embedding Generation --- | |
if model_choice == "cohere": | |
co = cohere.Client(COHERE_API_KEY) | |
embedding = co.embed( | |
model="embed-v4.0", | |
texts=[query], | |
input_type="search_query", | |
truncate="NONE" | |
).embeddings[0] | |
elif model_choice == "qwen": | |
model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B") | |
embedding = model.encode([query], prompt_name="query")[0].tolist() | |
elif model_choice == "ada": | |
embedding = asyncio.run(get_ada_embedding([query]))[0] | |
else: | |
return "Unsupported model." | |
# --- Pinecone Query --- | |
results = index.query(vector=embedding, top_k=15, include_metadata=True) | |
context = "\n\n".join([m["metadata"].get("text", "") for m in results.matches]) | |
# --- LLM Prompt --- | |
prompt = f"""You are a helpful assistant. Use the following context to answer the question: | |
Context: | |
{context} | |
Question: | |
{query} | |
Answer:""" | |
response = llm_client.chat.completions.create( | |
model="gpt-4o-mini", | |
messages=[{"role": "user", "content": prompt}], | |
temperature=0.3 | |
) | |
answer = response.choices[0].message.content | |
# --- Source Display --- | |
top_matches = "\n\n".join([ | |
f"**Rank {i+1}** \n" | |
f"π **Source:** {m.metadata.get('source', 'N/A')} \n" | |
f"π **Text:** {m.metadata.get('text', '').strip()[:500]}..." | |
for i, m in enumerate(results.matches) | |
]) | |
return f"### Answer:\n{answer}\n\n---\n### Top Retrieved Chunks:\n{top_matches}" | |
# === Gradio Interface === | |
iface = gr.Interface( | |
fn=run_rag_query, | |
inputs=[ | |
gr.Textbox(label="Enter your query"), | |
gr.Radio(["cohere", "qwen", "ada"], label="Choose embedding model") | |
], | |
outputs=gr.Markdown(label="RAG Response"), | |
title="QWEN vs COHERE vs ADA RAG App", | |
description="Ask a question and retrieve contextual answers from your embedded documents.\n[PDF Files Here](https://drive.google.com/drive/folders/1fq-PyNptFg20cknkzNrmW6Tev-869RY9?usp=sharing)" | |
) | |
if __name__ == "__main__": | |
iface.launch() | |