gwen-cohere / app.py
mouryachinta's picture
Update app.py
ef3f788 verified
# import gradio as gr
# from pinecone import Pinecone
# from sentence_transformers import SentenceTransformer
# from openai import AzureOpenAI
# from huggingface_hub import login as hf_login
# import cohere
# import os
# from dotenv import load_dotenv
# # Load environment variables
# load_dotenv()
# # === ENVIRONMENT VARIABLES ===
# AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
# AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
# COHERE_API_KEY = os.getenv("COHERE_API_KEY")
# PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
# # Pinecone Setup
# EMBED_INDEXES = {
# "cohere": {
# "name": "cohere-pdf-index",
# "dimension": 1536,
# "region": "us-east-1"
# },
# "qwen": {
# "name": "gwen-embeddings",
# "dimension": 1024,
# "region": "us-west-2"
# }
# }
# pc = Pinecone(api_key=PINECONE_API_KEY)
# # Azure OpenAI Client
# llm_client = AzureOpenAI(
# api_key=AZURE_OPENAI_KEY,
# api_version="2024-12-01-preview",
# azure_endpoint=AZURE_OPENAI_ENDPOINT
# )
# # RAG Query Function
# def run_rag_query(query: str, model_choice: str) -> str:
# if model_choice not in EMBED_INDEXES:
# return f"Invalid model selected. Choose from {list(EMBED_INDEXES.keys())}"
# index_config = EMBED_INDEXES[model_choice]
# index = pc.Index(index_config["name"])
# # Embedding generation
# if model_choice == "cohere":
# co = cohere.Client(COHERE_API_KEY)
# embedding = co.embed(
# model="embed-v4.0",
# texts=[query],
# input_type="search_query",
# truncate="NONE"
# ).embeddings[0]
# else: # qwen
# model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
# embedding = model.encode([query], prompt_name="query")[0].tolist()
# # Pinecone query
# results = index.query(vector=embedding, top_k=15, include_metadata=True)
# context = "\n\n".join([m["metadata"].get("text", "") for m in results.matches])
# # Prompt for LLM
# prompt = f"""You are a helpful assistant. Use the following context to answer the question:
# Context:
# {context}
# Question:
# {query}
# Answer:"""
# response = llm_client.chat.completions.create(
# model="gpt-4o-mini",
# messages=[{"role": "user", "content": prompt}],
# temperature=0.3
# )
# answer = response.choices[0].message.content
# # Show top retrieved chunks with source
# top_matches = "\n\n".join([
# f"**Rank {i+1}** \n"
# f"πŸ“„ **Source:** {m.metadata.get('source', 'N/A')} \n"
# f"πŸ“ **Text:** {m.metadata.get('text', '').strip()[:500]}..."
# for i, m in enumerate(results.matches)
# ])
# return f"### Answer:\n{answer}\n\n---\n### Top Retrieved Chunks:\n{top_matches}"
# # Gradio UI
# iface = gr.Interface(
# fn=run_rag_query,
# inputs=[
# gr.Textbox(label="Enter your query"),
# gr.Radio(["cohere", "qwen"], label="Choose embedding model")
# ],
# outputs=gr.Markdown(label="RAG Response"),
# title="QWEN vs COHERE RAG App",
# description="Ask a question and retrieve contextual answers from your embedded documents.\n[PDF Files Here](https://drive.google.com/drive/folders/1fq-PyNptFg20cknkzNrmW6Tev-869RY9?usp=sharing)"
# )
# if __name__ == "__main__":
# iface.launch()
import os
import uuid
import gradio as gr
import asyncio
from dotenv import load_dotenv
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer
from openai import AzureOpenAI, AsyncAzureOpenAI
import cohere
# === Load .env Variables ===
load_dotenv()
AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_EMBEDDING_DEPLOYMENT = "embedding" # Make sure it matches Azure deployment name
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
EMBED_API = os.getenv("EMBED_API")
EMBED_ENDPOINT = os.getenv("EMBED_ENDPOINT")
# === Pinecone Setup ===
EMBED_INDEXES = {
"cohere": {
"name": "cohere-pdf-index",
"dimension": 1536,
"region": "us-east-1"
},
"qwen": {
"name": "gwen-embeddings",
"dimension": 1024,
"region": "us-west-2"
},
"ada": {
"name": "ada-embeddings",
"dimension": 1536,
"region": "us-east-1"
}
}
pc = Pinecone(api_key=PINECONE_API_KEY)
llm_client = AzureOpenAI(
api_key=AZURE_OPENAI_KEY,
api_version="2024-12-01-preview",
azure_endpoint=AZURE_OPENAI_ENDPOINT
)
# === Async Embedding Function for Ada ===
async def get_ada_embedding(texts):
client = AsyncAzureOpenAI(
api_key=EMBED_API,
api_version="2023-05-15",
base_url=f"{EMBED_ENDPOINT}/openai/deployments/{AZURE_EMBEDDING_DEPLOYMENT}"
)
response = await client.embeddings.create(
input=texts,
model=AZURE_EMBEDDING_DEPLOYMENT
)
return [d.embedding for d in response.data]
# === RAG Query Function ===
def run_rag_query(query: str, model_choice: str) -> str:
if model_choice not in EMBED_INDEXES:
return f"Invalid model selected. Choose from {list(EMBED_INDEXES.keys())}"
index_config = EMBED_INDEXES[model_choice]
index = pc.Index(index_config["name"])
# --- Embedding Generation ---
if model_choice == "cohere":
co = cohere.Client(COHERE_API_KEY)
embedding = co.embed(
model="embed-v4.0",
texts=[query],
input_type="search_query",
truncate="NONE"
).embeddings[0]
elif model_choice == "qwen":
model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
embedding = model.encode([query], prompt_name="query")[0].tolist()
elif model_choice == "ada":
embedding = asyncio.run(get_ada_embedding([query]))[0]
else:
return "Unsupported model."
# --- Pinecone Query ---
results = index.query(vector=embedding, top_k=15, include_metadata=True)
context = "\n\n".join([m["metadata"].get("text", "") for m in results.matches])
# --- LLM Prompt ---
prompt = f"""You are a helpful assistant. Use the following context to answer the question:
Context:
{context}
Question:
{query}
Answer:"""
response = llm_client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
temperature=0.3
)
answer = response.choices[0].message.content
# --- Source Display ---
top_matches = "\n\n".join([
f"**Rank {i+1}** \n"
f"πŸ“„ **Source:** {m.metadata.get('source', 'N/A')} \n"
f"πŸ“ **Text:** {m.metadata.get('text', '').strip()[:500]}..."
for i, m in enumerate(results.matches)
])
return f"### Answer:\n{answer}\n\n---\n### Top Retrieved Chunks:\n{top_matches}"
# === Gradio Interface ===
iface = gr.Interface(
fn=run_rag_query,
inputs=[
gr.Textbox(label="Enter your query"),
gr.Radio(["cohere", "qwen", "ada"], label="Choose embedding model")
],
outputs=gr.Markdown(label="RAG Response"),
title="QWEN vs COHERE vs ADA RAG App",
description="Ask a question and retrieve contextual answers from your embedded documents.\n[PDF Files Here](https://drive.google.com/drive/folders/1fq-PyNptFg20cknkzNrmW6Tev-869RY9?usp=sharing)"
)
if __name__ == "__main__":
iface.launch()