Spaces:

mouryachinta
/

gwen-cohere

Running

App Files Files Community

gwen-cohere / app.py

mouryachinta

Update app.py

ef3f788 verified about 1 month ago

raw

history blame contribute delete

7.41 kB

	# import gradio as gr
	# from pinecone import Pinecone
	# from sentence_transformers import SentenceTransformer
	# from openai import AzureOpenAI
	# from huggingface_hub import login as hf_login
	# import cohere
	# import os
	# from dotenv import load_dotenv

	# # Load environment variables
	# load_dotenv()

	# # === ENVIRONMENT VARIABLES ===
	# AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
	# AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
	# COHERE_API_KEY = os.getenv("COHERE_API_KEY")
	# PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

	# # Pinecone Setup
	# EMBED_INDEXES = {
	# "cohere": {
	# "name": "cohere-pdf-index",
	# "dimension": 1536,
	# "region": "us-east-1"
	# },
	# "qwen": {
	# "name": "gwen-embeddings",
	# "dimension": 1024,
	# "region": "us-west-2"
	# }
	# }
	# pc = Pinecone(api_key=PINECONE_API_KEY)

	# # Azure OpenAI Client
	# llm_client = AzureOpenAI(
	# api_key=AZURE_OPENAI_KEY,
	# api_version="2024-12-01-preview",
	# azure_endpoint=AZURE_OPENAI_ENDPOINT
	# )

	# # RAG Query Function
	# def run_rag_query(query: str, model_choice: str) -> str:
	# if model_choice not in EMBED_INDEXES:
	# return f"Invalid model selected. Choose from {list(EMBED_INDEXES.keys())}"

	# index_config = EMBED_INDEXES[model_choice]
	# index = pc.Index(index_config["name"])

	# # Embedding generation
	# if model_choice == "cohere":
	# co = cohere.Client(COHERE_API_KEY)
	# embedding = co.embed(
	# model="embed-v4.0",
	# texts=[query],
	# input_type="search_query",
	# truncate="NONE"
	# ).embeddings[0]
	# else: # qwen
	# model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
	# embedding = model.encode([query], prompt_name="query")[0].tolist()

	# # Pinecone query
	# results = index.query(vector=embedding, top_k=15, include_metadata=True)
	# context = "\n\n".join([m["metadata"].get("text", "") for m in results.matches])

	# # Prompt for LLM
	# prompt = f"""You are a helpful assistant. Use the following context to answer the question:
	# Context:
	# {context}
	# Question:
	# {query}
	# Answer:"""

	# response = llm_client.chat.completions.create(
	# model="gpt-4o-mini",
	# messages=[{"role": "user", "content": prompt}],
	# temperature=0.3
	# )
	# answer = response.choices[0].message.content

	# # Show top retrieved chunks with source
	# top_matches = "\n\n".join([
	# f"Rank {i+1} \n"
	# f"📄 Source: {m.metadata.get('source', 'N/A')} \n"
	# f"📝 Text: {m.metadata.get('text', '').strip()[:500]}..."
	# for i, m in enumerate(results.matches)
	# ])


	# return f"### Answer:\n{answer}\n\n---\n### Top Retrieved Chunks:\n{top_matches}"

	# # Gradio UI
	# iface = gr.Interface(
	# fn=run_rag_query,
	# inputs=[
	# gr.Textbox(label="Enter your query"),
	# gr.Radio(["cohere", "qwen"], label="Choose embedding model")
	# ],
	# outputs=gr.Markdown(label="RAG Response"),
	# title="QWEN vs COHERE RAG App",
	# description="Ask a question and retrieve contextual answers from your embedded documents.\n[PDF Files Here](https://drive.google.com/drive/folders/1fq-PyNptFg20cknkzNrmW6Tev-869RY9?usp=sharing)"
	# )

	# if __name__ == "__main__":
	# iface.launch()


	import os
	import uuid
	import gradio as gr
	import asyncio
	from dotenv import load_dotenv
	from pinecone import Pinecone
	from sentence_transformers import SentenceTransformer
	from openai import AzureOpenAI, AsyncAzureOpenAI
	import cohere

	# === Load .env Variables ===
	load_dotenv()

	AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
	AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
	AZURE_EMBEDDING_DEPLOYMENT = "embedding" # Make sure it matches Azure deployment name
	PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
	COHERE_API_KEY = os.getenv("COHERE_API_KEY")
	EMBED_API = os.getenv("EMBED_API")
	EMBED_ENDPOINT = os.getenv("EMBED_ENDPOINT")

	# === Pinecone Setup ===
	EMBED_INDEXES = {
	"cohere": {
	"name": "cohere-pdf-index",
	"dimension": 1536,
	"region": "us-east-1"
	},
	"qwen": {
	"name": "gwen-embeddings",
	"dimension": 1024,
	"region": "us-west-2"
	},
	"ada": {
	"name": "ada-embeddings",
	"dimension": 1536,
	"region": "us-east-1"
	}
	}

	pc = Pinecone(api_key=PINECONE_API_KEY)
	llm_client = AzureOpenAI(
	api_key=AZURE_OPENAI_KEY,
	api_version="2024-12-01-preview",
	azure_endpoint=AZURE_OPENAI_ENDPOINT
	)

	# === Async Embedding Function for Ada ===
	async def get_ada_embedding(texts):
	client = AsyncAzureOpenAI(
	api_key=EMBED_API,
	api_version="2023-05-15",
	base_url=f"{EMBED_ENDPOINT}/openai/deployments/{AZURE_EMBEDDING_DEPLOYMENT}"
	)
	response = await client.embeddings.create(
	input=texts,
	model=AZURE_EMBEDDING_DEPLOYMENT
	)
	return [d.embedding for d in response.data]

	# === RAG Query Function ===
	def run_rag_query(query: str, model_choice: str) -> str:
	if model_choice not in EMBED_INDEXES:
	return f"Invalid model selected. Choose from {list(EMBED_INDEXES.keys())}"

	index_config = EMBED_INDEXES[model_choice]
	index = pc.Index(index_config["name"])

	# --- Embedding Generation ---
	if model_choice == "cohere":
	co = cohere.Client(COHERE_API_KEY)
	embedding = co.embed(
	model="embed-v4.0",
	texts=[query],
	input_type="search_query",
	truncate="NONE"
	).embeddings[0]
	elif model_choice == "qwen":
	model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")
	embedding = model.encode([query], prompt_name="query")[0].tolist()
	elif model_choice == "ada":
	embedding = asyncio.run(get_ada_embedding([query]))[0]
	else:
	return "Unsupported model."

	# --- Pinecone Query ---
	results = index.query(vector=embedding, top_k=15, include_metadata=True)
	context = "\n\n".join([m["metadata"].get("text", "") for m in results.matches])

	# --- LLM Prompt ---
	prompt = f"""You are a helpful assistant. Use the following context to answer the question:
	Context:
	{context}
	Question:
	{query}
	Answer:"""

	response = llm_client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[{"role": "user", "content": prompt}],
	temperature=0.3
	)
	answer = response.choices[0].message.content

	# --- Source Display ---
	top_matches = "\n\n".join([
	f"Rank {i+1} \n"
	f"📄 Source: {m.metadata.get('source', 'N/A')} \n"
	f"📝 Text: {m.metadata.get('text', '').strip()[:500]}..."
	for i, m in enumerate(results.matches)
	])

	return f"### Answer:\n{answer}\n\n---\n### Top Retrieved Chunks:\n{top_matches}"

	# === Gradio Interface ===
	iface = gr.Interface(
	fn=run_rag_query,
	inputs=[
	gr.Textbox(label="Enter your query"),
	gr.Radio(["cohere", "qwen", "ada"], label="Choose embedding model")
	],
	outputs=gr.Markdown(label="RAG Response"),
	title="QWEN vs COHERE vs ADA RAG App",
	description="Ask a question and retrieve contextual answers from your embedded documents.\n[PDF Files Here](https://drive.google.com/drive/folders/1fq-PyNptFg20cknkzNrmW6Tev-869RY9?usp=sharing)"
	)

	if __name__ == "__main__":
	iface.launch()