MRMAQ's picture
Update app.py
7b0cc4d verified
# ------------Libraries------------------
import pandas as pd
import numpy as np
import glob
import gradio as gr
import os
import time
from groq import Groq
import gradio as gr
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from dotenv import load_dotenv
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from transformers import AutoModel, AutoTokenizer
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec, CloudProvider, AwsRegion, VectorType
import uuid
import logging
from tqdm import tqdm # Optional
load_dotenv()
# -----------ENVs-------------------
PINECONE_API = os.getenv("PINECONE_API")
PINECONE_ENV = os.getenv("PINECONE_ENV")
PINECONE_NAMESPACE = ["policies", "management"]
#GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GROQ_API_KEY = os.getenv("TEST_API")
GROQ_CHAT_URL = "https://api.groq.com/openai/v1/chat/completions"
LLM_MODEL = "llama-3.3-70b-versatile"
# LLM_MODEL = "meta-llama/llama-4-maverick-17b-128e-instruct"
NVIDIA_API = os.getenv("NVIDEA_EMBEDDING_API")
# NVidia Embedding import
client = OpenAI(
api_key=NVIDIA_API,
base_url="https://integrate.api.nvidia.com/v1",
)
# ----------Pinecone--------------------
pc = Pinecone(api_key=PINECONE_API)
# ----------Index Init--------------------
# index = pc.Index("unilever")
index = pc.Index("unilever-without-metadata")
# ----------Nvidea Embedding Generator--------------------
def get_embedding(text="None"):
response = client.embeddings.create(
input=text,
model="nvidia/nv-embed-v1",
encoding_format="float",
extra_body={"input_type": "query", "truncate": "NONE"},
)
return response.data[0].embedding
# ----------query_pinecone--------------------
# def query_pinecone(embedding):
# # Use keyword arguments to pass the embedding and other parameters
# result = index.query(vector=embedding, top_k=20, namespace="management_with_metadata", include_metadata=True)
# return result['matches']
# ------------------------
# Pinecone Query
# ------------------------
def query_pinecone(embedding):
"""Blocking Pinecone query."""
result = index.query_namespaces(
vector=embedding,
namespaces=PINECONE_NAMESPACE,
metric="cosine",
top_k=25,
include_metadata=True,
)
return result["matches"]
# # ----------query_groq_stream--------------------
# def query_groq_stream(user_input, relevant_context):
# client = Groq(api_key=GROQ_API_KEY)
# try:
# chat_completion = client.chat.completions.create(
# messages=[
# {"role": "system", "content": f"You are a helpful assistant. Use all this information: {relevant_context}"},
# {"role": "user", "content": f"{user_input}"},
# ],
# model=LLM_MODEL,
# temperature=0.3,
# stream=True,
# )
# for chunk in chat_completion:
# content = chunk.choices[0].delta.content or ""
# yield content # Stream out piece-by-piece
# except Exception as e:
# print("Groq streaming failed:", e)
# yield "[Error] Unable to process the request at the moment."
groq_client = Groq(api_key=GROQ_API_KEY)
# ------------------------
# Groq Streaming LLM
# ------------------------
def query_groq_stream(user_input, relevant_context):
"""Blocking Groq streaming call."""
# groq_client = Groq(api_key=GROQ_API_KEY)
try:
chat_completion = groq_client.chat.completions.create(
messages=[
{
"role": "system",
"content": f"""
You are **MAQ**, a helpful assistant for Unilever employees.
🎯 Respond professionally, using this document context:
{relevant_context}
βœ… Always format responses in Markdown.
βœ… At the end of each answer, include a section:
## Sources:
- [Document Name](Link)
""",
},
{"role": "user", "content": f"{user_input}"},
],
model=LLM_MODEL,
temperature=0.3,
stream=True,
)
for chunk in chat_completion:
content = chunk.choices[0].delta.content or ""
yield content
except Exception as e:
print("Groq streaming failed:", e)
yield "[Error] Unable to process the request at the moment."
# --------Tokenizer and Tokens Counter----------------------
# Tokenizer to count number of tokens
tokenizer = AutoTokenizer.from_pretrained("jinaai/jina-embeddings-v2-base-en")
def count_tokens(text: str) -> int:
# Encode the text into tokens
tokens = tokenizer.encode(text)
return len(tokens)
# --------process_user_query_stream----------------------
def process_user_query_stream(user_query, chat_state, memory_state):
embedding = get_embedding(user_query)
relevant_chunks = query_pinecone(embedding)
context = "\n".join([chunk['metadata']["text"] for chunk in relevant_chunks])
history_str = "\n".join(memory_state)
prompt = f"""
Context from Unilever documents:
{context}
Previous interaction:
{history_str}
Provide a clear, insightful, and professional response to the user's question.
"""
partial_response = ""
for chunk in query_groq_stream(f"{user_query}. Provide Links if any.", prompt):
partial_response += chunk
yield (
chat_state + [(user_query, partial_response)],
chat_state,
memory_state,
"" # βœ… This clears the input box during streaming
)
memory_state.append(f"User: {user_query}")
memory_state.append(f"Unilever Assistant: {partial_response}")
chat_state.append((user_query, partial_response))
return chat_state, chat_state, memory_state, ""
def clear_chat():
return [], [], []
# ----------- Gradio Interface ------------
with gr.Blocks(
theme=gr.themes.Soft(primary_hue="blue"),
css="""
#centered-logo {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
text-align: center;
}
#input-row {
display: flex;
gap: 8px;
align-items: center;
}
.user-textbox {
flex: 5;
}
.tiny-send-btn {
padding: 4px 12px !important;
font-size: 12px !important;
min-width: 60px !important;
height: 36px !important;
line-height: 1 !important;
}
""",
fill_height=True,
fill_width=True
) as unilever_chat:
with gr.Row():
with gr.Column(elem_id="centered-logo"):
gr.Image(
value="unilever_logo.png",
show_label=False,
container=False,
show_download_button=False,
show_fullscreen_button=False,
show_share_button=False,
height=150
)
gr.Markdown("#### **Unilever AI Assistant** β€” Document-Aware Expert Chat")
gr.Markdown("### MANAGEMENT DOCUMENTS β€” **Version 1.1 (With Metadata)**")
chatbot = gr.Chatbot(
label="Unilever Document Assistant",
height=500,
autoscroll=True,
show_copy_button=True,
show_share_button=False,
avatar_images=["πŸ§‘", "πŸ€–"]
)
with gr.Row(elem_id="input-row"):
user_input = gr.Textbox(
placeholder="Ask your compliance, policy, or SHE question...",
label="Your Query",
lines=1,
scale=5,
elem_classes=["user-textbox"]
)
submit_btn = gr.Button(
"Send",
variant="primary",
scale=1,
elem_classes=["tiny-send-btn"]
)
with gr.Row():
clear_btn = gr.Button("Clear Chat πŸ—‘οΈ", variant="secondary")
# Session-specific state
chat_state = gr.State([])
memory_state = gr.State([])
# Event bindings with textbox reset
submit_btn.click(
fn=process_user_query_stream,
inputs=[user_input, chat_state, memory_state],
outputs=[chatbot, chat_state, memory_state, user_input]
)
user_input.submit(
fn=process_user_query_stream,
inputs=[user_input, chat_state, memory_state],
outputs=[chatbot, chat_state, memory_state, user_input]
)
clear_btn.click(
fn=clear_chat,
outputs=[chatbot, chat_state, memory_state]
)
# # πŸš€ Force normal click to open links in new tab and focus
# unilever_chat.load(
# fn=None,
# js="""
# document.querySelectorAll('.gr-markdown a').forEach(link => {
# link.addEventListener('click', function(e) {
# e.preventDefault();
# window.open(this.href, '_blank').focus();
# });
# });
# """,
# inputs=[],
# outputs=[]
# )
# Launch the interface
unilever_chat.launch()
# ------------------------------