import os | |
import streamlit as st | |
import streamlit.components.v1 as components | |
import openai | |
from llama_index.llms.openai import OpenAI | |
import os | |
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, PropertyGraphIndex | |
from llama_index.core.indices.property_graph import ( | |
ImplicitPathExtractor, | |
SimpleLLMPathExtractor, | |
) | |
from llama_index.retrievers.bm25 import BM25Retriever | |
from llama_index.core.retrievers import BaseRetriever | |
from llama_index.core.node_parser import SentenceSplitter | |
from llama_index.embeddings.openai import OpenAIEmbedding | |
from llama_index.llms.mistralai import MistralAI | |
from llmlingua import PromptCompressor | |
from rouge_score import rouge_scorer | |
from semantic_text_similarity.models import WebBertSimilarity | |
import nest_asyncio | |
# Apply nest_asyncio | |
nest_asyncio.apply() | |
# OpenAI credentials | |
# key = os.getenv('OPENAI_API_KEY') | |
# openai.api_key = key | |
# os.environ["OPENAI_API_KEY"] = key | |
# key = os.getenv('MISTRAL_API_KEY') | |
# os.environ["MISTRAL_API_KEY"] = key | |
# Anthropic credentials | |
key = os.getenv('CLAUDE_API_KEY') | |
os.environ["ANTHROPIC_API_KEY"] = key | |
# Streamlit UI | |
st.title("Prompt Optimization for a Policy Bot") | |
uploaded_files = st.file_uploader("Upload a Policy document in pdf format", type="pdf", accept_multiple_files=True) | |
if uploaded_files: | |
for uploaded_file in uploaded_files: | |
with open(f"./data/{uploaded_file.name}", 'wb') as f: | |
f.write(uploaded_file.getbuffer()) | |
reader = SimpleDirectoryReader(input_files=[f"./data/{uploaded_file.name}"]) | |
documents = reader.load_data() | |
st.write(documents) | |
st.success("File uploaded...") | |
# # Indexing | |
# index = PropertyGraphIndex.from_documents( | |
# documents, | |
# embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"), | |
# kg_extractors=[ | |
# ImplicitPathExtractor(), | |
# SimpleLLMPathExtractor( | |
# llm=OpenAI(model="gpt-3.5-turbo", temperature=0.3), | |
# num_workers=4, | |
# max_paths_per_chunk=10, | |
# ), | |
# ], | |
# show_progress=True, | |
# ) | |
# # Save Knowlege Graph | |
# index.property_graph_store.save_networkx_graph(name="./data/kg.html") | |
# # Display the graph in Streamlit | |
# st.success("File Processed...") | |
# st.success("Creating Knowledge Graph...") | |
# HtmlFile = open("./data/kg.html", 'r', encoding='utf-8') | |
# source_code = HtmlFile.read() | |
# components.html(source_code, height= 500, width=700) | |
# # Retrieval | |
# kg_retriever = index.as_retriever( | |
# include_text=True, # include source text, default True | |
# ) | |
# Indexing | |
splitter = SentenceSplitter(chunk_size=256) | |
nodes = splitter.get_nodes_from_documents(documents) | |
storage_context = StorageContext.from_defaults() | |
storage_context.docstore.add_documents(nodes) | |
index = VectorStoreIndex(nodes=nodes, storage_context=storage_context) | |
# Retrieval | |
bm25_retriever = BM25Retriever.from_defaults(nodes=nodes, similarity_top_k=10) | |
vector_retriever = index.as_retriever(similarity_top_k=10) | |
# Hybrid Retriever class | |
class HybridRetriever(BaseRetriever): | |
def __init__(self, vector_retriever, bm25_retriever): | |
self.vector_retriever = vector_retriever | |
self.bm25_retriever = bm25_retriever | |
super().__init__() | |
def _retrieve(self, query, **kwargs): | |
bm25_nodes = self.bm25_retriever.retrieve(query, **kwargs) | |
vector_nodes = self.vector_retriever.retrieve(query, **kwargs) | |
all_nodes = [] | |
node_ids = set() | |
for n in bm25_nodes + vector_nodes: | |
if n.node.node_id not in node_ids: | |
all_nodes.append(n) | |
node_ids.add(n.node.node_id) | |
return all_nodes | |
hybrid_retriever = HybridRetriever(vector_retriever, bm25_retriever) | |
# Generation | |
# model = "gpt-3.5-turbo" | |
model = "claude-3-opus-20240229" | |
# def get_context(query): | |
# contexts = kg_retriever.retrieve(query) | |
# context_list = [n.text for n in contexts] | |
# return context_list | |
def get_context(query): | |
contexts = hybrid_retriever.retrieve(query) | |
context_list = [n.get_content() for n in contexts] | |
return context_list | |
def res(prompt): | |
response = openai.chat.completions.create( | |
model=model, | |
messages=[ | |
{"role":"system", | |
"content":"You are a helpful assistant who answers from the following context. If the answer can't be found in context, politely refuse" | |
}, | |
{"role": "user", | |
"content": prompt, | |
} | |
] | |
) | |
return [response.usage.prompt_tokens, response.usage.completion_tokens, response.usage.total_tokens, response.choices[0].message.content] | |
# Initialize session state for token summary, evaluation details, and chat messages | |
if "token_summary" not in st.session_state: | |
st.session_state.token_summary = [] | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
# Display chat messages from history on app rerun | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
# Summarize | |
full_prompt = "\n\n".join([context + prompt]) | |
orig_res = res(full_prompt) | |
# Accept user input | |
if prompt := st.chat_input("Enter your query:"): | |
st.success("Fetching info...") | |
# Add user message to chat history | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
# Generate response | |
# st.success("Fetching info...") | |
context_list = get_context(prompt) | |
context = " ".join(context_list) | |
# # Summarize | |
# full_prompt = "\n\n".join([context + prompt]) | |
# orig_res = res(full_prompt) | |
# Original prompt response | |
full_prompt = "\n\n".join([context + prompt]) | |
orig_res = res(full_prompt) | |
st.session_state.messages.append({"role": "assistant", "content": "Generating Original prompt response..."}) | |
st.session_state.messages.append({"role": "assistant", "content": orig_res[3]}) | |
st.success("Generating Original prompt response...") | |
with st.chat_message("assistant"): | |
st.markdown(orig_res[3]) | |
# # Compressed Response | |
# st.session_state.messages.append({"role": "assistant", "content": "Generating Optimized prompt response..."}) | |
# st.success("Generating Optimized prompt response...") | |
# llm_lingua = PromptCompressor( | |
# model_name="microsoft/llmlingua-2-xlm-roberta-large-meetingbank", | |
# use_llmlingua2=True, device_map="cpu" | |
# ) | |
# def prompt_compression(context, rate=0.5): | |
# compressed_context = llm_lingua.compress_prompt( | |
# context, | |
# rate=rate, | |
# force_tokens=["!", ".", "?", "\n"], | |
# drop_consecutive=True, | |
# ) | |
# return compressed_context | |
# compressed_context = prompt_compression(context) | |
# full_opt_prompt = "\n\n".join([compressed_context['compressed_prompt'] + prompt]) | |
# compressed_res = res(full_opt_prompt) | |
# st.session_state.messages.append({"role": "assistant", "content": compressed_res[3]}) | |
# with st.chat_message("assistant"): | |
# st.markdown(compressed_res[3]) | |
# # Save token summary and evaluation details to session state | |
# scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True) | |
# scores = scorer.score(compressed_res[3],orig_res[3]) | |
# webert_model = WebBertSimilarity(device='cpu') | |
# similarity_score = webert_model.predict([(compressed_res[3], orig_res[3])])[0] / 5 * 100 | |
# # Display token summary | |
# st.session_state.messages.append({"role": "assistant", "content": "Token Length Summary..."}) | |
# st.success('Token Length Summary...') | |
# st.session_state.messages.append({"role": "assistant", "content": f"Original Prompt has {orig_res[0]} tokens"}) | |
# st.write(f"Original Prompt has {orig_res[0]} tokens") | |
# st.session_state.messages.append({"role": "assistant", "content": f"Optimized Prompt has {compressed_res[0]} tokens"}) | |
# st.write(f"Optimized Prompt has {compressed_res[0]} tokens") | |
# st.session_state.messages.append({"role": "assistant", "content": "Comparing Original and Optimized Prompt Response..."}) | |
# st.success("Comparing Original and Optimized Prompt Response...") | |
# st.session_state.messages.append({"role": "assistant", "content": f"Rouge Score : {scores['rougeL'].fmeasure * 100}"}) | |
# st.write(f"Rouge Score : {scores['rougeL'].fmeasure * 100}") | |
# st.session_state.messages.append({"role": "assistant", "content": f"Semantic Text Similarity Score : {similarity_score}"}) | |
# st.write(f"Semantic Text Similarity Score : {similarity_score}") | |
# st.write(" ") | |
# # origin_tokens = compressed_context['origin_tokens'] | |
# # compressed_tokens = compressed_context['compressed_tokens'] | |
# origin_tokens = orig_res[0] | |
# compressed_tokens = compressed_res[0] | |
# gpt_saving = (origin_tokens - compressed_tokens) * 0.06 / 1000 | |
# claude_saving = (origin_tokens - compressed_tokens) * 0.015 / 1000 | |
# mistral_saving = (origin_tokens - compressed_tokens) * 0.004 / 1000 | |
# # st.session_state.messages.append({"role": "assistant", "content": f"""The optimized prompt has saved ${gpt_saving:.4f} in GPT4, ${mistral_saving:.4f} in Mistral"""}) | |
# # st.success(f"""The optimized prompt has saved ${gpt_saving:.4f} in GPT4, ${mistral_saving:.4f} in Mistral""") | |
# st.session_state.messages.append({"role": "assistant", "content": f"The optimized prompt has ${gpt_saving:.4f} saved in GPT-4."}) | |
# st.success(f"The optimized prompt has ${gpt_saving:.4f} saved in GPT-4.") | |
# st.success("Downloading Optimized Prompt...") | |
# st.download_button(label = "Download Optimized Prompt", | |
# data = full_opt_prompt, file_name='./data/optimized_prompt.txt') | |