Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,6 +14,7 @@ from llama_index.retrievers.bm25 import BM25Retriever
|
|
| 14 |
from llama_index.core.retrievers import BaseRetriever
|
| 15 |
from llama_index.core.node_parser import SentenceSplitter
|
| 16 |
from llama_index.embeddings.openai import OpenAIEmbedding
|
|
|
|
| 17 |
from llmlingua import PromptCompressor
|
| 18 |
from rouge_score import rouge_scorer
|
| 19 |
from semantic_text_similarity.models import WebBertSimilarity
|
|
@@ -27,6 +28,13 @@ nest_asyncio.apply()
|
|
| 27 |
# openai.api_key = key
|
| 28 |
# os.environ["OPENAI_API_KEY"] = key
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
# Streamlit UI
|
| 31 |
st.title("Prompt Optimization for a Policy Bot")
|
| 32 |
|
|
@@ -38,6 +46,7 @@ if uploaded_files:
|
|
| 38 |
f.write(uploaded_file.getbuffer())
|
| 39 |
reader = SimpleDirectoryReader(input_files=[f"./data/{uploaded_file.name}"])
|
| 40 |
documents = reader.load_data()
|
|
|
|
| 41 |
st.success("File uploaded...")
|
| 42 |
|
| 43 |
# # Indexing
|
|
@@ -103,7 +112,8 @@ if uploaded_files:
|
|
| 103 |
hybrid_retriever = HybridRetriever(vector_retriever, bm25_retriever)
|
| 104 |
|
| 105 |
# Generation
|
| 106 |
-
model = "gpt-3.5-turbo"
|
|
|
|
| 107 |
|
| 108 |
# def get_context(query):
|
| 109 |
# contexts = kg_retriever.retrieve(query)
|
|
@@ -145,6 +155,10 @@ if uploaded_files:
|
|
| 145 |
with st.chat_message(message["role"]):
|
| 146 |
st.markdown(message["content"])
|
| 147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
# Accept user input
|
| 149 |
if prompt := st.chat_input("Enter your query:"):
|
| 150 |
st.success("Fetching info...")
|
|
@@ -158,6 +172,11 @@ if uploaded_files:
|
|
| 158 |
context_list = get_context(prompt)
|
| 159 |
context = " ".join(context_list)
|
| 160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
# Original prompt response
|
| 163 |
full_prompt = "\n\n".join([context + prompt])
|
|
@@ -168,66 +187,66 @@ if uploaded_files:
|
|
| 168 |
with st.chat_message("assistant"):
|
| 169 |
st.markdown(orig_res[3])
|
| 170 |
|
| 171 |
-
# Compressed Response
|
| 172 |
-
st.session_state.messages.append({"role": "assistant", "content": "Generating Optimized prompt response..."})
|
| 173 |
-
st.success("Generating Optimized prompt response...")
|
| 174 |
-
|
| 175 |
-
llm_lingua = PromptCompressor(
|
| 176 |
-
model_name="microsoft/llmlingua-2-xlm-roberta-large-meetingbank",
|
| 177 |
-
use_llmlingua2=True, device_map="cpu"
|
| 178 |
-
)
|
| 179 |
-
|
| 180 |
-
def prompt_compression(context, rate=0.5):
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
compressed_context = prompt_compression(context)
|
| 189 |
-
full_opt_prompt = "\n\n".join([compressed_context['compressed_prompt'] + prompt])
|
| 190 |
-
compressed_res = res(full_opt_prompt)
|
| 191 |
-
st.session_state.messages.append({"role": "assistant", "content": compressed_res[3]})
|
| 192 |
-
with st.chat_message("assistant"):
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
# Save token summary and evaluation details to session state
|
| 196 |
-
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
|
| 197 |
-
scores = scorer.score(compressed_res[3],orig_res[3])
|
| 198 |
-
webert_model = WebBertSimilarity(device='cpu')
|
| 199 |
-
similarity_score = webert_model.predict([(compressed_res[3], orig_res[3])])[0] / 5 * 100
|
| 200 |
|
| 201 |
|
| 202 |
-
# Display token summary
|
| 203 |
-
st.session_state.messages.append({"role": "assistant", "content": "Token Length Summary..."})
|
| 204 |
-
st.success('Token Length Summary...')
|
| 205 |
-
st.session_state.messages.append({"role": "assistant", "content": f"Original Prompt has {orig_res[0]} tokens"})
|
| 206 |
-
st.write(f"Original Prompt has {orig_res[0]} tokens")
|
| 207 |
-
st.session_state.messages.append({"role": "assistant", "content": f"Optimized Prompt has {compressed_res[0]} tokens"})
|
| 208 |
-
st.write(f"Optimized Prompt has {compressed_res[0]} tokens")
|
| 209 |
-
|
| 210 |
-
st.session_state.messages.append({"role": "assistant", "content": "Comparing Original and Optimized Prompt Response..."})
|
| 211 |
-
st.success("Comparing Original and Optimized Prompt Response...")
|
| 212 |
-
st.session_state.messages.append({"role": "assistant", "content": f"Rouge Score : {scores['rougeL'].fmeasure * 100}"})
|
| 213 |
-
st.write(f"Rouge Score : {scores['rougeL'].fmeasure * 100}")
|
| 214 |
-
st.session_state.messages.append({"role": "assistant", "content": f"Semantic Text Similarity Score : {similarity_score}"})
|
| 215 |
-
st.write(f"Semantic Text Similarity Score : {similarity_score}")
|
| 216 |
-
|
| 217 |
-
st.write(" ")
|
| 218 |
-
# origin_tokens = compressed_context['origin_tokens']
|
| 219 |
-
# compressed_tokens = compressed_context['compressed_tokens']
|
| 220 |
-
origin_tokens = orig_res[0]
|
| 221 |
-
compressed_tokens = compressed_res[0]
|
| 222 |
-
gpt_saving = (origin_tokens - compressed_tokens) * 0.06 / 1000
|
| 223 |
-
claude_saving = (origin_tokens - compressed_tokens) * 0.015 / 1000
|
| 224 |
-
mistral_saving = (origin_tokens - compressed_tokens) * 0.004 / 1000
|
| 225 |
-
# st.session_state.messages.append({"role": "assistant", "content": f"""The optimized prompt has saved ${gpt_saving:.4f} in GPT4, ${mistral_saving:.4f} in Mistral"""})
|
| 226 |
-
# st.success(f"""The optimized prompt has saved ${gpt_saving:.4f} in GPT4, ${mistral_saving:.4f} in Mistral""")
|
| 227 |
-
st.session_state.messages.append({"role": "assistant", "content": f"The optimized prompt has ${gpt_saving:.4f} saved in GPT-4."})
|
| 228 |
-
st.success(f"The optimized prompt has ${gpt_saving:.4f} saved in GPT-4.")
|
| 229 |
-
|
| 230 |
-
st.success("Downloading Optimized Prompt...")
|
| 231 |
-
st.download_button(label = "Download Optimized Prompt",
|
| 232 |
-
|
| 233 |
|
|
|
|
| 14 |
from llama_index.core.retrievers import BaseRetriever
|
| 15 |
from llama_index.core.node_parser import SentenceSplitter
|
| 16 |
from llama_index.embeddings.openai import OpenAIEmbedding
|
| 17 |
+
from llama_index.llms.mistralai import MistralAI
|
| 18 |
from llmlingua import PromptCompressor
|
| 19 |
from rouge_score import rouge_scorer
|
| 20 |
from semantic_text_similarity.models import WebBertSimilarity
|
|
|
|
| 28 |
# openai.api_key = key
|
| 29 |
# os.environ["OPENAI_API_KEY"] = key
|
| 30 |
|
| 31 |
+
# key = os.getenv('MISTRAL_API_KEY')
|
| 32 |
+
# os.environ["MISTRAL_API_KEY"] = key
|
| 33 |
+
|
| 34 |
+
# Anthropic credentials
|
| 35 |
+
key = os.getenv('CLAUDE_API_KEY')
|
| 36 |
+
os.environ["ANTHROPIC_API_KEY"] = key
|
| 37 |
+
|
| 38 |
# Streamlit UI
|
| 39 |
st.title("Prompt Optimization for a Policy Bot")
|
| 40 |
|
|
|
|
| 46 |
f.write(uploaded_file.getbuffer())
|
| 47 |
reader = SimpleDirectoryReader(input_files=[f"./data/{uploaded_file.name}"])
|
| 48 |
documents = reader.load_data()
|
| 49 |
+
st.write(documents)
|
| 50 |
st.success("File uploaded...")
|
| 51 |
|
| 52 |
# # Indexing
|
|
|
|
| 112 |
hybrid_retriever = HybridRetriever(vector_retriever, bm25_retriever)
|
| 113 |
|
| 114 |
# Generation
|
| 115 |
+
# model = "gpt-3.5-turbo"
|
| 116 |
+
model = "claude-3-opus-20240229"
|
| 117 |
|
| 118 |
# def get_context(query):
|
| 119 |
# contexts = kg_retriever.retrieve(query)
|
|
|
|
| 155 |
with st.chat_message(message["role"]):
|
| 156 |
st.markdown(message["content"])
|
| 157 |
|
| 158 |
+
# Summarize
|
| 159 |
+
full_prompt = "\n\n".join([context + prompt])
|
| 160 |
+
orig_res = res(full_prompt)
|
| 161 |
+
|
| 162 |
# Accept user input
|
| 163 |
if prompt := st.chat_input("Enter your query:"):
|
| 164 |
st.success("Fetching info...")
|
|
|
|
| 172 |
context_list = get_context(prompt)
|
| 173 |
context = " ".join(context_list)
|
| 174 |
|
| 175 |
+
# # Summarize
|
| 176 |
+
# full_prompt = "\n\n".join([context + prompt])
|
| 177 |
+
# orig_res = res(full_prompt)
|
| 178 |
+
|
| 179 |
+
|
| 180 |
|
| 181 |
# Original prompt response
|
| 182 |
full_prompt = "\n\n".join([context + prompt])
|
|
|
|
| 187 |
with st.chat_message("assistant"):
|
| 188 |
st.markdown(orig_res[3])
|
| 189 |
|
| 190 |
+
# # Compressed Response
|
| 191 |
+
# st.session_state.messages.append({"role": "assistant", "content": "Generating Optimized prompt response..."})
|
| 192 |
+
# st.success("Generating Optimized prompt response...")
|
| 193 |
+
|
| 194 |
+
# llm_lingua = PromptCompressor(
|
| 195 |
+
# model_name="microsoft/llmlingua-2-xlm-roberta-large-meetingbank",
|
| 196 |
+
# use_llmlingua2=True, device_map="cpu"
|
| 197 |
+
# )
|
| 198 |
+
|
| 199 |
+
# def prompt_compression(context, rate=0.5):
|
| 200 |
+
# compressed_context = llm_lingua.compress_prompt(
|
| 201 |
+
# context,
|
| 202 |
+
# rate=rate,
|
| 203 |
+
# force_tokens=["!", ".", "?", "\n"],
|
| 204 |
+
# drop_consecutive=True,
|
| 205 |
+
# )
|
| 206 |
+
# return compressed_context
|
| 207 |
+
# compressed_context = prompt_compression(context)
|
| 208 |
+
# full_opt_prompt = "\n\n".join([compressed_context['compressed_prompt'] + prompt])
|
| 209 |
+
# compressed_res = res(full_opt_prompt)
|
| 210 |
+
# st.session_state.messages.append({"role": "assistant", "content": compressed_res[3]})
|
| 211 |
+
# with st.chat_message("assistant"):
|
| 212 |
+
# st.markdown(compressed_res[3])
|
| 213 |
+
|
| 214 |
+
# # Save token summary and evaluation details to session state
|
| 215 |
+
# scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
|
| 216 |
+
# scores = scorer.score(compressed_res[3],orig_res[3])
|
| 217 |
+
# webert_model = WebBertSimilarity(device='cpu')
|
| 218 |
+
# similarity_score = webert_model.predict([(compressed_res[3], orig_res[3])])[0] / 5 * 100
|
| 219 |
|
| 220 |
|
| 221 |
+
# # Display token summary
|
| 222 |
+
# st.session_state.messages.append({"role": "assistant", "content": "Token Length Summary..."})
|
| 223 |
+
# st.success('Token Length Summary...')
|
| 224 |
+
# st.session_state.messages.append({"role": "assistant", "content": f"Original Prompt has {orig_res[0]} tokens"})
|
| 225 |
+
# st.write(f"Original Prompt has {orig_res[0]} tokens")
|
| 226 |
+
# st.session_state.messages.append({"role": "assistant", "content": f"Optimized Prompt has {compressed_res[0]} tokens"})
|
| 227 |
+
# st.write(f"Optimized Prompt has {compressed_res[0]} tokens")
|
| 228 |
+
|
| 229 |
+
# st.session_state.messages.append({"role": "assistant", "content": "Comparing Original and Optimized Prompt Response..."})
|
| 230 |
+
# st.success("Comparing Original and Optimized Prompt Response...")
|
| 231 |
+
# st.session_state.messages.append({"role": "assistant", "content": f"Rouge Score : {scores['rougeL'].fmeasure * 100}"})
|
| 232 |
+
# st.write(f"Rouge Score : {scores['rougeL'].fmeasure * 100}")
|
| 233 |
+
# st.session_state.messages.append({"role": "assistant", "content": f"Semantic Text Similarity Score : {similarity_score}"})
|
| 234 |
+
# st.write(f"Semantic Text Similarity Score : {similarity_score}")
|
| 235 |
+
|
| 236 |
+
# st.write(" ")
|
| 237 |
+
# # origin_tokens = compressed_context['origin_tokens']
|
| 238 |
+
# # compressed_tokens = compressed_context['compressed_tokens']
|
| 239 |
+
# origin_tokens = orig_res[0]
|
| 240 |
+
# compressed_tokens = compressed_res[0]
|
| 241 |
+
# gpt_saving = (origin_tokens - compressed_tokens) * 0.06 / 1000
|
| 242 |
+
# claude_saving = (origin_tokens - compressed_tokens) * 0.015 / 1000
|
| 243 |
+
# mistral_saving = (origin_tokens - compressed_tokens) * 0.004 / 1000
|
| 244 |
+
# # st.session_state.messages.append({"role": "assistant", "content": f"""The optimized prompt has saved ${gpt_saving:.4f} in GPT4, ${mistral_saving:.4f} in Mistral"""})
|
| 245 |
+
# # st.success(f"""The optimized prompt has saved ${gpt_saving:.4f} in GPT4, ${mistral_saving:.4f} in Mistral""")
|
| 246 |
+
# st.session_state.messages.append({"role": "assistant", "content": f"The optimized prompt has ${gpt_saving:.4f} saved in GPT-4."})
|
| 247 |
+
# st.success(f"The optimized prompt has ${gpt_saving:.4f} saved in GPT-4.")
|
| 248 |
+
|
| 249 |
+
# st.success("Downloading Optimized Prompt...")
|
| 250 |
+
# st.download_button(label = "Download Optimized Prompt",
|
| 251 |
+
# data = full_opt_prompt, file_name='./data/optimized_prompt.txt')
|
| 252 |
|