File size: 4,277 Bytes
4e00df7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import anthropic
import streamlit as st
from streamlit.logger import get_logger
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI
from langchain.llms import HuggingFaceEndpoint
from langchain.chat_models import ChatAnthropic
from langchain.vectorstores import SupabaseVectorStore
from stats import add_usage
memory = ConversationBufferMemory(
memory_key="chat_history", return_messages=True)
openai_api_key = st.secrets.openai_api_key
anthropic_api_key = st.secrets.anthropic_api_key
hf_api_key = st.secrets.hf_api_key
logger = get_logger(__name__)
def count_tokens(question, model):
count = f'Words: {len(question.split())}'
if model.startswith("claude"):
count += f' | Tokens: {anthropic.count_tokens(question)}'
return count
def chat_with_doc(model, vector_store: SupabaseVectorStore, stats_db):
if 'chat_history' not in st.session_state:
st.session_state['chat_history'] = []
question = st.text_area("## Ask a question")
columns = st.columns(3)
with columns[0]:
button = st.button("Ask")
with columns[1]:
count_button = st.button("Count Tokens", type='secondary')
with columns[2]:
clear_history = st.button("Clear History", type='secondary')
if clear_history:
# Clear memory in Langchain
memory.clear()
st.session_state['chat_history'] = []
st.experimental_rerun()
if button:
qa = None
if not st.session_state["overused"]:
add_usage(stats_db, "chat", "prompt" + question, {"model": model, "temperature": st.session_state['temperature']})
if model.startswith("gpt"):
logger.info('Using OpenAI model %s', model)
qa = ConversationalRetrievalChain.from_llm(
OpenAI(
model_name=st.session_state['model'], openai_api_key=openai_api_key, temperature=st.session_state['temperature'], max_tokens=st.session_state['max_tokens']), vector_store.as_retriever(), memory=memory, verbose=True)
elif anthropic_api_key and model.startswith("claude"):
logger.info('Using Anthropics model %s', model)
qa = ConversationalRetrievalChain.from_llm(
ChatAnthropic(
model=st.session_state['model'], anthropic_api_key=anthropic_api_key, temperature=st.session_state['temperature'], max_tokens_to_sample=st.session_state['max_tokens']), vector_store.as_retriever(), memory=memory, verbose=True, max_tokens_limit=102400)
elif hf_api_key and model.startswith("llama"):
logger.info('Using Llama model %s', model)
# print(st.session_state['max_tokens'])
endpoint_url = ("https://api-inference.huggingface.co/models/meta-llama/Llama-2-70b-chat-hf")
model_kwargs = {"temperature" : st.session_state['temperature'],
"max_new_tokens" : st.session_state['max_tokens'],
"return_full_text" : False}
hf = HuggingFaceEndpoint(
endpoint_url=endpoint_url,
task="text-generation",
huggingfacehub_api_token=hf_api_key,
model_kwargs=model_kwargs
)
qa = ConversationalRetrievalChain.from_llm(hf, retriever=vector_store.as_retriever(), memory=memory, verbose=True)
st.session_state['chat_history'].append(("You", question))
# Generate model's response and add it to chat history
model_response = qa({"question": question})
logger.info('Result: %s', model_response)
st.session_state['chat_history'].append(("meraKB", model_response["answer"]))
# Display chat history
st.empty()
for speaker, text in st.session_state['chat_history']:
st.markdown(f"**{speaker}:** {text}")
else:
st.error("You have used all your free credits. Please try again later or self host.")
if count_button:
st.write(count_tokens(question, model))
|