Spaces:
Sleeping
Sleeping
| from langchain_openai import ChatOpenAI | |
| from langchain.chains import LLMChain | |
| from langchain_community.callbacks import StreamlitCallbackHandler | |
| from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder | |
| from langchain.memory import ConversationBufferWindowMemory | |
| import streamlit as st | |
| import requests | |
| import time | |
| import json | |
| import os | |
| model_service = os.getenv("MODEL_ENDPOINT", | |
| "http://localhost:8001") | |
| model_service = f"{model_service}/v1" | |
| model_service_bearer = os.getenv("MODEL_ENDPOINT_BEARER") | |
| request_kwargs = {} | |
| if model_service_bearer is not None: | |
| request_kwargs = {"headers": {"Authorization": f"Bearer {model_service_bearer}"}} | |
| def checking_model_service(): | |
| start = time.time() | |
| print("Checking Model Service Availability...") | |
| ready = False | |
| while not ready: | |
| try: | |
| request_cpp = requests.get(f'{model_service}/models', **request_kwargs) | |
| request_ollama = requests.get(f'{model_service[:-2]}api/tags', **request_kwargs) | |
| if request_cpp.status_code == 200: | |
| server = "Llamacpp_Python" | |
| ready = True | |
| elif request_ollama.status_code == 200: | |
| server = "Ollama" | |
| ready = True | |
| except: | |
| pass | |
| time.sleep(1) | |
| print(f"{server} Model Service Available") | |
| print(f"{time.time()-start} seconds") | |
| return server | |
| def get_models(): | |
| try: | |
| response = requests.get(f"{model_service[:-2]}api/tags", **request_kwargs) | |
| return [i["name"].split(":")[0] for i in | |
| json.loads(response.content)["models"]] | |
| except: | |
| return None | |
| with st.spinner("Checking Model Service Availability..."): | |
| server = checking_model_service() | |
| def enableInput(): | |
| st.session_state["input_disabled"] = False | |
| def disableInput(): | |
| st.session_state["input_disabled"] = True | |
| st.title("π¬ Chatbot") | |
| if "messages" not in st.session_state: | |
| st.session_state["messages"] = [{"role": "assistant", | |
| "content": "How can I help you?"}] | |
| if "input_disabled" not in st.session_state: | |
| enableInput() | |
| for msg in st.session_state.messages: | |
| st.chat_message(msg["role"]).write(msg["content"]) | |
| def memory(): | |
| memory = ConversationBufferWindowMemory(return_messages=True,k=3) | |
| return memory | |
| model_name = os.getenv("MODEL_NAME", "") | |
| if server == "Ollama": | |
| models = get_models() | |
| with st.sidebar: | |
| model_name = st.radio(label="Select Model", | |
| options=models) | |
| llm = ChatOpenAI(base_url=model_service, | |
| api_key="sk-no-key-required" if model_service_bearer is None else model_service_bearer, | |
| model=model_name, | |
| streaming=True, | |
| callbacks=[StreamlitCallbackHandler(st.empty(), | |
| expand_new_thoughts=True, | |
| collapse_completed_thoughts=True)]) | |
| prompt = ChatPromptTemplate.from_messages([ | |
| ("system", "You are world class technical advisor."), | |
| MessagesPlaceholder(variable_name="history"), | |
| ("user", "{input}") | |
| ]) | |
| chain = LLMChain(llm=llm, | |
| prompt=prompt, | |
| verbose=False, | |
| memory=memory()) | |
| if prompt := st.chat_input(disabled=st.session_state["input_disabled"],on_submit=disableInput): | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| st.chat_message("user").markdown(prompt) | |
| response = chain.invoke(prompt) | |
| st.chat_message("assistant").markdown(response["text"]) | |
| st.session_state.messages.append({"role": "assistant", "content": response["text"]}) | |
| enableInput() | |
| st.rerun() |