Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
from langchain.chains import RetrievalQA | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.vectorstores import Chroma | |
from langchain.document_loaders import TextLoader | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
st.set_page_config(page_title="CoreMind AI", layout="wide") | |
st.header("CoreMind AI") | |
# ==================================================================================================== | |
# SIDEBAR | |
st.sidebar.title("Options") | |
openai_key = st.sidebar.text_input("OpenAI API Key", type="password", key="openai_api_key") | |
os.environ["OPENAI_API_KEY"] = openai_key | |
# just go directly to the llm | |
from langchain.llms import OpenAI | |
# llm = OpenAI(temperature=0.9) | |
# st.write(llm("What is Berkshire Hathaway?")) | |
# ==================================================================================================== | |
# chains do stuff in a prespecified order | |
# from langchain.prompts import PromptTemplate | |
# prompt = PromptTemplate( | |
# input_variables=["product"], | |
# template="Would Berkshire Hathaway typically invest in {product}?", | |
# ) | |
# st.write(prompt.format(product="colorful socks")) | |
# from langchain.chains import LLMChain | |
# chain = LLMChain(llm=llm, prompt=prompt) | |
# st.write(chain.run("colorful socks")) | |
# ==================================================================================================== | |
# agents decide what to do | |
# from langchain.agents import load_tools | |
# from langchain.agents import initialize_agent | |
# from langchain.agents import AgentType | |
from langchain.llms import OpenAI | |
# First, let's load the language model we're going to use to control the agent. | |
# llm = OpenAI(temperature=0) | |
# # Next, let's load some tools to use. Note that the `llm-math` tool uses an LLM, so we need to pass that in. | |
# tools = load_tools(["requests", "llm-math"], llm=llm) | |
# # Finally, let's initialize an agent with the tools, the language model, and the type of agent we want to use. | |
# agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True) | |
# # Now let's test it out! | |
# st.write(agent.run("Is there a Q2 XXX? what was the 1987 relative perf?")) | |
# # ==================================================================================================== | |
# from langchain.chat_models import ChatOpenAI | |
# this is more of a chat model, | |
# whats' cool is you can batch messages | |
# batch_messages = [ | |
# [ | |
# SystemMessage(content="You are a helpful assistant that translates English to French."), | |
# HumanMessage(content="I love programming.") | |
# ], | |
# [ | |
# SystemMessage(content="You are a helpful assistant that translates English to French."), | |
# HumanMessage(content="I love artificial intelligence.") | |
# ], | |
# ] | |
# result = chat.generate(batch_messages) | |
# result | |
# # -> LLMResult(generations=[[ChatGeneration(text="J'aime programmer.", generation_info=None, message=AIMessage(content="J'aime programmer.", additional_kwargs={}))], [ChatGeneration(text="J'aime l'intelligence artificielle.", generation_info=None, message=AIMessage(content="J'aime l'intelligence artificielle.", additional_kwargs={}))]], llm_output={'token_usage': {'prompt_tokens': 57, 'completion_tokens': 20, 'total_tokens': 77}}) | |
# from langchain.embeddings.openai import OpenAIEmbeddings | |
# from langchain.vectorstores import Chroma | |
# from langchain.document_loaders import TextLoader | |
# loader = TextLoader("raw_data.txt") | |
# embeddings = OpenAIEmbeddings() | |
# docsearch = Chroma(persist_directory="data", embedding_function=embeddings) | |
# qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever()) | |
# st.write(qa("What is Berkshire Hathaway?")) | |
# st.markdown("----") | |
if openai_key: | |
def question_answer(user_text, qa_temperature): | |
qa = RetrievalQA.from_chain_type( | |
llm=OpenAI(temperature=qa_temperature, model_name="gpt-3.5-turbo"), | |
retriever=docsearch.as_retriever() | |
) | |
response = qa.run(user_text) | |
return response | |
loader = TextLoader("raw_data.txt") | |
embeddings = OpenAIEmbeddings() | |
docsearch = Chroma(persist_directory="data", embedding_function=embeddings) | |
qa_tab, understanding_tab = st.tabs(["Database Understanding", "PDF Understanding"]) | |
with qa_tab: | |
st.header("Question Answering") | |
st.write("Dataset is Berkshire Hathaway's end of year reports for 1995, 1996 and 1997.") | |
qa_query = st.text_area("Enter your query", key="qa_query", help="Got a question you think your docs can answer? Just ask!") | |
if qa_query: | |
response = question_answer(qa_query, 0.9) | |
st.write(response) | |
with understanding_tab: | |
llm = OpenAI(temperature=0.9) | |
pdf_file = st.file_uploader("Upload a PDF", type=["pdf"], key="pdf_file") | |
if pdf_file: | |
with open("tmp_file.pdf", "wb") as f: | |
f.write(pdf_file.getbuffer()) | |
loader = PyPDFLoader("tmp_file.pdf") | |
pages = loader.load_and_split() | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings()) | |
# docs = faiss_index.similarity_search("How will the community be engaged?", k=2) | |
# for doc in docs: | |
# st.write(str(doc.metadata["page"]) + ":", doc.page_content[:300]) | |
qa_prompt = st.text_area("Query your pdf", key="qa_prompt") | |
# qa_button = st.button("Let's go!", disabled=not (openai_key and qa_prompt), key="qa_button", help="Make sure you have entered your OpenAI API key and a query.") | |
if qa_prompt: | |
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=faiss_index.as_retriever()) | |
st.write(qa(qa_prompt)) |