import streamlit as st import os from langchain.chains import RetrievalQA, ConversationalRetrievalChain from langchain.chat_models import ChatOpenAI from langchain.document_loaders import TextLoader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain_community.vectorstores import Qdrant from langchain.memory import ConversationBufferMemory from langchain.document_loaders import TextLoader from tempfile import NamedTemporaryFile import re def main(): # Initialize the Streamlit app st.title('Dokument-basiertes Q&A System') # API Key input securely, API KEY defined in settings # api_key = st.text_input("Enter your OpenAI API key:", type="password") # if api_key: # os.environ["OPENAI_API_KEY"] = api_key # st.success("API Key has been set!") # File uploader uploaded_file = st.file_uploader("Dokument hochladen", type=['txt']) if uploaded_file is not None: # Read and process the document with NamedTemporaryFile(delete=False) as f: f.write(uploaded_file.getbuffer()) loader = TextLoader(f.name, encoding="utf-8") data = loader.load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200) data = text_splitter.split_documents(data) # Create vector store embeddings = OpenAIEmbeddings() #vectorstore = FAISS.from_documents(data, embedding=embeddings) vectorstore = Qdrant.from_documents( data, embeddings, location=":memory:", # Local mode with in-memory storage only collection_name="my_documents", ) # Create conversation chain llm = ChatOpenAI(temperature=0.3, model_name="gpt-4-turbo") memory = ConversationBufferMemory( memory_key='chat_history', return_messages=True, output_key='answer') conversation_chain = ConversationalRetrievalChain.from_llm( llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever(), memory=memory, return_source_documents=True ) # Question input query = st.text_input("Frag deinen Dokumenten!") if query: systemquery = "You are a fraud analyst. You must help your colleague to answer the question below. Do not hallucinate. Provide all the relevant legal text." result = conversation_chain({"question": query}) answer = result["answer"] st.write("Antwort:", answer) st.write("Quellen:") for i in result["source_documents"]: res = re.search(r'^[^\n]*', i.page_content) st.write(i.page_content[res.span()[0]:res.span()[1]]) # Optionally display source text snippets # if st.checkbox("Show source text snippets"): # st.write("Source documents:") # for i in result["source_documents"]: # res = re.search(r'^[^\n]*', i.page_content) # st.write(i.page_content[res.span()[0]:res.span()[1]]) if __name__ == "__main__": main() # Initialize the Streamlit app # st.title('Document-Based Q&A System')