File size: 3,331 Bytes
892f4c0
 
 
 
 
 
 
 
76c1eb0
 
892f4c0
4b62a43
 
 
892f4c0
13977d9
79ab819
2ca54f8
79ab819
76c1eb0
 
 
 
 
 
 
79ab819
 
2ca54f8
79ab819
 
4b62a43
 
 
 
 
 
79ab819
4b62a43
79ab819
 
76c1eb0
79ab819
76c1eb0
 
 
 
 
 
 
79ab819
 
 
 
 
 
 
 
 
 
 
 
 
2ca54f8
79ab819
f1a21dd
 
79ab819
2ca54f8
 
52b4bc3
 
 
aea3c48
 
 
 
 
 
13977d9
 
 
 
892f4c0
13977d9
892f4c0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import streamlit as st
import os
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_community.vectorstores import Qdrant

from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import TextLoader
from tempfile import NamedTemporaryFile

import re
def main():
# Initialize the Streamlit app
    st.title('Dokument-basiertes Q&A System')

# API Key input securely, API KEY defined in settings
#   api_key = st.text_input("Enter your OpenAI API key:", type="password")
#    if api_key:
#        os.environ["OPENAI_API_KEY"] = api_key
#        st.success("API Key has been set!")

    

# File uploader
    uploaded_file = st.file_uploader("Dokument hochladen", type=['txt'])
    if uploaded_file is not None:
# Read and process the document
        with NamedTemporaryFile(delete=False) as f:
            f.write(uploaded_file.getbuffer())
            loader =  TextLoader(f.name, encoding="utf-8")
            data = loader.load()

        
        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        data = text_splitter.split_documents(data)

    # Create vector store
        
        embeddings = OpenAIEmbeddings()
        #vectorstore = FAISS.from_documents(data, embedding=embeddings)
        vectorstore = Qdrant.from_documents(
            data,
            embeddings,
            location=":memory:",  # Local mode with in-memory storage only
            collection_name="my_documents",
    )
    # Create conversation chain
        llm = ChatOpenAI(temperature=0.3, model_name="gpt-4-turbo")
        memory = ConversationBufferMemory(
            memory_key='chat_history', return_messages=True, output_key='answer')
        conversation_chain = ConversationalRetrievalChain.from_llm(
            llm=llm,
            chain_type="stuff",
            retriever=vectorstore.as_retriever(),
            memory=memory,
            return_source_documents=True
        )

    # Question input
        query = st.text_input("Frag deinen Dokumenten!")
        if query:
            systemquery = "You are a fraud analyst. You must help your colleague to answer the question below. Do not hallucinate. Provide all the relevant legal text. Answer in German"
            result = conversation_chain({"question": systemquery +"\n\n"+query})
            answer = result["answer"]
            st.write("Antwort:", answer)
            st.write("Quellen:")
            for i in result["source_documents"]:
                res = re.search(r'^[^\n]*', i.page_content)
                st.write(i.page_content[res.span()[0]:res.span()[1]])
        # Optionally display source text snippets
        #    if st.checkbox("Show source text snippets"):
        #        st.write("Source documents:")
        #        for i in result["source_documents"]:
        #            res = re.search(r'^[^\n]*', i.page_content)
        #            st.write(i.page_content[res.span()[0]:res.span()[1]])


if __name__ == "__main__":
    main()
# Initialize the Streamlit app
# st.title('Document-Based Q&A System')