FAG_RAG / app.py
htafer's picture
Update app.py
f1a21dd verified
import streamlit as st
import os
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_community.vectorstores import Qdrant
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import TextLoader
from tempfile import NamedTemporaryFile
import re
def main():
# Initialize the Streamlit app
st.title('Dokument-basiertes Q&A System')
# API Key input securely, API KEY defined in settings
# api_key = st.text_input("Enter your OpenAI API key:", type="password")
# if api_key:
# os.environ["OPENAI_API_KEY"] = api_key
# st.success("API Key has been set!")
# File uploader
uploaded_file = st.file_uploader("Dokument hochladen", type=['txt'])
if uploaded_file is not None:
# Read and process the document
with NamedTemporaryFile(delete=False) as f:
f.write(uploaded_file.getbuffer())
loader = TextLoader(f.name, encoding="utf-8")
data = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
data = text_splitter.split_documents(data)
# Create vector store
embeddings = OpenAIEmbeddings()
#vectorstore = FAISS.from_documents(data, embedding=embeddings)
vectorstore = Qdrant.from_documents(
data,
embeddings,
location=":memory:", # Local mode with in-memory storage only
collection_name="my_documents",
)
# Create conversation chain
llm = ChatOpenAI(temperature=0.3, model_name="gpt-4-turbo")
memory = ConversationBufferMemory(
memory_key='chat_history', return_messages=True, output_key='answer')
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever(),
memory=memory,
return_source_documents=True
)
# Question input
query = st.text_input("Frag deinen Dokumenten!")
if query:
systemquery = "You are a fraud analyst. You must help your colleague to answer the question below. Do not hallucinate. Provide all the relevant legal text. Answer in German"
result = conversation_chain({"question": systemquery +"\n\n"+query})
answer = result["answer"]
st.write("Antwort:", answer)
st.write("Quellen:")
for i in result["source_documents"]:
res = re.search(r'^[^\n]*', i.page_content)
st.write(i.page_content[res.span()[0]:res.span()[1]])
# Optionally display source text snippets
# if st.checkbox("Show source text snippets"):
# st.write("Source documents:")
# for i in result["source_documents"]:
# res = re.search(r'^[^\n]*', i.page_content)
# st.write(i.page_content[res.span()[0]:res.span()[1]])
if __name__ == "__main__":
main()
# Initialize the Streamlit app
# st.title('Document-Based Q&A System')