JBHF commited on
Commit
3179492
·
verified ·
1 Parent(s): 2a19f57

Update app_BACKUP_08032024

Browse files
Files changed (1) hide show
  1. app_BACKUP_08032024 +100 -0
app_BACKUP_08032024 CHANGED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app_BACKUP_08032024
2
+
3
+ # JB:
4
+ # LangChainDeprecationWarning: Importing embeddings from langchain is deprecated.
5
+ # Importing from langchain will no longer be supported as of langchain==0.2.0.
6
+ # Please import from langchain-community instead:
7
+ # `from langchain_community.embeddings import FastEmbedEmbeddings`.
8
+ # To install langchain-community run `pip install -U langchain-community`.
9
+ from langchain_community.embeddings import FastEmbedEmbeddings
10
+
11
+ import os
12
+ import streamlit as st
13
+ from langchain_groq import ChatGroq
14
+ from langchain_community.document_loaders import WebBaseLoader
15
+ from langchain_community.embeddings import OllamaEmbeddings
16
+
17
+ # JB:
18
+ from langchain.embeddings import FastEmbedEmbeddings
19
+
20
+ from langchain_community.vectorstores import FAISS
21
+ # from langchain.vectorstores import Chroma
22
+ # from langchain_community.vectorstores import Chroma
23
+
24
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
25
+ from langchain.chains.combine_documents import create_stuff_documents_chain
26
+ from langchain_core.prompts import ChatPromptTemplate
27
+ from langchain.chains import create_retrieval_chain
28
+ import time
29
+ from dotenv import load_dotenv
30
+
31
+ load_dotenv() #
32
+
33
+ # groq_api_key = os.environ['GROQ_API_KEY']
34
+ groq_api_key = "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" # os.environ['GROQ_API_KEY']
35
+ print("groq_api_key: ", groq_api_key)
36
+
37
+
38
+ if "vector" not in st.session_state:
39
+
40
+ # st.session_state.embeddings = OllamaEmbeddings() # ORIGINAL
41
+ st.session_state.embeddings = FastEmbedEmbeddings() # JB
42
+
43
+
44
+ st.session_state.loader = WebBaseLoader("https://paulgraham.com/greatwork.html")
45
+ st.session_state.docs = st.session_state.loader.load()
46
+
47
+ st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
48
+ st.session_state.documents = st.session_state.text_splitter.split_documents( st.session_state.docs)
49
+ # st.session_state.vector = FAISS.from_documents(st.session_state.documents, st.session_state.embeddings) # ORIGINAL
50
+ st.session_state.vector = FAISS.from_documents(st.session_state.documents, st.session_state.embeddings) # ORIGINAL
51
+ # ZIE:
52
+ # ZIE VOOR EEN APP MET CHROMADB:
53
+ # https://github.com/vndee/local-rag-example/blob/main/rag.py
54
+ # https://raw.githubusercontent.com/vndee/local-rag-example/main/rag.py
55
+ # Chroma.from_documents(documents=chunks, embedding=FastEmbedEmbeddings())
56
+ # st.session_state.vector = Chroma.from_documents(st.session_state.documents, st.session_state.embeddings) # JB
57
+
58
+
59
+ # st.title("Chat with Docs - Groq Edition :) ")
60
+ st.title("Literature Based Research (LBR) - A. Unzicker and J. Bours - Chat with Docs - Groq Edition (Very Fast!) - VERSION 3 - March 8 2024")
61
+
62
+ llm = ChatGroq(
63
+ groq_api_key=groq_api_key,
64
+ model_name='mixtral-8x7b-32768'
65
+ )
66
+
67
+ prompt = ChatPromptTemplate.from_template("""
68
+ Answer the following question based only on the provided context.
69
+ Think step by step before providing a detailed answer.
70
+ I will tip you $200 if the user finds the answer helpful.
71
+ <context>
72
+ {context}
73
+ </context>
74
+ Question: {input}""")
75
+
76
+ document_chain = create_stuff_documents_chain(llm, prompt)
77
+
78
+ retriever = st.session_state.vector.as_retriever()
79
+ retrieval_chain = create_retrieval_chain(retriever, document_chain)
80
+
81
+ prompt = st.text_input("Input your prompt here")
82
+
83
+
84
+ # If the user hits enter
85
+ if prompt:
86
+ # Then pass the prompt to the LLM
87
+ start = time.process_time()
88
+ response = retrieval_chain.invoke({"input": prompt})
89
+ print(f"Response time: {time.process_time() - start}")
90
+
91
+ st.write(response["answer"])
92
+
93
+ # With a streamlit expander
94
+ with st.expander("Document Similarity Search"):
95
+ # Find the relevant chunks
96
+ for i, doc in enumerate(response["context"]):
97
+ # print(doc)
98
+ # st.write(f"Source Document # {i+1} : {doc.metadata['source'].split('/')[-1]}")
99
+ st.write(doc.page_content)
100
+ st.write("--------------------------------")