JBHF commited on
Commit
abaae3f
·
verified ·
1 Parent(s): 16f3929

Upload 3 files

Browse files
Files changed (3) hide show
  1. README_JB.md +15 -0
  2. langchain_groq_rag_JB.py +99 -0
  3. requirements.txt +13 -0
README_JB.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YoutTube-Tutorial
2
+ This repo contains codes covered in the youtube tutorials.
3
+
4
+ To run the Groq RAG example code, use the command `streamlit run langchain_groq_rag.py`
5
+
6
+ | Video link | Notebook |
7
+ | --- | ----------- |
8
+ | [Getting Started with Groq API](https://youtu.be/S53BanCP14c) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://tinyurl.com/2nxdv2m8)|
9
+ | [Better RAG: Hybrid Search in LangChain with BM25 and Ensemble](https://youtu.be/r2m9DbEmeqI) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://tinyurl.com/33wc8sav)|
10
+ | [Fine-Tune Your Own Tiny-Llama on Custom Dataset](https://youtu.be/OVqe6GTrDFM) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://tinyurl.com/4eny9cvc)|
11
+ | [Run Mixtral 8x7B MoE in Google Colab](https://youtu.be/Zo3CTapKJ4I) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://tinyurl.com/2nn5snb4)|
12
+ | [GEMINI Pro with LangChain - Chat, MultiModal and Chat with your Documents](https://youtu.be/7h8ZHSkAkas) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://tinyurl.com/28bw3ntv)|
13
+ | [GEMINI Pro with LangChain - Chat, MultiModal and Chat with your Documents](https://youtu.be/7h8ZHSkAkas) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://tinyurl.com/28bw3ntv)|
14
+
15
+
langchain_groq_rag_JB.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # JB:
2
+ # LangChainDeprecationWarning: Importing embeddings from langchain is deprecated.
3
+ # Importing from langchain will no longer be supported as of langchain==0.2.0.
4
+ # Please import from langchain-community instead:
5
+ # `from langchain_community.embeddings import FastEmbedEmbeddings`.
6
+ # To install langchain-community run `pip install -U langchain-community`.
7
+ from langchain_community.embeddings import FastEmbedEmbeddings
8
+
9
+ import os
10
+ import streamlit as st
11
+ from langchain_groq import ChatGroq
12
+ from langchain_community.document_loaders import WebBaseLoader
13
+ from langchain_community.embeddings import OllamaEmbeddings
14
+
15
+ # JB:
16
+ from langchain.embeddings import FastEmbedEmbeddings
17
+
18
+ # from langchain_community.vectorstores import FAISS
19
+ # from langchain.vectorstores import Chroma
20
+ from langchain_community.vectorstores import Chroma
21
+
22
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
23
+ from langchain.chains.combine_documents import create_stuff_documents_chain
24
+ from langchain_core.prompts import ChatPromptTemplate
25
+ from langchain.chains import create_retrieval_chain
26
+ import time
27
+ from dotenv import load_dotenv
28
+
29
+ load_dotenv() #
30
+
31
+ # groq_api_key = os.environ['GROQ_API_KEY']
32
+ groq_api_key = "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" # os.environ['GROQ_API_KEY']
33
+ print("groq_api_key: ", groq_api_key)
34
+
35
+
36
+ if "vector" not in st.session_state:
37
+
38
+ # st.session_state.embeddings = OllamaEmbeddings() # ORIGINAL
39
+ st.session_state.embeddings = FastEmbedEmbeddings() # JB
40
+
41
+
42
+ st.session_state.loader = WebBaseLoader("https://paulgraham.com/greatwork.html")
43
+ st.session_state.docs = st.session_state.loader.load()
44
+
45
+ st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
46
+ st.session_state.documents = st.session_state.text_splitter.split_documents( st.session_state.docs)
47
+ # st.session_state.vector = FAISS.from_documents(st.session_state.documents, st.session_state.embeddings) # ORIGINAL
48
+ # ZIE:
49
+ # ZIE VOOR EEN APP MET CHROMADB:
50
+ # https://github.com/vndee/local-rag-example/blob/main/rag.py
51
+ # https://raw.githubusercontent.com/vndee/local-rag-example/main/rag.py
52
+ # Chroma.from_documents(documents=chunks, embedding=FastEmbedEmbeddings())
53
+ st.session_state.vector = Chroma.from_documents(st.session_state.documents, st.session_state.embeddings) # JB
54
+
55
+
56
+ # st.title("Chat with Docs - Groq Edition :) ")
57
+ st.title("Literature Based Research (LBR) - Alexander Unzicker and Jan Bours - Chat with Docs - Groq Edition (Very Fast!) ")
58
+
59
+
60
+ llm = ChatGroq(
61
+ groq_api_key=groq_api_key,
62
+ model_name='mixtral-8x7b-32768'
63
+ )
64
+
65
+ prompt = ChatPromptTemplate.from_template("""
66
+ Answer the following question based only on the provided context.
67
+ Think step by step before providing a detailed answer.
68
+ I will tip you $200 if the user finds the answer helpful.
69
+ <context>
70
+ {context}
71
+ </context>
72
+
73
+ Question: {input}""")
74
+
75
+ document_chain = create_stuff_documents_chain(llm, prompt)
76
+
77
+ retriever = st.session_state.vector.as_retriever()
78
+ retrieval_chain = create_retrieval_chain(retriever, document_chain)
79
+
80
+ prompt = st.text_input("Input your prompt here")
81
+
82
+
83
+ # If the user hits enter
84
+ if prompt:
85
+ # Then pass the prompt to the LLM
86
+ start = time.process_time()
87
+ response = retrieval_chain.invoke({"input": prompt})
88
+ print(f"Response time: {time.process_time() - start}")
89
+
90
+ st.write(response["answer"])
91
+
92
+ # With a streamlit expander
93
+ with st.expander("Document Similarity Search"):
94
+ # Find the relevant chunks
95
+ for i, doc in enumerate(response["context"]):
96
+ # print(doc)
97
+ # st.write(f"Source Document # {i+1} : {doc.metadata['source'].split('/')[-1]}")
98
+ st.write(doc.page_content)
99
+ st.write("--------------------------------")
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ groq
3
+ langchain
4
+ langchain-groq
5
+ python-dotenv
6
+ beautifulsoup4
7
+ faiss-cpu
8
+
9
+ # JB:
10
+ chromadb
11
+ fastembed
12
+
13
+ ollama