prsdm commited on
Commit
0fefbe1
1 Parent(s): 3dcc08e

Upload 15 files

Browse files
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WEAVIATE_API_KEY = "t7Tlyj1yoD6SGcPCeER3rLWkRzQnHQ1ECvzd"
2
+ WEAVIATE_CLUSTER = "https://rag-sample-lawtn4cr.weaviate.network"
.env.example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WEAVIATE_API_KEY =
2
+ WEAVIATE_CLUSTER =
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Ignoring following:
2
+ poetry.lock
3
+ pyproject.toml
4
+ .streamlit/secrets.toml
5
+ .env
.streamlit/secrets.toml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [weaviate_credentials]
2
+
3
+ weaviate_api_key = "t7Tlyj1yoD6SGcPCeER3rLWkRzQnHQ1ECvzd"
4
+ weaviate_cluster = "https://rag-sample-lawtn4cr.weaviate.network"
README.md CHANGED
@@ -1,13 +1,31 @@
1
- ---
2
- title: Rag Weaviate
3
- emoji: 🐠
4
- colorFrom: indigo
5
- colorTo: indigo
6
- sdk: streamlit
7
- sdk_version: 1.35.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Roman Empire Q&A Application
2
+
3
+ https://weaviate-rag.streamlit.app/
4
+
5
+ This project is a Retrieval-Augmented Generation (RAG) application designed to answer questions related to the Roman Empire. It leverages Weaviate for vector storage and retrieval, OpenAI for generating responses, FastAPI for the backend API, and Next.js for the frontend interface.
6
+
7
+
8
+ ![RAG-Roman Empire Banner](public/images/workflow.jpg)
9
+
10
+ ## Features
11
+
12
+ - **Interactive Question-Answering**: Users can input questions about Roman history and culture and receive informative responses.
13
+ - **Sample Questions**: The app provides a list of sample questions to guide users and demonstrate its capabilities.
14
+ - **API Key Management**: Users can securely enter their OpenAI API key via a password input field in the sidebar.
15
+ - **Built With**: The sidebar includes links to the technologies and frameworks used in building the app.
16
+
17
+ ## Getting Started
18
+
19
+ To run the app locally, follow these steps:
20
+
21
+ 1. Clone this repository to your local machine.
22
+ 2. Install the required dependencies by running `pip install -r requirements.txt`.
23
+ 3. Create a `.env` file in the root directory of the project and add your Weaviate API key and cluster URL in the following format:
24
+
25
+ ```
26
+ WEAVIATE_API_KEY="your_api_key"
27
+ WEAVIATE_CLUSTER="your_cluster_url"
28
+ ```
29
+
30
+ 4. Run the Streamlit app using the command `streamlit run app.py`.
31
+ 5. Access the app in your web browser at `localhost:8501`.
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import weaviate
4
+ from dotenv import load_dotenv,find_dotenv
5
+ from langchain.vectorstores import Weaviate
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from langchain_community.chat_models import ChatOpenAI
8
+ from langchain_core.output_parsers import StrOutputParser
9
+ from langchain_core.runnables import RunnablePassthrough
10
+
11
+ load_dotenv(find_dotenv())
12
+ weaviate_api_key = os.getenv('WEAVIATE_API_KEY')
13
+ weaviate_cluster = os.getenv('WEAVIATE_CLUSTER')
14
+
15
+ st.set_page_config(page_title="RAG-Roman Empire")
16
+ st.image("public/images/banner.png")
17
+
18
+ st.write(
19
+ """
20
+ <div style='text-align: center;'>
21
+ <p>Salvete! I'm here to help you learn about the Roman Empire.
22
+ Feel free to ask me anything about Roman history and culture!</p>
23
+ </div>
24
+ """,
25
+ unsafe_allow_html=True
26
+ )
27
+
28
+ with st.expander("Sample questions you can ask"):
29
+ st.markdown(
30
+ """
31
+ <div>
32
+ <ul>
33
+ <li>What were the major achievements of the Roman Empire?</li>
34
+ <li>How did the Roman Empire rise to power?</li>
35
+ <li>How did the Roman Empire expand its territory over time? etc.</li>
36
+ </ul>
37
+ <p><b>Note:</b> you can also ask irrelevant questions to test how the RAG framework is working.</p>
38
+ </div>
39
+ """,
40
+ unsafe_allow_html=True,
41
+ )
42
+
43
+ st.sidebar.subheader("Your OpenAI Key Please")
44
+ openai_api_key = st.sidebar.text_input('Put Your Key Here: ', type="password")
45
+
46
+ with st.sidebar:
47
+ if openai_api_key.startswith('sk-') and weaviate_api_key and weaviate_cluster:
48
+ st.success('API keys and URL already provided!', icon='✅')
49
+ else:
50
+ st.warning('Please enter your credentials!', icon='⚠️')
51
+
52
+
53
+ with st.sidebar.expander("ℹ️ Disclaimer"):
54
+ st.caption(
55
+ """We appreciate your engagement! Please note, This demo app can be shut down
56
+ after the Weaviate cluster expires on 6/22/2024, 2:57 PM.
57
+ """
58
+ )
59
+
60
+ # Adding custom HTML content to the sidebar with your name and heading
61
+ st.sidebar.markdown(
62
+ """
63
+ <p style='text-align: center'> This RAG App is built using: </p>
64
+ <div style='text-align: center;'>
65
+ <a href='https://weaviate.io/' target='_blank'> Weaviate</a>
66
+ <a href='https://www.langchain.com/' target='_blank'> LangChain</a>
67
+ <a href='https://openai.com/' target='_blank'><img src="https://img.icons8.com/?size=100&id=ka3InxFU3QZa&format=png&color=000000" height="26"></a>
68
+ <a href='https://huggingface.co/' target='_blank'> <img src="https://img.icons8.com/?size=100&id=sop9ROXku5bb&format=png&color=000000" height="25"></a>
69
+ </div>
70
+ </p>
71
+ """,
72
+ unsafe_allow_html=True,
73
+ )
74
+
75
+ st.sidebar.markdown(
76
+ """
77
+ <div style='margin-top: 20px;'>
78
+ <p style='text-align: center; margin-bottom: 0; margin-top: 10;'>App created by</p>
79
+ <p style='text-align: center; margin-top: 0;'><b>Prasad Mahamulkar</b></p>
80
+ <p style='text-align: center'> GitHub
81
+ <a href='https://weaviate.io/' target='_blank'> Repository</a>
82
+ <p style='text-align: center'> Follow me for more! </p>
83
+ <div style='text-align: center;'>
84
+ <a href='https://github.com/prsdm' target='_blank'><img src="https://img.icons8.com/fluency/48/000000/github.png" height="26"></a>
85
+ <a href='https://x.com/prsdm17' target='_blank'> <img src="https://img.icons8.com/?size=100&id=phOKFKYpe00C&format=png&color=000000" height="25"></a>
86
+ <a href='https://www.linkedin.com/in/prasad-mahamulkar/' target='_blank'><img src="https://img.icons8.com/?size=100&id=8808&format=png&color=000000" height="25"></a>
87
+ <a href='https://medium.com/@prasadmahamulkar' target='_blank'><img src="https://img.icons8.com/?size=100&id=XVNvUWCvvlD9&format=png&color=000000" height="25"></a>
88
+ </div>
89
+ </p>
90
+ </div>
91
+ """,
92
+ unsafe_allow_html=True,
93
+ )
94
+
95
+ def get_qa_chain():
96
+ auth_config = weaviate.auth.AuthApiKey(api_key=weaviate_api_key)
97
+ client = weaviate.Client(
98
+ url=weaviate_cluster,
99
+ additional_headers={"X-OpenAI-Api-key": openai_api_key},
100
+ auth_client_secret=auth_config,
101
+ startup_period=10
102
+ )
103
+
104
+ vectorstore = Weaviate(client, "RomanEmpire", "content", attributes=["source"])
105
+ retriever = vectorstore.as_retriever()
106
+
107
+ template = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.
108
+ If the answer is not in the context, just say that you I don't have access to this information. Use five sentences maximum and keep the answer concise.
109
+ Question: {question}
110
+ Context: {context}
111
+ Answer:
112
+ """
113
+ prompt = ChatPromptTemplate.from_template(template)
114
+
115
+ llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key=openai_api_key)
116
+
117
+ chain = (
118
+ {"context": retriever, "question": RunnablePassthrough()}
119
+ | prompt
120
+ | llm
121
+ | StrOutputParser()
122
+ )
123
+
124
+ return chain
125
+
126
+ def generate_response(input_text):
127
+ if openai_api_key and weaviate_api_key and weaviate_cluster:
128
+ chain = get_qa_chain()
129
+ answer = chain.invoke(input_text)
130
+ st.info(answer)
131
+ else:
132
+ st.warning("Please provide all required API keys and URL!")
133
+
134
+ with st.form('my_form'):
135
+ text = st.text_input('Enter text:', 'Tell me interesting facts about Roman Empire')
136
+ submitted = st.form_submit_button('Submit')
137
+ if not openai_api_key.startswith('sk-'):
138
+ st.warning('Please enter your OpenAI API key!', icon='⚠')
139
+
140
+
141
+ if submitted and openai_api_key.startswith('sk-'):
142
+ generate_response(text)
backend/Create_Vectorstore.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
backend/data/RomanEmpire.pdf ADDED
Binary file (792 kB). View file
 
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
public/images/banner.png ADDED
public/images/weaviate_logo.png ADDED
public/images/workflow.jpg ADDED
pyproject.toml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "RAG Application"
3
+ version = "0.1.0"
4
+ description = "App by prasad mahamulkar"
5
+ authors = ["Prsd17 <prsd.mahamulkar@gmail.com>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.12"
10
+ weaviate-client = "^4.6.4"
11
+ langchain-community = "^0.2.4"
12
+ langchain = "^0.2.3"
13
+ openai = "^1.33.0"
14
+ python-dotenv = "^1.0.1"
15
+
16
+
17
+ [build-system]
18
+ requires = ["poetry-core"]
19
+ build-backend = "poetry.core.masonry.api"
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ weaviate-client == 4.6.4
2
+ langchain-community == 0.2.4
3
+ langchain == 0.2.3
4
+ openai == 1.33.0
5
+ python-dotenv == 1.0.1