andreped commited on
Commit
60a59c6
1 Parent(s): 183d24b

Use gdown python API to automatically download test data during launch

Browse files
Files changed (3) hide show
  1. .gitignore +1 -2
  2. README.md +10 -4
  3. app.py +9 -0
.gitignore CHANGED
@@ -1,5 +1,4 @@
1
- secrets.toml
2
  venv/
3
  data/
4
  .DS_Store
5
- config.json
 
 
1
  venv/
2
  data/
3
  .DS_Store
4
+ config.json
README.md CHANGED
@@ -20,14 +20,20 @@ pip install -r requirements.txt
20
  export OPENAI_API_KEY=<insert key here>
21
  ```
22
 
23
- 3. Download test data:
24
  ```
25
- gdown https://drive.google.com/drive/folders/1uDSAWtLvp1YPzfXUsK_v6DeWta16pq6y -O ./data/ --folder
 
 
 
 
 
 
26
  ```
27
 
28
- 3. Launch the app:
29
  ```
30
  streamlit run app.py
31
  ```
32
 
33
- You can then access the app in your browser at `http://localhost:8501`
 
20
  export OPENAI_API_KEY=<insert key here>
21
  ```
22
 
23
+ 3. Create the `config.json` file and fill in the relevant info:
24
  ```
25
+ {
26
+ "CHATGPT_MODEL":"<insert model name>",
27
+ "OPENAI_API_BASE":"https://<insert-openai-service-name>.openai.azure.com",
28
+ "OPENAI_API_VERSION":"<insert version>",
29
+ "ENGINE": "<insert deployment model name>",
30
+ "ENGINE_EMBEDDING": "<insert deployment embedding name>"
31
+ }
32
  ```
33
 
34
+ 4. Launch the app:
35
  ```
36
  streamlit run app.py
37
  ```
38
 
39
+ A Streamlit browser window should automatically open. If not, the app can be accessed at `http://localhost:8501`
app.py CHANGED
@@ -2,6 +2,7 @@ import json
2
  import os
3
 
4
  import streamlit as st
 
5
  from llama_index import ServiceContext
6
  from llama_index import SimpleDirectoryReader
7
  from llama_index import VectorStoreIndex
@@ -9,6 +10,7 @@ from llama_index import set_global_service_context
9
  from llama_index.embeddings import OpenAIEmbedding
10
  from llama_index.llms import AzureOpenAI
11
 
 
12
  # Initialize message history
13
  st.header("Chat with André's research 💬 📚")
14
 
@@ -20,6 +22,12 @@ with open(r"config.json") as config_file:
20
  config_details = json.load(config_file)
21
 
22
 
 
 
 
 
 
 
23
  @st.cache_resource(show_spinner=False)
24
  def load_data():
25
  with st.spinner(text="Loading and indexing the provided dataset – hang tight! This may take a few seconds."):
@@ -53,6 +61,7 @@ def load_data():
53
 
54
 
55
  def main():
 
56
  index = load_data()
57
  chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
58
 
 
2
  import os
3
 
4
  import streamlit as st
5
+ from gdown import download_folder
6
  from llama_index import ServiceContext
7
  from llama_index import SimpleDirectoryReader
8
  from llama_index import VectorStoreIndex
 
10
  from llama_index.embeddings import OpenAIEmbedding
11
  from llama_index.llms import AzureOpenAI
12
 
13
+
14
  # Initialize message history
15
  st.header("Chat with André's research 💬 📚")
16
 
 
22
  config_details = json.load(config_file)
23
 
24
 
25
+ def download_test_data():
26
+ url = "https://drive.google.com/drive/folders/1uDSAWtLvp1YPzfXUsK_v6DeWta16pq6y"
27
+ with st.spinner(text="Downloading test data. Might take a few seconds."):
28
+ download_folder(url, quiet=True, use_cookies=False, output="./data/")
29
+
30
+
31
  @st.cache_resource(show_spinner=False)
32
  def load_data():
33
  with st.spinner(text="Loading and indexing the provided dataset – hang tight! This may take a few seconds."):
 
61
 
62
 
63
  def main():
64
+ download_test_data()
65
  index = load_data()
66
  chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
67