andreped commited on
Commit
a22f65f
1 Parent(s): c2595aa

Reformated code; updated linting

Browse files
Files changed (6) hide show
  1. app.py +4 -47
  2. chatbot/__init__.py +0 -0
  3. chatbot/utils.py +50 -0
  4. setup.cfg +2 -1
  5. shell/format.sh +3 -3
  6. shell/lint.sh +3 -3
app.py CHANGED
@@ -1,14 +1,8 @@
1
  import json
2
- import os
3
 
4
  import streamlit as st
5
- from gdown import download_folder
6
- from llama_index import ServiceContext
7
- from llama_index import SimpleDirectoryReader
8
- from llama_index import VectorStoreIndex
9
- from llama_index import set_global_service_context
10
- from llama_index.embeddings import OpenAIEmbedding
11
- from llama_index.llms import AzureOpenAI
12
 
13
  # Initialize message history
14
  st.header("Chat with André's research 💬 📚")
@@ -21,47 +15,10 @@ with open(r"config.json") as config_file:
21
  config_details = json.load(config_file)
22
 
23
 
24
- def download_test_data():
25
- url = "https://drive.google.com/drive/folders/1uDSAWtLvp1YPzfXUsK_v6DeWta16pq6y"
26
- with st.spinner(text="Downloading test data. Might take a few seconds."):
27
- download_folder(url, quiet=True, use_cookies=False, output="./data/")
28
-
29
-
30
- @st.cache_resource(show_spinner=False)
31
- def load_data():
32
- with st.spinner(text="Loading and indexing the provided dataset – hang tight! This may take a few seconds."):
33
- documents = SimpleDirectoryReader(input_dir="./data", recursive=True).load_data()
34
- llm = AzureOpenAI(
35
- model="gpt-3.5-turbo",
36
- engine=config_details["ENGINE"],
37
- temperature=0.5,
38
- api_key=os.getenv("OPENAI_API_KEY"),
39
- api_base=config_details["OPENAI_API_BASE"],
40
- api_type="azure",
41
- api_version=config_details["OPENAI_API_VERSION"],
42
- system_prompt="You are an expert on André's research and your job is to answer"
43
- "technical questions. Assume that all questions are related to"
44
- "André's research. Keep your answers technical and based on facts"
45
- " – do not hallucinate features.",
46
- )
47
- # You need to deploy your own embedding model as well as your own chat completion model
48
- embed_model = OpenAIEmbedding(
49
- model="text-embedding-ada-002",
50
- deployment_name=config_details["ENGINE_EMBEDDING"],
51
- api_key=os.getenv("OPENAI_API_KEY"),
52
- api_base=config_details["OPENAI_API_BASE"],
53
- api_type="azure",
54
- api_version=config_details["OPENAI_API_VERSION"],
55
- )
56
- service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
57
- set_global_service_context(service_context)
58
- index = VectorStoreIndex.from_documents(documents) # , service_context=service_context)
59
- return index
60
-
61
-
62
  def main():
 
63
  download_test_data()
64
- index = load_data()
65
  chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
66
 
67
  if prompt := st.chat_input("Your question"): # Prompt for user input and save to chat history
 
1
  import json
 
2
 
3
  import streamlit as st
4
+ from src.utils import download_test_data
5
+ from src.utils import load_data
 
 
 
 
 
6
 
7
  # Initialize message history
8
  st.header("Chat with André's research 💬 📚")
 
15
  config_details = json.load(config_file)
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def main():
19
+ # setup dataset
20
  download_test_data()
21
+ index = load_data(config_details)
22
  chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)
23
 
24
  if prompt := st.chat_input("Your question"): # Prompt for user input and save to chat history
chatbot/__init__.py ADDED
File without changes
chatbot/utils.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import streamlit as st
4
+ from gdown import download_folder
5
+ from llama_index import ServiceContext
6
+ from llama_index import SimpleDirectoryReader
7
+ from llama_index import VectorStoreIndex
8
+ from llama_index import set_global_service_context
9
+ from llama_index.embeddings import OpenAIEmbedding
10
+ from llama_index.llms import AzureOpenAI
11
+
12
+
13
+ @st.cache_resource(show_spinner=False)
14
+ def download_test_data():
15
+ # url = f"https://drive.google.com/drive/folders/uc?export=download&confirm=pbef&id={file_id}"
16
+ url = "https://drive.google.com/drive/folders/1uDSAWtLvp1YPzfXUsK_v6DeWta16pq6y"
17
+ with st.spinner(text="Downloading test data. Might take a few seconds."):
18
+ download_folder(url=url, quiet=False, use_cookies=False, output="./data/")
19
+
20
+
21
+ @st.cache_resource(show_spinner=False)
22
+ def load_data(config_details):
23
+ with st.spinner(text="Loading and indexing the provided dataset – hang tight! This may take a few seconds."):
24
+ documents = SimpleDirectoryReader(input_dir="./data", recursive=True).load_data()
25
+ llm = AzureOpenAI(
26
+ model="gpt-3.5-turbo",
27
+ engine=config_details["ENGINE"],
28
+ temperature=0.5,
29
+ api_key=os.getenv("OPENAI_API_KEY"),
30
+ api_base=config_details["OPENAI_API_BASE"],
31
+ api_type="azure",
32
+ api_version=config_details["OPENAI_API_VERSION"],
33
+ system_prompt="You are an expert on André's research and your job is to answer"
34
+ "technical questions. Assume that all questions are related to"
35
+ "André's research. Keep your answers technical and based on facts"
36
+ " – do not hallucinate features.",
37
+ )
38
+ # You need to deploy your own embedding model as well as your own chat completion model
39
+ embed_model = OpenAIEmbedding(
40
+ model="text-embedding-ada-002",
41
+ deployment_name=config_details["ENGINE_EMBEDDING"],
42
+ api_key=os.getenv("OPENAI_API_KEY"),
43
+ api_base=config_details["OPENAI_API_BASE"],
44
+ api_type="azure",
45
+ api_version=config_details["OPENAI_API_VERSION"],
46
+ )
47
+ service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
48
+ set_global_service_context(service_context)
49
+ index = VectorStoreIndex.from_documents(documents) # , service_context=service_context)
50
+ return index
setup.cfg CHANGED
@@ -3,7 +3,7 @@ description-file = README.md
3
 
4
  [isort]
5
  force_single_line=True
6
- known_first_party=gradient_accumulator
7
  line_length=120
8
  profile=black
9
 
@@ -12,3 +12,4 @@ profile=black
12
  per-file-ignores=*__init__.py:F401
13
  ignore=E203,W503,W605,F632,E266,E731,E712,E741
14
  max-line-length=120
 
 
3
 
4
  [isort]
5
  force_single_line=True
6
+ known_first_party=chatbot
7
  line_length=120
8
  profile=black
9
 
 
12
  per-file-ignores=*__init__.py:F401
13
  ignore=E203,W503,W605,F632,E266,E731,E712,E741
14
  max-line-length=120
15
+ exclude=venv/
shell/format.sh CHANGED
@@ -1,4 +1,4 @@
1
  #!/bin/bash
2
- isort --sl app.py
3
- black --line-length 120 app.py
4
- flake8 app.py
 
1
  #!/bin/bash
2
+ isort --sl .
3
+ black --line-length 120 .
4
+ flake8 .
shell/lint.sh CHANGED
@@ -1,19 +1,19 @@
1
  #!/bin/bash
2
- isort --check --sl -c app.py
3
  if ! [ $? -eq 0 ]
4
  then
5
  echo "Please run \"sh shell/format.sh\" to format the code."
6
  exit 1
7
  fi
8
  echo "no issues with isort"
9
- flake8 app.py
10
  if ! [ $? -eq 0 ]
11
  then
12
  echo "Please fix the code style issue."
13
  exit 1
14
  fi
15
  echo "no issues with flake8"
16
- black --check --line-length 120 app.py
17
  if ! [ $? -eq 0 ]
18
  then
19
  echo "Please run \"sh shell/format.sh\" to format the code."
 
1
  #!/bin/bash
2
+ isort --check --sl -c .
3
  if ! [ $? -eq 0 ]
4
  then
5
  echo "Please run \"sh shell/format.sh\" to format the code."
6
  exit 1
7
  fi
8
  echo "no issues with isort"
9
+ flake8 .
10
  if ! [ $? -eq 0 ]
11
  then
12
  echo "Please fix the code style issue."
13
  exit 1
14
  fi
15
  echo "no issues with flake8"
16
+ black --check --line-length 120 .
17
  if ! [ $? -eq 0 ]
18
  then
19
  echo "Please run \"sh shell/format.sh\" to format the code."