facehugger92 commited on
Commit
1679e12
β€’
1 Parent(s): 386ceec

Upload 6 files

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  test_index_persist/vector_store.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  test_index_persist/vector_store.json filter=lfs diff=lfs merge=lfs -text
37
+ 100_test_docs_persist/vector_store.json filter=lfs diff=lfs merge=lfs -text
100_test_docs_persist/docstore.json ADDED
The diff for this file is too large to render. See raw diff
 
100_test_docs_persist/graph_store.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"graph_dict": {}}
100_test_docs_persist/index_store.json ADDED
The diff for this file is too large to render. See raw diff
 
100_test_docs_persist/vector_store.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feb2826fe8f6ba32acd8b2545e07d089ffd0d876dd80a8e27df8300c987af9f8
3
+ size 62556594
app.py CHANGED
@@ -1,70 +1,103 @@
1
  from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, ServiceContext, set_global_service_context, load_index_from_storage, StorageContext, PromptHelper
2
  from llama_index.llms import OpenAI
3
  from langchain.chat_models import ChatOpenAI
 
4
  import gradio as gr
5
  import sys
6
  import os
7
 
8
- # Disabled for HF
9
- # def construct_index(directory_path, index_path):
10
- # max_input_size = 4096
11
- # num_outputs = 512
12
- # chunk_overlap_ratio = 0.2
13
- # chunk_size_limit = 600
14
 
15
- # if os.listdir(index_path) != []:
16
-
17
- # return index
18
 
19
- # # Load in documents
20
- # documents = SimpleDirectoryReader(directory_path).load_data()
 
 
21
 
22
- # # Define LLM: gpt-3.5-turbo, temp:0.7
23
- # llm = OpenAI(model="gpt-3.5-turbo", temperature=0.7, max_tokens=num_outputs)
24
 
25
- # # Define prompt helper
26
- # prompt_helper = PromptHelper(context_window=max_input_size, num_output=num_outputs, chunk_overlap_ratio=chunk_overlap_ratio, chunk_size_limit=chunk_size_limit)
27
 
28
- # # Set service context
29
- # service_context = ServiceContext.from_defaults(llm=llm, prompt_helper=prompt_helper)
30
- # set_global_service_context(service_context)
31
 
32
- # # Parse documents into nodes
33
- # # parser = SimpleNodeParser.from_defaults()
34
- # # nodes = parser.get_nodes_from_documents(documents)
35
 
36
- # # Index documents
37
- # index = GPTVectorStoreIndex.from_documents(documents)
 
38
 
39
- # # Save index
40
- # index.storage_context.persist(persist_dir=index_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- # return index
43
 
44
- STORAGE_CONTEXT = StorageContext.from_defaults(persist_dir="test_index_persist")
45
- INDEX = load_index_from_storage(STORAGE_CONTEXT)
46
  QE = INDEX.as_query_engine()
 
47
 
48
- def format_chat(message, chat_history, max_chat_length=10):
49
- prompt = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  for chat in chat_history[~max_chat_length:]:
51
  user_chat, bot_chat = chat
52
  prompt = f"{prompt}\nUser: {user_chat}\nAssistant: {bot_chat}"
53
- prompt = f"{prompt}\nUser: {message}\nAssistant:"
54
- return prompt
55
-
56
- def chatfunc(input_text, chat_history):
57
- response = QE.query(input_text)
58
  chat_history.append([input_text, response.response])
59
  return "", chat_history
60
 
61
  with gr.Blocks() as iface:
62
  chatbot = gr.Chatbot(height=400)
63
- msg = gr.Textbox(label="Ask the Standard Bot anything about curriculum standards")
64
- submit = gr.Button("Submit")
65
  with gr.Row():
66
-
67
- clear = gr.ClearButton(components=[msg, chatbot])
68
- msg.submit(chatfunc, [msg, chatbot], [msg, chatbot])
69
-
70
- iface.launch(share=False, debug=True)
 
 
 
 
1
  from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, ServiceContext, set_global_service_context, load_index_from_storage, StorageContext, PromptHelper
2
  from llama_index.llms import OpenAI
3
  from langchain.chat_models import ChatOpenAI
4
+ from PyPDF2 import PdfReader
5
  import gradio as gr
6
  import sys
7
  import os
8
 
9
+ try:
10
+ from config import OPEN_AI_KEY
11
+ os.environ["OPENAI_API_KEY"] = OPEN_AI_KEY
12
+ except:
13
+ pass
 
14
 
15
+ # ===============================
16
+ # Settings
17
+ # ===============================
18
 
19
+ MAX_INPUT_SIZE = 4096
20
+ NUM_OUTPUT = 700
21
+ CHUNK_OVERLAP_RATIO = 0.15
22
+ CHUNK_SIZE_LIMIT = 600
23
 
24
+ # Define LLM: gpt-3.5-turbo, temp:0.7
25
+ llm = OpenAI(model="gpt-3.5-turbo", temperature=0.7, max_tokens=NUM_OUTPUT)
26
 
27
+ # Define prompt helper
28
+ prompt_helper = PromptHelper(context_window=MAX_INPUT_SIZE, num_output=NUM_OUTPUT, chunk_overlap_ratio=CHUNK_OVERLAP_RATIO, chunk_size_limit=CHUNK_SIZE_LIMIT)
29
 
30
+ # Set service context
31
+ service_context = ServiceContext.from_defaults(llm=llm, prompt_helper=prompt_helper)
32
+ set_global_service_context(service_context)
33
 
 
 
 
34
 
35
+ # ===============================
36
+ # Functions
37
+ # ===============================
38
 
39
+ def construct_index(directory_path, index_path):
40
+
41
+ if os.listdir(index_path) != []:
42
+ storage_context = StorageContext.from_defaults(persist_dir=index_path)
43
+ index = load_index_from_storage(storage_context)
44
+ return index
45
+
46
+ else:
47
+ # Load in documents
48
+ documents = SimpleDirectoryReader(directory_path).load_data()
49
+
50
+ # Index documents
51
+ index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True)
52
+
53
+ # Save index
54
+ index.storage_context.persist(persist_dir=index_path)
55
 
56
+ return index
57
 
58
+ INDEX = construct_index("100_test_docs", "100_test_docs_persist")
 
59
  QE = INDEX.as_query_engine()
60
+ PDF_CONTENT = gr.State("")
61
 
62
+ def upload_file(file):
63
+ try:
64
+ read_pdf = PdfReader(file.name)
65
+ pdf_text = "\n\n".join([w.extract_text() for w in read_pdf.pages])
66
+ PDF_CONTENT.value = pdf_text
67
+ return pdf_text
68
+ except Exception as e:
69
+ return f"Error: {str(e)}"
70
+
71
+ def chatfunc(input_text, chat_history, max_chat_length=6):
72
+ prompt = """
73
+ <|SYSTEM|># You are the Common Core State Standards Bot; or CCSSB in short.
74
+ - CCSSB is a helpful model that helps the User learn about Common Core State Standards.
75
+ - CCSSB can access a vector data base of reports on how specific contents align to Common Core State Standards.
76
+ - Users will sometimes provide CCSSB with their syllabus or homework and ask CCSSB how they align to Common Core State Standards.
77
+ - CCSSB will use the database as much as it can to answer Users' questions with as much detail as possible with specific attention to Common Core State Standards.
78
+ - CCSSB will be provided with its past conversation with Users. CCSSB can use this chat history to answer questions specific to the User.
79
+ \n\n
80
+ """
81
+ if PDF_CONTENT.value:
82
+ prompt = prompt + "The following is the syllabus provided by the user" + PDF_CONTENT.value + "\n\n"
83
  for chat in chat_history[~max_chat_length:]:
84
  user_chat, bot_chat = chat
85
  prompt = f"{prompt}\nUser: {user_chat}\nAssistant: {bot_chat}"
86
+ prompt = f"{prompt}\nUser: {input_text}\nAssistant:"
87
+ response = QE.query(prompt)
 
 
 
88
  chat_history.append([input_text, response.response])
89
  return "", chat_history
90
 
91
  with gr.Blocks() as iface:
92
  chatbot = gr.Chatbot(height=400)
93
+ msg = gr.Textbox(label="Ask the Common Core State Standard Bot anything about curriculum standards")
94
+ submit = gr.Button("πŸ’¬ Submit")
95
  with gr.Row():
96
+ clear = gr.ClearButton(value="🧹 Clear outputs", components=[msg, chatbot])
97
+ upload_button = gr.UploadButton("πŸ“ Upload a Syllabus", file_types=[".pdf"], file_count="single")
98
+ with gr.Accordion("πŸ“ View your syllabus"):
99
+ syl = gr.Textbox(label="Your syllabus' content will show here")
100
+ msg.submit(chatfunc, [msg, chatbot], [msg, chatbot])
101
+ upload_button.upload(upload_file, upload_button, syl)
102
+
103
+ iface.launch(share=False)
requirements.txt CHANGED
@@ -83,3 +83,4 @@ uvicorn==0.23.2
83
  websockets==11.0.3
84
  yarl==1.9.2
85
  zipp==3.17.0
 
 
83
  websockets==11.0.3
84
  yarl==1.9.2
85
  zipp==3.17.0
86
+ pypdf==3.16.4