llamazookeeper commited on
Commit
81cf5f3
1 Parent(s): e2dccc5
Files changed (2) hide show
  1. pages/Tesla_Alerts.py +42 -32
  2. requirements.txt +3 -1
pages/Tesla_Alerts.py CHANGED
@@ -1,21 +1,20 @@
1
  from langchain.prompts import PromptTemplate
2
  from langchain.output_parsers import PydanticOutputParser
 
3
 
4
- from llama_index import VectorStoreIndex, ServiceContext, StorageContext
5
  from llama_index.vector_stores import FaissVectorStore
6
  from llama_index.tools import QueryEngineTool, ToolMetadata
7
  from llama_index.query_engine import SubQuestionQueryEngine
8
  from llama_index.embeddings import OpenAIEmbedding
9
  from llama_index.schema import Document
10
  from llama_index.node_parser import UnstructuredElementNodeParser
11
-
12
- from src.utils import get_model, process_pdf2
13
 
14
  import streamlit as st
15
  import os
16
  import faiss
17
  import time
18
- from pypdf import PdfReader
19
 
20
 
21
  st.set_page_config(page_title="Yield Case Analyzer", page_icon=":card_index_dividers:", initial_sidebar_state="expanded", layout="wide")
@@ -25,24 +24,15 @@ st.info("""
25
  Begin by uploading the case report in PDF format. Afterward, click on 'Process Document'. Once the document has been processed. You can enter question and click send, system will answer your question.
26
  """)
27
 
28
- def process_pdf(pdf):
29
- file = PdfReader(pdf)
30
- print("in process pdf")
31
- document_list = []
32
- for page in file.pages:
33
- document_list.append(Document(text=str(page.extract_text())))
34
- print("in process pdf 1")
35
-
36
- node_paser = UnstructuredElementNodeParser()
37
- print("in process pdf 1")
38
-
39
- nodes = node_paser.get_nodes_from_documents(document_list, show_progress=True)
40
 
41
- return nodes
 
 
 
 
42
 
43
-
44
- def get_vector_index(nodes, vector_store):
45
- print(nodes)
46
  llm = get_model("openai")
47
  if vector_store == "faiss":
48
  d = 1536
@@ -52,12 +42,12 @@ def get_vector_index(nodes, vector_store):
52
  # embed_model = OpenAIEmbedding()
53
  # service_context = ServiceContext.from_defaults(embed_model=embed_model)
54
  service_context = ServiceContext.from_defaults(llm=llm)
55
- index = VectorStoreIndex(nodes,
56
  service_context=service_context,
57
  storage_context=storage_context
58
  )
59
  elif vector_store == "simple":
60
- index = VectorStoreIndex.from_documents(nodes)
61
 
62
 
63
  return index
@@ -89,7 +79,7 @@ def get_query_engine(engine):
89
  query_engine=engine,
90
  metadata=ToolMetadata(
91
  name="Alert Report",
92
- description=f"Provides information about the cases from its case report.",
93
  ),
94
  ),
95
  ]
@@ -111,20 +101,39 @@ os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
111
 
112
 
113
  if OPENAI_API_KEY:
114
- pptx_files = st.sidebar.file_uploader("Upload the case report in PDF format", type="pptx")
115
  st.sidebar.info("""
116
  Example pdf reports you can upload here:
117
  """)
118
 
119
  if st.sidebar.button("Process Document"):
120
  with st.spinner("Processing Document..."):
121
- nodes = process_pptx(pptx_files)
122
- st.session_state.index = get_vector_index(nodes, vector_store="faiss")
123
- #st.session_state.index = get_vector_index(nodes, vector_store="simple")
124
- st.session_state.process_doc = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  st.toast("Document Processsed!")
126
 
127
- st.session_state.process_doc = True
128
 
129
  if st.session_state.process_doc:
130
  search_text = st.text_input("Enter your question")
@@ -134,10 +143,11 @@ if OPENAI_API_KEY:
134
 
135
  st.write("Alert search result...")
136
  response = generate_insight(engine, search_text)
137
- st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time))
 
138
 
139
  st.toast("Report Analysis Complete!")
140
 
141
- if st.session_state.end_time:
142
- st.write("Report Analysis Time: ", st.session_state.end_time, "s")
143
 
 
1
  from langchain.prompts import PromptTemplate
2
  from langchain.output_parsers import PydanticOutputParser
3
+ from langchain.chat_models import ChatOpenAI
4
 
5
+ from llama_index import VectorStoreIndex, ServiceContext, StorageContext, download_loader, SimpleDirectoryReader
6
  from llama_index.vector_stores import FaissVectorStore
7
  from llama_index.tools import QueryEngineTool, ToolMetadata
8
  from llama_index.query_engine import SubQuestionQueryEngine
9
  from llama_index.embeddings import OpenAIEmbedding
10
  from llama_index.schema import Document
11
  from llama_index.node_parser import UnstructuredElementNodeParser
12
+ from llama_index.llms import OpenAI
 
13
 
14
  import streamlit as st
15
  import os
16
  import faiss
17
  import time
 
18
 
19
 
20
  st.set_page_config(page_title="Yield Case Analyzer", page_icon=":card_index_dividers:", initial_sidebar_state="expanded", layout="wide")
 
24
  Begin by uploading the case report in PDF format. Afterward, click on 'Process Document'. Once the document has been processed. You can enter question and click send, system will answer your question.
25
  """)
26
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ def get_model(model_name):
29
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
30
+ if model_name == "openai":
31
+ model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name="gpt-3.5-turbo")
32
+ return model
33
 
34
+ def get_vector_index(docs, vector_store):
35
+ print(docs)
 
36
  llm = get_model("openai")
37
  if vector_store == "faiss":
38
  d = 1536
 
42
  # embed_model = OpenAIEmbedding()
43
  # service_context = ServiceContext.from_defaults(embed_model=embed_model)
44
  service_context = ServiceContext.from_defaults(llm=llm)
45
+ index = VectorStoreIndex(docs,
46
  service_context=service_context,
47
  storage_context=storage_context
48
  )
49
  elif vector_store == "simple":
50
+ index = VectorStoreIndex.from_documents(docs)
51
 
52
 
53
  return index
 
79
  query_engine=engine,
80
  metadata=ToolMetadata(
81
  name="Alert Report",
82
+ description=f"Provides information about the alerts from alerts files uploaded.",
83
  ),
84
  ),
85
  ]
 
101
 
102
 
103
  if OPENAI_API_KEY:
104
+ files_uploaded = st.sidebar.file_uploader("Upload the case report in PDF format", type="pptx")
105
  st.sidebar.info("""
106
  Example pdf reports you can upload here:
107
  """)
108
 
109
  if st.sidebar.button("Process Document"):
110
  with st.spinner("Processing Document..."):
111
+
112
+ data_dir = "./data"
113
+ if not os.path.exists(data_dir):
114
+ os.makedirs(data_dir)
115
+
116
+ for file in files_uploaded:
117
+ print(f'file named {file.name}')
118
+ fname=f'{data_dir}/{file.name}'
119
+ with open(fname, 'wb') as f:
120
+ f.write(file.read())
121
+
122
+ def fmetadata(dummy: str): return {"file_path": ""}
123
+
124
+ PptxReader = download_loader("PptxReader")
125
+ loader = SimpleDirectoryReader(input_dir=data_dir, file_extractor={".pptx": PptxReader(),}, file_metadata=fmetadata)
126
+
127
+ documents = loader.load_data()
128
+ for doc in documents:
129
+ doc.metadata["file_path"]=""
130
+
131
+ st.session_state.index = get_vector_index(documents, vector_store="faiss")
132
+ #st.session_state.index = get_vector_index(documents, vector_store="simple")
133
+ st.session_state.process_doc = True
134
  st.toast("Document Processsed!")
135
 
136
+ #st.session_state.process_doc = True
137
 
138
  if st.session_state.process_doc:
139
  search_text = st.text_input("Enter your question")
 
143
 
144
  st.write("Alert search result...")
145
  response = generate_insight(engine, search_text)
146
+ st.write(response)
147
+ #st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time))
148
 
149
  st.toast("Report Analysis Complete!")
150
 
151
+ #if st.session_state.end_time:
152
+ # st.write("Report Analysis Time: ", st.session_state.end_time, "s")
153
 
requirements.txt CHANGED
@@ -1,8 +1,10 @@
1
  llama-index
2
  llama_hub
 
3
  transformers
4
  accelerate
5
  openai
6
  pypdf
7
  streamlit
8
- chromadb
 
 
1
  llama-index
2
  llama_hub
3
+ langchain
4
  transformers
5
  accelerate
6
  openai
7
  pypdf
8
  streamlit
9
+ chromadb
10
+ faiss