llamazookeeper commited on
Commit
e2dccc5
1 Parent(s): f7e5eb3
Files changed (2) hide show
  1. pages/Tesla_Alerts.py +143 -0
  2. pages/tc1.ppy +129 -0
pages/Tesla_Alerts.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.prompts import PromptTemplate
2
+ from langchain.output_parsers import PydanticOutputParser
3
+
4
+ from llama_index import VectorStoreIndex, ServiceContext, StorageContext
5
+ from llama_index.vector_stores import FaissVectorStore
6
+ from llama_index.tools import QueryEngineTool, ToolMetadata
7
+ from llama_index.query_engine import SubQuestionQueryEngine
8
+ from llama_index.embeddings import OpenAIEmbedding
9
+ from llama_index.schema import Document
10
+ from llama_index.node_parser import UnstructuredElementNodeParser
11
+
12
+ from src.utils import get_model, process_pdf2
13
+
14
+ import streamlit as st
15
+ import os
16
+ import faiss
17
+ import time
18
+ from pypdf import PdfReader
19
+
20
+
21
+ st.set_page_config(page_title="Yield Case Analyzer", page_icon=":card_index_dividers:", initial_sidebar_state="expanded", layout="wide")
22
+
23
+ st.title(":card_index_dividers: Yield Case Analyzer")
24
+ st.info("""
25
+ Begin by uploading the case report in PDF format. Afterward, click on 'Process Document'. Once the document has been processed. You can enter question and click send, system will answer your question.
26
+ """)
27
+
28
+ def process_pdf(pdf):
29
+ file = PdfReader(pdf)
30
+ print("in process pdf")
31
+ document_list = []
32
+ for page in file.pages:
33
+ document_list.append(Document(text=str(page.extract_text())))
34
+ print("in process pdf 1")
35
+
36
+ node_paser = UnstructuredElementNodeParser()
37
+ print("in process pdf 1")
38
+
39
+ nodes = node_paser.get_nodes_from_documents(document_list, show_progress=True)
40
+
41
+ return nodes
42
+
43
+
44
+ def get_vector_index(nodes, vector_store):
45
+ print(nodes)
46
+ llm = get_model("openai")
47
+ if vector_store == "faiss":
48
+ d = 1536
49
+ faiss_index = faiss.IndexFlatL2(d)
50
+ vector_store = FaissVectorStore(faiss_index=faiss_index)
51
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
52
+ # embed_model = OpenAIEmbedding()
53
+ # service_context = ServiceContext.from_defaults(embed_model=embed_model)
54
+ service_context = ServiceContext.from_defaults(llm=llm)
55
+ index = VectorStoreIndex(nodes,
56
+ service_context=service_context,
57
+ storage_context=storage_context
58
+ )
59
+ elif vector_store == "simple":
60
+ index = VectorStoreIndex.from_documents(nodes)
61
+
62
+
63
+ return index
64
+
65
+
66
+
67
+ def generate_insight(engine, search_string):
68
+
69
+ with open("prompts/main.prompt", "r") as f:
70
+ template = f.read()
71
+
72
+ prompt_template = PromptTemplate(
73
+ template=template,
74
+ input_variables=['search_string']
75
+ )
76
+
77
+ formatted_input = prompt_template.format(search_string=search_string)
78
+ print(formatted_input)
79
+ response = engine.query(formatted_input)
80
+ return response.response
81
+
82
+
83
+ def get_query_engine(engine):
84
+ llm = get_model("openai")
85
+ service_context = ServiceContext.from_defaults(llm=llm)
86
+
87
+ query_engine_tools = [
88
+ QueryEngineTool(
89
+ query_engine=engine,
90
+ metadata=ToolMetadata(
91
+ name="Alert Report",
92
+ description=f"Provides information about the cases from its case report.",
93
+ ),
94
+ ),
95
+ ]
96
+
97
+
98
+ s_engine = SubQuestionQueryEngine.from_defaults(
99
+ query_engine_tools=query_engine_tools,
100
+ service_context=service_context
101
+ )
102
+ return s_engine
103
+
104
+
105
+ if "process_doc" not in st.session_state:
106
+ st.session_state.process_doc = False
107
+
108
+
109
+ OPENAI_API_KEY = "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"
110
+ os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
111
+
112
+
113
+ if OPENAI_API_KEY:
114
+ pptx_files = st.sidebar.file_uploader("Upload the case report in PDF format", type="pptx")
115
+ st.sidebar.info("""
116
+ Example pdf reports you can upload here:
117
+ """)
118
+
119
+ if st.sidebar.button("Process Document"):
120
+ with st.spinner("Processing Document..."):
121
+ nodes = process_pptx(pptx_files)
122
+ st.session_state.index = get_vector_index(nodes, vector_store="faiss")
123
+ #st.session_state.index = get_vector_index(nodes, vector_store="simple")
124
+ st.session_state.process_doc = True
125
+ st.toast("Document Processsed!")
126
+
127
+ st.session_state.process_doc = True
128
+
129
+ if st.session_state.process_doc:
130
+ search_text = st.text_input("Enter your question")
131
+ if st.button("Submit"):
132
+ engine = get_query_engine(st.session_state.index.as_query_engine(similarity_top_k=3))
133
+ start_time = time.time()
134
+
135
+ st.write("Alert search result...")
136
+ response = generate_insight(engine, search_text)
137
+ st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time))
138
+
139
+ st.toast("Report Analysis Complete!")
140
+
141
+ if st.session_state.end_time:
142
+ st.write("Report Analysis Time: ", st.session_state.end_time, "s")
143
+
pages/tc1.ppy ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
4
+ from llama_index import download_loader, SimpleDirectoryReader, StorageContext, load_index_from_storage
5
+ from llama_index.llms import HuggingFaceLLM
6
+ from llama_index.embeddings import HuggingFaceEmbedding
7
+
8
+ from IPython.display import Markdown, display
9
+
10
+ import chromadb
11
+
12
+ import streamlit as st
13
+ import time
14
+ from pypdf import PdfReader
15
+
16
+ from pathlib import Path
17
+
18
+ import os
19
+
20
+ import torch
21
+ #torch.set_default_device('cuda')
22
+
23
+
24
+ st.set_page_config(page_title="Tesla Case Analyzer", page_icon=":card_index_dividers:", initial_sidebar_state="expanded", layout="wide")
25
+
26
+ st.title(":card_index_dividers: Tesla Case Analyzer")
27
+ st.info("""
28
+ Begin by uploading the case report in pptx format. Afterward, click on 'Process Document'. Once the document has been processed. You can enter question and click send, system will answer your question.
29
+ """)
30
+
31
+ if "process_doc" not in st.session_state:
32
+ st.session_state.process_doc = False
33
+
34
+
35
+
36
+ llm = HuggingFaceLLM(
37
+ context_window=8000,
38
+ max_new_tokens=256,
39
+ generate_kwargs={"temperature": 0.1, "do_sample": True},
40
+ system_prompt=system_prompt,
41
+ query_wrapper_prompt=query_wrapper_prompt,
42
+ tokenizer_name="mistralai/Mistral-7B-Instruct-v0.1",
43
+ model_name="mistralai/Mistral-7B-Instruct-v0.1",
44
+ device_map="auto",
45
+ tokenizer_kwargs={"max_length": 8000},
46
+ model_kwargs={"torch_dtype": torch.float16}
47
+ )
48
+
49
+ embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-base")
50
+
51
+ service_context = ServiceContext.from_defaults(
52
+ chunk_size=1024,
53
+ llm=llm,
54
+ embed_model=embed_model
55
+ )
56
+
57
+
58
+ files_uploaded = st.sidebar.file_uploader("Upload the case report in pptx format", type="pptx",accept_multiple_files=True)
59
+ st.sidebar.info("""
60
+ Example pdf reports you can upload here:
61
+ """)
62
+
63
+ if st.sidebar.button("Process Document"):
64
+ with st.spinner("Processing Document..."):
65
+
66
+ data_dir = './data'
67
+ if not os.path.exists(data_dir):
68
+ os.makedirs(data_dir)
69
+
70
+ for pdf in files_uploaded:
71
+ print(f'file named {pdf.name}')
72
+ fname=f'{data_dir}/{pdf.name}'
73
+ with open(fname, 'wb') as f:
74
+ f.write(pdf.read())
75
+
76
+
77
+ def fmetadata(dummy: str): return {"file_path": ""}
78
+
79
+ PptxReader = download_loader("PptxReader")
80
+ loader = SimpleDirectoryReader(input_dir=data_dir, file_extractor={".pptx": PptxReader(),}, file_metadata=fmetadata)
81
+
82
+ documents = loader.load_data()
83
+ for doc in documents:
84
+ doc.metadata["file_path"]=""
85
+
86
+ print('stored')
87
+
88
+ st.session_state.process_doc = True
89
+
90
+ st.toast("Document Processsed!")
91
+
92
+ #st.session_state.process_doc = True
93
+
94
+ OPENAI_API_KEY = "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"
95
+ os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
96
+
97
+
98
+ if OPENAI_API_KEY:
99
+ pdfs = st.sidebar.file_uploader("Upload the case report in PDF format", type="pdf")
100
+ st.sidebar.info("""
101
+ Example pdf reports you can upload here:
102
+ """)
103
+
104
+ if st.sidebar.button("Process Document"):
105
+ with st.spinner("Processing Document..."):
106
+ nodes = process_pdf(pdfs)
107
+ #st.session_state.index = get_vector_index(nodes, vector_store="faiss")
108
+ st.session_state.index = get_vector_index(nodes, vector_store="simple")
109
+ st.session_state.process_doc = True
110
+ st.toast("Document Processsed!")
111
+
112
+ #st.session_state.process_doc = True
113
+
114
+ if st.session_state.process_doc:
115
+ search_text = st.text_input("Enter your question")
116
+ if st.button("Submit"):
117
+ engine = get_query_engine(st.session_state.index.as_query_engine(similarity_top_k=3))
118
+ start_time = time.time()
119
+
120
+ with st.status("**Analyzing Report...**"):
121
+ st.write("Case search result...")
122
+ response = generate_insight(engine, search_text)
123
+ st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time))
124
+
125
+ st.toast("Report Analysis Complete!")
126
+
127
+ if st.session_state.end_time:
128
+ st.write("Report Analysis Time: ", st.session_state.end_time, "s")
129
+