llamazookeeper commited on
Commit
f64588d
1 Parent(s): 5f9b358
data/TeslaTroubleshootingCase1_w009.pptx ADDED
Binary file (36.3 kB). View file
 
data/TeslaTroubleshootingCase2_w048.pptx ADDED
Binary file (36.4 kB). View file
 
data/TeslaTroubleshootingCase3_w027.pptx ADDED
Binary file (36.7 kB). View file
 
data/TeslaTroubleshootingCase4_w218.pptx ADDED
Binary file (36.4 kB). View file
 
data/TeslaTroubleshootingCase4_w218_follow_up.pptx ADDED
Binary file (36.2 kB). View file
 
data/TeslaTroubleshootingCase4_w221.pptx ADDED
Binary file (36.4 kB). View file
 
pages/Tesla_Alerts.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ from llama_index import SimpleDirectoryReader, ServiceContext, StorageContext, VectorStoreIndex, download_loader,load_index_from_storage
5
+ from llama_index.llms import HuggingFaceLLM
6
+ from llama_index.embeddings import HuggingFaceEmbedding
7
+ from llama_index.vector_stores import ChromaVectorStore
8
+ from llama_index.storage.index_store import SimpleIndexStore
9
+ from llama_index.indices.query.schema import QueryBundle, QueryType
10
+ import chromadb
11
+ import streamlit as st
12
+ import time
13
+
14
+
15
+ st.set_page_config(page_title="Tesla Alert Analyzer", page_icon=":card_index_dividers:", initial_sidebar_state="expanded", layout="wide")
16
+
17
+ st.title(":card_index_dividers: Tesla Alert Analyzer")
18
+ st.info("""
19
+ Begin by uploading the case report in pptx format. Afterward, click on 'Process Document'. Once the document has been processed. You can enter question and click send, system will answer your question.
20
+ """)
21
+
22
+
23
+ if "process_doc" not in st.session_state:
24
+ st.session_state.process_doc = False
25
+
26
+
27
+
28
+
29
+ def fmetadata(dummy: str): return {"file_path": ""}
30
+
31
+ def load_files(file_dir):
32
+
33
+ PptxReader = download_loader("PptxReader")
34
+ loader = SimpleDirectoryReader(input_dir=file_dir, file_extractor={".pptx": PptxReader(),}, file_metadata=fmetadata)
35
+ documents = loader.load_data()
36
+
37
+ for doc in documents:
38
+ doc.metadata["file_path"]=""
39
+
40
+ return documents
41
+
42
+ system_prompt = "You are a Q&A assistant. "
43
+ system_prompt += "Your goal is to answer questions as accurately as possible based on the instructions and context provided."
44
+ system_prompt += "Please say you do not know if you do not find answer."
45
+
46
+ # This will wrap the default prompts that are internal to llama-index
47
+ query_wrapper_prompt = "<|USER|>{query_str}<|ASSISTANT|>"
48
+
49
+
50
+
51
+ import torch
52
+ #torch.set_default_device('cuda')
53
+
54
+ @st.cache_resource
55
+ def llm_loading():
56
+ print("before huggingfacellm")
57
+ llm = HuggingFaceLLM(
58
+ context_window=8000,
59
+ max_new_tokens=500,
60
+ generate_kwargs={"temperature": 0.1, "do_sample": True},
61
+ system_prompt=system_prompt,
62
+ query_wrapper_prompt=query_wrapper_prompt,
63
+ tokenizer_name="mistralai/Mistral-7B-Instruct-v0.1",
64
+ model_name="mistralai/Mistral-7B-Instruct-v0.1",
65
+ device_map="auto",
66
+ tokenizer_kwargs={"max_length": 8000},
67
+ model_kwargs={"torch_dtype": torch.float16}
68
+ )
69
+
70
+ print("after huggingfacellm")
71
+ embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-base")
72
+ print("after embed_model")
73
+ return llm,embed_model
74
+
75
+ llm, embed_model = llm_loading()
76
+
77
+ files_uploaded = st.sidebar.file_uploader("Upload the case report in PPT format", type="pptx", accept_multiple_files=True)
78
+
79
+ st.sidebar.info("""
80
+ Example pptx reports you can upload here:
81
+ """)
82
+
83
+ if st.sidebar.button("Process Document"):
84
+
85
+ with st.spinner("Processing Document..."):
86
+
87
+ data_dir = "data"
88
+ if not os.path.exists(data_dir):
89
+ os.makedirs(data_dir)
90
+
91
+ for uploaded_file in files_uploaded:
92
+ print(f'file named {uploaded_file.name}')
93
+ fname=f'{data_dir}/{uploaded_file.name}'
94
+ with open(fname, 'wb') as f:
95
+ f.write(uploaded_file.read())
96
+
97
+ documents=load_files(data_dir)
98
+
99
+ collection_name = "tesla_report"
100
+ chroma_client = chromadb.PersistentClient()
101
+ chroma_collection = chroma_client.get_or_create_collection(collection_name)
102
+ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
103
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
104
+ service_context = ServiceContext.from_defaults(
105
+ chunk_size=8000,
106
+ llm=llm,
107
+ embed_model=embed_model
108
+ )
109
+ index = VectorStoreIndex.from_documents(documents, service_context=service_context, storage_context=storage_context)
110
+ index.storage_context.persist()
111
+
112
+ #chroma_collection.peek()
113
+
114
+ #st.session_state.index = index
115
+ st.session_state.process_doc = True
116
+
117
+ st.toast("Document Processsed!")
118
+
119
+ #st.session_state.process_doc = True
120
+
121
+ def clear_form():
122
+ st.session_state.query_text = st.session_state["question"]
123
+ st.session_state["question"] = ""
124
+ st.session_state["response"] = ""
125
+
126
+ @st.cache_resource
127
+ def reload_index(_llm,_embed_model, col ) :
128
+ chroma_client = chromadb.PersistentClient()
129
+ chroma_collection = chroma_client.get_or_create_collection(col)
130
+ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
131
+ service_context = ServiceContext.from_defaults(llm=llm,embed_model=embed_model)
132
+ load_index = VectorStoreIndex.from_vector_store(service_context=service_context,
133
+ vector_store=vector_store)
134
+ return load_index
135
+
136
+ if st.session_state.process_doc:
137
+ #alert number looks like APP_wnnn where nnn is a number. Please list out all the alerts uploaded in these files!
138
+ search_text = st.text_input("Enter your question", key='question' )
139
+ if st.button(label="Submit", on_click=clear_form):
140
+ index = reload_index(llm,embed_model,"tesla_report" )
141
+ query_engine = index.as_query_engine()
142
+ start_time = time.time()
143
+ #qry = QueryBundle(search_text)
144
+ #alert number looks like APP_wnnn where nnn is a number. Please list out all the alerts uploaded in these files!"
145
+ st.write("Processing....")
146
+ search_text = st.session_state.query_text
147
+ print(search_text)
148
+ response = query_engine.query(search_text)
149
+ st.write(response.response)
150
+ #st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time))
151
+
152
+ st.toast("Report Analysis Complete!")
153
+