llamazookeeper commited on
Commit
4595f47
1 Parent(s): 7afc766
README.md CHANGED
@@ -11,3 +11,5 @@ license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+ this project is used in huggingface space "teslaalerts"
data/TeslaTroubleshootingCase1_w009.pptx ADDED
Binary file (36.3 kB). View file
 
data/TeslaTroubleshootingCase2_w048.pptx ADDED
Binary file (36.4 kB). View file
 
data/TeslaTroubleshootingCase3_w027.pptx ADDED
Binary file (36.7 kB). View file
 
data/TeslaTroubleshootingCase4_w218.pptx ADDED
Binary file (36.4 kB). View file
 
data/TeslaTroubleshootingCase4_w218_follow_up.pptx ADDED
Binary file (36.2 kB). View file
 
data/TeslaTroubleshootingCase4_w221.pptx ADDED
Binary file (36.4 kB). View file
 
pages/Tesla_Alerts.py CHANGED
@@ -1,153 +1,153 @@
1
- from langchain.prompts import PromptTemplate
2
- from langchain.output_parsers import PydanticOutputParser
3
- from langchain.chat_models import ChatOpenAI
4
-
5
- from llama_index import VectorStoreIndex, ServiceContext, StorageContext, download_loader, SimpleDirectoryReader
6
- from llama_index.vector_stores import FaissVectorStore
7
- from llama_index.tools import QueryEngineTool, ToolMetadata
8
- from llama_index.query_engine import SubQuestionQueryEngine
9
- from llama_index.embeddings import OpenAIEmbedding
10
- from llama_index.schema import Document
11
- from llama_index.node_parser import UnstructuredElementNodeParser
12
- from llama_index.llms import OpenAI
13
-
14
- import streamlit as st
15
  import os
16
- import faiss
 
 
 
 
 
 
 
 
 
17
  import time
18
 
19
 
20
- st.set_page_config(page_title="Yield Case Analyzer", page_icon=":card_index_dividers:", initial_sidebar_state="expanded", layout="wide")
21
 
22
- st.title(":card_index_dividers: Yield Case Analyzer")
23
  st.info("""
24
- Begin by uploading the case report in PDF format. Afterward, click on 'Process Document'. Once the document has been processed. You can enter question and click send, system will answer your question.
25
  """)
26
 
27
 
28
- def get_model(model_name):
29
- OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
30
- if model_name == "openai":
31
- model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name="gpt-3.5-turbo")
32
- return model
33
 
34
- def get_vector_index(docs, vector_store):
35
- print(docs)
36
- llm = get_model("openai")
37
- if vector_store == "faiss":
38
- d = 1536
39
- faiss_index = faiss.IndexFlatL2(d)
40
- vector_store = FaissVectorStore(faiss_index=faiss_index)
41
- storage_context = StorageContext.from_defaults(vector_store=vector_store)
42
- # embed_model = OpenAIEmbedding()
43
- # service_context = ServiceContext.from_defaults(embed_model=embed_model)
44
- service_context = ServiceContext.from_defaults(llm=llm)
45
- index = VectorStoreIndex(docs,
46
- service_context=service_context,
47
- storage_context=storage_context
48
- )
49
- elif vector_store == "simple":
50
- index = VectorStoreIndex.from_documents(docs)
51
 
52
 
53
- return index
54
 
 
55
 
 
56
 
57
- def generate_insight(engine, search_string):
 
 
58
 
59
- with open("prompts/main.prompt", "r") as f:
60
- template = f.read()
61
 
62
- prompt_template = PromptTemplate(
63
- template=template,
64
- input_variables=['search_string']
65
- )
66
 
67
- formatted_input = prompt_template.format(search_string=search_string)
68
- print(formatted_input)
69
- response = engine.query(formatted_input)
70
- return response.response
71
 
 
 
72
 
73
- def get_query_engine(engine):
74
- llm = get_model("openai")
75
- service_context = ServiceContext.from_defaults(llm=llm)
76
 
77
- query_engine_tools = [
78
- QueryEngineTool(
79
- query_engine=engine,
80
- metadata=ToolMetadata(
81
- name="Alert Report",
82
- description=f"Provides information about the alerts from alerts files uploaded.",
83
- ),
84
- ),
85
- ]
86
 
 
 
87
 
88
- s_engine = SubQuestionQueryEngine.from_defaults(
89
- query_engine_tools=query_engine_tools,
90
- service_context=service_context
 
 
 
 
 
 
 
 
 
 
 
91
  )
92
- return s_engine
93
-
94
-
95
- if "process_doc" not in st.session_state:
96
- st.session_state.process_doc = False
97
 
 
 
 
 
98
 
99
- OPENAI_API_KEY = "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"
100
- os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
101
 
 
102
 
103
- if OPENAI_API_KEY:
104
- files_uploaded = st.sidebar.file_uploader("Upload the case report in PDF format", type="pptx")
105
- st.sidebar.info("""
106
- Example pdf reports you can upload here:
107
- """)
108
-
109
- if st.sidebar.button("Process Document"):
110
- with st.spinner("Processing Document..."):
111
-
112
- data_dir = "./data"
113
- if not os.path.exists(data_dir):
114
- os.makedirs(data_dir)
115
-
116
- for file in files_uploaded:
117
- print(f'file named {file.name}')
118
- fname=f'{data_dir}/{file.name}'
119
- with open(fname, 'wb') as f:
120
- f.write(file.read())
121
-
122
- def fmetadata(dummy: str): return {"file_path": ""}
123
-
124
- PptxReader = download_loader("PptxReader")
125
- loader = SimpleDirectoryReader(input_dir=data_dir, file_extractor={".pptx": PptxReader(),}, file_metadata=fmetadata)
126
-
127
- documents = loader.load_data()
128
- for doc in documents:
129
- doc.metadata["file_path"]=""
130
-
131
- st.session_state.index = get_vector_index(documents, vector_store="faiss")
132
- #st.session_state.index = get_vector_index(documents, vector_store="simple")
133
- st.session_state.process_doc = True
134
- st.toast("Document Processsed!")
135
 
136
- #st.session_state.process_doc = True
137
 
138
- if st.session_state.process_doc:
139
- search_text = st.text_input("Enter your question")
140
- if st.button("Submit"):
141
- engine = get_query_engine(st.session_state.index.as_query_engine(similarity_top_k=3))
142
- start_time = time.time()
143
 
144
- st.write("Alert search result...")
145
- response = generate_insight(engine, search_text)
146
- st.write(response)
147
- #st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time))
148
 
149
- st.toast("Report Analysis Complete!")
 
 
 
 
 
 
150
 
151
- #if st.session_state.end_time:
152
- # st.write("Report Analysis Time: ", st.session_state.end_time, "s")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import sys
3
+
4
+ from llama_index import SimpleDirectoryReader, ServiceContext, StorageContext, VectorStoreIndex, download_loader,load_index_from_storage
5
+ from llama_index.llms import HuggingFaceLLM
6
+ from llama_index.embeddings import HuggingFaceEmbedding
7
+ from llama_index.vector_stores import ChromaVectorStore
8
+ from llama_index.storage.index_store import SimpleIndexStore
9
+ from llama_index.indices.query.schema import QueryBundle, QueryType
10
+ import chromadb
11
+ import streamlit as st
12
  import time
13
 
14
 
15
+ st.set_page_config(page_title="Tesla Alert Analyzer", page_icon=":card_index_dividers:", initial_sidebar_state="expanded", layout="wide")
16
 
17
+ st.title(":card_index_dividers: Tesla Alert Analyzer")
18
  st.info("""
19
+ Begin by uploading the case report in pptx format. Afterward, click on 'Process Document'. Once the document has been processed. You can enter question and click send, system will answer your question.
20
  """)
21
 
22
 
23
+ if "process_doc" not in st.session_state:
24
+ st.session_state.process_doc = False
 
 
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
 
 
28
 
29
+ def fmetadata(dummy: str): return {"file_path": ""}
30
 
31
+ def load_files(file_dir):
32
 
33
+ PptxReader = download_loader("PptxReader")
34
+ loader = SimpleDirectoryReader(input_dir=file_dir, file_extractor={".pptx": PptxReader(),}, file_metadata=fmetadata)
35
+ documents = loader.load_data()
36
 
37
+ for doc in documents:
38
+ doc.metadata["file_path"]=""
39
 
40
+ return documents
 
 
 
41
 
42
+ system_prompt = "You are a Q&A assistant. "
43
+ system_prompt += "Your goal is to answer questions as accurately as possible based on the instructions and context provided."
44
+ system_prompt += "Please say you do not know if you do not find answer."
 
45
 
46
+ # This will wrap the default prompts that are internal to llama-index
47
+ query_wrapper_prompt = "<|USER|>{query_str}<|ASSISTANT|>"
48
 
 
 
 
49
 
 
 
 
 
 
 
 
 
 
50
 
51
+ import torch
52
+ #torch.set_default_device('cuda')
53
 
54
+ @st.cache_resource
55
+ def llm_loading():
56
+ print("before huggingfacellm")
57
+ llm = HuggingFaceLLM(
58
+ context_window=8000,
59
+ max_new_tokens=500,
60
+ generate_kwargs={"temperature": 0.1, "do_sample": True},
61
+ system_prompt=system_prompt,
62
+ query_wrapper_prompt=query_wrapper_prompt,
63
+ tokenizer_name="mistralai/Mistral-7B-Instruct-v0.1",
64
+ model_name="mistralai/Mistral-7B-Instruct-v0.1",
65
+ device_map="auto",
66
+ tokenizer_kwargs={"max_length": 8000},
67
+ model_kwargs={"torch_dtype": torch.float16}
68
  )
 
 
 
 
 
69
 
70
+ print("after huggingfacellm")
71
+ embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-base")
72
+ print("after embed_model")
73
+ return llm,embed_model
74
 
75
+ llm, embed_model = llm_loading()
 
76
 
77
+ files_uploaded = st.sidebar.file_uploader("Upload the case report in PPT format", type="pptx", accept_multiple_files=True)
78
 
79
+ st.sidebar.info("""
80
+ Example pptx reports you can upload here:
81
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ if st.sidebar.button("Process Document"):
84
 
85
+ with st.spinner("Processing Document..."):
 
 
 
 
86
 
87
+ data_dir = "data"
88
+ if not os.path.exists(data_dir):
89
+ os.makedirs(data_dir)
 
90
 
91
+ for uploaded_file in files_uploaded:
92
+ print(f'file named {uploaded_file.name}')
93
+ fname=f'{data_dir}/{uploaded_file.name}'
94
+ with open(fname, 'wb') as f:
95
+ f.write(uploaded_file.read())
96
+
97
+ documents=load_files(data_dir)
98
 
99
+ collection_name = "tesla_report"
100
+ chroma_client = chromadb.PersistentClient()
101
+ chroma_collection = chroma_client.get_or_create_collection(collection_name)
102
+ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
103
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
104
+ service_context = ServiceContext.from_defaults(
105
+ chunk_size=8000,
106
+ llm=llm,
107
+ embed_model=embed_model
108
+ )
109
+ index = VectorStoreIndex.from_documents(documents, service_context=service_context, storage_context=storage_context)
110
+ index.storage_context.persist()
111
+
112
+ #chroma_collection.peek()
113
+
114
+ #st.session_state.index = index
115
+ st.session_state.process_doc = True
116
+
117
+ st.toast("Document Processsed!")
118
+
119
+ #st.session_state.process_doc = True
120
+
121
+ def clear_form():
122
+ st.session_state.query_text = st.session_state["question"]
123
+ st.session_state["question"] = ""
124
+ st.session_state["response"] = ""
125
+
126
+ @st.cache_resource
127
+ def reload_index(_llm,_embed_model, col ) :
128
+ chroma_client = chromadb.PersistentClient()
129
+ chroma_collection = chroma_client.get_or_create_collection(col)
130
+ vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
131
+ service_context = ServiceContext.from_defaults(llm=llm,embed_model=embed_model)
132
+ load_index = VectorStoreIndex.from_vector_store(service_context=service_context,
133
+ vector_store=vector_store)
134
+ return load_index
135
+
136
+ if st.session_state.process_doc:
137
+ #alert number looks like APP_wnnn where nnn is a number. Please list out all the alerts uploaded in these files!
138
+ search_text = st.text_input("Enter your question", key='question' )
139
+ if st.button(label="Submit", on_click=clear_form):
140
+ index = reload_index(llm,embed_model,"tesla_report" )
141
+ query_engine = index.as_query_engine()
142
+ start_time = time.time()
143
+ #qry = QueryBundle(search_text)
144
+ #alert number looks like APP_wnnn where nnn is a number. Please list out all the alerts uploaded in these files!"
145
+ st.write("Processing....")
146
+ search_text = st.session_state.query_text
147
+ print(search_text)
148
+ response = query_engine.query(search_text)
149
+ st.write(response.response)
150
+ #st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time))
151
+
152
+ st.toast("Report Analysis Complete!")
153
 
prompts/main.prompt CHANGED
@@ -1,3 +1,3 @@
1
  You are a tesla alert analyzer.
2
  Your job is the is to provide a detailed analysis of the alert and follow-up
3
-
 
1
  You are a tesla alert analyzer.
2
  Your job is the is to provide a detailed analysis of the alert and follow-up
3
+ Question: {search_string}
requirements.txt CHANGED
@@ -5,3 +5,4 @@ transformers
5
  accelerate
6
  openai
7
  streamlit
 
 
5
  accelerate
6
  openai
7
  streamlit
8
+ faiss