Spaces:
Build error
Build error
File size: 4,166 Bytes
016719e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import os
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index import download_loader, SimpleDirectoryReader, StorageContext, load_index_from_storage
from llama_index.llms import HuggingFaceLLM
from llama_index.embeddings import HuggingFaceEmbedding
from IPython.display import Markdown, display
import chromadb
import streamlit as st
import time
from pypdf import PdfReader
from pathlib import Path
import os
import torch
#torch.set_default_device('cuda')
st.set_page_config(page_title="Tesla Case Analyzer", page_icon=":card_index_dividers:", initial_sidebar_state="expanded", layout="wide")
st.title(":card_index_dividers: Tesla Case Analyzer")
st.info("""
Begin by uploading the case report in pptx format. Afterward, click on 'Process Document'. Once the document has been processed. You can enter question and click send, system will answer your question.
""")
if "process_doc" not in st.session_state:
st.session_state.process_doc = False
llm = HuggingFaceLLM(
context_window=8000,
max_new_tokens=256,
generate_kwargs={"temperature": 0.1, "do_sample": True},
system_prompt=system_prompt,
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name="mistralai/Mistral-7B-Instruct-v0.1",
model_name="mistralai/Mistral-7B-Instruct-v0.1",
device_map="auto",
tokenizer_kwargs={"max_length": 8000},
model_kwargs={"torch_dtype": torch.float16}
)
embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-base")
service_context = ServiceContext.from_defaults(
chunk_size=1024,
llm=llm,
embed_model=embed_model
)
files_uploaded = st.sidebar.file_uploader("Upload the case report in pptx format", type="pptx",accept_multiple_files=True)
st.sidebar.info("""
Example pdf reports you can upload here:
""")
if st.sidebar.button("Process Document"):
with st.spinner("Processing Document..."):
data_dir = './data'
if not os.path.exists(data_dir):
os.makedirs(data_dir)
for pdf in files_uploaded:
print(f'file named {pdf.name}')
fname=f'{data_dir}/{pdf.name}'
with open(fname, 'wb') as f:
f.write(pdf.read())
def fmetadata(dummy: str): return {"file_path": ""}
PptxReader = download_loader("PptxReader")
loader = SimpleDirectoryReader(input_dir=data_dir, file_extractor={".pptx": PptxReader(),}, file_metadata=fmetadata)
documents = loader.load_data()
for doc in documents:
doc.metadata["file_path"]=""
print('stored')
st.session_state.process_doc = True
st.toast("Document Processsed!")
#st.session_state.process_doc = True
OPENAI_API_KEY = "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"
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
if OPENAI_API_KEY:
pdfs = st.sidebar.file_uploader("Upload the case report in PDF format", type="pdf")
st.sidebar.info("""
Example pdf reports you can upload here:
""")
if st.sidebar.button("Process Document"):
with st.spinner("Processing Document..."):
nodes = process_pdf(pdfs)
#st.session_state.index = get_vector_index(nodes, vector_store="faiss")
st.session_state.index = get_vector_index(nodes, vector_store="simple")
st.session_state.process_doc = True
st.toast("Document Processsed!")
#st.session_state.process_doc = True
if st.session_state.process_doc:
search_text = st.text_input("Enter your question")
if st.button("Submit"):
engine = get_query_engine(st.session_state.index.as_query_engine(similarity_top_k=3))
start_time = time.time()
with st.status("**Analyzing Report...**"):
st.write("Case search result...")
response = generate_insight(engine, search_text)
st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time))
st.toast("Report Analysis Complete!")
if st.session_state.end_time:
st.write("Report Analysis Time: ", st.session_state.end_time, "s")
|