File size: 4,166 Bytes
e2dccc5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os

from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index import download_loader, SimpleDirectoryReader, StorageContext, load_index_from_storage
from llama_index.llms import HuggingFaceLLM
from llama_index.embeddings import HuggingFaceEmbedding

from IPython.display import Markdown, display

import chromadb

import streamlit as st
import time
from pypdf import PdfReader

from pathlib import Path

import os

import torch
#torch.set_default_device('cuda')


st.set_page_config(page_title="Tesla Case Analyzer", page_icon=":card_index_dividers:", initial_sidebar_state="expanded", layout="wide")

st.title(":card_index_dividers: Tesla Case Analyzer")
st.info("""
Begin by uploading the case report in pptx format. Afterward, click on 'Process Document'. Once the document has been processed. You can enter question and click send, system will answer your question.
""")

if "process_doc" not in st.session_state:
        st.session_state.process_doc = False



llm = HuggingFaceLLM(
    context_window=8000,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.1, "do_sample": True},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="mistralai/Mistral-7B-Instruct-v0.1",
    model_name="mistralai/Mistral-7B-Instruct-v0.1",
    device_map="auto",
    tokenizer_kwargs={"max_length": 8000},
    model_kwargs={"torch_dtype": torch.float16}
)

embed_model =  HuggingFaceEmbedding(model_name="thenlper/gte-base")

service_context = ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embed_model
)


files_uploaded = st.sidebar.file_uploader("Upload the case report in pptx format", type="pptx",accept_multiple_files=True)
st.sidebar.info("""
Example pdf reports you can upload here:
""")

if st.sidebar.button("Process Document"):
    with st.spinner("Processing Document..."):

        data_dir = './data'
        if not os.path.exists(data_dir):
            os.makedirs(data_dir)

        for pdf in files_uploaded:
            print(f'file named {pdf.name}')
            fname=f'{data_dir}/{pdf.name}'
            with open(fname, 'wb') as f:
                f.write(pdf.read())


        def fmetadata(dummy: str): return {"file_path": ""}

        PptxReader = download_loader("PptxReader")
        loader =  SimpleDirectoryReader(input_dir=data_dir, file_extractor={".pptx": PptxReader(),}, file_metadata=fmetadata)

        documents = loader.load_data()
        for doc in documents:
            doc.metadata["file_path"]=""
        
        print('stored')

        st.session_state.process_doc = True

    st.toast("Document Processsed!")

    #st.session_state.process_doc = True

OPENAI_API_KEY = "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"
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY


if OPENAI_API_KEY:
    pdfs = st.sidebar.file_uploader("Upload the case report in PDF format", type="pdf")
    st.sidebar.info("""
    Example pdf reports you can upload here:
    """)

    if st.sidebar.button("Process Document"):
        with st.spinner("Processing Document..."):
            nodes = process_pdf(pdfs)
            #st.session_state.index = get_vector_index(nodes, vector_store="faiss")
            st.session_state.index = get_vector_index(nodes, vector_store="simple")
            st.session_state.process_doc = True
        st.toast("Document Processsed!")

    #st.session_state.process_doc = True

    if st.session_state.process_doc:
        search_text = st.text_input("Enter your question")
        if st.button("Submit"):
            engine = get_query_engine(st.session_state.index.as_query_engine(similarity_top_k=3))
            start_time = time.time()

            with st.status("**Analyzing Report...**"):
                st.write("Case search result...")
                response = generate_insight(engine, search_text)
                st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time))

                st.toast("Report Analysis Complete!")

        if st.session_state.end_time:
            st.write("Report Analysis Time: ", st.session_state.end_time, "s")