Spaces:

llamazookeeper
/

teslaalerts

Build error

e2dccc5 6 months ago

No virus

4.17 kB

	import os

	from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
	from llama_index import download_loader, SimpleDirectoryReader, StorageContext, load_index_from_storage
	from llama_index.llms import HuggingFaceLLM
	from llama_index.embeddings import HuggingFaceEmbedding

	from IPython.display import Markdown, display

	import chromadb

	import streamlit as st
	import time
	from pypdf import PdfReader

	from pathlib import Path

	import os

	import torch
	#torch.set_default_device('cuda')


	st.set_page_config(page_title="Tesla Case Analyzer", page_icon=":card_index_dividers:", initial_sidebar_state="expanded", layout="wide")

	st.title(":card_index_dividers: Tesla Case Analyzer")
	st.info("""
	Begin by uploading the case report in pptx format. Afterward, click on 'Process Document'. Once the document has been processed. You can enter question and click send, system will answer your question.
	""")

	if "process_doc" not in st.session_state:
	st.session_state.process_doc = False



	llm = HuggingFaceLLM(
	context_window=8000,
	max_new_tokens=256,
	generate_kwargs={"temperature": 0.1, "do_sample": True},
	system_prompt=system_prompt,
	query_wrapper_prompt=query_wrapper_prompt,
	tokenizer_name="mistralai/Mistral-7B-Instruct-v0.1",
	model_name="mistralai/Mistral-7B-Instruct-v0.1",
	device_map="auto",
	tokenizer_kwargs={"max_length": 8000},
	model_kwargs={"torch_dtype": torch.float16}
	)

	embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-base")

	service_context = ServiceContext.from_defaults(
	chunk_size=1024,
	llm=llm,
	embed_model=embed_model
	)


	files_uploaded = st.sidebar.file_uploader("Upload the case report in pptx format", type="pptx",accept_multiple_files=True)
	st.sidebar.info("""
	Example pdf reports you can upload here:
	""")

	if st.sidebar.button("Process Document"):
	with st.spinner("Processing Document..."):

	data_dir = './data'
	if not os.path.exists(data_dir):
	os.makedirs(data_dir)

	for pdf in files_uploaded:
	print(f'file named {pdf.name}')
	fname=f'{data_dir}/{pdf.name}'
	with open(fname, 'wb') as f:
	f.write(pdf.read())


	def fmetadata(dummy: str): return {"file_path": ""}

	PptxReader = download_loader("PptxReader")
	loader = SimpleDirectoryReader(input_dir=data_dir, file_extractor={".pptx": PptxReader(),}, file_metadata=fmetadata)

	documents = loader.load_data()
	for doc in documents:
	doc.metadata["file_path"]=""

	print('stored')

	st.session_state.process_doc = True

	st.toast("Document Processsed!")

	#st.session_state.process_doc = True

	OPENAI_API_KEY = "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"
	os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY


	if OPENAI_API_KEY:
	pdfs = st.sidebar.file_uploader("Upload the case report in PDF format", type="pdf")
	st.sidebar.info("""
	Example pdf reports you can upload here:
	""")

	if st.sidebar.button("Process Document"):
	with st.spinner("Processing Document..."):
	nodes = process_pdf(pdfs)
	#st.session_state.index = get_vector_index(nodes, vector_store="faiss")
	st.session_state.index = get_vector_index(nodes, vector_store="simple")
	st.session_state.process_doc = True
	st.toast("Document Processsed!")

	#st.session_state.process_doc = True

	if st.session_state.process_doc:
	search_text = st.text_input("Enter your question")
	if st.button("Submit"):
	engine = get_query_engine(st.session_state.index.as_query_engine(similarity_top_k=3))
	start_time = time.time()

	with st.status("Analyzing Report..."):
	st.write("Case search result...")
	response = generate_insight(engine, search_text)
	st.session_state["end_time"] = "{:.2f}".format((time.time() - start_time))

	st.toast("Report Analysis Complete!")

	if st.session_state.end_time:
	st.write("Report Analysis Time: ", st.session_state.end_time, "s")