Spaces:

fracapuano
/

AISandbox

Runtime error

fracapuano

add: toward multiple files support

0e17089 10 months ago

No virus

6.13 kB

	import streamlit as st
	from openai.error import OpenAIError
	from .utils import *
	from typing import Text, Union

	multiple_files = True

	def clear_submit():
	"""
	Toggles the file_submitted internal session state variable to False.
	"""
	st.session_state["file_submitted"] = False

	def set_openai_api_key(api_key:Text)->bool:
	"""Sets the internal OpenAI API key to the given value.

	Args:
	api_key (Text): OpenAI API key
	"""
	if not (api_key.startswith('sk-') and len(api_key)==51):
	st.error("Invalid OpenAI API key! Please provide a valid key.")
	return False

	st.session_state["OPENAI_API_KEY"] = api_key
	st.session_state["api_key_configured"] = True
	return True

	def file_to_doc(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None:
	"""Converts a file to a document using specialized parsers."""
	if file.name.endswith(".pdf"):
	doc = parse_pdf(file)
	elif file.name.endswith(".docx"):
	doc = parse_docx(file)
	elif file.name.split["."][1] in [".txt", ".py", ".json", ".html", ".css", ".md" ]:
	doc = parse_txt(file)
	else:
	st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt, .py, .json, .html, .css, .md]")
	doc = None

	return doc

	# this function can be used to define a single doc processing pipeline
	# def document_embedding_pipeline(file:Union[PDFFile, DocxFile, TxtFile, CodeFile]) -> None:

	def qa_main():
	st.markdown("<h2>This app allows to chat with files!</h2>", unsafe_allow_html=True)
	st.write("Just upload something using and start chatting with a version of GPT4 that has read the file!")

	index = None
	doc = None

	upload_document_greenlight = False
	uploaded_processed_document_greenlight = False
	# OpenAI API Key - TODO: consider adding a key valid for everyone
	# st.header("Configure OpenAI API Key")
	# st.warning('Please enter your OpenAI API Key!', icon='⚠️')

	# uncomment the following lines to add a user-specific key
	# user_secret = st.text_input(
	# "Insert your OpenAI API key here ([get your API key](https://platform.openai.com/account/api-keys)).",
	# type="password",
	# placeholder="Paste your OpenAI API key here (sk-...)",
	# help="You can get your API key from https://platform.openai.com/account/api-keys.",
	# value=st.session_state.get("OPENAI_API_KEY", ""),
	# )
	user_secret = st.secrets["OPENAI_API_KEY"]
	if user_secret:
	if set_openai_api_key(user_secret):
	st.success('OpenAI API key successfully accessed!', icon='✅')
	upload_document_greenlight = True

	if upload_document_greenlight:
	# File that needs to be queried
	st.header("Upload a file")
	uploaded_file = st.file_uploader(
	"Upload a pdf, docx, or txt file (scanned documents not supported)",
	type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"],
	help="Scanned documents are not supported yet 🥲",
	on_change=clear_submit,
	accept_multiple_files=multiple_files
	)

	# reading the uploaded files
	text = []
	if len(uploaded_file) != 0:
	# toggle internal file submission state to True
	st.session_state["file_submitted"] = True
	for file in uploaded_file:
	# parse the file using custom parsers
	file_doc = file_to_doc(file)
	# converts the files into a list of documents
	file_text = text_to_docs(text=tuple(file_doc), file_name=file.name)
	text.extend(file_text)

	# embeds the documents using OpenAI API
	try:
	with st.spinner("Indexing the document... This might take a while!"):
	index = embed_docs(tuple(text))
	st.session_state["api_key_configured"] = True
	except OpenAIError as e:
	st.error("OpenAI error encountered: ", e._message)

	uploaded_processed_document_greenlight = True

	if uploaded_processed_document_greenlight:
	if "messages" not in st.session_state:
	st.session_state["messages"] = []

	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	if prompt := st.chat_input("Ask the document something..."):
	st.session_state.messages.append({"role": "user", "content": prompt})
	with st.chat_message("user"):
	st.markdown(prompt)

	with st.chat_message("assistant"):
	message_placeholder = st.empty()
	# retrieving the most relevant sources
	sources = search_docs(index, prompt)
	# producing the answer, live
	full_response = ""
	for answer_bit in get_answer(sources, prompt)["output_text"]:
	full_response += answer_bit
	message_placeholder.markdown(full_response + "▌")

	message_placeholder.markdown(full_response)

	# answer = get_answer(sources, prompt)
	# message_placeholder.markdown(answer["output_text"])

	# st.session_state.messages.append({"role": "assistant", "content": answer["output_text"]})
	st.session_state.messages.append({"role": "assistant", "content": full_response})

	# This might be useful to add memory to the chatbot harnessing a more low-level approach
	# llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

	# memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')
	# retriever = your_vector_store.as_retriever()

	# # Create the multipurpose chain
	# qachat = ConversationalRetrievalChain.from_llm(
	# llm=ChatOpenAI(temperature=0),
	# memory=memory,
	# retriever=retriever,
	# return_source_documents=True
	# )

	# qachat("Ask your question here...")