| | import streamlit as st
|
| | from transformers import pipeline
|
| | import fitz
|
| |
|
| | qa = pipeline("question-answering", model="deepset/roberta-base-squad2", device=0)
|
| | text_gen = pipeline("text2text-generation", model="google/flan-t5-base", device=0)
|
| |
|
| |
|
| |
|
| | def extract_PDF(file):
|
| | text = ""
|
| | with fitz.open(stream=file.read(), filetype="pdf") as doc:
|
| | for page in doc:
|
| | text += page.get_text()
|
| | return text
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | st.title("Chatbot with Huggingface")
|
| |
|
| | st.subheader("Upload file")
|
| | pdf_file = st.file_uploader("Upload", type="pdf")
|
| |
|
| |
|
| |
|
| | if "chat_history" not in st.session_state:
|
| | st.session_state.chat_history = []
|
| |
|
| | if "context" not in st.session_state:
|
| | st.session_state.context = None
|
| |
|
| |
|
| | if pdf_file is not None and st.session_state.context is None:
|
| | st.session_state.context = extract_PDF(pdf_file)
|
| |
|
| |
|
| |
|
| |
|
| | if st.session_state.context:
|
| | st.subheader("Chat with the PDF")
|
| |
|
| | question = st.text_input("You", key="user_input")
|
| |
|
| |
|
| | if question:
|
| | result = qa(question=question, context=st.session_state.context)
|
| |
|
| | context_chunk = st.session_state.context[:1500]
|
| | prompt = f"Context: {context_chunk}\nQuestion: {question}\nAnswer:"
|
| |
|
| | generated = text_gen(prompt, max_length=100)[0]['generated_text']
|
| |
|
| |
|
| | st.session_state.chat_history.append(
|
| | {"user": question, "bot": generated}
|
| | )
|
| |
|
| |
|
| |
|
| | for chat in st.session_state.chat_history:
|
| | st.markdown(f"**You:** {chat['user']}")
|
| | st.markdown(f"**Bot:** {chat['bot']}")
|
| |
|
| | else:
|
| | st.info("Please upload PDF to begin")
|
| |
|