|
import streamlit as st |
|
from pdfminer.high_level import extract_text |
|
import docx |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
st.title("JAI") |
|
|
|
|
|
if "previous_conversations" not in st.session_state: |
|
st.session_state.previous_conversations = [] |
|
|
|
|
|
if st.checkbox("Korábbi Beszélgetések Törlése"): |
|
st.session_state.previous_conversations = [] |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("pile-of-law/legalbert-large-1.7M-2") |
|
model = AutoModelForCausalLM.from_pretrained("databricks/dolly-v2-3b", trust_remote_code=True) |
|
|
|
|
|
def process_document(document_file): |
|
document_text = "" |
|
if document_file.type == "application/pdf": |
|
document_text = extract_text(document_file) |
|
elif document_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": |
|
docx_file = docx.Document(document_file) |
|
for paragraph in docx_file.paragraphs: |
|
text = paragraph.text |
|
if text: |
|
document_text += text.strip() + "\n\n" |
|
elif document_file.type == "application/msword": |
|
doc_file = docx.Document(document_file) |
|
for paragraph in doc_file.paragraphs: |
|
text = paragraph.text |
|
if text: |
|
document_text += text.strip() + "\n\n" |
|
else: |
|
st.error("A fájltípus nem támogatott. Kérlek válassz ki egy PDF, DOCX vagy DOC fájlt!") |
|
return document_text |
|
|
|
|
|
def generate_response(input_text): |
|
|
|
cleaned_input = input_text.strip() |
|
|
|
|
|
inputs = tokenizer(cleaned_input, return_tensors="pt", max_length=1024, truncation=True) |
|
|
|
|
|
outputs = model(**inputs) |
|
|
|
|
|
response = tokenizer.decode(outputs.logits.argmax(dim=1)[0]) |
|
return response |
|
|
|
|
|
document_file = st.file_uploader("Húzd ide a dokumentumot vagy kattints a feltöltéshez", type=["pdf", "docx", "doc"]) |
|
|
|
if document_file is not None: |
|
document_text = process_document(document_file) |
|
|
|
|
|
context = st.text_area("Korábbi Beszélgetéshez Tartozó Kontextus", "") |
|
|
|
|
|
question = st.text_input("Kérdés a Dokumentumból", "") |
|
|
|
|
|
if question: |
|
input_text = f"{context} {document_text} {question}" if context else f"{document_text} {question}" |
|
else: |
|
input_text = f"{context} {document_text}" if context else document_text |
|
|
|
|
|
if input_text.strip() != "": |
|
response = generate_response(input_text) |
|
st.subheader("Generált Válasz:") |
|
st.write(response) |
|
|
|
|
|
st.session_state.previous_conversations.append({"input_text": input_text, "response": response}) |
|
|
|
|
|
st.subheader("Input Text:") |
|
st.write(input_text) |
|
|
|
|
|
st.subheader("Response:") |
|
st.write(response) |
|
|