File size: 2,874 Bytes
65bd869 d151635 65bd869 884190d 877c158 884190d d151635 884190d 877c158 884190d 941e747 884190d 877c158 884190d 941e747 877c158 884190d 65bd869 884190d 65bd869 884190d 65bd869 884190d 877c158 941e747 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
import streamlit as st
import fitz # PyMuPDF for PDF text extraction
# Streamlit UI setup
st.set_page_config(page_title="Text Simplifier", layout="centered")
st.title("🧠 Academic Text Simplifier")
# Model selection
model_options = {
"Mistral (Instruction-tuned)": "mistralai/Mistral-7B-Instruct-v0.1",
"T5 (Simplification finetuned)": "mrm8488/t5-base-finetuned-common_gen",
"BART (Paraphrasing/Simplification)": "tuner007/pegasus_paraphrase"
}
model_choice = st.selectbox("Choose a simplification model:", list(model_options.keys()))
model_name = model_options[model_choice]
@st.cache_resource(show_spinner=True)
def load_model(name):
if "t5" in name.lower():
tokenizer = AutoTokenizer.from_pretrained(name, use_fast=False)
model = AutoModelForSeq2SeqLM.from_pretrained(name)
return pipeline("text2text-generation", model=model, tokenizer=tokenizer)
elif "pegasus" in name.lower():
return pipeline("text2text-generation", model=name)
else:
return pipeline("text-generation", model=name)
simplifier = load_model(model_name)
def simplify_text(text):
try:
if "t5" in model_name.lower():
prompt = f"simplify: {text}"
elif "mistral" in model_name.lower() or "instruct" in model_name.lower():
prompt = f"Rewrite the following text using simpler vocabulary and structure:\n{text}"
elif "pegasus" in model_name.lower():
prompt = f"paraphrase: {text}"
else:
prompt = text
output = simplifier(prompt, max_length=256, min_length=30, do_sample=False)[0]
return output.get('summary_text') or output.get('generated_text') or "(No output)"
except Exception as e:
return f"Error simplifying text: {e}"
def extract_text_from_pdf(pdf_file):
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
text = "\n".join(page.get_text("text") for page in doc)
return text
# Input options
option = st.radio("Choose input type:", ("Text Input", "Upload PDF"))
if option == "Text Input":
user_text = st.text_area("Enter your complex academic text here:", height=200)
if st.button("Simplify Text") and user_text:
simplified_text = simplify_text(user_text)
st.text_area("🔽 Simplified Text:", simplified_text, height=200)
elif option == "Upload PDF":
uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])
if uploaded_file and st.button("Simplify Extracted Text"):
extracted_text = extract_text_from_pdf(uploaded_file)
simplified_text = simplify_text(extracted_text[:2000]) # limit for performance
st.text_area("🔽 Simplified Text from PDF:", simplified_text, height=200)
st.markdown("---")
st.markdown("Made with ❤️ by Harshitha")
|