import streamlit as st from transformers import GPT2TokenizerFast, AutoModelForCausalLM from arabert.preprocess import ArabertPreprocessor # Load model and tokenizer model_name = "malmarjeh/gpt2" tokenizer = GPT2TokenizerFast.from_pretrained("aubmindlab/aragpt2-base") model = AutoModelForCausalLM.from_pretrained(model_name) preprocessor = ArabertPreprocessor(model_name=model_name) # Streamlit UI st.title('Arabic Text Summarizer') text = st.text_area("Paste your Arabic text here:") if st.button('Summarize'): if text: # Preprocess and tokenize input text processed_text = preprocessor.preprocess(text) formatted_text = '\n النص: ' + processed_text + ' \n الملخص: \n ' tokenizer.add_special_tokens({'pad_token': ''}) tokens = tokenizer.batch_encode_plus([formatted_text], return_tensors='pt', padding='max_length', max_length=150) # Generate summary output = model.generate( input_ids=tokens['input_ids'], repetition_penalty=2.0, num_beams=5, max_length=600, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, bos_token_id=tokenizer.bos_token_id, ) # Decode and display the summarized text result = tokenizer.decode(output[0][150:], skip_special_tokens=True).strip() st.subheader("Original Text") st.write(text) st.subheader("Summarized Text") st.write(result) else: st.warning("Please enter Arabic text to summarize.")