import gradio as gr import numpy as np import pytesseract as pt import pdf2image from fpdf import FPDF import re import nltk from nltk.tokenize import sent_tokenize from nltk.tokenize import word_tokenize import os import pdfkit import yake from summarizer import Summarizer,TransformerSummarizer from transformers import pipelines nltk.download('punkt') from transformers import AutoTokenizer, AutoModelForPreTraining, AutoConfig, AutoModel # model_name = 'distilbert-base-uncased' model_name = 'nlpaueb/legal-bert-base-uncased' #model_name = 'laxya007/gpt2_legal' # model_name = 'facebook/bart-large-cnn' from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("laxya007/gpt2_BSA_Legal_Initiproject_OE_OS_BRM") model = AutoModelForCausalLM.from_pretrained("laxya007/gpt2_BSA_Legal_Initiproject_OE_OS_BRM") bert_legal_model = Summarizer(custom_model= model, custom_tokenizer= tokenizer) print('Using model {}\n'.format(model_name)) def lincoln(input_text): output_text= bert_legal_model(input_text, min_length = 8, ratio = 0.05) iface = gr.Interface( lincoln, "text", "text" ) if __name__ == "__main__": iface.launch(share=False)