import gradio as gr from memsum import MemSum import re model_path = "model/MemSum_Final/model.pt" summarizer = MemSum(model_path, "model/glove/vocabulary_200dim.pkl") def preprocess(text): text = text.replace('- ', '') text = re.sub(r'[\n\s]+', ' ', text) return text sent_tokenize = lambda txt: re.split(r'(?<=[.!?])\s(?=[A-Z])', txt) def summarize(text): text = sent_tokenize( preprocess(text) ) summary = "\n\n".join( summarizer.summarize(text) ) return summary input = gr.Textbox(label="Input text", placeholder="Court opinion text goes here...") output = gr.Textbox(label="Summary", placeholder="Output summary will appear here...") with gr.Blocks() as demo: gr.Markdown( """ # Legal MemSum Demo Copy an opinion text from your favorite source and paste it into the textbox below. """) gr.Interface(fn=summarize, inputs=input, outputs=output) gr.Markdown( """ Note: The version hosted here on Huggingface uses a different tokenizer than in our Github repository. We recommend using that https://github.com/bauerem/legal_memsum .
Please reach out to us if you are interested in a modified version of this. """) demo.launch()