File size: 1,287 Bytes
90fa1fd
 
f91afe7
90fa1fd
 
 
 
d75d460
 
 
 
f91afe7
d75d460
90fa1fd
 
 
 
 
d75d460
90fa1fd
 
 
d75d460
2cd599d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48f6d03
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
from memsum import MemSum
import re

model_path = "model/MemSum_Final/model.pt"
summarizer = MemSum(model_path, "model/glove/vocabulary_200dim.pkl")

def preprocess(text):
    text = text.replace('- ', '')
    text = re.sub(r'[\n\s]+', ' ', text)
    return text

sent_tokenize = lambda txt: re.split(r'(?<=[.!?])\s(?=[A-Z])', txt)

def summarize(text):
    
    text = sent_tokenize( preprocess(text) )

    summary = "\n\n".join( summarizer.summarize(text) )

    return summary


input = gr.Textbox(label="Input text", placeholder="Court opinion text goes here...")
output = gr.Textbox(label="Summary", placeholder="Output summary will appear here...")

with gr.Blocks() as demo:

    gr.Markdown(
        """
        # Legal MemSum Demo
        Copy an opinion text from your favorite source and paste it into the textbox below.
        """)

    gr.Interface(fn=summarize, inputs=input, outputs=output)

    gr.Markdown(
            """
            Note: The version hosted here on Huggingface uses a different tokenizer than in our Github repository. We recommend using that
                https://github.com/bauerem/legal_memsum . <br>
                Please reach out to us if you are interested in a modified version of this.
            """)

demo.launch()