Update app.py (change to MT5Tokenizer; add button boxes and bold to button text)
Browse files
app.py
CHANGED
@@ -1,40 +1,55 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
-
from transformers import
|
4 |
|
5 |
# Load your fine-tuned mT5 model
|
6 |
model_name = "Addaci/mT5-small-experiment-13-checkpoint-2790"
|
7 |
-
tokenizer =
|
8 |
-
model =
|
9 |
|
10 |
def correct_htr(raw_htr_text):
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
15 |
|
16 |
def summarize_text(legal_text):
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
21 |
|
22 |
def answer_question(legal_text, question):
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
28 |
|
29 |
-
# Create the Gradio Blocks interface
|
30 |
with gr.Blocks() as demo:
|
31 |
gr.Markdown("# mT5 Legal Assistant")
|
32 |
gr.Markdown("Use this tool to correct raw HTR, summarize legal texts, or answer questions about legal cases.")
|
33 |
|
34 |
-
# Adding external link buttons
|
35 |
with gr.Row():
|
36 |
-
gr.HTML('<
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
with gr.Tab("Correct HTR"):
|
40 |
gr.Markdown("### Correct Raw HTR Text")
|
@@ -70,4 +85,6 @@ with gr.Blocks() as demo:
|
|
70 |
# Launch the Gradio interface
|
71 |
demo.launch()
|
72 |
|
|
|
|
|
73 |
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
+
from transformers import MT5Tokenizer, MT5ForConditionalGeneration
|
4 |
|
5 |
# Load your fine-tuned mT5 model
|
6 |
model_name = "Addaci/mT5-small-experiment-13-checkpoint-2790"
|
7 |
+
tokenizer = MT5Tokenizer.from_pretrained(model_name)
|
8 |
+
model = MT5ForConditionalGeneration.from_pretrained(model_name)
|
9 |
|
10 |
def correct_htr(raw_htr_text):
|
11 |
+
try:
|
12 |
+
inputs = tokenizer(raw_htr_text, return_tensors="pt", max_length=512, truncation=True)
|
13 |
+
outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
|
14 |
+
corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
15 |
+
return corrected_text
|
16 |
+
except Exception as e:
|
17 |
+
return str(e)
|
18 |
|
19 |
def summarize_text(legal_text):
|
20 |
+
try:
|
21 |
+
inputs = tokenizer("summarize: " + legal_text, return_tensors="pt", max_length=512, truncation=True)
|
22 |
+
outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
|
23 |
+
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
24 |
+
return summary
|
25 |
+
except Exception as e:
|
26 |
+
return str(e)
|
27 |
|
28 |
def answer_question(legal_text, question):
|
29 |
+
try:
|
30 |
+
formatted_input = f"question: {question} context: {legal_text}"
|
31 |
+
inputs = tokenizer(formatted_input, return_tensors="pt", max_length=512, truncation=True)
|
32 |
+
outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
|
33 |
+
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
34 |
+
return answer
|
35 |
+
except Exception as e:
|
36 |
+
return str(e)
|
37 |
|
38 |
+
# Create the Gradio Blocks interface with boxed clickable buttons and bold text
|
39 |
with gr.Blocks() as demo:
|
40 |
gr.Markdown("# mT5 Legal Assistant")
|
41 |
gr.Markdown("Use this tool to correct raw HTR, summarize legal texts, or answer questions about legal cases.")
|
42 |
|
43 |
+
# Adding external link buttons with a box around them and bold text
|
44 |
with gr.Row():
|
45 |
+
gr.HTML('''<div style="border: 2px solid black; padding: 10px; display: inline-block;">
|
46 |
+
<a href="http://www.marinelives.org/wiki/Tools:_Admiralty_court_legal_glossary" target="_blank">
|
47 |
+
<button style="font-weight:bold;">Admiralty Court Legal Glossary</button>
|
48 |
+
</a>
|
49 |
+
<a href="https://raw.githubusercontent.com/Addaci/HCA/refs/heads/main/HCA_13_70_Full_Volume_Processed_Text_EDITED_Ver.1.2_18062024.txt" target="_blank">
|
50 |
+
<button style="font-weight:bold;">HCA 13/70 Ground Truth (1654-55)</button>
|
51 |
+
</a>
|
52 |
+
</div>''')
|
53 |
|
54 |
with gr.Tab("Correct HTR"):
|
55 |
gr.Markdown("### Correct Raw HTR Text")
|
|
|
85 |
# Launch the Gradio interface
|
86 |
demo.launch()
|
87 |
|
88 |
+
|
89 |
+
|
90 |
|