Update app.py (added two clickable buttons at top of each tab page)
Browse filesAdd Gradio Interface and Launch
# After your function definitions (correct_htr, summarize_text, answer_question), you need to define your Gradio interface and call demo.launch() to run the app.
Add two clickable buttons
# To add two clickable buttons for accessing external links (the Admiralty Court Legal Glossary and HCA 13/70 Ground Truth) on the left-hand side of each tab window, you can use Gradio's gr.Button() along with gr.HTML() to embed links in the interface.
# Unfortunately, Gradio does not currently support traditional sidebars, but we can still place these buttons or links at the top of the interface (before the main content) for easy access.
app.py
CHANGED
@@ -8,49 +8,66 @@ tokenizer = T5Tokenizer.from_pretrained(model_name)
|
|
8 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
9 |
|
10 |
def correct_htr(raw_htr_text):
|
11 |
-
# Tokenize the input text
|
12 |
inputs = tokenizer(raw_htr_text, return_tensors="pt", max_length=512, truncation=True)
|
13 |
-
print("Tokenized Inputs for HTR Correction:", inputs) # Debugging
|
14 |
-
|
15 |
-
# Generate corrected text with max_length and beam search
|
16 |
outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
|
17 |
-
print("Generated Output (Tokens) for HTR Correction:", outputs) # Debugging
|
18 |
-
|
19 |
-
# Decode the output, skipping special tokens
|
20 |
corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
21 |
-
print("Decoded Output for HTR Correction:", corrected_text) # Debugging
|
22 |
-
|
23 |
return corrected_text
|
24 |
|
25 |
def summarize_text(legal_text):
|
26 |
-
# Tokenize the input text with the summarization prompt
|
27 |
inputs = tokenizer("summarize: " + legal_text, return_tensors="pt", max_length=512, truncation=True)
|
28 |
-
print("Tokenized Inputs for Summarization:", inputs) # Debugging
|
29 |
-
|
30 |
-
# Generate summary with beam search for better results
|
31 |
outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
|
32 |
-
print("Generated Summary (Tokens):", outputs) # Debugging
|
33 |
-
|
34 |
-
# Decode the output, skipping special tokens
|
35 |
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
36 |
-
print("Decoded Summary:", summary) # Debugging
|
37 |
-
|
38 |
return summary
|
39 |
|
40 |
def answer_question(legal_text, question):
|
41 |
-
# Format input for question-answering
|
42 |
formatted_input = f"question: {question} context: {legal_text}"
|
43 |
inputs = tokenizer(formatted_input, return_tensors="pt", max_length=512, truncation=True)
|
44 |
-
print("Tokenized Inputs for Question Answering:", inputs) # Debugging
|
45 |
-
|
46 |
-
# Generate answer using beam search
|
47 |
outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
|
48 |
-
print("Generated Answer (Tokens):", outputs) # Debugging
|
49 |
-
|
50 |
-
# Decode the output, skipping special tokens
|
51 |
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
52 |
-
print("Decoded Answer:", answer) # Debugging
|
53 |
-
|
54 |
return answer
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
|
|
8 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
9 |
|
10 |
def correct_htr(raw_htr_text):
|
|
|
11 |
inputs = tokenizer(raw_htr_text, return_tensors="pt", max_length=512, truncation=True)
|
|
|
|
|
|
|
12 |
outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
|
|
|
|
|
|
|
13 |
corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
|
14 |
return corrected_text
|
15 |
|
16 |
def summarize_text(legal_text):
|
|
|
17 |
inputs = tokenizer("summarize: " + legal_text, return_tensors="pt", max_length=512, truncation=True)
|
|
|
|
|
|
|
18 |
outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
|
|
|
|
|
|
|
19 |
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
|
20 |
return summary
|
21 |
|
22 |
def answer_question(legal_text, question):
|
|
|
23 |
formatted_input = f"question: {question} context: {legal_text}"
|
24 |
inputs = tokenizer(formatted_input, return_tensors="pt", max_length=512, truncation=True)
|
|
|
|
|
|
|
25 |
outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
|
|
|
|
|
|
|
26 |
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
|
27 |
return answer
|
28 |
|
29 |
+
# Create the Gradio Blocks interface
|
30 |
+
with gr.Blocks() as demo:
|
31 |
+
gr.Markdown("# mT5 Legal Assistant")
|
32 |
+
gr.Markdown("Use this tool to correct raw HTR, summarize legal texts, or answer questions about legal cases.")
|
33 |
+
|
34 |
+
# Adding external link buttons at the top
|
35 |
+
with gr.Row():
|
36 |
+
gr.HTML('<a href="http://www.marinelives.org/wiki/Tools:_Admiralty_court_legal_glossary" target="_blank"><button>Admiralty Court Legal Glossary</button></a>')
|
37 |
+
gr.HTML('<a href="https://raw.githubusercontent.com/Addaci/HCA/refs/heads/main/HCA_13_70_Full_Volume_Processed_Text_EDITED_Ver.1.2_18062024.txt" target="_blank"><button>HCA 13/70 Ground Truth (1654-55)</button></a>')
|
38 |
+
|
39 |
+
with gr.Tab("Correct HTR"):
|
40 |
+
gr.Markdown("### Correct Raw HTR Text")
|
41 |
+
raw_htr_input = gr.Textbox(lines=5, placeholder="Enter raw HTR text here...")
|
42 |
+
corrected_output = gr.Textbox(lines=5, placeholder="Corrected HTR text")
|
43 |
+
correct_button = gr.Button("Correct HTR")
|
44 |
+
clear_button = gr.Button("Clear")
|
45 |
+
|
46 |
+
correct_button.click(correct_htr, inputs=raw_htr_input, outputs=corrected_output)
|
47 |
+
clear_button.click(lambda: ("", ""), outputs=[raw_htr_input, corrected_output])
|
48 |
+
|
49 |
+
with gr.Tab("Summarize Legal Text"):
|
50 |
+
gr.Markdown("### Summarize Legal Text")
|
51 |
+
legal_text_input = gr.Textbox(lines=10, placeholder="Enter legal text to summarize...")
|
52 |
+
summary_output = gr.Textbox(lines=5, placeholder="Summary of legal text")
|
53 |
+
summarize_button = gr.Button("Summarize Text")
|
54 |
+
clear_button = gr.Button("Clear")
|
55 |
+
|
56 |
+
summarize_button.click(summarize_text, inputs=legal_text_input, outputs=summary_output)
|
57 |
+
clear_button.click(lambda: ("", ""), outputs=[legal_text_input, summary_output])
|
58 |
+
|
59 |
+
with gr.Tab("Answer Legal Question"):
|
60 |
+
gr.Markdown("### Answer a Question Based on Legal Text")
|
61 |
+
legal_text_input_q = gr.Textbox(lines=10, placeholder="Enter legal text...")
|
62 |
+
question_input = gr.Textbox(lines=2, placeholder="Enter your question...")
|
63 |
+
answer_output = gr.Textbox(lines=5, placeholder="Answer to your question")
|
64 |
+
answer_button = gr.Button("Get Answer")
|
65 |
+
clear_button = gr.Button("Clear")
|
66 |
+
|
67 |
+
answer_button.click(answer_question, inputs=[legal_text_input_q, question_input], outputs=answer_output)
|
68 |
+
clear_button.click(lambda: ("", "", ""), outputs=[legal_text_input_q, question_input, answer_output])
|
69 |
+
|
70 |
+
# Launch the Gradio interface
|
71 |
+
demo.launch()
|
72 |
+
|
73 |
|