Spaces:

cswamy
/

summarization_en_and_es_text

Runtime error

App Files Files Community

cswamy commited on Sep 14, 2023

Commit

bd69f73

•

1 Parent(s): b2e544e

initial commit

Browse files

Files changed (5) hide show

.gitattributes +1 -0
app.py +49 -0
model.py +11 -0
mt5_amzn_enes_reviews_summarization.pth +3 -0
requirements.txt +4 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+mt5_amzn_enes_reviews_summarization.pth filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import torch
+import gradio as gr
+from model import create_mt5_small
+# Setup model and tokenizer
+model, tokenizer = create_mt5_small()
+# Load state dict from model
+model.load_state_dict(
+    torch.load(
+        f="mt5_amzn_enes_reviews_summarization.pth",
+        map_location=torch.device("cpu")
+    ))
+# Predict function
+def predict(text:str):
+  # Tokenize inputs and get model outputs
+  input = tokenizer(text,
+                    max_length=512,
+                    truncation=True,
+                    return_tensors='pt')
+  output_tokens = model.generate(input['input_ids'],
+                                 attention_mask=input['attention_mask'],
+                                 max_length=30)
+  output_text = tokenizer.batch_decode(output_tokens,
+                                       skip_special_tokens=True)
+  return output_text
+# Create examples list
+examples_list = ["The ball hit the splice a lot and sent a fizzing sensation up the handle and into the bottom hand, so I adapted at each session by playing softer and softer, later and later. I found it very difficult to get down the pitch and meet the ball as it landed and so persuaded myself to play back more. It occurred to me that a better player would manage the shimmy down the pitch with more skill and faster footwork, and that the good sweepers would have to take him on in the way that Kevin Pietersen managed so successfully on occasions.",
+                 "Todo muy bien, cumple con lo esperado. Lo único malo es que: se calienta un poco y la batería no dura 8h. A una persona le ha parecido esto útil"]
+# Create gradio app
+title = "Summarizer for English and Spanish inputs"
+description = "MT5small model finetuned for summarization on English or Spanish text trained on the Amazon reviews dataset."
+demo = gr.Interface(fn=predict,
+                    inputs=gr.inputs.Textbox(label="Input",
+                                             placeholder="Enter sentences here in English or Spanish..."),
+                    outputs="text",
+                    examples=examples_list,
+                    title=title,
+                    description=description)
+# Launch gradio
+demo.launch()

model.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+def create_mt5_small():
+  """
+  Initializes model and tokenizer.
+  """
+  checkpoint = 'google/mt5-small'
+  tokenizer = AutoTokenizer.from_pretrained(checkpoint, return_tensors='pt')
+  model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
+  return model, tokenizer

mt5_amzn_enes_reviews_summarization.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5cd5e76f4bed56cd7dc3e669fee65d3b8db01af16091b58645b0a56ef86e9449
+size 1200799301

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch==1.12.0
+gradio==3.44.1
+transformers==4.33.1
+sentencepiece==0.1.99