Spaces:

MrGanesh
/

bigbird-summarizer

Runtime error

App Files Files Community

MrGanesh commited on Sep 21, 2022

Commit

940bf03

•

1 Parent(s): 6f80e3e

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -54

app.py CHANGED Viewed

@@ -1,54 +1,46 @@
-import streamlit
-import pandas as pd
-#import torch
-from transformers import pipeline
-import streamlit as st
-def app():
-    st.title("Patent Document Summarization  🤓")
-    st.markdown("This is a Web application that Summarizes Patent Text 😎")
-    upload_file = st.file_uploader('Upload a file containing Text data')
-    button = st.button("Summarize")
-    st.cache(allow_output_mutation=True)
-    def model():
-        summarizer = pipeline("summarization", model="google/bigbird-pegasus-large-bigpatent")
-        return summarizer
-    summarizer= model()
-    def text_summarizer(text):
-        a = summarizer(text, max_length=450, min_length=150, do_sample=False)
-        return a[0]['summary_text']
-    # Check to see if a file has been uploaded
-    if upload_file is not None and button:
-        st.success("Summarizing Text, Please wait...")
-        # If it has then do the following:
-        # Read the file to a dataframe using pandas
-        df = pd.read_csv(upload_file)
-        # Create a section for the dataframe header
-        df1 = df.copy()
-        df1['summarized_text'] = df1['Dialog'].apply(text_summarizer)
-        df2 = df1[['Name','summarized_text']]
-        st.write(df2.head(5))
-        @st.cache
-        def convert_df(dataframe):
-            return dataframe.to_csv().encode('utf-8')
-        csv = convert_df(df2)
-        st.download_button(label="Download CSV", data=csv, file_name='summarized_output.csv', mime='text/csv')
-if __name__ == "__main__":
-    app()

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+tokenizer = AutoTokenizer.from_pretrained("Armandoliv/t5-small-summarizer-scitldr")
+model = AutoModelForSeq2SeqLM.from_pretrained("Armandoliv/t5-small-summarizer-scitldr")
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+def main_summarizer(text):
+  max_input_length = 1024
+  preprocess_text = text.strip().replace("\n"," ").replace("’", "'").strip()
+  tokenized_text = tokenizer.encode(preprocess_text, return_tensors="pt", truncation=True, max_length=max_input_length,).to(device)
+  summary_ids = model.generate(
+              tokenized_text,
+              max_length=256,
+              num_beams=8,
+              repetition_penalty=3.0,
+              length_penalty=2.5,
+              early_stopping=False
+          )
+  output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+  return output
+inputs = [gr.Textbox(lines=10, placeholder="Text Here...", label="Input")]
+outputs = gr.Text( label="Summary")
+title="Text summarisation app"
+description = "This demo uses AI Models to summarize long text.\nIt focus on scientific texts."
+io = gr.Interface(fn=main_summarizer, inputs=inputs, outputs=outputs, title=title, description = description,
+                  css= """.gr-button-primary { background: -webkit-linear-gradient(
+                    90deg, #355764 0%, #55a8a1 100% ) !important;     background: #355764;
+                        background: linear-gradient(
+                    90deg, #355764 0%, #55a8a1 100% ) !important;
+                        background: -moz-linear-gradient( 90deg, #355764 0%, #55a8a1 100% ) !important;
+                        background: -webkit-linear-gradient(
+                    90deg, #355764 0%, #55a8a1 100% ) !important;
+                    color:white !important}"""
+                  )
+io.launch()