MrGanesh commited on
Commit
940bf03
β€’
1 Parent(s): 6f80e3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -54
app.py CHANGED
@@ -1,54 +1,46 @@
1
- import streamlit
2
- import pandas as pd
3
- #import torch
4
- from transformers import pipeline
5
- import streamlit as st
6
-
7
- def app():
8
- st.title("Patent Document Summarization πŸ€“")
9
-
10
- st.markdown("This is a Web application that Summarizes Patent Text 😎")
11
- upload_file = st.file_uploader('Upload a file containing Text data')
12
- button = st.button("Summarize")
13
-
14
- st.cache(allow_output_mutation=True)
15
- def model():
16
- summarizer = pipeline("summarization", model="google/bigbird-pegasus-large-bigpatent")
17
- return summarizer
18
- summarizer= model()
19
-
20
- def text_summarizer(text):
21
- a = summarizer(text, max_length=450, min_length=150, do_sample=False)
22
- return a[0]['summary_text']
23
-
24
-
25
- # Check to see if a file has been uploaded
26
- if upload_file is not None and button:
27
- st.success("Summarizing Text, Please wait...")
28
- # If it has then do the following:
29
-
30
- # Read the file to a dataframe using pandas
31
- df = pd.read_csv(upload_file)
32
-
33
- # Create a section for the dataframe header
34
-
35
- df1 = df.copy()
36
- df1['summarized_text'] = df1['Dialog'].apply(text_summarizer)
37
-
38
- df2 = df1[['Name','summarized_text']]
39
- st.write(df2.head(5))
40
-
41
- @st.cache
42
- def convert_df(dataframe):
43
- return dataframe.to_csv().encode('utf-8')
44
-
45
- csv = convert_df(df2)
46
- st.download_button(label="Download CSV", data=csv, file_name='summarized_output.csv', mime='text/csv')
47
-
48
-
49
-
50
-
51
-
52
-
53
- if __name__ == "__main__":
54
- app()
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+
5
+ tokenizer = AutoTokenizer.from_pretrained("Armandoliv/t5-small-summarizer-scitldr")
6
+
7
+ model = AutoModelForSeq2SeqLM.from_pretrained("Armandoliv/t5-small-summarizer-scitldr")
8
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
9
+ model = model.to(device)
10
+
11
+ def main_summarizer(text):
12
+ max_input_length = 1024
13
+ preprocess_text = text.strip().replace("\n"," ").replace("’", "'").strip()
14
+ tokenized_text = tokenizer.encode(preprocess_text, return_tensors="pt", truncation=True, max_length=max_input_length,).to(device)
15
+
16
+ summary_ids = model.generate(
17
+ tokenized_text,
18
+ max_length=256,
19
+ num_beams=8,
20
+ repetition_penalty=3.0,
21
+ length_penalty=2.5,
22
+ early_stopping=False
23
+ )
24
+
25
+ output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
26
+
27
+ return output
28
+
29
+ inputs = [gr.Textbox(lines=10, placeholder="Text Here...", label="Input")]
30
+ outputs = gr.Text( label="Summary")
31
+ title="Text summarisation app"
32
+ description = "This demo uses AI Models to summarize long text.\nIt focus on scientific texts."
33
+
34
+ io = gr.Interface(fn=main_summarizer, inputs=inputs, outputs=outputs, title=title, description = description,
35
+
36
+ css= """.gr-button-primary { background: -webkit-linear-gradient(
37
+ 90deg, #355764 0%, #55a8a1 100% ) !important; background: #355764;
38
+ background: linear-gradient(
39
+ 90deg, #355764 0%, #55a8a1 100% ) !important;
40
+ background: -moz-linear-gradient( 90deg, #355764 0%, #55a8a1 100% ) !important;
41
+ background: -webkit-linear-gradient(
42
+ 90deg, #355764 0%, #55a8a1 100% ) !important;
43
+ color:white !important}"""
44
+ )
45
+
46
+ io.launch()