astro21 commited on
Commit
272d571
1 Parent(s): d379fcc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -10
app.py CHANGED
@@ -1,33 +1,48 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import os
 
4
 
5
  # Load the text summarization pipeline
6
  summarizer = pipeline("summarization", model="astro21/bart-cls")
7
 
8
  chunk_counter = 0
9
 
 
10
  def summarize_text(input_text):
11
- global chunk_counter # Use a global variable to keep track of the chunk number
12
- chunk_counter = 0 # Initialize the chunk counter
13
 
14
- # Split the input text into chunks with a maximum size of 512
15
- max_chunk_size = 512
16
- chunks = [input_text[i:i+max_chunk_size] for i in range(0, len(input_text), max_chunk_size)]
17
 
18
  summarized_chunks = []
 
 
 
19
  for chunk in chunks:
20
  chunk_counter += 1
21
- # Summarize each chunk
22
  summarized_chunk = summarizer(chunk, max_length=128, min_length=64, do_sample=False)[0]['summary_text']
23
  summarized_chunks.append(f"Chunk {chunk_counter}:\n{summarized_chunk}")
 
 
 
24
 
25
- # Concatenate the summaries
26
  summarized_text = "\n".join(summarized_chunks)
27
- return summarized_text
 
 
 
 
 
 
 
 
 
28
 
29
  def read_file(file):
30
- with open(file.name, 'r') as file_:
 
31
  content = file_.read()
32
  return content
33
 
@@ -37,8 +52,18 @@ def summarize_text_file(file):
37
  content = read_file(file)
38
  return summarize_text(content)
39
 
 
40
  input_type = gr.inputs.File("text")
41
 
42
- demo = gr.Interface(fn=summarize_text_file, inputs=input_type, outputs="text", live=True)
 
 
 
 
 
 
 
 
 
43
 
44
  demo.launch()
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import os
4
+ import pandas as pd
5
 
6
  # Load the text summarization pipeline
7
  summarizer = pipeline("summarization", model="astro21/bart-cls")
8
 
9
  chunk_counter = 0
10
 
11
+
12
  def summarize_text(input_text):
13
+ global chunk_counter
14
+ chunk_counter = 0
15
 
16
+ max_chunk_size = 1024
17
+ chunks = [input_text[i:i + max_chunk_size] for i in range(0, len(input_text), max_chunk_size)]
 
18
 
19
  summarized_chunks = []
20
+ chunk_lengths = []
21
+ summarized_chunks_only = []
22
+
23
  for chunk in chunks:
24
  chunk_counter += 1
 
25
  summarized_chunk = summarizer(chunk, max_length=128, min_length=64, do_sample=False)[0]['summary_text']
26
  summarized_chunks.append(f"Chunk {chunk_counter}:\n{summarized_chunk}")
27
+ summarized_chunks_only.append(summarized_chunk)
28
+
29
+ chunk_lengths.append(len(chunk))
30
 
 
31
  summarized_text = "\n".join(summarized_chunks)
32
+ summarized_text_only = "\n".join(summarized_chunks_only)
33
+
34
+ # Save the merged summary to a file
35
+ with open("summarized.txt", "w") as output_file:
36
+ output_file.write(summarized_text_only)
37
+
38
+ chunk_df = pd.DataFrame({'Chunk Number': range(1, chunk_counter + 1), 'Chunk Length': chunk_lengths})
39
+
40
+ return summarized_text, chunk_df, "summarized.txt"
41
+
42
 
43
  def read_file(file):
44
+ print(file[0].name)
45
+ with open(file[0].name, 'r') as file_:
46
  content = file_.read()
47
  return content
48
 
 
52
  content = read_file(file)
53
  return summarize_text(content)
54
 
55
+
56
  input_type = gr.inputs.File("text")
57
 
58
+ # Name the outputs using the label parameter and provide a download option
59
+ demo = gr.Interface(fn=summarize_text_file, inputs=input_type,
60
+ outputs=[gr.Textbox(label="Summarized Text"),
61
+ gr.Dataframe(label="Chunk Information", type="pandas"),
62
+ gr.File(label="Download Summarized Text", type="file", live=False)],
63
+ title = "Text Summarization",
64
+ description = "Summarize text using BART",
65
+ theme = "huggingface",
66
+ allow_flagging="never",
67
+ live=True)
68
 
69
  demo.launch()