wjjessen commited on
Commit
af44c43
1 Parent(s): b2d65e0

update code

Browse files
Files changed (1) hide show
  1. app.py +232 -90
app.py CHANGED
@@ -6,21 +6,26 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from PyPDF2 import PdfReader
7
  import re
8
  import streamlit as st
 
9
  import sys
10
  import time
11
  import torch
12
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
13
  from transformers import pipeline
14
 
15
- # notes
16
  # https://huggingface.co/docs/transformers/pad_truncation
 
 
17
 
18
 
19
  # file loader and preprocessor
20
- def file_preprocessing(file, skipfirst, skiplast):
 
 
21
  loader = PyMuPDFLoader(file)
22
  pages = loader.load_and_split()
23
- # skip page(s)
24
  if (skipfirst == 1) & (skiplast == 0):
25
  del pages[0]
26
  elif (skipfirst == 0) & (skiplast == 1):
@@ -30,104 +35,156 @@ def file_preprocessing(file, skipfirst, skiplast):
30
  del pages[-1]
31
  else:
32
  pages = pages
33
- # https://stackoverflow.com/questions/76431655/langchain-pypdfloader
34
- content = ""
35
  for page in pages:
36
- content = content + page.page_content
37
- content = re.sub("-\n", "", content)
38
- print("\n###### New article ######\n")
39
- print("Input text:\n")
40
- print(content)
41
- print("\nChunking...")
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  text_splitter = RecursiveCharacterTextSplitter(
43
- chunk_size=1000, # number of characters
44
- chunk_overlap=100,
45
  length_function=len,
46
- separators=["\n\n", "\n", " ", ""], # default list
47
  )
48
- # https://dev.to/eteimz/understanding-langchains-recursivecharactertextsplitter-2846
49
- texts = text_splitter.split_text(content)
50
- print("Number of tokens: " + str(len(texts)))
51
- print("\nFirst three tokens:\n")
52
- print(texts[0])
53
- print("")
54
- print(texts[1])
55
- print("")
56
- print(texts[2])
57
- print("")
58
- final_texts = ""
59
- for text in texts:
60
- final_texts = final_texts + text
61
- return texts, final_texts
62
 
63
 
64
- # function to count words in the input
65
- def preproc_count(filepath, skipfirst, skiplast):
66
- texts, input_text = file_preprocessing(filepath, skipfirst, skiplast)
67
- input_text = input_text.replace("-", "")
68
- text_length = len(re.findall(r"\w+", input_text))
69
- print("Input word count: " f"{text_length:,}")
70
- return texts, input_text, text_length
71
-
72
-
73
- # function to covert (bart) summary to sentence case
74
- def convert_to_sentence_case(text):
75
- sentences = re.split(r"(?<=[.!?])\s+", text)
76
- formatted_sentences = [sentence.capitalize() for sentence in sentences]
77
- return " ".join(formatted_sentences)
78
 
79
 
80
- # llm pipeline
81
- def llm_pipeline(tokenizer, base_model, input_text, model_source):
82
- pipe_sum = pipeline(
83
- "summarization",
 
 
84
  model=base_model,
85
  tokenizer=tokenizer,
86
- max_length=300,
87
- min_length=200,
88
  truncation=True,
89
  )
90
  print("Model source: %s" % (model_source))
91
- print("Summarizing...")
92
- result = pipe_sum(input_text)
 
 
 
 
93
  summary = result[0]["summary_text"]
94
- print("Summarization finished\n")
95
  print("Summary text:\n")
96
  print(summary)
97
- print("")
98
  return summary
99
 
100
 
101
- # function to count words in the summary
102
- def postproc_count(summary):
103
  text_length = len(re.findall(r"\w+", summary))
104
- print("Summary word count: " f"{text_length:,}")
105
  return text_length
106
 
107
 
108
- # function to clean summary text
109
  def clean_summary_text(summary):
110
- # remove whitespace
111
- summary_clean_1 = summary.strip()
112
- # remove spaces before punctuation (bart)
113
- summary_clean_2 = re.sub(r'\s([,.():;?!"](?:\s|$))', r"\1", summary_clean_1)
114
- # convert to sentence case
115
- summary_clean_3 = convert_to_sentence_case(summary_clean_2)
116
- return summary_clean_3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
 
119
  @st.cache_data(ttl=60 * 60)
120
- # function to display the PDF
121
  def displayPDF(file):
122
  with open(file, "rb") as f:
123
  base64_pdf = base64.b64encode(f.read()).decode("utf-8")
124
- # embed pdf in html
125
  pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
126
- # display file
127
  st.markdown(pdf_display, unsafe_allow_html=True)
128
 
129
 
130
- # streamlit code
131
  st.set_page_config(layout="wide")
132
 
133
 
@@ -152,14 +209,15 @@ def main():
152
  selected_model = st.radio(
153
  "Select a model to use:",
154
  model_names,
155
- help="Defauls to T5-Small; for most articles it summarizes better than BART",
156
  )
157
  if selected_model == "BART":
 
 
158
  checkpoint = "ccdv/lsg-bart-base-16384-pubmed"
159
  tokenizer = AutoTokenizer.from_pretrained(
160
  checkpoint,
161
  truncation=True,
162
- model_max_length=1000,
163
  trust_remote_code=True,
164
  )
165
  if model_source == "Download model":
@@ -171,12 +229,14 @@ def main():
171
  else:
172
  base_model = "model_cache/models--ccdv--lsg-bart-base-16384-pubmed/snapshots/4072bc1a7a94e2b4fd860a5fdf1b71d0487dcf15"
173
  else:
 
 
174
  checkpoint = "MBZUAI/LaMini-Flan-T5-77M"
175
  tokenizer = AutoTokenizer.from_pretrained(
176
  checkpoint,
177
  truncation=True,
178
  legacy=False,
179
- model_max_length=1000,
180
  )
181
  if model_source == "Download model":
182
  base_model = AutoModelForSeq2SeqLM.from_pretrained(
@@ -201,64 +261,142 @@ def main():
201
  "Model class: [BART](https://huggingface.co/docs/transformers/main/en/model_doc/bart)"
202
  "&nbsp;&nbsp;|&nbsp;&nbsp;Model: [lsg-bart-base-16384-pubmed](https://huggingface.co/ccdv/lsg-bart-base-16384-pubmed)"
203
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  if st.button("Summarize"):
205
  col1, col2 = st.columns(2)
206
  filepath = "data/" + uploaded_file.name
207
  with open(filepath, "wb") as temp_file:
208
  temp_file.write(uploaded_file.read())
209
  with col1:
210
- texts, input_text, preproc_text_length = preproc_count(
211
- filepath, skipfirst, skiplast
 
 
 
 
 
 
 
 
 
212
  )
213
  st.info(
214
  "Uploaded PDF&nbsp;&nbsp;|&nbsp;&nbsp;Number of words: "
215
- f"{preproc_text_length:,}"
216
  )
217
  pdf_viewer = displayPDF(filepath)
218
  with col2:
219
  start = time.time()
220
  with st.spinner("Summarizing..."):
221
  summary = llm_pipeline(
222
- tokenizer, base_model, input_text, model_source
 
 
 
 
223
  )
224
- postproc_text_length = postproc_count(summary)
 
225
  end = time.time()
226
  duration = end - start
227
  print("Duration: " f"{duration:.0f}" + " seconds")
228
  st.info(
229
  "PDF Summary&nbsp;&nbsp;|&nbsp;&nbsp;Number of words: "
230
- f"{postproc_text_length:,}"
231
  + "&nbsp;&nbsp;|&nbsp;&nbsp;Summarization time: "
232
  f"{duration:.0f}" + " seconds"
233
  )
234
  if selected_model == "BART":
 
235
  summary_cleaned = clean_summary_text(summary)
236
- st.success(summary_cleaned)
237
- with st.expander("Raw output"):
 
 
 
 
 
 
 
 
 
 
 
 
238
  st.write(summary)
239
  else:
240
- st.success(summary)
 
 
 
 
 
 
241
  col1 = st.columns(1)
242
  url = "https://dev.to/eteimz/understanding-langchains-recursivecharactertextsplitter-2846"
243
  st.info("Additional information")
244
- st.write("\n[RecursiveCharacterTextSplitter](%s) parameters used:" % url)
245
- st.write("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;chunk_size=1000")
 
246
  st.write(
247
- "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;chunk_overlap=100"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  )
249
  st.write(
250
  "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;length_function=len"
251
  )
 
 
 
 
 
 
252
  st.write("")
253
- st.write("Number of tokens generated: " + str(len(texts)))
254
  st.write("")
255
- st.write("First three tokens:")
256
- st.write("----")
257
- st.write(texts[0])
258
- st.write("----")
259
- st.write(texts[1])
260
- st.write("----")
261
- st.write(texts[2])
 
262
 
263
 
264
  st.markdown(
@@ -273,6 +411,10 @@ div[class*="stMarkdown"] > div[data-testid="stMarkdownContainer"] > p {
273
  div[class*="stCheckbox"] > label[data-baseweb="checkbox"] {
274
  margin-bottom: -15px;
275
  }
 
 
 
 
276
  body > a {
277
  text-decoration: underline;
278
  }
 
6
  from PyPDF2 import PdfReader
7
  import re
8
  import streamlit as st
9
+ from streamlit_tags import st_tags
10
  import sys
11
  import time
12
  import torch
13
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
14
  from transformers import pipeline
15
 
16
+ # Notes
17
  # https://huggingface.co/docs/transformers/pad_truncation
18
+ # https://stackoverflow.com/questions/76431655/langchain-pypdfloader
19
+ # https://dev.to/eteimz/understanding-langchains-recursivecharactertextsplitter-2846
20
 
21
 
22
  # file loader and preprocessor
23
+ def file_preprocessing(
24
+ file, skipfirst, skiplast, chunk_size, chunk_overlap, exclude_words
25
+ ):
26
  loader = PyMuPDFLoader(file)
27
  pages = loader.load_and_split()
28
+ # Skip user-specified page(s)
29
  if (skipfirst == 1) & (skiplast == 0):
30
  del pages[0]
31
  elif (skipfirst == 0) & (skiplast == 1):
 
35
  del pages[-1]
36
  else:
37
  pages = pages
38
+ input_text = ""
 
39
  for page in pages:
40
+ input_text = input_text + page.page_content
41
+ input_text = re.sub("-\n", "", input_text)
42
+ input_text = re.sub(r"\n", " ", input_text)
43
+ # Initialize a list to store valid sentences
44
+ valid_sentences = []
45
+ # Split the input_text into sentences
46
+ sentences = re.split(r"(?<=[.!?])\s+", input_text)
47
+ # Iterate through each sentence
48
+ for sentence in sentences:
49
+ # Check if any exclude_word is present in the sentence
50
+ if any(word in sentence for word in exclude_words):
51
+ continue # Skip sentences with exclude_words
52
+ valid_sentences.append(sentence)
53
+ final_input_text = " ".join(valid_sentences)
54
+ print("\n############## New article ##############\n")
55
+ print("Cleaned and formatted input text:\n")
56
+ print(final_input_text)
57
+ print("\nExcluded words: " + str(exclude_words))
58
+ print("\nChunking input text...\n")
59
  text_splitter = RecursiveCharacterTextSplitter(
60
+ chunk_size=chunk_size, # Number of characters
61
+ chunk_overlap=chunk_overlap,
62
  length_function=len,
63
+ separators=["\n\n", "\n", " ", ""], # Default list
64
  )
65
+ text_chunks = text_splitter.split_text(final_input_text)
66
+ print("Number of chunks: " + str(len(text_chunks)), end="")
67
+ chunks = ""
68
+ for text in text_chunks:
69
+ chunks = chunks + "\n\n" + text
70
+ print(chunks)
71
+ return final_input_text, text_chunks
 
 
 
 
 
 
 
72
 
73
 
74
+ # Function to count words in the input
75
+ def preprocessing_word_count(
76
+ filepath, skipfirst, skiplast, chunk_size, chunk_overlap, exclude_words
77
+ ):
78
+ final_input_text, text_chunks = file_preprocessing(
79
+ filepath, skipfirst, skiplast, chunk_size, chunk_overlap, exclude_words
80
+ )
81
+ text_length = len(re.findall(r"\w+", final_input_text))
82
+ print("\nInput word count: " f"{text_length:,}")
83
+ print("Chunk size: " f"{chunk_size:,}")
84
+ print("Chunk overlap: %s" % chunk_overlap)
85
+ return final_input_text, text_chunks, text_length
 
 
86
 
87
 
88
+ # LLM pipeline for summarization
89
+ def llm_pipeline(
90
+ tokenizer, base_model, final_input_text, model_source, minimum_token_number
91
+ ):
92
+ summarizer = pipeline(
93
+ task="summarization",
94
  model=base_model,
95
  tokenizer=tokenizer,
 
 
96
  truncation=True,
97
  )
98
  print("Model source: %s" % (model_source))
99
+ print("Summarizing...\n")
100
+ result = summarizer(
101
+ final_input_text,
102
+ min_length=minimum_token_number,
103
+ max_length=tokenizer.model_max_length,
104
+ )
105
  summary = result[0]["summary_text"]
 
106
  print("Summary text:\n")
107
  print(summary)
 
108
  return summary
109
 
110
 
111
+ # Function to count words in the summary
112
+ def postprocessing_word_count(summary):
113
  text_length = len(re.findall(r"\w+", summary))
114
+ print("\nSummary word count: " f"{text_length:,}")
115
  return text_length
116
 
117
 
118
+ # Function to clean bart summary text
119
  def clean_summary_text(summary):
120
+ # Remove next line
121
+ summary_cleaned_1 = re.sub(r"\n\s+", "", summary)
122
+ # Remove whitespace
123
+ summary_cleaned_2 = summary_cleaned_1.strip()
124
+ # Remove any spaces before punctuation (bart)
125
+ summary_cleaned_3 = re.sub(r"\s+([.,;:)!?](?:\s|$))", r"\1", summary_cleaned_2)
126
+ # Remove any spaces after "("
127
+ summary_cleaned_4 = re.sub(r"\(\s", r"(", summary_cleaned_3)
128
+ # Remove any spaces betweeen the closing parenthesis and other puncuation
129
+ summary_cleaned_5 = re.sub(r"(\))\s+([,.:;?!])", r"\1\2", summary_cleaned_4)
130
+ return summary_cleaned_5
131
+
132
+
133
+ # Function to covert bart summary to sentence case
134
+ def convert_to_sentence_case(summary):
135
+ # Split the paragraph into sentences based on '.', '!', or '?'
136
+ sentences = re.split(r"(?<=[.!?])\s+", summary)
137
+ # Convert to sentence case and join the sentences back together
138
+ formatted_sentences = [sentence.capitalize() for sentence in sentences]
139
+ return " ".join(formatted_sentences)
140
+
141
+
142
+ def remove_duplicate_sentences(summary):
143
+ # Split the paragraph into sentences
144
+ sentences = re.split(r"(?<=[.!?])\s+", summary)
145
+ # Initialize a set to store unique sentences
146
+ unique_sentences = set()
147
+ # Initialize a list to store valid sentences
148
+ valid_sentences = []
149
+ # Iterate through each sentence
150
+ for sentence in sentences:
151
+ # Check if the sentence is unique
152
+ if sentence not in unique_sentences:
153
+ unique_sentences.add(sentence)
154
+ valid_sentences.append(sentence)
155
+ # Join the remaining valid sentences to create the final_summary
156
+ final_summary = " ".join(valid_sentences)
157
+ return final_summary
158
+
159
+
160
+ # Function to remove incomplete last sentence from summary
161
+ def remove_incomplete_last_sentence(summary):
162
+ # Split the paragraph into sentences based on '.', '!', or '?'
163
+ sentences = re.split(r"(?<=[.!?])\s+", summary)
164
+ # Check if the last sentence lacks punctuation at the end
165
+ if (
166
+ sentences
167
+ and sentences[-1].strip()
168
+ and not sentences[-1].strip().endswith((".", "!", "?"))
169
+ ):
170
+ # Remove the last sentence from the paragraph
171
+ sentences.pop()
172
+ # Join the sentences back together
173
+ return " ".join(sentences)
174
 
175
 
176
  @st.cache_data(ttl=60 * 60)
177
+ # Function to display the PDF
178
  def displayPDF(file):
179
  with open(file, "rb") as f:
180
  base64_pdf = base64.b64encode(f.read()).decode("utf-8")
181
+ # Embed pdf in html
182
  pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
183
+ # Display file
184
  st.markdown(pdf_display, unsafe_allow_html=True)
185
 
186
 
187
+ # Streamlit code
188
  st.set_page_config(layout="wide")
189
 
190
 
 
209
  selected_model = st.radio(
210
  "Select a model to use:",
211
  model_names,
 
212
  )
213
  if selected_model == "BART":
214
+ chunk_size = 800
215
+ chunk_overlap = 80
216
  checkpoint = "ccdv/lsg-bart-base-16384-pubmed"
217
  tokenizer = AutoTokenizer.from_pretrained(
218
  checkpoint,
219
  truncation=True,
220
+ model_max_length=512,
221
  trust_remote_code=True,
222
  )
223
  if model_source == "Download model":
 
229
  else:
230
  base_model = "model_cache/models--ccdv--lsg-bart-base-16384-pubmed/snapshots/4072bc1a7a94e2b4fd860a5fdf1b71d0487dcf15"
231
  else:
232
+ chunk_size = 1000
233
+ chunk_overlap = 100
234
  checkpoint = "MBZUAI/LaMini-Flan-T5-77M"
235
  tokenizer = AutoTokenizer.from_pretrained(
236
  checkpoint,
237
  truncation=True,
238
  legacy=False,
239
+ model_max_length=512,
240
  )
241
  if model_source == "Download model":
242
  base_model = AutoModelForSeq2SeqLM.from_pretrained(
 
261
  "Model class: [BART](https://huggingface.co/docs/transformers/main/en/model_doc/bart)"
262
  "&nbsp;&nbsp;|&nbsp;&nbsp;Model: [lsg-bart-base-16384-pubmed](https://huggingface.co/ccdv/lsg-bart-base-16384-pubmed)"
263
  )
264
+ exclude_words = st_tags(
265
+ label="Enter words/phrases to exclude from the summary:",
266
+ text="Press enter to add words/phrases",
267
+ )
268
+ col1, col2, col3 = st.columns([1, 1, 5])
269
+ with col1:
270
+ minimum_token_number = st.number_input(
271
+ "Minimum number of tokens",
272
+ value=200,
273
+ step=25,
274
+ min_value=0,
275
+ max_value=512,
276
+ help="Use a larger number of tokens to increase summary length",
277
+ )
278
+ with col3:
279
+ st.subheader("Notes")
280
+ st.write(
281
+ "To remove content from the summary, copy and paste the word(s) and/or phrase(s) to exclude into the box above and summarize again."
282
+ )
283
+ st.write(
284
+ "To lengthen or shorten the summary, increase or decrease the minimum number of tokens to the left and summarize again."
285
+ )
286
  if st.button("Summarize"):
287
  col1, col2 = st.columns(2)
288
  filepath = "data/" + uploaded_file.name
289
  with open(filepath, "wb") as temp_file:
290
  temp_file.write(uploaded_file.read())
291
  with col1:
292
+ (
293
+ final_input_text,
294
+ text_chunks,
295
+ preprocessing_text_length,
296
+ ) = preprocessing_word_count(
297
+ filepath,
298
+ skipfirst,
299
+ skiplast,
300
+ chunk_size,
301
+ chunk_overlap,
302
+ exclude_words,
303
  )
304
  st.info(
305
  "Uploaded PDF&nbsp;&nbsp;|&nbsp;&nbsp;Number of words: "
306
+ f"{preprocessing_text_length:,}"
307
  )
308
  pdf_viewer = displayPDF(filepath)
309
  with col2:
310
  start = time.time()
311
  with st.spinner("Summarizing..."):
312
  summary = llm_pipeline(
313
+ tokenizer,
314
+ base_model,
315
+ final_input_text,
316
+ model_source,
317
+ minimum_token_number,
318
  )
319
+ # Count summary words
320
+ postprocessing_text_length = postprocessing_word_count(summary)
321
  end = time.time()
322
  duration = end - start
323
  print("Duration: " f"{duration:.0f}" + " seconds")
324
  st.info(
325
  "PDF Summary&nbsp;&nbsp;|&nbsp;&nbsp;Number of words: "
326
+ f"{postprocessing_text_length:,}"
327
  + "&nbsp;&nbsp;|&nbsp;&nbsp;Summarization time: "
328
  f"{duration:.0f}" + " seconds"
329
  )
330
  if selected_model == "BART":
331
+ # Use regex to clean the unformatted bart summary
332
  summary_cleaned = clean_summary_text(summary)
333
+ # Convert to sentence case
334
+ summary_cleaned_sentence_case = convert_to_sentence_case(
335
+ summary_cleaned
336
+ )
337
+ # Remove duplicate sentences
338
+ summary_cleaned_sentence_case_dedup = remove_duplicate_sentences(
339
+ summary_cleaned_sentence_case
340
+ )
341
+ # Remove incomplete last sentence
342
+ summary_cleaned_final = remove_incomplete_last_sentence(
343
+ summary_cleaned_sentence_case_dedup
344
+ )
345
+ st.success(summary_cleaned_final)
346
+ with st.expander("Unformatted output"):
347
  st.write(summary)
348
  else:
349
+ # Remove duplicate sentences
350
+ summary_dedup = remove_duplicate_sentences(summary)
351
+ # Remove incomplete last sentence
352
+ summary_final = remove_incomplete_last_sentence(summary_dedup)
353
+ st.success(summary_final)
354
+ with st.expander("Unformatted output"):
355
+ st.write(summary)
356
  col1 = st.columns(1)
357
  url = "https://dev.to/eteimz/understanding-langchains-recursivecharactertextsplitter-2846"
358
  st.info("Additional information")
359
+ input_ids = tokenizer.encode(
360
+ final_input_text, add_special_tokens=True, truncation=True
361
+ )
362
  st.write(
363
+ "Maximum number of tokens generated for inputs into the model: %s"
364
+ % f"{len(input_ids):,}"
365
+ )
366
+ st.write("First 10 tokens:")
367
+ first_10_tokens = input_ids[:10]
368
+ first_10_tokens_text = tokenizer.convert_ids_to_tokens(first_10_tokens)
369
+ st.write(first_10_tokens_text)
370
+
371
+ st.write("[RecursiveCharacterTextSplitter](%s) parameters used:" % url)
372
+ st.write(
373
+ "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;chunk_size=%s"
374
+ % chunk_size
375
+ )
376
+ st.write(
377
+ "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;chunk_overlap=%s"
378
+ % chunk_overlap
379
  )
380
  st.write(
381
  "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;length_function=len"
382
  )
383
+ st.write("\n")
384
+ st.write("Number of input text chunks: " + str(len(text_chunks)))
385
+ st.write("")
386
+ st.write("First three chunks:")
387
+ st.write("\n")
388
+ st.write(text_chunks[0])
389
  st.write("")
390
+ st.write(text_chunks[1])
391
  st.write("")
392
+ st.write(text_chunks[2])
393
+ st.write("\n")
394
+
395
+ st.write(
396
+ "Extracted and cleaned text, less sentences containing excluded words:"
397
+ )
398
+ st.write("")
399
+ st.write(final_input_text)
400
 
401
 
402
  st.markdown(
 
411
  div[class*="stCheckbox"] > label[data-baseweb="checkbox"] {
412
  margin-bottom: -15px;
413
  }
414
+ div[class*="stNumberInput"] > label > div[data-testid="stMarkdownContainer"] > p {
415
+ font-size: 1rem;
416
+ font-weight: 400;
417
+ }
418
  body > a {
419
  text-decoration: underline;
420
  }