pleonova commited on
Commit
559f5c9
1 Parent(s): 1d7e9d0

Remove extra spaces

Browse files
Files changed (1) hide show
  1. models.py +0 -7
models.py CHANGED
@@ -15,14 +15,12 @@ def create_nest_sentences(document:str, token_max_length = 1024):
15
  for sentence in re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', document.replace("\n", ' ')):
16
  tokens_in_sentence = tokenizer(str(sentence), truncation=False, padding=False)[0] # hugging face transformer tokenizer
17
  length += len(tokens_in_sentence)
18
-
19
  if length < token_max_length:
20
  sent.append(sentence)
21
  else:
22
  nested.append(sent)
23
  sent = [sentence]
24
  length = 0
25
-
26
  if sent:
27
  nested.append(sent)
28
  return nested
@@ -42,8 +40,6 @@ def keyword_gen(kw_model, sequence:str):
42
  top_n=10)
43
  return keywords
44
 
45
-
46
-
47
  # Reference: https://huggingface.co/facebook/bart-large-mnli
48
  @st.cache_resource
49
  def load_summary_model():
@@ -69,7 +65,6 @@ def summarizer_gen(summarizer, sequence:str, maximum_tokens:int, minimum_tokens:
69
  no_repeat_ngram_size=3)
70
  return output[0].get('summary_text')
71
 
72
-
73
  # # Reference: https://www.datatrigger.org/post/nlp_hugging_face/
74
  # # Custom summarization pipeline (to handle long articles)
75
  # def summarize(text, minimum_length_of_summary = 100):
@@ -80,7 +75,6 @@ def summarizer_gen(summarizer, sequence:str, maximum_tokens:int, minimum_tokens:
80
  # # Untokenize
81
  # return([tokenizer_bart.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids][0])
82
 
83
-
84
  # Reference: https://huggingface.co/spaces/team-zero-shot-nli/zero-shot-nli/blob/main/utils.py
85
  @st.cache_resource
86
  def load_model():
@@ -93,4 +87,3 @@ def load_model():
93
  def classifier_zero(classifier, sequence:str, labels:list, multi_class:bool):
94
  outputs = classifier(sequence, labels, multi_label=multi_class)
95
  return outputs['labels'], outputs['scores']
96
-
15
  for sentence in re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', document.replace("\n", ' ')):
16
  tokens_in_sentence = tokenizer(str(sentence), truncation=False, padding=False)[0] # hugging face transformer tokenizer
17
  length += len(tokens_in_sentence)
 
18
  if length < token_max_length:
19
  sent.append(sentence)
20
  else:
21
  nested.append(sent)
22
  sent = [sentence]
23
  length = 0
 
24
  if sent:
25
  nested.append(sent)
26
  return nested
40
  top_n=10)
41
  return keywords
42
 
 
 
43
  # Reference: https://huggingface.co/facebook/bart-large-mnli
44
  @st.cache_resource
45
  def load_summary_model():
65
  no_repeat_ngram_size=3)
66
  return output[0].get('summary_text')
67
 
 
68
  # # Reference: https://www.datatrigger.org/post/nlp_hugging_face/
69
  # # Custom summarization pipeline (to handle long articles)
70
  # def summarize(text, minimum_length_of_summary = 100):
75
  # # Untokenize
76
  # return([tokenizer_bart.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids][0])
77
 
 
78
  # Reference: https://huggingface.co/spaces/team-zero-shot-nli/zero-shot-nli/blob/main/utils.py
79
  @st.cache_resource
80
  def load_model():
87
  def classifier_zero(classifier, sequence:str, labels:list, multi_class:bool):
88
  outputs = classifier(sequence, labels, multi_label=multi_class)
89
  return outputs['labels'], outputs['scores']