efederici commited on
Commit
5441fe4
1 Parent(s): 32cd6fe

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -87,7 +87,7 @@ def is_subset(text1, text2):
87
  def cleaning(text, tags):
88
  return [tag for tag in tags if is_subset(text, tag)]
89
 
90
- def get_texts(self, text, max_len):
91
  texts = list(filter(lambda x : x != '', text.split('\n\n')))
92
  lengths = [len(tokenizer.encode(paragraph)) for paragraph in texts]
93
  output = []
@@ -99,7 +99,7 @@ def get_texts(self, text, max_len):
99
  output.append(par)
100
  return output
101
 
102
- def get_tags(self, text, generate_kwargs):
103
  input_text = 'summarize: ' + text.strip().replace('\n', ' ')
104
  tokenized_text = tokenizer.encode(input_text, return_tensors="pt")
105
  with torch.no_grad():
@@ -115,7 +115,7 @@ def get_tags(self, text, generate_kwargs):
115
 
116
  return list(set(itertools.chain(*output)))
117
 
118
- def tag(self, text, max_len, generate_kwargs):
119
  texts = get_texts(text, max_len)
120
  all_tags = [get_tags(text, generate_kwargs) for text in texts]
121
  flatten_tags = itertools.chain(*all_tags)
 
87
  def cleaning(text, tags):
88
  return [tag for tag in tags if is_subset(text, tag)]
89
 
90
+ def get_texts(text, max_len):
91
  texts = list(filter(lambda x : x != '', text.split('\n\n')))
92
  lengths = [len(tokenizer.encode(paragraph)) for paragraph in texts]
93
  output = []
 
99
  output.append(par)
100
  return output
101
 
102
+ def get_tags(text, generate_kwargs):
103
  input_text = 'summarize: ' + text.strip().replace('\n', ' ')
104
  tokenized_text = tokenizer.encode(input_text, return_tensors="pt")
105
  with torch.no_grad():
 
115
 
116
  return list(set(itertools.chain(*output)))
117
 
118
+ def tag(text, max_len, generate_kwargs):
119
  texts = get_texts(text, max_len)
120
  all_tags = [get_tags(text, generate_kwargs) for text in texts]
121
  flatten_tags = itertools.chain(*all_tags)