AlbertoFH98 commited on
Commit
f6c9588
1 Parent(s): 121ee4b

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +6 -4
utils.py CHANGED
@@ -61,7 +61,10 @@ class TogetherLLM(LLM):
61
  new_transcription_aux = []
62
  for text in transcription.split('\n\n'):
63
  if text not in new_transcription_aux:
64
- new_transcription_aux.append(text)
 
 
 
65
  return '\n\n'.join(new_transcription_aux)
66
 
67
  def _call(
@@ -76,7 +79,6 @@ class TogetherLLM(LLM):
76
  # -- Extract transcription
77
  together.api_key = self.together_api_key
78
  cleaned_prompt = self.clean_duplicates(prompt)
79
- print(cleaned_prompt)
80
  resultado = re.search(regex_transcription, cleaned_prompt, re.DOTALL)
81
 
82
  resultado = re.sub(regex_init_transcription, "", resultado.group(1).strip()).replace('\"', '')
@@ -88,7 +90,7 @@ class TogetherLLM(LLM):
88
  transcription_cleaned = re.sub(regex_init_transcription, "", transcription.strip()).replace('\"', '')
89
  transcription_cleaned = re.sub(r'\W+', ' ', transcription_cleaned).strip().lower()
90
  for resultado_aux in resultado_alpha_num:
91
- if resultado_aux in transcription_cleaned or transcription_cleaned in resultado_aux:
92
  init_transcription = re.search(regex_init_transcription, transcription).group(0)
93
  new_transcription.append(init_transcription + '\"' + resultado_aux + '\"')
94
  # -- Merge with original transcription
@@ -96,13 +98,13 @@ class TogetherLLM(LLM):
96
  new_cleaned_prompt = re.sub(regex_transcription, f"""CONTEXTO:
97
  {new_transcription}
98
  PREGUNTA:""", cleaned_prompt, re.DOTALL)
99
- print(new_cleaned_prompt)
100
  output = together.Complete.create(new_cleaned_prompt,
101
  model=self.model,
102
  max_tokens=self.max_tokens,
103
  temperature=self.temperature,
104
  )
105
  text = output['output']['choices'][0]['text']
 
106
  return text
107
 
108
  # -- Python function to setup basic features: translator, SpaCy pipeline and LLM model
 
61
  new_transcription_aux = []
62
  for text in transcription.split('\n\n'):
63
  if text not in new_transcription_aux:
64
+ is_substring = any(transcription_aux.replace('"', '').lower() in text.replace('"', '').lower()\
65
+ for transcription_aux in new_transcription_aux)
66
+ if not is_substring:
67
+ new_transcription_aux.append(text)
68
  return '\n\n'.join(new_transcription_aux)
69
 
70
  def _call(
 
79
  # -- Extract transcription
80
  together.api_key = self.together_api_key
81
  cleaned_prompt = self.clean_duplicates(prompt)
 
82
  resultado = re.search(regex_transcription, cleaned_prompt, re.DOTALL)
83
 
84
  resultado = re.sub(regex_init_transcription, "", resultado.group(1).strip()).replace('\"', '')
 
90
  transcription_cleaned = re.sub(regex_init_transcription, "", transcription.strip()).replace('\"', '')
91
  transcription_cleaned = re.sub(r'\W+', ' ', transcription_cleaned).strip().lower()
92
  for resultado_aux in resultado_alpha_num:
93
+ if resultado_aux in transcription_cleaned:
94
  init_transcription = re.search(regex_init_transcription, transcription).group(0)
95
  new_transcription.append(init_transcription + '\"' + resultado_aux + '\"')
96
  # -- Merge with original transcription
 
98
  new_cleaned_prompt = re.sub(regex_transcription, f"""CONTEXTO:
99
  {new_transcription}
100
  PREGUNTA:""", cleaned_prompt, re.DOTALL)
 
101
  output = together.Complete.create(new_cleaned_prompt,
102
  model=self.model,
103
  max_tokens=self.max_tokens,
104
  temperature=self.temperature,
105
  )
106
  text = output['output']['choices'][0]['text']
107
+ text = self.clean_duplicates(text)
108
  return text
109
 
110
  # -- Python function to setup basic features: translator, SpaCy pipeline and LLM model