Spaces:

AlbertoFH98
/

PodCastena

Running

App Files Files Community

AlbertoFH98 commited on Nov 27, 2023

Commit

f6c9588

•

1 Parent(s): 121ee4b

Update utils.py

Browse files

Files changed (1) hide show

utils.py +6 -4

utils.py CHANGED Viewed

@@ -61,7 +61,10 @@ class TogetherLLM(LLM):
       new_transcription_aux = []
       for text in transcription.split('\n\n'):
           if text not in new_transcription_aux:
-              new_transcription_aux.append(text)
       return '\n\n'.join(new_transcription_aux)
     def _call(
@@ -76,7 +79,6 @@ class TogetherLLM(LLM):
         # -- Extract transcription
         together.api_key = self.together_api_key
         cleaned_prompt   = self.clean_duplicates(prompt)
-        print(cleaned_prompt)
         resultado        = re.search(regex_transcription, cleaned_prompt, re.DOTALL)
         resultado        = re.sub(regex_init_transcription, "", resultado.group(1).strip()).replace('\"', '')
@@ -88,7 +90,7 @@ class TogetherLLM(LLM):
           transcription_cleaned = re.sub(regex_init_transcription, "", transcription.strip()).replace('\"', '')
           transcription_cleaned = re.sub(r'\W+', ' ', transcription_cleaned).strip().lower()
           for resultado_aux in resultado_alpha_num:
-            if resultado_aux in transcription_cleaned or transcription_cleaned in resultado_aux:
               init_transcription = re.search(regex_init_transcription, transcription).group(0)
               new_transcription.append(init_transcription + '\"' + resultado_aux + '\"')
         # -- Merge with original transcription
@@ -96,13 +98,13 @@ class TogetherLLM(LLM):
         new_cleaned_prompt = re.sub(regex_transcription, f"""CONTEXTO:
 {new_transcription}
 PREGUNTA:""", cleaned_prompt, re.DOTALL)
-        print(new_cleaned_prompt)
         output = together.Complete.create(new_cleaned_prompt,
                                           model=self.model,
                                           max_tokens=self.max_tokens,
                                           temperature=self.temperature,
                                           )
         text = output['output']['choices'][0]['text']
         return text
 # -- Python function to setup basic features: translator, SpaCy pipeline and LLM model

       new_transcription_aux = []
       for text in transcription.split('\n\n'):
           if text not in new_transcription_aux:
+            is_substring = any(transcription_aux.replace('"', '').lower() in text.replace('"', '').lower()\
+                               for transcription_aux in new_transcription_aux)
+            if not is_substring:
+                new_transcription_aux.append(text)
       return '\n\n'.join(new_transcription_aux)
     def _call(
         # -- Extract transcription
         together.api_key = self.together_api_key
         cleaned_prompt   = self.clean_duplicates(prompt)
         resultado        = re.search(regex_transcription, cleaned_prompt, re.DOTALL)
         resultado        = re.sub(regex_init_transcription, "", resultado.group(1).strip()).replace('\"', '')
           transcription_cleaned = re.sub(regex_init_transcription, "", transcription.strip()).replace('\"', '')
           transcription_cleaned = re.sub(r'\W+', ' ', transcription_cleaned).strip().lower()
           for resultado_aux in resultado_alpha_num:
+            if resultado_aux in transcription_cleaned:
               init_transcription = re.search(regex_init_transcription, transcription).group(0)
               new_transcription.append(init_transcription + '\"' + resultado_aux + '\"')
         # -- Merge with original transcription
         new_cleaned_prompt = re.sub(regex_transcription, f"""CONTEXTO:
 {new_transcription}
 PREGUNTA:""", cleaned_prompt, re.DOTALL)
         output = together.Complete.create(new_cleaned_prompt,
                                           model=self.model,
                                           max_tokens=self.max_tokens,
                                           temperature=self.temperature,
                                           )
         text = output['output']['choices'][0]['text']
+        text = self.clean_duplicates(text)
         return text
 # -- Python function to setup basic features: translator, SpaCy pipeline and LLM model