Spaces:

storresbusquets
/

llm-demo1

Runtime error

App Files Files Community

storresbusquets commited on Sep 18, 2023

Commit

31eb124

•

1 Parent(s): 24a4fff

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -44

app.py CHANGED Viewed

@@ -86,49 +86,34 @@ class GradioInference:
         progress(0.40, desc="Summarizing")
         # Perform summarization on the transcription
-        # transcription_summary = self.summarizer(
-        #     results["text"], max_length=150, min_length=30, do_sample=False
-        # )
-        #### Prueba
-        # WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
-        # input_ids_sum = self.tokenizer(
-        #     [WHITESPACE_HANDLER(results["text"])],
-        #     return_tensors="pt",
-        #     padding="max_length",
-        #     truncation=True,
-        #     max_length=512
-        # )["input_ids"]
-        # output_ids_sum = self.model.generate(
-        #     input_ids=input_ids_sum,
-        #     max_length=130,
-        #     no_repeat_ngram_size=2,
-        #     num_beams=4
-        # )[0]
-        # summary = self.tokenizer.decode(
-        #     output_ids_sum,
-        #     skip_special_tokens=True,
-        #     clean_up_tokenization_spaces=False
-        # )
-        #### Fin prueba
-        ### Prueba con LLM ###
-        template = """
-        [INST] <<SYS>>
-        You are a helpful, respectful and honest assistant that performs summaries of text. Write a concise summary of the following text.
-        <</SYS>>
-        {text}[/INST]
-        """
-        prompt = PromptTemplate(template=template, input_variables=["text"])
-        llm_chain = LLMChain(prompt=prompt, llm=self.llm)
-        summary2 = llm_chain.run(results["text"])
-        ### Fin prueba LLM ###
         progress(0.60, desc="Extracting Keywords")
@@ -171,8 +156,7 @@ class GradioInference:
         if lang == "english" or lang == "none":
             return (
                 results["text"],
-                summary2,
-                # transcription_summary[0]["summary_text"],
                 formatted_keywords,
                 formatted_sentiment,
                 wordcloud_image,
@@ -180,7 +164,7 @@ class GradioInference:
         else:
             return (
                 results["text"],
-                summary2,
                 formatted_keywords,
                 formatted_sentiment,
                 wordcloud_image,
@@ -229,7 +213,7 @@ class GradioInference:
             results["text"], max_length=150, min_length=30, do_sample=False
         )
-        #### Prueba
         WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
         input_ids_sum = self.tokenizer(
@@ -252,7 +236,7 @@ class GradioInference:
             skip_special_tokens=True,
             clean_up_tokenization_spaces=False
         )
-        #### Fin prueba
         progress(0.50, desc="Extracting Keywords")

         progress(0.40, desc="Summarizing")
         # Perform summarization on the transcription
+        transcription_summary = self.summarizer(
+            results["text"], max_length=150, min_length=30, do_sample=False
+        )
+        #### Resumen multilingue
+        WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
+        input_ids_sum = self.tokenizer(
+            [WHITESPACE_HANDLER(results["text"])],
+            return_tensors="pt",
+            padding="max_length",
+            truncation=True,
+            max_length=512
+        )["input_ids"]
+        output_ids_sum = self.model.generate(
+            input_ids=input_ids_sum,
+            max_length=130,
+            no_repeat_ngram_size=2,
+            num_beams=4
+        )[0]
+        summary = self.tokenizer.decode(
+            output_ids_sum,
+            skip_special_tokens=True,
+            clean_up_tokenization_spaces=False
+        )
+        #### Fin resumen multilingue
         progress(0.60, desc="Extracting Keywords")
         if lang == "english" or lang == "none":
             return (
                 results["text"],
+                transcription_summary[0]["summary_text"],
                 formatted_keywords,
                 formatted_sentiment,
                 wordcloud_image,
         else:
             return (
                 results["text"],
+                summary,
                 formatted_keywords,
                 formatted_sentiment,
                 wordcloud_image,
             results["text"], max_length=150, min_length=30, do_sample=False
         )
+        #### Resumen multilingue
         WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
         input_ids_sum = self.tokenizer(
             skip_special_tokens=True,
             clean_up_tokenization_spaces=False
         )
+        #### Fin resumen multilingue
         progress(0.50, desc="Extracting Keywords")