k4d3
/

toolkit

Safetensors

Model card Files Files and versions Community

k4d3 commited on Oct 2, 2024

Commit

b54f417

1 Parent(s): ee41534

removed perplexity stuff

Browse files

Signed-off-by: Balazs Horvath <acsipont@gmail.com>

Files changed (1) hide show

joy +19 -46

joy CHANGED Viewed

@@ -330,20 +330,14 @@ class JoyCaptionModel:
         caption_type: str,
         caption_tone: str,
         caption_length: str | int,
-        custom_prompt: str | None = None
-    ) -> Tuple[str, float, float]:
         """
-        Process the input image and generate a caption.
-        Args:
-            input_image (Image.Image): The input image to caption.
-            caption_type (str): The type of caption to generate.
-            caption_tone (str): The tone of the caption.
-            caption_length (str | int): The desired length of the caption.
-            custom_prompt (str | None): A custom prompt for caption generation.
         Returns:
-            Tuple[str, float, float]: A tuple containing the generated caption, its entropy, and its perplexity.
         """
         torch.cuda.empty_cache()
@@ -370,11 +364,7 @@ class JoyCaptionModel:
         token_ids = generate_ids[0].tolist()
         entropy = self._calculate_entropy(token_ids)
-        # Calculate perplexity
-        loss = self._calculate_perplexity(generate_ids, input_ids)
-        perplexity = math.exp(-loss)
-        return caption.strip(), entropy, perplexity
     def generate_valid_caption(
         self,
@@ -388,7 +378,6 @@ class JoyCaptionModel:
         min_sentence_count: int = 3,
         max_word_repetitions: int = 5,
         min_entropy: float = 1.75,
-        max_perplexity: float = 100.0,
         stop_words: set[str] = STOP_WORDS
     ) -> str:
         """
@@ -400,18 +389,24 @@ class JoyCaptionModel:
             caption_tone (str): The tone of the caption.
             caption_length (str | int): The desired length of the caption.
             custom_prompt (str | None): A custom prompt for caption generation.
-            limited_words (Dict[str, int]): Dictionary of words with their maximum allowed occurrences. Default is {"fluffy": 2}.
             min_sentence_count (int): Minimum required number of sentences. Default is 3.
-            max_word_repetitions (int): Maximum allowed repetitions for words longer than 4 characters. Default is 5.
-            min_entropy (float): Minimum required entropy of the caption. Default is 1.75.
-            max_perplexity (float): Maximum allowed perplexity of the caption. Default is 100.0.
-            stop_words (set[str]): Set of stop words to exclude from repetition checks. Default is STOP_WORDS.
         Returns:
             str: A valid caption meeting all specified criteria.
         """
         while True:
-            caption, entropy, perplexity = self.process_image(
                 input_image, caption_type, caption_tone,
                 caption_length, custom_prompt
             )
@@ -435,8 +430,6 @@ class JoyCaptionModel:
                 print(f"Retrying: Only {sentence_count} sentences (min: {min_sentence_count}).\nCaption: {caption!r}")
             elif entropy < min_entropy:
                 print(f"Retrying: Low entropy ({entropy:.2f} < {min_entropy}).\nCaption: {caption!r}")
-            elif perplexity > max_perplexity:
-                print(f"Retrying: High perplexity ({perplexity:.2f} > {max_perplexity}).\nCaption: {caption!r}")
             else:
                 return caption
@@ -597,26 +590,6 @@ class JoyCaptionModel:
         return entropy
-    def _calculate_perplexity(self, generate_ids, input_ids):
-        """
-        Calculate the perplexity of the generated caption.
-        Args:
-            generate_ids (torch.Tensor): Generated token IDs.
-            input_ids (torch.Tensor): Input token IDs.
-        Returns:
-            float: Perplexity of the generated caption.
-        """
-        with torch.no_grad():
-            outputs = self.text_model(
-                input_ids=input_ids,
-                labels=generate_ids,
-                output_hidden_states=True,
-            )
-            loss = outputs.loss
-        return loss.item()
 def main():
     """
@@ -738,7 +711,7 @@ def main():
                     args, image_path, tagset_normalizer
                 )
-            print(f"\nCaptioning {image_path}...\nCustom prompt: {custom_prompt}")
             caption = joy_caption_model.generate_valid_caption(
                 input_image,

         caption_type: str,
         caption_tone: str,
         caption_length: str | int,
+        custom_prompt: str | None = None,
+    ) -> Tuple[str, float]:
         """
+        Process an input image and generate a caption based on specified parameters.
+        Also calculates the entropy of the generated caption.
         Returns:
+            Tuple[str, float]: The generated caption and its entropy.
         """
         torch.cuda.empty_cache()
         token_ids = generate_ids[0].tolist()
         entropy = self._calculate_entropy(token_ids)
+        return caption.strip(), entropy
     def generate_valid_caption(
         self,
         min_sentence_count: int = 3,
         max_word_repetitions: int = 5,
         min_entropy: float = 1.75,
         stop_words: set[str] = STOP_WORDS
     ) -> str:
         """
             caption_tone (str): The tone of the caption.
             caption_length (str | int): The desired length of the caption.
             custom_prompt (str | None): A custom prompt for caption generation.
+            limited_words (Dict[str, int]): Dictionary of words with their maximum allowed occurrences. Default is {"fluffy": 1}.
             min_sentence_count (int): Minimum required number of sentences. Default is 3.
+            max_word_repetitions (int): Maximum allowed repetitions for words longer than 4 characters. Default is 15.
+            min_entropy (float): Minimum required entropy of the caption. Default is 2.3.
         Returns:
             str: A valid caption meeting all specified criteria.
+        The method retries caption generation if:
+        - The caption contains only special characters
+        - The caption does not end with a period, exclamation mark, or question mark
+        - Any word in limited_words appears more than its specified maximum times
+        - Any word longer than 4 characters is repeated more than max_word_repetitions times
+        - The caption contains fewer than min_sentence_count sentences
+        - The entropy of the caption is below min_entropy
         """
         while True:
+            caption, entropy = self.process_image(
                 input_image, caption_type, caption_tone,
                 caption_length, custom_prompt
             )
                 print(f"Retrying: Only {sentence_count} sentences (min: {min_sentence_count}).\nCaption: {caption!r}")
             elif entropy < min_entropy:
                 print(f"Retrying: Low entropy ({entropy:.2f} < {min_entropy}).\nCaption: {caption!r}")
             else:
                 return caption
         return entropy
 def main():
     """
                     args, image_path, tagset_normalizer
                 )
+            print(f"\nCustom prompt: {custom_prompt}")
             caption = joy_caption_model.generate_valid_caption(
                 input_image,