gaborcselle
/

font-identifier

@@ -1,7 +1,6 @@
 # Generate sample data with 800x400 images of fonts in /System/Library/Fonts
 # 50 images per font, 1 font per image
 import os
 from PIL import Image, ImageDraw, ImageFont
 import nltk
@@ -14,12 +13,16 @@ nltk.download('brown')
 # Sample text for prose and code
 prose_text = " ".join(brown.words(categories='news')[:50]) # First 50 words from news category
-font_dir = '/System/Library/Fonts/'
 output_dir = './font_images'
 os.makedirs(output_dir, exist_ok=True)
 all_brown_words = sorted(set(brown.words(categories='news')))
 def wrap_text(text, line_length=10):
     """
     Wraps the provided text every 'line_length' words.
@@ -35,27 +38,38 @@ def random_code_text(base_code, num_lines=15):  # Increase number of lines
     lines = base_code.split("\n")
     return "\n".join(random.sample(lines, min(num_lines, len(lines))))
-for font_file in os.listdir(font_dir):
-    if font_file.endswith('.ttf'):
-        font_path = os.path.join(font_dir, font_file)
-        font_name = font_file.split('.')[0]
-        print(font_name)
-        j = 0
-        for i in range(50):  # Generate 50 images per font
-            prose_sample = random_prose_text(all_brown_words)
-            for text in [prose_sample]:
-                img = Image.new('RGB', (800, 400), color="white")  # Canvas size
-                draw = ImageDraw.Draw(img)
                 font_size = random.choice(range(32, 128))  # Increased minimum font size
                 font = ImageFont.truetype(font_path, font_size)
-                # Random offsets, but ensuring that text isn't too far off the canvas
-                offset_x = random.randint(-20, 10)
-                offset_y = random.randint(-20, 10)
-                draw.text((offset_x, offset_y), text, fill="black", font=font)
-                j += 1
-                output_file = os.path.join(output_dir, f"{font_name}_{j}.png")
-                img.save(output_file)

 # Generate sample data with 800x400 images of fonts in /System/Library/Fonts
 # 50 images per font, 1 font per image
 import os
 from PIL import Image, ImageDraw, ImageFont
 import nltk
 # Sample text for prose and code
 prose_text = " ".join(brown.words(categories='news')[:50]) # First 50 words from news category
+# Note that this will only work on MacOS where this is the default font directory
+font_dirs = ['/System/Library/Fonts/', '/System/Library/Fonts/Supplemental/']
 output_dir = './font_images'
 os.makedirs(output_dir, exist_ok=True)
 all_brown_words = sorted(set(brown.words(categories='news')))
+# This is a list of fonts that we want to use for our sample data
+FONT_ALLOWLIST = ["Arial", "Avenir", "Courier", "Helvetica", "Georgia", "Tahoma", "Times New Roman", "Verdana"]
 def wrap_text(text, line_length=10):
     """
     Wraps the provided text every 'line_length' words.
     lines = base_code.split("\n")
     return "\n".join(random.sample(lines, min(num_lines, len(lines))))
+for font_dir in font_dirs:
+    for font_file in os.listdir(font_dir):
+        if font_file.endswith('.ttf') or font_file.endswith('.ttc'):
+            font_path = os.path.join(font_dir, font_file)
+            font_name = font_file.split('.')[0]
+            if font_name not in FONT_ALLOWLIST:
+                continue
+            # Output the font name so we can see the progress
+            print(font_path, font_name)
+            if font_file.endswith('.ttc'):
+                # ttc fonts have multiple fonts in one file, so we need to specify which one we want
+                font = ImageFont.truetype(font_path, random.choice(range(32, 128)), index=0)
+            else:
+                # ttf fonts have only one font in the file
                 font_size = random.choice(range(32, 128))  # Increased minimum font size
                 font = ImageFont.truetype(font_path, font_size)
+            # Counter for the image filename
+            j = 0
+            for i in range(10):  # Generate 50 images per font - reduced to 10 for now to make things faster
+                prose_sample = random_prose_text(all_brown_words)
+                for text in [prose_sample]:
+                    img = Image.new('RGB', (800, 400), color="white")  # Canvas size
+                    draw = ImageDraw.Draw(img)
+                    # Random offsets, but ensuring that text isn't too far off the canvas
+                    offset_x = random.randint(-20, 10)
+                    offset_y = random.randint(-20, 10)
+                    draw.text((offset_x, offset_y), text, fill="black", font=font)
+                    j += 1
+                    output_file = os.path.join(output_dir, f"{font_name}_{j}.png")
+                    img.save(output_file)