shoni commited on
Commit
1c5f81a
β€’
1 Parent(s): 53a9d53

fix comicsans detector save

Browse files
.ipynb_checkpoints/README-checkpoint.md DELETED
@@ -1,69 +0,0 @@
1
- # Comic Sans Detector
2
-
3
- This repository contains a fine-tuned ResNet-18 model, specifically trained to detect whether an image contains Comic Sans font. It is a fine-tuning of a previously fine-tuned font classification model, based on the ResNet-18 foundation model.
4
-
5
- ## Repository Contents
6
-
7
- - **`comic-detector.ipynb`**: A notebook that demonstrates the training and evaluation process for the Comic Sans detector using the fine-tuned ResNet-18 model.
8
- - **`image-format-generalizer.ipynb`**: A utility notebook for preparing and normalizing image datasets, ensuring consistent formatting across `/data` folders.
9
-
10
- ## Dataset Structure (Not Included)
11
-
12
- The dataset used for training and evaluation should follow this structure:
13
- ```
14
- /data
15
- β”œβ”€β”€ comic/
16
- β”‚ β”œβ”€β”€ image1.jpg
17
- β”‚ β”œβ”€β”€ image2.png
18
- β”‚ └── ...
19
- β”œβ”€β”€ not-comic/
20
- β”‚ β”œβ”€β”€ image1.jpg
21
- β”‚ β”œβ”€β”€ image2.png
22
- β”‚ └── ...
23
- ```
24
- - **`comic/`**: Contains images labeled as featuring Comic Sans font.
25
- - **`not-comic/`**: Contains images labeled as not featuring Comic Sans font.
26
-
27
- ⚠️ The dataset itself is not included in this repository. You must prepare and structure your dataset as described.
28
-
29
- ## How to Use
30
-
31
- ### 1. Clone the Repository
32
- ```bash
33
- git clone https://huggingface.co/your-username/comic-sans-detector
34
- cd comic-sans-detector
35
- ```
36
-
37
- ### 2. Prepare the Dataset
38
- Ensure your dataset is properly structured under a `/data` directory with `comic/` and `not-comic/` folders.
39
-
40
- ### 3. Run the Training Notebook
41
- Open `comic-detector.ipynb` in Jupyter Notebook or an equivalent environment to retrain the model or evaluate it.
42
-
43
- ### 4. Format Images (Optional)
44
- If your dataset images are not in a consistent format, use `image-format-generalizer.ipynb` to preprocess them.
45
-
46
- ## Model Usage
47
-
48
- The fine-tuned model can be deployed directly via the Hugging Face Inference API. Once uploaded, the model can be used to classify whether an image contains Comic Sans font.
49
-
50
- Example API usage (replace `your-username/comic-sans-detector` with your repository name):
51
- ```python
52
- from transformers import pipeline
53
-
54
- classifier = pipeline("image-classification", model="your-username/comic-sans-detector")
55
- result = classifier("path/to/image.jpg")
56
- print(result)
57
- ```
58
-
59
- ## Fine-Tuning Process
60
-
61
- This model was fine-tuned on a previously fine-tuned font classification model, which itself was based on the ResNet-18 foundation model. The fine-tuning process was conducted using a custom dataset with two classes: `comic` and `not-comic`.
62
-
63
- ## Acknowledgments
64
-
65
- This project is based on the original font identifier repository by [gaborcselle](https://huggingface.co/gaborcselle/font-identifier).
66
-
67
- ## License
68
-
69
- Include your preferred license here (e.g., MIT, Apache 2.0, etc.).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.ipynb_checkpoints/comic-detector-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
.ipynb_checkpoints/image-format-generalizer-checkpoint.ipynb DELETED
@@ -1,84 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "id": "d3fca531-0f68-4951-b168-db8ad2d25971",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "from PIL import Image, UnidentifiedImageError\n",
11
- "import os\n",
12
- "import pillow_avif # AVIF support for Pillow\n",
13
- "\n",
14
- "# Define paths to folders\n",
15
- "data_dir = \"./data\"\n",
16
- "folders = [\"comic\", \"not-comic\"] # Both folders to process\n",
17
- "output_format = \"png\" # Target image format\n",
18
- "\n",
19
- "# Function to clean, convert, and rename images\n",
20
- "def process_images(data_dir, folders, output_format):\n",
21
- " for folder in folders:\n",
22
- " folder_path = os.path.join(data_dir, folder)\n",
23
- " print(f\"Processing folder: {folder_path}\")\n",
24
- "\n",
25
- " # Ensure the folder exists\n",
26
- " if not os.path.exists(folder_path):\n",
27
- " print(f\"Folder {folder_path} does not exist. Skipping.\")\n",
28
- " continue\n",
29
- "\n",
30
- " # Sort files to preserve order and avoid overwriting\n",
31
- " image_count = 1 # Start numbering images\n",
32
- " for filename in sorted(os.listdir(folder_path)):\n",
33
- " file_path = os.path.join(folder_path, filename)\n",
34
- " if \"test_sample.png\" in filename: # Ignore test_sample.png\n",
35
- " continue\n",
36
- "\n",
37
- " try:\n",
38
- " # Open and convert image (supports AVIF and others)\n",
39
- " with Image.open(file_path) as img:\n",
40
- " img = img.convert(\"RGB\") # Ensure compatible format\n",
41
- " new_filename = f\"{str(image_count).zfill(4)}.{output_format}\"\n",
42
- " new_file_path = os.path.join(folder_path, new_filename)\n",
43
- "\n",
44
- " # Save as new file\n",
45
- " img.save(new_file_path, format=output_format.upper())\n",
46
- " print(f\"Converted: {filename} -> {new_filename}\")\n",
47
- "\n",
48
- " # Remove old file if different\n",
49
- " if file_path != new_file_path:\n",
50
- " os.remove(file_path)\n",
51
- "\n",
52
- " image_count += 1\n",
53
- "\n",
54
- " except (UnidentifiedImageError, IOError) as e:\n",
55
- " print(f\"Invalid or unreadable file: {filename} ({e}). Deleting.\")\n",
56
- " os.remove(file_path) # Delete invalid files\n",
57
- "\n",
58
- "# Run the processing function\n",
59
- "process_images(data_dir, folders, output_format)\n"
60
- ]
61
- }
62
- ],
63
- "metadata": {
64
- "kernelspec": {
65
- "display_name": "Python 3 (ipykernel)",
66
- "language": "python",
67
- "name": "python3"
68
- },
69
- "language_info": {
70
- "codemirror_mode": {
71
- "name": "ipython",
72
- "version": 3
73
- },
74
- "file_extension": ".py",
75
- "mimetype": "text/x-python",
76
- "name": "python",
77
- "nbconvert_exporter": "python",
78
- "pygments_lexer": "ipython3",
79
- "version": "3.12.5"
80
- }
81
- },
82
- "nbformat": 4,
83
- "nbformat_minor": 5
84
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.jupyter/desktop-workspaces/default-37a8.jupyterlab-workspace DELETED
@@ -1 +0,0 @@
1
- {"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":1,"widgets":["notebook:image-format-generalizer.ipynb","notebook:comic-detector.ipynb"]},"current":"notebook:comic-detector.ipynb"},"down":{"size":0,"widgets":[]},"left":{"collapsed":false,"visible":true,"current":"filebrowser","widgets":["filebrowser","running-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"],"widgetStates":{"jp-running-sessions":{"sizes":[0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666,0.16666666666666666],"expansionStates":[false,false,false,false,false,false]},"extensionmanager.main-view":{"sizes":[0.3333333333333333,0.3333333333333333,0.3333333333333333],"expansionStates":[false,false,false]}}},"right":{"collapsed":true,"visible":true,"widgets":["jp-property-inspector","debugger-sidebar"],"widgetStates":{"jp-debugger-sidebar":{"sizes":[0.2,0.2,0.2,0.2,0.2],"expansionStates":[false,false,false,false,false]}}},"relativeSizes":[0.2622779519331243,0.7377220480668757,0],"top":{"simpleVisibility":true}},"docmanager:recents":{"opened":[{"path":"","contentType":"directory","root":"~/Desktop/comic-sans-detector-clean"},{"path":"comic-detector.ipynb","contentType":"notebook","factory":"Notebook","root":"~/Desktop/comic-sans-detector-clean"},{"path":"image-format-generalizer.ipynb","contentType":"notebook","factory":"Notebook","root":"~/Desktop/comic-sans-detector-clean"},{"path":"README.md","contentType":"file","factory":"Editor","root":"~/Desktop/comic-sans-detector"}],"closed":[{"path":"README.md","contentType":"file","factory":"Editor","root":"~/Desktop/comic-sans-detector"}]},"notebook:image-format-generalizer.ipynb":{"data":{"path":"image-format-generalizer.ipynb","factory":"Notebook"}},"notebook:comic-detector.ipynb":{"data":{"path":"comic-detector.ipynb","factory":"Notebook"}},"file-browser-filebrowser:cwd":{"path":""}},"metadata":{"id":"default"}}
 
 
comic-detector.ipynb β†’ comic-detector-new.ipynb RENAMED
The diff for this file is too large to render. See raw diff
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gaborcselle/font-identifier",
3
  "architectures": [
4
  "ResNetForImageClassification"
5
  ],
@@ -20,106 +20,12 @@
20
  512
21
  ],
22
  "id2label": {
23
- "0": "Agbalumo-Regular",
24
- "1": "AlfaSlabOne-Regular",
25
- "2": "ArchitectsDaughter-Regular",
26
- "3": "Arial",
27
- "4": "Arial Black",
28
- "5": "Arial Bold",
29
- "6": "Arial Bold Italic",
30
- "7": "Avenir",
31
- "8": "Bangers-Regular",
32
- "9": "BlackOpsOne-Regular",
33
- "10": "Courier",
34
- "11": "Georgia",
35
- "12": "Helvetica",
36
- "13": "IBMPlexSans-Regular",
37
- "14": "Inter-Regular",
38
- "15": "KaushanScript-Regular",
39
- "16": "Lato-Regular",
40
- "17": "Lobster-Regular",
41
- "18": "Lora-Regular",
42
- "19": "Merriweather-Regular",
43
- "20": "Niconne-Regular",
44
- "21": "OpenSans-Bold",
45
- "22": "OpenSans-Italic",
46
- "23": "OpenSans-Light",
47
- "24": "Pacifico-Regular",
48
- "25": "PixelifySans-Regular",
49
- "26": "PlayfairDisplay-Regular",
50
- "27": "Poppins-Regular",
51
- "28": "Rakkas-Regular",
52
- "29": "Roboto-Regular",
53
- "30": "RobotoMono-Regular",
54
- "31": "RobotoSlab-Regular",
55
- "32": "Rubik-Regular",
56
- "33": "SpaceMono-Regular",
57
- "34": "Tahoma",
58
- "35": "Tahoma Bold",
59
- "36": "Times New Roman",
60
- "37": "Times New Roman Bold",
61
- "38": "Times New Roman Bold Italic",
62
- "39": "Times New Roman Italic",
63
- "40": "TitilliumWeb-Regular",
64
- "41": "Trebuchet MS",
65
- "42": "Trebuchet MS Bold",
66
- "43": "Trebuchet MS Bold Italic",
67
- "44": "Trebuchet MS Italic",
68
- "45": "Verdana",
69
- "46": "Verdana Bold",
70
- "47": "Verdana Bold Italic",
71
- "48": "Verdana Italic"
72
  },
73
  "label2id": {
74
- "Agbalumo-Regular": "0",
75
- "AlfaSlabOne-Regular": "1",
76
- "ArchitectsDaughter-Regular": "2",
77
- "Arial": "3",
78
- "Arial Black": "4",
79
- "Arial Bold": "5",
80
- "Arial Bold Italic": "6",
81
- "Avenir": "7",
82
- "Bangers-Regular": "8",
83
- "BlackOpsOne-Regular": "9",
84
- "Courier": "10",
85
- "Georgia": "11",
86
- "Helvetica": "12",
87
- "IBMPlexSans-Regular": "13",
88
- "Inter-Regular": "14",
89
- "KaushanScript-Regular": "15",
90
- "Lato-Regular": "16",
91
- "Lobster-Regular": "17",
92
- "Lora-Regular": "18",
93
- "Merriweather-Regular": "19",
94
- "Niconne-Regular": "20",
95
- "OpenSans-Bold": "21",
96
- "OpenSans-Italic": "22",
97
- "OpenSans-Light": "23",
98
- "Pacifico-Regular": "24",
99
- "PixelifySans-Regular": "25",
100
- "PlayfairDisplay-Regular": "26",
101
- "Poppins-Regular": "27",
102
- "Rakkas-Regular": "28",
103
- "Roboto-Regular": "29",
104
- "RobotoMono-Regular": "30",
105
- "RobotoSlab-Regular": "31",
106
- "Rubik-Regular": "32",
107
- "SpaceMono-Regular": "33",
108
- "Tahoma": "34",
109
- "Tahoma Bold": "35",
110
- "Times New Roman": "36",
111
- "Times New Roman Bold": "37",
112
- "Times New Roman Bold Italic": "38",
113
- "Times New Roman Italic": "39",
114
- "TitilliumWeb-Regular": "40",
115
- "Trebuchet MS": "41",
116
- "Trebuchet MS Bold": "42",
117
- "Trebuchet MS Bold Italic": "43",
118
- "Trebuchet MS Italic": "44",
119
- "Verdana": "45",
120
- "Verdana Bold": "46",
121
- "Verdana Bold Italic": "47",
122
- "Verdana Italic": "48"
123
  },
124
  "layer_type": "basic",
125
  "model_type": "resnet",
 
1
  {
2
+ "_name_or_path": "comic-sans-detector",
3
  "architectures": [
4
  "ResNetForImageClassification"
5
  ],
 
20
  512
21
  ],
22
  "id2label": {
23
+ "0": "comic",
24
+ "1": "not-comic"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  },
26
  "label2id": {
27
+ "comic": 0,
28
+ "not-comic": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  },
30
  "layer_type": "basic",
31
  "model_type": "resnet",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb04f92ff671597997d664f096dc4d23fc3883a89c0d04db44acfbad76c696da
3
  size 44764336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:333a0316a4a8f816ee65c4dd897952a21f97ccddaefe96c08cbbc124aafe28e6
3
  size 44764336