diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..e5f2b96ca838d33c2ab1b0b5f07338416d51a407 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +outputs/proper_input_template.jpg filter=lfs diff=lfs merge=lfs -text +temp_grid.jpg filter=lfs diff=lfs merge=lfs -text +ui/logo.png filter=lfs diff=lfs merge=lfs -text diff --git a/.streamlit/config.toml b/.streamlit/config.toml new file mode 100644 index 0000000000000000000000000000000000000000..88153ef6caf7264cdbba075c0979069a77910e51 --- /dev/null +++ b/.streamlit/config.toml @@ -0,0 +1,3 @@ +[server] +fileWatcherType = "none" +headless = true diff --git a/DEPLOY.md b/DEPLOY.md new file mode 100644 index 0000000000000000000000000000000000000000..5935fded3712abf81bb105817bebe9cb81eea321 --- /dev/null +++ b/DEPLOY.md @@ -0,0 +1,48 @@ +# How to Deploy Type.ai to Hugging Face Spaces + +This guide will help you deploy your handwriting font generator for free on Hugging Face Spaces. + +## Prerequisites +- A [Hugging Face Account](https://huggingface.co/join) (Free) + +## Step-by-Step Deployment + +### 1. Create a New Space +1. Go to [huggingface.co/spaces](https://huggingface.co/spaces). +2. Click **"Create new Space"**. +3. **Space Name**: Enter `type-ai` (or similar). +4. **License**: `MIT` (optional). +5. **SDK**: Select **Streamlit**. +6. **Hardware**: Keep the default **CPU Basic (Free)**. + > *Note: This builds slowly but works. If it fails, you can request a restart.* + > *For faster font generation, T4 GPU (small cost) is better, but CPU works for free.* +7. Click **"Create Space"**. + +### 2. Upload Your Code +You will see a page with instructions. We will upload files via the web interface (easiest method). + +1. On your Space page, click the **"Files"** tab. +2. Click **"Add file"** > **"Upload files"**. +3. Drag and drop **ALL** the files from your `Type.ai` folder into the upload area. + - **Crucial Files**: + - `app.py` + - `requirements.txt` (I just updated this for you) + - `packages.txt` (I just created this for you - installs Tesseract) + - `src/` folder (drag the whole folder) + - `models/` folder (if you have local models, otherwise the code downloads them) + - `ui/` folder +4. In the "Commit message" box, type "Initial deploy". +5. Click **"Commit changes to main"**. + +### 3. Watch it Build +1. Click the **"App"** tab. +2. You will see "Building". +3. Hugging Face will automatically: + - Install Python libraries from `requirements.txt`. + - Install Tesseract OCR from `packages.txt`. +4. This process may take **5-10 minutes** the first time. + +### Troubleshooting +- **"Runtime Error"**: Check the **"Logs"** tab. +- **Tesseract not found**: Ensure `packages.txt` exists in the root directory. +- **Memory Error**: The free tier has 16GB RAM, which is usually enough. If it crashes loading the model, we can optimize the code to load lighter models. diff --git a/README.md b/README.md index 51d5051d35db53d10617d8b65ba6e98266dd90d5..f945ff220600c0cb7ad60997b917d4cfee333c1e 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,100 @@ ---- -title: TypeAI -emoji: đ -colorFrom: red -colorTo: red -sdk: docker -app_port: 8501 -tags: -- streamlit -pinned: false -short_description: Streamlit template space ---- - -# Welcome to Streamlit! - -Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart: - -If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community -forums](https://discuss.streamlit.io). +--- +title: Type AI +emoji: âī¸ +colorFrom: blue +colorTo: purple +sdk: streamlit +sdk_version: 1.25.0 +python_version: "3.10" +app_file: app.py +pinned: false +--- + +# Type.ai - AI Handwriting to Font Generator + +Convert your handwriting into a custom TrueType font file using AI-powered character detection. + +## Features + +- **AI Character Detection**: Uses advanced contour analysis and OCR to extract characters +- **Style Analysis**: Captures stroke width, slant, and roughness of your handwriting +- **Vector Conversion**: Converts bitmap characters to smooth vector paths +- **Font Generation**: Creates installable TTF font files +- **Web UI**: Modern drag-and-drop interface +- **REST API**: Integrate into your own applications + +## Quick Start + +### 1. Install Dependencies + +```bash +cd Type.ai +pip install -r requirements.txt +``` + +### 2. Command Line Usage + +```bash +# Basic usage +python main.py handwriting.jpg --output MyFont.ttf --name "MyHandwriting" + +# With debug output +python main.py sample.png -o CustomFont.ttf -n "Custom Font" --debug +``` + +### 3. Web Interface + +```bash +# Start the server +python server.py + +# Open in browser +# http://localhost:8000/ui/ +``` + +## Tips for Best Results + +1. **Write clearly** with consistent size +2. **Include all letters** A-Z (uppercase and/or lowercase) +3. **Use dark ink** on white paper +4. **Good lighting** - avoid shadows +5. **High resolution** scan or photo + +## Project Structure + +``` +Type.ai/ +âââ src/ +â âââ preprocessing.py # Image enhancement +â âââ segmentation.py # Character detection +â âââ style_extractor.py # Style analysis +â âââ vectorizer.py # Bitmap to vector +â âââ font_generator.py # TTF creation +â âââ pipeline.py # Main orchestrator +âââ ui/ +â âââ index.html # Web interface +âââ main.py # CLI +âââ server.py # FastAPI server +âââ requirements.txt # Dependencies +``` + +## API Endpoints + +| Endpoint | Method | Description | +|----------|--------|-------------| +| `/api/upload` | POST | Upload handwriting image | +| `/api/status/{job_id}` | GET | Check processing status | +| `/api/download/{job_id}` | GET | Download generated font | +| `/api/preview/{job_id}` | GET | Get segmentation preview | + +## Requirements + +- Python 3.8+ +- PyTorch 1.9+ +- OpenCV +- fonttools +- Tesseract OCR (optional, for character recognition) + +## License + +MIT diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..2337702311f54544816ad5b76e42edace81e93b4 --- /dev/null +++ b/app.py @@ -0,0 +1,161 @@ +""" +Type.ai - Web Interface +----------------------- +A clean UI to generate handwriting fonts. +Now supports Phase 1 (Grid) and Phase 2 (Sentence). +""" +import streamlit as st +import os +import cv2 +import numpy as np +import sys +from pathlib import Path + +# Fix imports +sys.path.insert(0, str(Path(__file__).parent / 'src')) +from process_grid import process_grid_final +from process_sentence import process_sentence + +def main(): + # Use logo as favicon/page icon + logo_path = "ui/logo.png" + st.set_page_config( + page_title="Type.ai", + page_icon=logo_path if os.path.exists(logo_path) else "âī¸", + layout="centered" + ) + + # --- LOGO & HEADER --- + logo_path = "ui/logo.png" + if os.path.exists(logo_path): + st.image(logo_path, width=200) + else: + st.title("âī¸ Type.ai") + + st.header("Turn your handwriting into a font.") + + tab1, tab2 = st.tabs(["đ Grid Mode (Standard)", "đ Sentence Mode (Experimental)"]) + + # --- TAB 1: GRID MODE --- + with tab1: + st.subheader("Method 1: The Precision Grid") + st.markdown("Use this for 100% accuracy using the template.") + + col1, col2 = st.columns([1, 2]) + template_path = "outputs/proper_input_template.jpg" + + # Ensure template exists + if not os.path.exists(template_path): + st.warning("Template not found. Please run cleanup or generate it.") + + with col1: + if os.path.exists(template_path): + st.image(template_path, caption="Grid Template", width=150) + + with col2: + st.write("1. Download blank template.\n2. Fill with **Blue Pen**.\n3. Upload.") + if os.path.exists(template_path): + with open(template_path, "rb") as f: + st.download_button( + label="đĨ Download Blank Template", + data=f, + file_name="TypeAI_Template.jpg", + mime="image/jpeg" + ) + + uploaded_grid = st.file_uploader("Upload Grid Scan", type=["jpg", "png", "jpeg"], key="grid_up") + + if uploaded_grid is not None: + st.image(uploaded_grid, caption="Grid Upload", width=300) + + font_name = st.text_input("Name your font:", value="MyHandwriting", key="grid_font_name") + + if st.button("⨠Generate Grid Font"): + with st.spinner("Processing Grid..."): + try: + temp_path = "temp_grid.jpg" + with open(temp_path, "wb") as f: + f.write(uploaded_grid.getbuffer()) + + # Clean filename + safe_name = "".join(x for x in font_name if x.isalnum() or x in " -_") + if not safe_name: safe_name = "MyHandwriting" + + output_filename = f"{safe_name.replace(' ', '_')}.ttf" + output_path = os.path.join("outputs", output_filename) + + process_grid_final(temp_path, output_path, font_family_name=font_name) + + st.success(f"Font '{font_name}' Generated!") + + with open(output_path, "rb") as f: + st.download_button(f"Download {output_filename}", f, output_filename) + except Exception as e: + st.error(f"Error: {e}") + + # --- TAB 2: SENTENCE MODE --- + with tab2: + st.subheader("Method 2: Natural Sentence (Phase 2)") + st.markdown("Upload a photo of any handwritten sentence to detect letters automatically.") + + st.info("âšī¸ **First Run Note:** This mode uses AI models (TrOCR) which will download on the first run. Please be patient.") + + try: + import torch + if torch.cuda.is_available(): + gpu_name = torch.cuda.get_device_name(0) + st.success(f"đ Acceleration Enabled: Using {gpu_name}") + else: + st.warning("â ī¸ No GPU Detected. Running on CPU (this might be slow).") + except ImportError: + pass + + uploaded_sent = st.file_uploader("Upload Sentence Image", type=["jpg", "png", "jpeg"], key="sent_up") + + if uploaded_sent is not None: + st.image(uploaded_sent, caption="Sentence Upload", width=300) + if st.button("đ§ Analyze & Generate"): + with st.spinner("Analyzing handwriting (Segmentation + OCR)..."): + try: + temp_path = "temp_sentence.jpg" + with open(temp_path, "wb") as f: + f.write(uploaded_sent.getbuffer()) + + output_path = "outputs/SentenceFont.ttf" + result = process_sentence(temp_path, output_path) + + if result and result[0] and os.path.exists(result[0]): + font_path, text_content = result + st.success("Sentence Font Generated!") + + # Generate Preview + if text_content: + st.subheader("Preview: Your Text in Your Font") + from font_renderer import render_font_preview + preview_path = "outputs/preview.png" + render_font_preview(font_path, text_content, preview_path) + if os.path.exists(preview_path): + st.image(preview_path, caption="Generated Preview", use_container_width=True) + else: + st.warning("Could not generate preview image.") + else: + st.warning("Font generated, but no text content reconstructed for preview.") + + st.balloons() + with open(output_path, "rb") as f: + st.download_button("Download SentenceFont.ttf", f, "SentenceFont.ttf") + + # Debug View + if os.path.exists("outputs/debug_alphabet.png"): + with st.expander("đī¸ Debug: what the AI saw"): + st.image("outputs/debug_alphabet.png", caption="Detected Characters") + else: + st.warning("No font generated. Maybe no characters were confidently detected?") + + except Exception as e: + st.error(f"Error: {e}") + import traceback + st.text(traceback.format_exc()) # Debug info + +if __name__ == "__main__": + main() diff --git a/models/easyocr/craft_mlt_25k.pth b/models/easyocr/craft_mlt_25k.pth new file mode 100644 index 0000000000000000000000000000000000000000..88871234c9270456cdf0137652a48b929d0ddf72 --- /dev/null +++ b/models/easyocr/craft_mlt_25k.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a5efbfb48b4081100544e75e1e2b57f8de3d84f213004b14b85fd4b3748db17 +size 83152330 diff --git a/models/easyocr/english_g2.pth b/models/easyocr/english_g2.pth new file mode 100644 index 0000000000000000000000000000000000000000..a26a8bacb812296f7e0abb62154d33a4931b6093 --- /dev/null +++ b/models/easyocr/english_g2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2272681d9d67a04e2dff396b6e95077bc19001f8f6d3593c307b9852e1c29e8 +size 15143997 diff --git a/outputs/MyHandwriting.ttf b/outputs/MyHandwriting.ttf new file mode 100644 index 0000000000000000000000000000000000000000..9aebe7465bdf09895871ea297341a7324d716cdf Binary files /dev/null and b/outputs/MyHandwriting.ttf differ diff --git a/outputs/SentenceFont.ttf b/outputs/SentenceFont.ttf new file mode 100644 index 0000000000000000000000000000000000000000..c592878b4962a71393e880faa1a940a8968d7200 Binary files /dev/null and b/outputs/SentenceFont.ttf differ diff --git a/outputs/debug/char_a_7_1.png b/outputs/debug/char_a_7_1.png new file mode 100644 index 0000000000000000000000000000000000000000..55980de2b6e5cd1ef35925d07ff953263a03b2be Binary files /dev/null and b/outputs/debug/char_a_7_1.png differ diff --git a/outputs/debug/char_b_2_0.png b/outputs/debug/char_b_2_0.png new file mode 100644 index 0000000000000000000000000000000000000000..9a705b0751687fb6ee45573aaa7188d3076403dd Binary files /dev/null and b/outputs/debug/char_b_2_0.png differ diff --git a/outputs/debug/char_c_1_3.png b/outputs/debug/char_c_1_3.png new file mode 100644 index 0000000000000000000000000000000000000000..f649fbee1a9c084d1292f1ccf714534e8447d7ec Binary files /dev/null and b/outputs/debug/char_c_1_3.png differ diff --git a/outputs/debug/char_d_8_0.png b/outputs/debug/char_d_8_0.png new file mode 100644 index 0000000000000000000000000000000000000000..400b02e18c66a9298dd18768ede183be0880150f Binary files /dev/null and b/outputs/debug/char_d_8_0.png differ diff --git a/outputs/debug/char_e_0_2.png b/outputs/debug/char_e_0_2.png new file mode 100644 index 0000000000000000000000000000000000000000..606e294cb363d3300f5ad86773e9d33d14c31334 Binary files /dev/null and b/outputs/debug/char_e_0_2.png differ diff --git a/outputs/debug/char_e_5_2.png b/outputs/debug/char_e_5_2.png new file mode 100644 index 0000000000000000000000000000000000000000..6d220dab9edf160f200206b15311d6cb05df3d9a Binary files /dev/null and b/outputs/debug/char_e_5_2.png differ diff --git a/outputs/debug/char_e_6_2.png b/outputs/debug/char_e_6_2.png new file mode 100644 index 0000000000000000000000000000000000000000..67d4ddc0fdcc7a562e24241950591ac00a69efae Binary files /dev/null and b/outputs/debug/char_e_6_2.png differ diff --git a/outputs/debug/char_f_3_0.png b/outputs/debug/char_f_3_0.png new file mode 100644 index 0000000000000000000000000000000000000000..a40e7e76e3db7824255d4a11bfa473787e25c6e9 Binary files /dev/null and b/outputs/debug/char_f_3_0.png differ diff --git a/outputs/debug/char_g_8_2.png b/outputs/debug/char_g_8_2.png new file mode 100644 index 0000000000000000000000000000000000000000..c1a4887c71338e15e34f3b79304ea62633d8cef3 Binary files /dev/null and b/outputs/debug/char_g_8_2.png differ diff --git a/outputs/debug/char_h_0_1.png b/outputs/debug/char_h_0_1.png new file mode 100644 index 0000000000000000000000000000000000000000..2329541e775963bd8d7650924cb92e95a3c40af1 Binary files /dev/null and b/outputs/debug/char_h_0_1.png differ diff --git a/outputs/debug/char_h_6_1.png b/outputs/debug/char_h_6_1.png new file mode 100644 index 0000000000000000000000000000000000000000..d1f12fb5bddb8f2036ea253f14fb678271386441 Binary files /dev/null and b/outputs/debug/char_h_6_1.png differ diff --git a/outputs/debug/char_i_1_2.png b/outputs/debug/char_i_1_2.png new file mode 100644 index 0000000000000000000000000000000000000000..9a1874e82608cb75f810c9da7ccb3a337853d73f Binary files /dev/null and b/outputs/debug/char_i_1_2.png differ diff --git a/outputs/debug/char_j_4_0.png b/outputs/debug/char_j_4_0.png new file mode 100644 index 0000000000000000000000000000000000000000..7b700f3feb9097c88e0f134e98ada4e0c5a47e4d Binary files /dev/null and b/outputs/debug/char_j_4_0.png differ diff --git a/outputs/debug/char_k_1_4.png b/outputs/debug/char_k_1_4.png new file mode 100644 index 0000000000000000000000000000000000000000..33ea2ce189b5cfef008471165afb92c9d48ec9ac Binary files /dev/null and b/outputs/debug/char_k_1_4.png differ diff --git a/outputs/debug/char_l_7_0.png b/outputs/debug/char_l_7_0.png new file mode 100644 index 0000000000000000000000000000000000000000..2fb121fd102e0e305d6564cb39d49eda58f3aae6 Binary files /dev/null and b/outputs/debug/char_l_7_0.png differ diff --git a/outputs/debug/char_m_4_2.png b/outputs/debug/char_m_4_2.png new file mode 100644 index 0000000000000000000000000000000000000000..ec54479d326748b444ab173fcb849d5a806e24cf Binary files /dev/null and b/outputs/debug/char_m_4_2.png differ diff --git a/outputs/debug/char_n_2_4.png b/outputs/debug/char_n_2_4.png new file mode 100644 index 0000000000000000000000000000000000000000..823b5238181d92de59176c8ad540d38a587feea3 Binary files /dev/null and b/outputs/debug/char_n_2_4.png differ diff --git a/outputs/debug/char_o_2_2.png b/outputs/debug/char_o_2_2.png new file mode 100644 index 0000000000000000000000000000000000000000..011c17b015ab5ab927e036453cf68b784ea12164 Binary files /dev/null and b/outputs/debug/char_o_2_2.png differ diff --git a/outputs/debug/char_o_3_1.png b/outputs/debug/char_o_3_1.png new file mode 100644 index 0000000000000000000000000000000000000000..2dcb112a9f081c411a59a859a51b6f225f8342dd Binary files /dev/null and b/outputs/debug/char_o_3_1.png differ diff --git a/outputs/debug/char_o_5_0.png b/outputs/debug/char_o_5_0.png new file mode 100644 index 0000000000000000000000000000000000000000..eae168a63b97ba580d17cfcf09af772a31abe95e Binary files /dev/null and b/outputs/debug/char_o_5_0.png differ diff --git a/outputs/debug/char_o_8_1.png b/outputs/debug/char_o_8_1.png new file mode 100644 index 0000000000000000000000000000000000000000..31b8097b3a3691dbe051bf0d2469717cbbda167b Binary files /dev/null and b/outputs/debug/char_o_8_1.png differ diff --git a/outputs/debug/char_p_4_3.png b/outputs/debug/char_p_4_3.png new file mode 100644 index 0000000000000000000000000000000000000000..fb8b6fe0a05e1270d4b8f5dfdcae27ba0a7ac295 Binary files /dev/null and b/outputs/debug/char_p_4_3.png differ diff --git a/outputs/debug/char_q_1_0.png b/outputs/debug/char_q_1_0.png new file mode 100644 index 0000000000000000000000000000000000000000..a65b54cc7521d8deedaf456e1832d29bcd58cd55 Binary files /dev/null and b/outputs/debug/char_q_1_0.png differ diff --git a/outputs/debug/char_r_2_1.png b/outputs/debug/char_r_2_1.png new file mode 100644 index 0000000000000000000000000000000000000000..75c0ec5ead8f24275846c3be2aec9b0f0fa6c185 Binary files /dev/null and b/outputs/debug/char_r_2_1.png differ diff --git a/outputs/debug/char_r_5_3.png b/outputs/debug/char_r_5_3.png new file mode 100644 index 0000000000000000000000000000000000000000..7cf75a7eee874663a4f66b90f87fd7e8584235a5 Binary files /dev/null and b/outputs/debug/char_r_5_3.png differ diff --git a/outputs/debug/char_s_4_4.png b/outputs/debug/char_s_4_4.png new file mode 100644 index 0000000000000000000000000000000000000000..c66ab5a30202f2ad2132866587b42122025ea8b6 Binary files /dev/null and b/outputs/debug/char_s_4_4.png differ diff --git a/outputs/debug/char_t_0_0.png b/outputs/debug/char_t_0_0.png new file mode 100644 index 0000000000000000000000000000000000000000..99c53f621cbc6061435b5fb7c599e4fa49c05aa4 Binary files /dev/null and b/outputs/debug/char_t_0_0.png differ diff --git a/outputs/debug/char_t_6_0.png b/outputs/debug/char_t_6_0.png new file mode 100644 index 0000000000000000000000000000000000000000..1b0f8aa49a4894c626447962ffde2bae75defb46 Binary files /dev/null and b/outputs/debug/char_t_6_0.png differ diff --git a/outputs/debug/char_u_1_1.png b/outputs/debug/char_u_1_1.png new file mode 100644 index 0000000000000000000000000000000000000000..61e474411bee3b441a3b3c31dc20dd806b218cc4 Binary files /dev/null and b/outputs/debug/char_u_1_1.png differ diff --git a/outputs/debug/char_u_4_1.png b/outputs/debug/char_u_4_1.png new file mode 100644 index 0000000000000000000000000000000000000000..8ad80ea4e6f80dae08b215e21de419d9413d937f Binary files /dev/null and b/outputs/debug/char_u_4_1.png differ diff --git a/outputs/debug/char_v_5_1.png b/outputs/debug/char_v_5_1.png new file mode 100644 index 0000000000000000000000000000000000000000..e9179571d9704dc85aed45855e94fefa2fa9dad4 Binary files /dev/null and b/outputs/debug/char_v_5_1.png differ diff --git a/outputs/debug/char_w_2_3.png b/outputs/debug/char_w_2_3.png new file mode 100644 index 0000000000000000000000000000000000000000..94121d857127b5f59829b9bf8ebb41c758981e16 Binary files /dev/null and b/outputs/debug/char_w_2_3.png differ diff --git a/outputs/debug/char_x_3_2.png b/outputs/debug/char_x_3_2.png new file mode 100644 index 0000000000000000000000000000000000000000..4e3998637cad283eb97f1f3308e96555614350dc Binary files /dev/null and b/outputs/debug/char_x_3_2.png differ diff --git a/outputs/debug/char_y_7_4.png b/outputs/debug/char_y_7_4.png new file mode 100644 index 0000000000000000000000000000000000000000..51eceb770105963866fb600818f9ae5db9e2850d Binary files /dev/null and b/outputs/debug/char_y_7_4.png differ diff --git a/outputs/debug/char_z_7_3.png b/outputs/debug/char_z_7_3.png new file mode 100644 index 0000000000000000000000000000000000000000..d29a34b39def7b7648f576c413c31b15e7f4098a Binary files /dev/null and b/outputs/debug/char_z_7_3.png differ diff --git a/outputs/debug/word_0.png b/outputs/debug/word_0.png new file mode 100644 index 0000000000000000000000000000000000000000..397e87c4725a66a728a16e055fe9901af3e49d47 Binary files /dev/null and b/outputs/debug/word_0.png differ diff --git a/outputs/debug/word_1.png b/outputs/debug/word_1.png new file mode 100644 index 0000000000000000000000000000000000000000..21abbce7eaffac9bd5a0a2c47b50f90f877f0e97 Binary files /dev/null and b/outputs/debug/word_1.png differ diff --git a/outputs/debug/word_2.png b/outputs/debug/word_2.png new file mode 100644 index 0000000000000000000000000000000000000000..791fd4114dff5fcd777d3c970ba708fad344acd5 Binary files /dev/null and b/outputs/debug/word_2.png differ diff --git a/outputs/debug/word_3.png b/outputs/debug/word_3.png new file mode 100644 index 0000000000000000000000000000000000000000..40e3de8fdde7970f4095269988b8ec41bbacbe7a Binary files /dev/null and b/outputs/debug/word_3.png differ diff --git a/outputs/debug/word_4.png b/outputs/debug/word_4.png new file mode 100644 index 0000000000000000000000000000000000000000..da210257c0edea0c654ea307926c15f0d9159140 Binary files /dev/null and b/outputs/debug/word_4.png differ diff --git a/outputs/debug/word_5.png b/outputs/debug/word_5.png new file mode 100644 index 0000000000000000000000000000000000000000..660130532bad4d085c81942400762cc4fe48885b Binary files /dev/null and b/outputs/debug/word_5.png differ diff --git a/outputs/debug/word_6.png b/outputs/debug/word_6.png new file mode 100644 index 0000000000000000000000000000000000000000..cd61cac9b5a920fc690d71f9538dc82c0a955e25 Binary files /dev/null and b/outputs/debug/word_6.png differ diff --git a/outputs/debug/word_7.png b/outputs/debug/word_7.png new file mode 100644 index 0000000000000000000000000000000000000000..e9eb3097712052f97b67cc4504b6c5894a3f97ff Binary files /dev/null and b/outputs/debug/word_7.png differ diff --git a/outputs/debug/word_8.png b/outputs/debug/word_8.png new file mode 100644 index 0000000000000000000000000000000000000000..94bd6404715b9313d5bd6955a9e0a8604a535a7b Binary files /dev/null and b/outputs/debug/word_8.png differ diff --git a/outputs/debug_alphabet.png b/outputs/debug_alphabet.png new file mode 100644 index 0000000000000000000000000000000000000000..bc743500da84cfb10e12d1b4829ef4f71c2ac542 Binary files /dev/null and b/outputs/debug_alphabet.png differ diff --git a/outputs/preview.png b/outputs/preview.png new file mode 100644 index 0000000000000000000000000000000000000000..8c5a39bc86d8954fe0ba731485c1d366ca84df46 Binary files /dev/null and b/outputs/preview.png differ diff --git a/outputs/proper_input_template.jpg b/outputs/proper_input_template.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5aad5ff51f53cf9e4846c8a876665139e2aa2189 --- /dev/null +++ b/outputs/proper_input_template.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e852791e7c298241b72a73cd80f4734180b186fa62f0066dc71e529a5fd3e5 +size 497125 diff --git a/packages.txt b/packages.txt new file mode 100644 index 0000000000000000000000000000000000000000..66f8b91d676ce2c1de43c5e3a6284015be3af0e1 --- /dev/null +++ b/packages.txt @@ -0,0 +1 @@ +tesseract-ocr diff --git a/process_grid.py b/process_grid.py new file mode 100644 index 0000000000000000000000000000000000000000..9b225b6083163ee1e6b2ff510089db4364b3147e --- /dev/null +++ b/process_grid.py @@ -0,0 +1,84 @@ +""" +Standard Grid Processor (Final / Perfect). +Uses Spectral Separation + Correct Polarity (Black Ink). +""" +import cv2 +import numpy as np +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent / 'src')) +from vectorizer import CharacterVectorizer +from font_generator import generate_font + +def get_page_transform(img): + # Simplified alignment - Find page if possible, else full image + h, w = img.shape[:2] + return np.array([[0,0], [2480,0], [2480,3508], [0,3508]], dtype="float32") # Fallback to full A4 + +def process_grid_final(image_path, output_font="outputs/PerfectAIFont.ttf", font_family_name="PerfectAIFont"): + print(f"Processing: {image_path}") + img = cv2.imread(image_path) + if img is None: + raise ValueError("Cannot open image") + + # Resize to A4 + img = cv2.resize(img, (2480, 3508)) + + # 1. AI Extraction (Blue Ink -> White Mask) + hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + s = hsv[:, :, 1] + _, mask = cv2.threshold(s, 50, 255, cv2.THRESH_BINARY) + + # 2. Polarity Fix (White Mask -> Black Ink on White BG) + # This matches "Option B" which was confirmed correct + final_mask = cv2.bitwise_not(mask) + + # Clean noise + kernel = np.ones((3,3), np.uint8) + final_mask = cv2.morphologyEx(final_mask, cv2.MORPH_OPEN, kernel) # Remove small noise + + # 3. Grid Extraction + margin_x, margin_y = 100, 300 + cols = 8 + cell_w = (2480 - 2 * margin_x) // cols + cell_h = int(cell_w * 1.2) + + chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!?.,:;" + + current_x, current_y = margin_x, margin_y + vectorizer = CharacterVectorizer() + char_paths = {} + + print("Vectorizing...") + for i, char in enumerate(chars): + x1, y1 = int(current_x), int(current_y) + x2, y2 = int(x1 + cell_w), int(y1 + cell_h) + + pad = 20 + roi = final_mask[y1+pad:y2-pad, x1+pad:x2-pad] + + # Check original mask for content (White ink) + roi_check = mask[y1+pad:y2-pad, x1+pad:x2-pad] + + if cv2.countNonZero(roi_check) > 50: + path, _ = vectorizer.vectorize(roi) + if path: + char_paths[char] = path + print(f" {char}: Found") + + current_x += cell_w + if (i + 1) % cols == 0: + current_x = margin_x + current_y += cell_h + + generate_font(char_paths, output_font, font_family_name) + print(f"Success! Saved to {output_font}") + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("image", help="Input image") + parser.add_argument("-o", "--output", default="outputs/PerfectAIFont.ttf") + args = parser.parse_args() + process_grid_final(args.image, args.output) diff --git a/process_sentence.py b/process_sentence.py new file mode 100644 index 0000000000000000000000000000000000000000..8074d4003bf552d35f29b8828ba4ca1973b7b91e --- /dev/null +++ b/process_sentence.py @@ -0,0 +1,252 @@ +""" +Sentence Processor - Phase 2 +Robust implementation using projection-based segmentation + TrOCR labeling. +""" +import sys +import os +import cv2 +import numpy as np +from pathlib import Path +from collections import defaultdict + +sys.path.insert(0, str(Path(__file__).resolve().parent / 'src')) +from segmenter import TextSegmenter +from recognizer import HandwritingRecognizer +from vectorizer import CharacterVectorizer +from font_generator import generate_font + + +def score_character_image(img): + """ + Score a character image for quality. + Higher is better. + """ + if img is None or img.size == 0: + return 0 + + h, w = img.shape[:2] + + # Aspect ratio score (prefer ~0.5-1.0) + aspect = w / max(h, 1) + aspect_score = 1.0 - abs(aspect - 0.7) * 0.5 + aspect_score = max(0, min(1, aspect_score)) + + # Size score (prefer medium-sized) + area = h * w + size_score = min(area / 2000, 1.0) # Normalize to reasonable size + + # Ink density (prefer 10-50% ink) + if len(img.shape) == 3: + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + else: + gray = img + _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) + ink_ratio = np.sum(binary > 0) / max(binary.size, 1) + ink_score = 1.0 - abs(ink_ratio - 0.3) * 2 + ink_score = max(0, min(1, ink_score)) + + return aspect_score * 0.3 + size_score * 0.3 + ink_score * 0.4 + + +def select_best_characters(char_images): + """ + Select best instance of each character. + """ + best_chars = {} + for label, imgs in char_images.items(): + if not imgs: + continue + + # Score all instances + scored = [(score_character_image(img), img) for img in imgs] + scored.sort(key=lambda x: x[0], reverse=True) + + # Pick best + best_chars[label] = scored[0][1] + + return best_chars + + +def process_sentence(image_path, output_font): + """ + Process a sentence image to generate a font. + Uses projection-based segmentation + TrOCR for labeling. + """ + print(f"Processing sentence: {image_path}") + + # Ensure output directory exists + os.makedirs(os.path.dirname(output_font) if os.path.dirname(output_font) else "outputs", exist_ok=True) + os.makedirs("outputs/debug", exist_ok=True) + + # Initialize Models + print("Loading models...") + segmenter = TextSegmenter() + recognizer = HandwritingRecognizer() + vectorizer = CharacterVectorizer() + + # 1. Segment Words + print("Step 1: Segmenting words...") + word_images = segmenter.segment_lines_words(image_path) + print(f" Found {len(word_images)} words") + + if not word_images: + print("ERROR: No words detected!") + return None, "" + + # 2. Process each word + print("Step 2: Processing words...") + char_images = defaultdict(list) + full_text_parts = [] + + for word_idx, word_img in enumerate(word_images): + try: + # Save for debugging + cv2.imwrite(f"outputs/debug/word_{word_idx}.png", word_img) + + # Recognize the word + temp_path = "temp_word.png" + cv2.imwrite(temp_path, word_img) + word_text = recognizer.recognize(temp_path) + word_text = word_text.strip().lower() + + print(f" Word {word_idx}: '{word_text}'") + + if not word_text: + continue + + # Segment characters using projection + char_crops = segmenter.segment_characters(word_img) + + if char_crops is None: + char_crops = [] + + print(f" Segmented into {len(char_crops)} parts, text has {len(word_text)} chars") + + # Try to align segments with recognized text + if len(char_crops) == len(word_text): + # Perfect match! 1:1 alignment + for char_idx, (crop, char) in enumerate(zip(char_crops, word_text)): + if char.isalpha(): + char_images[char].append(crop) + cv2.imwrite(f"outputs/debug/char_{char}_{word_idx}_{char_idx}.png", crop) + full_text_parts.append(word_text) + + elif len(char_crops) > 0 and len(word_text) > 0: + # Mismatch - try force split + num_chars = len(word_text) + force_split = segmenter.segment_connected_characters(word_img, num_chars) + + if len(force_split) == num_chars: + for char_idx, (crop, char) in enumerate(zip(force_split, word_text)): + if char.isalpha(): + char_images[char].append(crop) + cv2.imwrite(f"outputs/debug/char_{char}_{word_idx}_{char_idx}.png", crop) + full_text_parts.append(word_text) + else: + # Last resort: equal width split + h, w = word_img.shape[:2] + char_width = w // num_chars + for char_idx, char in enumerate(word_text): + x_start = char_idx * char_width + x_end = (char_idx + 1) * char_width if char_idx < num_chars - 1 else w + crop = word_img[:, x_start:x_end] + if char.isalpha() and crop.size > 0: + char_images[char].append(crop) + full_text_parts.append(word_text) + + full_text_parts.append(" ") + + except Exception as e: + print(f" Error processing word {word_idx}: {e}") + import traceback + traceback.print_exc() + continue + + # 3. Select best instances + print("Step 3: Selecting best character instances...") + best_chars = select_best_characters(char_images) + + print(f" Unique characters: {len(best_chars)}") + print(f" Alphabet: {sorted(best_chars.keys())}") + + if not best_chars: + print("ERROR: No characters extracted!") + return None, "" + + # 4. Vectorize + print("Step 4: Vectorizing...") + char_paths = {} + for label, img in best_chars.items(): + try: + path, _ = vectorizer.vectorize(img) + if path: + char_paths[label] = path + print(f" {label}: OK") + else: + print(f" {label}: Empty path") + except Exception as e: + print(f" {label}: Error - {e}") + + if not char_paths: + print("ERROR: No characters vectorized!") + return None, "" + + # 5. Generate font + print("Step 5: Generating font...") + try: + generate_font(char_paths, output_font, "SentenceFont") + print(f" SUCCESS! Saved to {output_font}") + except Exception as e: + print(f" ERROR generating font: {e}") + import traceback + traceback.print_exc() + return None, "" + + # 6. Create debug visualization + print("Step 6: Creating debug visualization...") + try: + debug_h = 64 + debug_imgs = [] + for lbl in sorted(best_chars.keys()): + img = best_chars[lbl] + h, w = img.shape[:2] + if h > 0: + scale = debug_h / h + new_w = max(1, int(w * scale)) + resized = cv2.resize(img, (new_w, debug_h)) + debug_imgs.append(resized) + + if debug_imgs: + total_w = sum(img.shape[1] for img in debug_imgs) + collage = np.ones((debug_h, total_w, 3), dtype=np.uint8) * 255 + curr_x = 0 + for img in debug_imgs: + h, w = img.shape[:2] + if len(img.shape) == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + collage[:, curr_x:curr_x+w] = img[:, :w] + curr_x += w + + cv2.imwrite("outputs/debug_alphabet.png", collage) + print(" Debug image saved to outputs/debug_alphabet.png") + except Exception as e: + print(f" Debug visualization error: {e}") + + full_text = "".join(full_text_parts).strip() + print(f"\nFull text: '{full_text}'") + print("Done!") + + return output_font, full_text + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("image", help="Input sentence image") + parser.add_argument("-o", "--output", default="outputs/SentenceFont.ttf") + args = parser.parse_args() + + if os.path.exists(args.image): + process_sentence(args.image, args.output) + else: + print(f"Image not found: {args.image}") diff --git a/requirements.txt b/requirements.txt index 28d994e22f8dd432b51df193562052e315ad95f7..34cab8b3675171a3a75b451e6edde4996dce3a54 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,30 @@ -altair -pandas -streamlit \ No newline at end of file +# Type.ai - AI Handwriting to Font Generator +streamlit>=1.10.0 +# Core Dependencies + +# Deep Learning +torch>=1.9.0 +torchvision>=0.10.0 + +# Image Processing +opencv-python-headless>=4.5.0 +Pillow>=8.0.0 +numpy>=1.20.0 + +# Font Generation +fonttools>=4.25.0 + +# OCR for character recognition +pytesseract>=0.3.8 + +# Web Server +fastapi>=0.68.0 +uvicorn>=0.15.0 +python-multipart>=0.0.5 + +# Utilities +gdown>=4.4.0 +tqdm>=4.62.0 + +# Optional: CUDA for GPU acceleration (install separately) +# pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..15f279420f7ec2b4646bf80f7c4c47684a8f71a3 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,6 @@ +# Type.ai - AI Handwriting to Font Generator +""" +Core modules for handwriting analysis and font generation. +""" + +__version__ = "1.0.0" diff --git a/src/__pycache__/__init__.cpython-311.pyc b/src/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..83890a4762f42bd8bf37ac0d06eed7f62fa80f24 Binary files /dev/null and b/src/__pycache__/__init__.cpython-311.pyc differ diff --git a/src/__pycache__/font_generator.cpython-311.pyc b/src/__pycache__/font_generator.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..274b5860e849f5c95df0575969600597b2bdf5ed Binary files /dev/null and b/src/__pycache__/font_generator.cpython-311.pyc differ diff --git a/src/__pycache__/font_renderer.cpython-311.pyc b/src/__pycache__/font_renderer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..769b9278953cfe9e0e7d7953c1d7ae8f3b52b26c Binary files /dev/null and b/src/__pycache__/font_renderer.cpython-311.pyc differ diff --git a/src/__pycache__/pipeline.cpython-311.pyc b/src/__pycache__/pipeline.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab8258738a792cf80b8bf2eff5f1454bcfcc3033 Binary files /dev/null and b/src/__pycache__/pipeline.cpython-311.pyc differ diff --git a/src/__pycache__/preprocessing.cpython-311.pyc b/src/__pycache__/preprocessing.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c085f1a44bdb78944e04523238ec126704ce2927 Binary files /dev/null and b/src/__pycache__/preprocessing.cpython-311.pyc differ diff --git a/src/__pycache__/recognizer.cpython-311.pyc b/src/__pycache__/recognizer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c957ebc3b344b495e7ca5f5bf759c5065890cbf0 Binary files /dev/null and b/src/__pycache__/recognizer.cpython-311.pyc differ diff --git a/src/__pycache__/segmentation.cpython-311.pyc b/src/__pycache__/segmentation.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f594e7e872ae69a7c9ddaf8e4b560dc8780fea43 Binary files /dev/null and b/src/__pycache__/segmentation.cpython-311.pyc differ diff --git a/src/__pycache__/segmenter.cpython-311.pyc b/src/__pycache__/segmenter.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d22f190331ea8836c777f2fccf871f020d25b183 Binary files /dev/null and b/src/__pycache__/segmenter.cpython-311.pyc differ diff --git a/src/__pycache__/style_extractor.cpython-311.pyc b/src/__pycache__/style_extractor.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..77c33da04139333f7a2983af29b7c6d215cc6596 Binary files /dev/null and b/src/__pycache__/style_extractor.cpython-311.pyc differ diff --git a/src/__pycache__/trocr_recognizer.cpython-311.pyc b/src/__pycache__/trocr_recognizer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eae67ca3eaee20954f5f82291558c483f45fabc2 Binary files /dev/null and b/src/__pycache__/trocr_recognizer.cpython-311.pyc differ diff --git a/src/__pycache__/vectorizer.cpython-311.pyc b/src/__pycache__/vectorizer.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ad198421a2d08f2d2010467888b578cd62b2f3d0 Binary files /dev/null and b/src/__pycache__/vectorizer.cpython-311.pyc differ diff --git a/src/alphabet_processor.py b/src/alphabet_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..6ddfbda8fe534676be343af21477ff5b910a3fd6 --- /dev/null +++ b/src/alphabet_processor.py @@ -0,0 +1,185 @@ +""" +Alphabet Template Processor for Type.ai + +Special segmentation for alphabet template images where letters +are written in order (A-Z, a-z). Uses positional labeling instead +of OCR for 100% accuracy. +""" + +import cv2 +import numpy as np +from dataclasses import dataclass +from typing import List, Dict, Tuple, Optional +import os + + +@dataclass +class Character: + """Represents a detected character.""" + image: np.ndarray + bbox: Tuple[int, int, int, int] # (x, y, w, h) + label: str = "" + confidence: float = 0.0 + + +class AlphabetTemplateProcessor: + """ + Processes alphabet template images where letters are in order. + No OCR needed - uses positional labeling. + """ + + # Standard alphabet order + UPPERCASE = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + LOWERCASE = "abcdefghijklmnopqrstuvwxyz" + + def __init__(self, min_char_area: int = 100, max_char_area: int = 50000): + self.min_char_area = min_char_area + self.max_char_area = max_char_area + + def process(self, binary_image: np.ndarray, + include_lowercase: bool = True) -> List[Character]: + """ + Process alphabet template image. + + Assumes letters are written in rows, left-to-right, top-to-bottom. + Row 1-3: Uppercase A-Z + Row 4-6 (if present): Lowercase a-z + + Args: + binary_image: Preprocessed binary image + include_lowercase: Whether to expect lowercase letters + + Returns: + List of Character objects with correct labels + """ + # Invert if needed + if np.mean(binary_image) > 127: + inverted = cv2.bitwise_not(binary_image) + else: + inverted = binary_image.copy() + + # Clean up + kernel = np.ones((2, 2), np.uint8) + cleaned = cv2.morphologyEx(inverted, cv2.MORPH_CLOSE, kernel) + + # Find contours + contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + # Extract all character regions + char_regions = [] + for contour in contours: + area = cv2.contourArea(contour) + if self.min_char_area < area < self.max_char_area: + x, y, w, h = cv2.boundingRect(contour) + + # Extract image + padding = 5 + x1 = max(0, x - padding) + y1 = max(0, y - padding) + x2 = min(binary_image.shape[1], x + w + padding) + y2 = min(binary_image.shape[0], y + h + padding) + + char_img = binary_image[y1:y2, x1:x2].copy() + + char_regions.append({ + 'image': char_img, + 'bbox': (x, y, w, h), + 'center_x': x + w // 2, + 'center_y': y + h // 2 + }) + + if not char_regions: + return [] + + # Sort into rows by Y coordinate + rows = self._group_into_rows(char_regions) + + # Determine expected labels + if include_lowercase: + expected_labels = self.UPPERCASE + self.LOWERCASE + else: + expected_labels = self.UPPERCASE + + # Assign labels based on position + characters = [] + label_idx = 0 + + for row in rows: + # Sort row by X coordinate (left to right) + row_sorted = sorted(row, key=lambda r: r['center_x']) + + for region in row_sorted: + if label_idx < len(expected_labels): + label = expected_labels[label_idx] + else: + label = f"char_{label_idx}" + + characters.append(Character( + image=region['image'], + bbox=region['bbox'], + label=label, + confidence=1.0 # 100% confidence with positional labeling + )) + label_idx += 1 + + return characters + + def _group_into_rows(self, regions: List[dict]) -> List[List[dict]]: + """Group character regions into rows based on Y coordinate.""" + if not regions: + return [] + + # Sort by Y coordinate + sorted_regions = sorted(regions, key=lambda r: r['center_y']) + + # Compute average height + avg_height = np.mean([r['bbox'][3] for r in regions]) + row_threshold = avg_height * 1.5 + + rows = [] + current_row = [sorted_regions[0]] + current_y = sorted_regions[0]['center_y'] + + for region in sorted_regions[1:]: + if abs(region['center_y'] - current_y) < row_threshold: + current_row.append(region) + else: + rows.append(current_row) + current_row = [region] + current_y = region['center_y'] + + if current_row: + rows.append(current_row) + + return rows + + def get_character_dict(self, characters: List[Character]) -> Dict[str, np.ndarray]: + """Convert to dictionary.""" + return {char.label: char.image for char in characters if char.label} + + def visualize(self, original_image: np.ndarray, + characters: List[Character], + output_path: Optional[str] = None) -> np.ndarray: + """Visualize detected characters.""" + if len(original_image.shape) == 2: + vis = cv2.cvtColor(original_image, cv2.COLOR_GRAY2BGR) + else: + vis = original_image.copy() + + for char in characters: + x, y, w, h = char.bbox + cv2.rectangle(vis, (x, y), (x + w, y + h), (0, 255, 0), 2) + cv2.putText(vis, char.label, (x, y - 5), + cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2) + + if output_path: + cv2.imwrite(str(output_path), vis) + + return vis + + +def process_alphabet_template(binary_image: np.ndarray, + include_lowercase: bool = True) -> List[Character]: + """Convenience function.""" + processor = AlphabetTemplateProcessor() + return processor.process(binary_image, include_lowercase) diff --git a/src/font_generator.py b/src/font_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..27bd924d962b63ab11313c43b0c53b190c3ddcc3 --- /dev/null +++ b/src/font_generator.py @@ -0,0 +1,323 @@ +""" +Font Generator Module for Type.ai + +Generates valid TrueType Font (TTF) files from vectorized character paths. +Uses fontTools with proper glyph construction via pen drawing. +""" + +import os +from typing import Dict, List, Tuple, Optional +from pathlib import Path +import re +import time +import datetime + +from fontTools.fontBuilder import FontBuilder +from fontTools.pens.t2CharStringPen import T2CharStringPen +from fontTools.pens.ttGlyphPen import TTGlyphPen +from fontTools.ttLib import TTFont, newTable + +class FontGenerator: + """ + Generates valid TTF font files from character bitmaps. + """ + + def __init__(self, font_name: str = "CustomHandwriting", + units_per_em: int = 2048): + self.font_name = font_name + self.units_per_em = units_per_em + self.ascender = 1800 + self.descender = -500 + + def generate_font(self, character_paths: Dict[str, str], + output_path: str, + font_weight: str = "Regular") -> str: + """ + Generate TTF font file from SVG path strings. + """ + # Filter valid characters + valid_chars = {} + for char, path in character_paths.items(): + if len(char) == 1 and path and path.strip(): + valid_chars[char] = path + + if not valid_chars: + raise ValueError("No valid character paths provided") + + # Build glyph order + glyph_order = ['.notdef', 'space'] + for char in sorted(valid_chars.keys()): + glyph_name = self._char_to_glyph_name(char) + if glyph_name not in glyph_order: + glyph_order.append(glyph_name) + + # Build cmap + cmap = {32: 'space'} + for char in valid_chars.keys(): + cmap[ord(char)] = self._char_to_glyph_name(char) + + # Create glyphs dict with pen-drawn glyphs + glyphs = {} + for glyph_name in glyph_order: + if glyph_name == '.notdef': + glyphs[glyph_name] = self._create_notdef_glyph() + elif glyph_name == 'space': + glyphs[glyph_name] = self._create_space_glyph() + else: + # Find the character + char = None + for c in valid_chars.keys(): + if self._char_to_glyph_name(c) == glyph_name: + char = c + break + + if char and char in valid_chars: + glyphs[glyph_name] = self._svg_to_glyph(valid_chars[char]) + else: + glyphs[glyph_name] = self._create_space_glyph() + + # Calculate horizontal metrics + hmtx = {} + for glyph_name in glyph_order: + if glyph_name == '.notdef': + hmtx[glyph_name] = (500, 50) + elif glyph_name == 'space': + hmtx[glyph_name] = (250, 0) + else: + char = None + for c in valid_chars.keys(): + if self._char_to_glyph_name(c) == glyph_name: + char = c + break + if char and char in valid_chars: + width = self._calc_width(valid_chars[char]) + hmtx[glyph_name] = (width, 50) + else: + hmtx[glyph_name] = (500, 50) + + # Use FontBuilder + fb = FontBuilder(self.units_per_em, isTTF=True) + fb.setupGlyphOrder(glyph_order) + fb.setupCharacterMap(cmap) + fb.setupGlyf(glyphs) + fb.setupHorizontalMetrics(hmtx) + + fb.setupHorizontalHeader( + ascent=self.ascender, + descent=self.descender + ) + + fb.setupNameTable({ + "familyName": self.font_name, + "styleName": font_weight, + }) + + # Windows-Specific Fixes: + fb.setupOS2( + sTypoAscender=self.ascender, + sTypoDescender=self.descender, + sTypoLineGap=90, + usWinAscent=2000, + usWinDescent=500, + sxHeight=900, + sCapHeight=1400, + usWeightClass=400, + usWidthClass=5, + fsSelection=0b01000000, # Regular + version=4 + ) + + fb.setupPost() + + # Setup head + fb.setupHead(unitsPerEm=self.units_per_em) + + # 2. Add Dummy DSIG (Digital Signature) Table for Windows capability + # Windows requires this table (even if empty) to recognize OpenType in some contexts + fb.font['DSIG'] = newTable("DSIG") + fb.font['DSIG'].ulVersion = 1 + fb.font['DSIG'].usFlag = 0 + fb.font['DSIG'].usNumSigs = 0 + fb.font['DSIG'].signatureRecords = [] + + # Save + fb.save(output_path) + + # Fix timestamps by reopening + self._fix_timestamps(output_path) + + return output_path + + def _fix_timestamps(self, font_path: str): + """Fix the timestamp fields in the font.""" + try: + font = TTFont(font_path) + epoch_1904 = datetime.datetime(1904, 1, 1) + now = datetime.datetime.now() + diff = now - epoch_1904 + timestamp = int(diff.total_seconds()) + + font['head'].created = timestamp + font['head'].modified = timestamp + + font.save(font_path) + except Exception as e: + print(f"Warning: Could not fix timestamps: {e}") + + def _char_to_glyph_name(self, char: str) -> str: + if char.isalpha(): + return char + elif char.isdigit(): + names = ['zero', 'one', 'two', 'three', 'four', + 'five', 'six', 'seven', 'eight', 'nine'] + return names[int(char)] + elif char == ' ': + return 'space' + elif char == '.': + return 'period' + elif char == ',': + return 'comma' + elif char == '!': + return 'exclam' + elif char == '?': + return 'question' + else: + return f'uni{ord(char):04X}' + + def _create_notdef_glyph(self): + pen = TTGlyphPen(None) + pen.moveTo((100, 0)) + pen.lineTo((100, 700)) + pen.lineTo((400, 700)) + pen.lineTo((400, 0)) + pen.closePath() + pen.moveTo((150, 50)) + pen.lineTo((350, 50)) + pen.lineTo((350, 650)) + pen.lineTo((150, 650)) + pen.closePath() + return pen.glyph() + + def _create_space_glyph(self): + pen = TTGlyphPen(None) + return pen.glyph() + + def _svg_to_glyph(self, svg_path: str): + pen = TTGlyphPen(None) + if not svg_path or not svg_path.strip(): + return self._create_notdef_glyph() + + tokens = re.findall(r'[MLQCZmlqcz]|[-+]?\d*\.?\d+', svg_path) + if not tokens: + return self._create_notdef_glyph() + + i = 0 + has_content = False + current_x, current_y = 0, 0 + + while i < len(tokens): + cmd = tokens[i] if tokens[i] in 'MLQCZmlqcz' else None + + if cmd in ('M', 'm'): + if i + 2 < len(tokens): + try: + x = float(tokens[i + 1]) + y = float(tokens[i + 2]) + if cmd == 'm': # Relative + x += current_x + y += current_y + pen.moveTo((int(x), int(y))) + current_x, current_y = x, y + has_content = True + except ValueError: + pass + i += 3 + else: + i += 1 + elif cmd in ('L', 'l'): + if i + 2 < len(tokens): + try: + x = float(tokens[i + 1]) + y = float(tokens[i + 2]) + if cmd == 'l': # Relative + x += current_x + y += current_y + pen.lineTo((int(x), int(y))) + current_x, current_y = x, y + except ValueError: + pass + i += 3 + else: + i += 1 + elif cmd in ('Q', 'q'): + if i + 4 < len(tokens): + try: + cx = float(tokens[i + 1]) + cy = float(tokens[i + 2]) + x = float(tokens[i + 3]) + y = float(tokens[i + 4]) + if cmd == 'q': # Relative + cx += current_x + cy += current_y + x += current_x + y += current_y + pen.qCurveTo((int(cx), int(cy)), (int(x), int(y))) + current_x, current_y = x, y + except ValueError: + pass + i += 5 + else: + i += 1 + elif cmd in ('C', 'c'): + if i + 6 < len(tokens): + try: + c1x = float(tokens[i + 1]) + c1y = float(tokens[i + 2]) + c2x = float(tokens[i + 3]) + c2y = float(tokens[i + 4]) + x = float(tokens[i + 5]) + y = float(tokens[i + 6]) + if cmd == 'c': # Relative + c1x += current_x + c1y += current_y + c2x += current_x + c2y += current_y + x += current_x + y += current_y + mx = (c1x + c2x) / 2 + my = (c1y + c2y) / 2 + pen.qCurveTo((int(mx), int(my)), (int(x), int(y))) + current_x, current_y = x, y + except ValueError: + pass + i += 7 + else: + i += 1 + elif cmd in ('Z', 'z'): + pen.closePath() + i += 1 + else: + i += 1 + + if not has_content: + return self._create_notdef_glyph() + return pen.glyph() + + def _calc_width(self, svg_path: str) -> int: + x_coords = [] + tokens = re.findall(r'[-+]?\d*\.?\d+', svg_path) + for i in range(0, len(tokens) - 1, 2): + try: + x_coords.append(float(tokens[i])) + except ValueError: + pass + if x_coords: + width = int(max(x_coords) - min(x_coords) + 100) + return max(width, 200) + return 500 + +def generate_font(character_paths: Dict[str, str], + output_path: str, + font_name: str = "CustomHandwriting") -> str: + generator = FontGenerator(font_name) + return generator.generate_font(character_paths, output_path) diff --git a/src/font_renderer.py b/src/font_renderer.py new file mode 100644 index 0000000000000000000000000000000000000000..dcdb102ef02c9a8297cf1e7e838361b116ac873e --- /dev/null +++ b/src/font_renderer.py @@ -0,0 +1,30 @@ +from PIL import Image, ImageDraw, ImageFont +import os + +def render_font_preview(font_path: str, text: str, output_path: str = "preview.png", font_size: int = 64): + """ + Renders the given text using the specified font file to an image. + """ + try: + # Load font + font = ImageFont.truetype(font_path, font_size) + except Exception as e: + print(f"Error loading font {font_path}: {e}") + return None + + # Calculate size + dummy_img = Image.new('RGB', (1, 1)) + draw = ImageDraw.Draw(dummy_img) + bbox = draw.textbbox((0, 0), text, font=font) + width = bbox[2] - bbox[0] + 40 # Padding + height = bbox[3] - bbox[1] + 40 + + # Draw + image = Image.new('RGB', (width, height), color=(255, 255, 255)) + draw = ImageDraw.Draw(image) + + # Center roughly + draw.text((20, 20), text, font=font, fill=(0, 0, 0)) + + image.save(output_path) + return output_path diff --git a/src/pipeline.py b/src/pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..efb8bfc77849046ce375ab32a85e06e2c7cfe91c --- /dev/null +++ b/src/pipeline.py @@ -0,0 +1,192 @@ +""" +Main Pipeline Orchestrator for Type.ai + +Connects all modules to create complete handwriting-to-font pipeline: +1. Preprocess image +2. Segment characters +3. Extract style features +4. Vectorize characters +5. Generate TTF font file +""" + +import os +from pathlib import Path +from typing import Optional, Dict, List +from datetime import datetime + +from .preprocessing import ImagePreprocessor +from .segmentation import CharacterSegmenter, Character +from .style_extractor import StyleExtractor, StyleFeatures +from .vectorizer import CharacterVectorizer +from .font_generator import FontGenerator + + +class TypeAIPipeline: + """ + Complete pipeline for converting handwriting to fonts. + """ + + def __init__(self, debug: bool = False): + """ + Initialize pipeline. + + Args: + debug: Whether to save debug outputs + """ + self.debug = debug + + # Initialize all components + self.preprocessor = ImagePreprocessor() + self.segmenter = CharacterSegmenter() + self.style_extractor = StyleExtractor() + self.vectorizer = CharacterVectorizer() + self.font_generator = None # Created per-font + + def process(self, + input_image_path: str, + output_font_path: str, + font_name: str = "CustomHandwriting", + debug_dir: Optional[str] = None) -> str: + """ + Process handwriting image and generate font. + + Args: + input_image_path: Path to handwriting sample image + output_font_path: Path for output font file (.ttf) + font_name: Name for the generated font + debug_dir: Optional directory for debug outputs + + Returns: + Path to generated font file + """ + # Setup debug directory + if self.debug and debug_dir: + os.makedirs(debug_dir, exist_ok=True) + + print(f"[Type.ai] Processing: {input_image_path}") + + # Step 1: Preprocess image + print("[1/5] Preprocessing image...") + preprocessor = ImagePreprocessor() + binary_image = preprocessor.preprocess(input_image_path) + + if self.debug and debug_dir: + import cv2 + cv2.imwrite(os.path.join(debug_dir, "01_preprocessed.png"), binary_image) + + # Step 2: Segment characters + print("[2/5] Segmenting characters...") + characters = self.segmenter.segment(binary_image) + characters = self.segmenter.classify_characters(characters) + + print(f" Found {len(characters)} characters") + + if self.debug and debug_dir: + import cv2 + original = cv2.imread(input_image_path) + original = preprocessor._resize(original) + vis = self.segmenter.visualize(original, characters, + os.path.join(debug_dir, "02_segmentation.png")) + + # Save individual characters + char_dir = os.path.join(debug_dir, "characters") + os.makedirs(char_dir, exist_ok=True) + for i, char in enumerate(characters): + cv2.imwrite(os.path.join(char_dir, f"{i:02d}_{char.label}.png"), + char.image) + + if not characters: + raise ValueError("No characters detected in the image!") + + # Step 3: Extract style + print("[3/5] Extracting style features...") + char_images = [c.image for c in characters] + style = self.style_extractor.extract_style(char_images) + + print(f" Style: stroke={style.stroke_width:.1f}px, " + + f"slant={style.slant_angle:.1f}°, roughness={style.roughness:.2f}") + + if self.debug and debug_dir: + # Save style info + with open(os.path.join(debug_dir, "03_style.txt"), 'w') as f: + for key, val in style.to_dict().items(): + f.write(f"{key}: {val}\n") + + # Step 4: Vectorize characters + print("[4/5] Vectorizing characters...") + char_dict = self.segmenter.get_character_dict(characters) + + svg_paths = {} + for label, bitmap in char_dict.items(): + svg_path, dims = self.vectorizer.vectorize(bitmap) + if svg_path: + svg_paths[label] = svg_path + + print(f" Vectorized {len(svg_paths)} characters") + + if self.debug and debug_dir: + svg_dir = os.path.join(debug_dir, "svg") + os.makedirs(svg_dir, exist_ok=True) + for label, path in svg_paths.items(): + self.vectorizer.create_svg_file( + path, 1000, 1000, + os.path.join(svg_dir, f"{label}.svg") + ) + + if not svg_paths: + raise ValueError("No characters could be vectorized!") + + # Step 5: Generate font + print("[5/5] Generating font file...") + self.font_generator = FontGenerator(font_name) + font_path = self.font_generator.generate_font(svg_paths, output_font_path) + + print(f"\nâ Font generated successfully: {font_path}") + print(f" Characters: {', '.join(sorted(svg_paths.keys()))}") + + return font_path + + def get_detected_characters(self, image_path: str) -> List[Character]: + """ + Get detected characters without generating font. + + Useful for preview/editing before final generation. + """ + binary = self.preprocessor.preprocess(image_path) + characters = self.segmenter.segment(binary) + characters = self.segmenter.classify_characters(characters) + return characters + + def get_style_features(self, image_path: str) -> StyleFeatures: + """ + Get style features from handwriting image. + """ + binary = self.preprocessor.preprocess(image_path) + characters = self.segmenter.segment(binary) + char_images = [c.image for c in characters] + return self.style_extractor.extract_style(char_images) + + +def process_handwriting(input_path: str, + output_path: str, + font_name: str = "CustomHandwriting", + debug: bool = False) -> str: + """ + Convenience function for handwriting-to-font conversion. + + Args: + input_path: Path to handwriting image + output_path: Path for output TTF file + font_name: Name for the font + debug: Save debug outputs + + Returns: + Path to generated font + """ + pipeline = TypeAIPipeline(debug=debug) + + debug_dir = None + if debug: + debug_dir = str(Path(output_path).parent / "debug") + + return pipeline.process(input_path, output_path, font_name, debug_dir) diff --git a/src/preprocessing.py b/src/preprocessing.py new file mode 100644 index 0000000000000000000000000000000000000000..3372965b4a7676ee4f4fb99108f13d681ce389e8 --- /dev/null +++ b/src/preprocessing.py @@ -0,0 +1,160 @@ +""" +Image Preprocessing Module for Type.ai + +Handles all image preprocessing for handwriting samples: +- Grayscale conversion +- Contrast enhancement (CLAHE) +- Noise reduction +- Deskewing +- Binarization (adaptive thresholding) +""" + +import cv2 +import numpy as np +from pathlib import Path +from typing import Tuple, Optional + + +class ImagePreprocessor: + """ + Preprocesses handwriting images for optimal character extraction. + """ + + def __init__(self, target_height: int = 800): + """ + Initialize preprocessor. + + Args: + target_height: Target height for resizing (maintains aspect ratio) + """ + self.target_height = target_height + + def preprocess(self, image_path: str) -> np.ndarray: + """ + Complete preprocessing pipeline. + + Args: + image_path: Path to input handwriting image + + Returns: + Preprocessed binary image as numpy array + """ + # Load image + image = cv2.imread(str(image_path)) + if image is None: + raise ValueError(f"Could not load image: {image_path}") + + # Resize to standard height + image = self._resize(image) + + # Convert to grayscale + gray = self._to_grayscale(image) + + # Enhance contrast + enhanced = self._enhance_contrast(gray) + + # Deskew + deskewed = self._deskew(enhanced) + + # Denoise + denoised = self._denoise(deskewed) + + # Binarize + binary = self._binarize(denoised) + + return binary + + def _resize(self, image: np.ndarray) -> np.ndarray: + """Resize image to target height maintaining aspect ratio.""" + h, w = image.shape[:2] + if h == 0: + return image + + scale = self.target_height / h + new_w = int(w * scale) + return cv2.resize(image, (new_w, self.target_height), interpolation=cv2.INTER_AREA) + + def _to_grayscale(self, image: np.ndarray) -> np.ndarray: + """Convert to grayscale if needed.""" + if len(image.shape) == 3: + return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + return image + + def _enhance_contrast(self, image: np.ndarray) -> np.ndarray: + """Enhance contrast using CLAHE.""" + clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) + return clahe.apply(image) + + def _deskew(self, image: np.ndarray) -> np.ndarray: + """Detect and correct skew using Hough Transform.""" + # Edge detection + edges = cv2.Canny(image, 50, 150, apertureSize=3) + + # Detect lines + lines = cv2.HoughLines(edges, 1, np.pi / 180, 200) + + if lines is None: + return image + + # Calculate angles + angles = [] + for rho, theta in lines[:, 0]: + angle = np.degrees(theta) - 90 + if -45 < angle < 45: + angles.append(angle) + + if not angles: + return image + + # Use median angle + median_angle = np.median(angles) + + # Only deskew if angle is significant + if abs(median_angle) < 0.5: + return image + + # Rotate image + h, w = image.shape + center = (w // 2, h // 2) + rotation_matrix = cv2.getRotationMatrix2D(center, median_angle, 1.0) + rotated = cv2.warpAffine(image, rotation_matrix, (w, h), + flags=cv2.INTER_CUBIC, + borderMode=cv2.BORDER_REPLICATE) + return rotated + + def _denoise(self, image: np.ndarray) -> np.ndarray: + """Remove noise using Non-local Means Denoising.""" + return cv2.fastNlMeansDenoising(image, None, h=10, + templateWindowSize=7, + searchWindowSize=21) + + def _binarize(self, image: np.ndarray) -> np.ndarray: + """Convert to binary using adaptive thresholding.""" + return cv2.adaptiveThreshold( + image, 255, + cv2.ADAPTIVE_THRESH_GAUSSIAN_C, + cv2.THRESH_BINARY, + blockSize=15, C=10 + ) + + def get_original_and_binary(self, image_path: str) -> Tuple[np.ndarray, np.ndarray]: + """ + Get both original (resized) and binary versions. + + Returns: + Tuple of (original_color, binary) + """ + image = cv2.imread(str(image_path)) + if image is None: + raise ValueError(f"Could not load image: {image_path}") + + original = self._resize(image) + binary = self.preprocess(image_path) + + return original, binary + + +def preprocess_image(image_path: str, target_height: int = 800) -> np.ndarray: + """Convenience function for preprocessing.""" + preprocessor = ImagePreprocessor(target_height) + return preprocessor.preprocess(image_path) diff --git a/src/recognizer.py b/src/recognizer.py new file mode 100644 index 0000000000000000000000000000000000000000..27a1aadf331e51b4eb2970c449dc28438834ee55 --- /dev/null +++ b/src/recognizer.py @@ -0,0 +1,71 @@ +""" +TrOCR Recognizer Module +Uses Microsoft's TrOCR (Transformer OCR) to recognize handwritten characters. +""" +from transformers import TrOCRProcessor, VisionEncoderDecoderModel +from PIL import Image +import torch +import warnings + +# Suppress warnings +warnings.filterwarnings("ignore") + +class HandwritingRecognizer: + def __init__(self, model_name="microsoft/trocr-small-handwritten"): + import os + local_path = "models/trocr" + if os.path.exists(local_path) and os.path.isdir(local_path): + print(f"Loading Local TrOCR model from {local_path}...") + model_to_load = local_path + else: + print(f"Loading TrOCR model ({model_name}) from Hub...") + model_to_load = model_name + + self.processor = TrOCRProcessor.from_pretrained(model_to_load, use_fast=False) + self.model = VisionEncoderDecoderModel.from_pretrained(model_to_load) + self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.model.to(self.device) + print(f"Model loaded on {self.device}") + + def recognize(self, image_path_or_obj, return_attention=False): + """ + Recognize text in an image. + Args: + image_path_or_obj: Path to image or PIL Image object. + return_attention: If True, returns (text, attention_map) + Returns: + Recognized string (or tuple if return_attention=True) + """ + if isinstance(image_path_or_obj, str): + image = Image.open(image_path_or_obj).convert("RGB") + else: + image = image_path_or_obj.convert("RGB") + + pixel_values = self.processor(images=image, return_tensors="pt").pixel_values + pixel_values = pixel_values.to(self.device) + + # Generate with attentions if requested + generate_kwargs = {"max_length": 64} + if return_attention: + generate_kwargs["output_attentions"] = True + generate_kwargs["return_dict_in_generate"] = True + + outputs = self.model.generate(pixel_values, **generate_kwargs) + + if return_attention: + generated_ids = outputs.sequences + generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + + # Cross attentions: Tuple of len(seq_len) + # Each element is Tuple of len(num_layers) + # Each element is Tensor(batch, num_heads, 1, src_len) + # We want to aggregate across heads/layers for each step. + return generated_text, outputs.cross_attentions + else: + generated_text = self.processor.batch_decode(outputs, skip_special_tokens=True)[0] + return generated_text + +if __name__ == "__main__": + # Test + rec = HandwritingRecognizer() + print("Recognizer Ready.") diff --git a/src/segmentation.py b/src/segmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..1e7378e51e43b4e52a2e99064aaae0148487b1ef --- /dev/null +++ b/src/segmentation.py @@ -0,0 +1,351 @@ +""" +Character Segmentation Module for Type.ai + +Improved segmentation for CURSIVE handwriting: +1. Detect word-level regions +2. OCR reads the FULL WORD +3. Split word image into individual character images +4. Map each character to its portion of the word +""" + +import cv2 +import numpy as np +from dataclasses import dataclass +from typing import List, Dict, Tuple, Optional +from pathlib import Path +import os + +# CRAFT model path (user provided) +CRAFT_MODEL_PATH = r"C:\Users\np080\Downloads\craft_mlt_25k.pth" +CRAFT_AVAILABLE = False + +try: + import torch + if os.path.exists(CRAFT_MODEL_PATH): + CRAFT_AVAILABLE = True + print(f"CRAFT model found: {CRAFT_MODEL_PATH}") +except ImportError: + pass + +# Tesseract OCR +TESSERACT_AVAILABLE = False +try: + import pytesseract + if os.name == 'nt': + tesseract_paths = [ + r'C:\Program Files\Tesseract-OCR\tesseract.exe', + r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe', + ] + for path in tesseract_paths: + if os.path.exists(path): + pytesseract.pytesseract.tesseract_cmd = path + TESSERACT_AVAILABLE = True + print(f"Tesseract found: {path}") + break + else: + TESSERACT_AVAILABLE = True +except ImportError: + print("Warning: pytesseract not installed") + + +@dataclass +class Character: + """Represents a detected character.""" + image: np.ndarray + bbox: Tuple[int, int, int, int] # (x, y, w, h) + label: str = "" + confidence: float = 0.0 + + +class CharacterSegmenter: + """ + Segments individual characters from cursive handwriting. + """ + + def __init__(self, min_char_area: int = 50, max_char_area: int = 100000): + self.min_char_area = min_char_area + self.max_char_area = max_char_area + + def segment(self, binary_image: np.ndarray) -> List[Character]: + """ + Segment characters from cursive handwriting. + + Steps: + 1. Find word-level contours + 2. OCR each word to get text + 3. Split word into individual character images + """ + # Invert if needed + if np.mean(binary_image) > 127: + inverted = cv2.bitwise_not(binary_image) + else: + inverted = binary_image.copy() + + # Clean up + kernel = np.ones((2, 2), np.uint8) + cleaned = cv2.morphologyEx(inverted, cv2.MORPH_CLOSE, kernel) + + # Find word-level contours + contours, _ = cv2.findContours(cleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + all_characters = [] + + for contour in contours: + area = cv2.contourArea(contour) + if area < self.min_char_area or area > self.max_char_area: + continue + + x, y, w, h = cv2.boundingRect(contour) + + # Extract word region + padding = 10 + x1 = max(0, x - padding) + y1 = max(0, y - padding) + x2 = min(binary_image.shape[1], x + w + padding) + y2 = min(binary_image.shape[0], y + h + padding) + + word_img = binary_image[y1:y2, x1:x2].copy() + + # OCR the word to get full text + word_text = self._ocr_word(word_img) + + if word_text and len(word_text) > 0: + # Split word into individual character images + char_images = self._split_word_to_chars(word_img, word_text) + + # Create Character objects + char_width = w // max(len(word_text), 1) + for i, (char_label, char_img) in enumerate(char_images): + char_x = x + i * char_width + all_characters.append(Character( + image=char_img, + bbox=(char_x, y, char_width, h), + label=char_label, + confidence=0.8 + )) + else: + # Fallback: use as single character + all_characters.append(Character( + image=word_img, + bbox=(x, y, w, h), + label="", + confidence=0.0 + )) + + # Sort by position + all_characters = self._sort_characters(all_characters) + + return all_characters + + def _ocr_word(self, word_image: np.ndarray) -> str: + """OCR a word image to get the full text.""" + if not TESSERACT_AVAILABLE: + return "" + + try: + # Prepare image + h, w = word_image.shape[:2] + if h < 40: + scale = 60 / h + word_image = cv2.resize(word_image, None, fx=scale, fy=scale, + interpolation=cv2.INTER_CUBIC) + + # Add border + bordered = cv2.copyMakeBorder(word_image, 15, 15, 15, 15, + cv2.BORDER_CONSTANT, value=255) + + # OCR - use word mode (PSM 8) not single char mode + config = '--psm 8 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' + result = pytesseract.image_to_string(bordered, config=config).strip() + + # Clean result - only keep letters + cleaned = ''.join(c for c in result if c.isalpha()) + return cleaned + + except Exception as e: + print(f"OCR error: {e}") + return "" + + def _split_word_to_chars(self, word_image: np.ndarray, + word_text: str) -> List[Tuple[str, np.ndarray]]: + """ + Split a word image into individual character images. + Uses vertical projection profile to find natural split points. + """ + if not word_text: + return [] + + h, w = word_image.shape[:2] + num_chars = len(word_text) + + if num_chars == 1: + return [(word_text[0], word_image)] + + # Try to use vertical projection to find splits + char_images = self._projection_split(word_image, num_chars) + + if len(char_images) != num_chars: + # Fallback: equal width splits + char_images = self._equal_split(word_image, num_chars) + + # Pair with labels + result = [] + for i, char_img in enumerate(char_images): + if i < len(word_text): + result.append((word_text[i], char_img)) + + return result + + def _projection_split(self, word_image: np.ndarray, + num_chars: int) -> List[np.ndarray]: + """Split using vertical projection profile.""" + h, w = word_image.shape[:2] + + # Invert for projection (white text on black) + if np.mean(word_image) > 127: + inverted = cv2.bitwise_not(word_image) + else: + inverted = word_image + + # Vertical projection - sum of white pixels in each column + projection = np.sum(inverted, axis=0) + + # Smooth the projection + kernel_size = max(3, w // 30) + if kernel_size % 2 == 0: + kernel_size += 1 + smoothed = cv2.GaussianBlur(projection.reshape(1, -1).astype(np.float32), + (kernel_size, 1), 0).flatten() + + # Find local minima as split points + min_indices = [] + threshold = np.mean(smoothed) * 0.3 + + for i in range(1, len(smoothed) - 1): + if smoothed[i] < smoothed[i-1] and smoothed[i] < smoothed[i+1]: + if smoothed[i] < threshold: + min_indices.append(i) + + # If we found good splits, use them + if len(min_indices) >= num_chars - 1: + # Take the best num_chars-1 splits + split_points = sorted(min_indices)[:num_chars-1] + split_points = [0] + sorted(split_points) + [w] + + char_images = [] + for i in range(len(split_points) - 1): + x1 = split_points[i] + x2 = split_points[i + 1] + if x2 > x1: + char_img = word_image[:, x1:x2] + char_images.append(char_img) + + if len(char_images) == num_chars: + return char_images + + # Fallback to equal splits + return self._equal_split(word_image, num_chars) + + def _equal_split(self, word_image: np.ndarray, + num_chars: int) -> List[np.ndarray]: + """Split image into equal-width parts.""" + h, w = word_image.shape[:2] + char_width = max(1, w // num_chars) + + char_images = [] + for i in range(num_chars): + x1 = i * char_width + x2 = min(x1 + char_width, w) if i < num_chars - 1 else w + if x2 > x1: + char_images.append(word_image[:, x1:x2]) + + return char_images + + def _sort_characters(self, characters: List[Character]) -> List[Character]: + """Sort characters in reading order (top-to-bottom, left-to-right).""" + if not characters: + return characters + + # Group by lines + line_threshold = 30 + lines = [] + current_line = [] + + sorted_by_y = sorted(characters, key=lambda c: c.bbox[1]) + + for char in sorted_by_y: + if not current_line: + current_line.append(char) + else: + last_y = current_line[-1].bbox[1] + if abs(char.bbox[1] - last_y) < line_threshold: + current_line.append(char) + else: + lines.append(current_line) + current_line = [char] + + if current_line: + lines.append(current_line) + + # Sort each line by x + sorted_chars = [] + for line in lines: + line_sorted = sorted(line, key=lambda c: c.bbox[0]) + sorted_chars.extend(line_sorted) + + return sorted_chars + + def classify_characters(self, characters: List[Character]) -> List[Character]: + """ + Characters are already classified during segmentation. + This method handles any that weren't labeled. + """ + unlabeled = [c for c in characters if not c.label] + + if unlabeled: + alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + for i, char in enumerate(unlabeled): + if i < len(alphabet): + char.label = alphabet[i] + else: + char.label = f"char_{i}" + char.confidence = 0.3 + + return characters + + def get_character_dict(self, characters: List[Character]) -> Dict[str, np.ndarray]: + """Get unique characters as dict.""" + char_dict = {} + for char in characters: + if char.label and char.label not in char_dict: + char_dict[char.label] = char.image + return char_dict + + def visualize(self, original_image: np.ndarray, + characters: List[Character], + output_path: Optional[str] = None) -> np.ndarray: + """Visualize detected characters with bounding boxes and labels.""" + if len(original_image.shape) == 2: + vis = cv2.cvtColor(original_image, cv2.COLOR_GRAY2BGR) + else: + vis = original_image.copy() + + for char in characters: + x, y, w, h = char.bbox + cv2.rectangle(vis, (x, y), (x + w, y + h), (0, 255, 0), 2) + if char.label: + cv2.putText(vis, char.label, (x, y - 5), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1) + + if output_path: + cv2.imwrite(str(output_path), vis) + + return vis + + +def segment_characters(binary_image: np.ndarray) -> List[Character]: + """Convenience function for segmentation.""" + segmenter = CharacterSegmenter() + characters = segmenter.segment(binary_image) + characters = segmenter.classify_characters(characters) + return characters diff --git a/src/segmenter.py b/src/segmenter.py new file mode 100644 index 0000000000000000000000000000000000000000..8709d8a41b2e668ac9db4526b199e3581973b69e --- /dev/null +++ b/src/segmenter.py @@ -0,0 +1,148 @@ +""" +Segmenter Module +Uses EasyOCR (CRAFT) to detect lines and words. +Then uses projection profiles for character splitting. +""" +import cv2 +import numpy as np +import easyocr +from typing import List, Tuple + +class TextSegmenter: + def __init__(self): + print("Loading EasyOCR (CRAFT) model...") + import os + local_path = os.path.join(os.getcwd(), "models", "easyocr") + + # Check if local models exist, otherwise use default + if os.path.exists(local_path) and os.listdir(local_path): + print(f"Using local EasyOCR assets from: {local_path}") + self.reader = easyocr.Reader(['en'], gpu=True, model_storage_directory=local_path) + else: + print("Using default EasyOCR assets (loading/downloading)...") + self.reader = easyocr.Reader(['en'], gpu=True) + + def segment_lines_words(self, image_path: str) -> List[np.ndarray]: + """ + Detect word bounding boxes and return cropped word images. + """ + img = cv2.imread(image_path) + if img is None: + raise ValueError("Image not found") + + # EasyOCR returns coordinates in [ [x,y]... ] format + # detail=1 gives boxes, text, confidence + results = self.reader.readtext(image_path) + + word_images = [] + for (bbox, text, prob) in results: + # bbox = [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] + pt1 = np.array(bbox[0]) + pt3 = np.array(bbox[2]) + + x_min = int(min(pt1[0], pt3[0])) + x_max = int(max(pt1[0], pt3[0])) + y_min = int(min(pt1[1], pt3[1])) + y_max = int(max(pt1[1], pt3[1])) + + # Crop + crop = img[y_min:y_max, x_min:x_max] + if crop.size > 0: + word_images.append(crop) + + return word_images + + def segment_characters(self, word_img: np.ndarray) -> List[np.ndarray]: + """ + Segment a word image into characters using vertical projection. + (Simple baseline for unconnected script). + """ + gray = cv2.cvtColor(word_img, cv2.COLOR_BGR2GRAY) + _, bin_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) + + # Vertical projection directly on binary image + # Erosion was causing thin strokes to disappear and split characters. + proj = np.sum(bin_img, axis=0) + + # If proj > 0, we have ink + # Look for gaps (proj == 0 or very low) + h, w = bin_img.shape + threshold = 0 # Strict zero for eroded gaps + + cuts = [] + in_char = False + start = 0 + + chars = [] + + for x in range(w): + val = proj[x] + if not in_char and val > threshold: + in_char = True + start = x + elif in_char and val <= threshold: + in_char = False + end = x + # Use the ORIGINAL image for the crop + char_crop = word_img[:, max(0, start-1):min(w, end+1)] + if char_crop.shape[1] > 5: + chars.append(char_crop) + + # Catch last char + if in_char: + char_crop = word_img[:, max(0, start-1):w] + if char_crop.shape[1] > 5: + chars.append(char_crop) + + return chars + + def segment_connected_characters(self, img: np.ndarray, num_chars: int) -> List[np.ndarray]: + """ + Force split a connected word image into N characters. + Uses equidistant heuristic refined by ink minima. + """ + if num_chars <= 1: + return [img] + + h, w = img.shape[:2] + if w < num_chars: # Too thin + return [img] + + # Vertical Projection + if len(img.shape) == 3: + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + else: + gray = img + + _, bin_img = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) + proj = np.sum(bin_img, axis=0) + + # Ideal width + char_w = w / num_chars + cuts = [] + + for i in range(1, num_chars): + ideal_cut = int(i * char_w) + # Search local window for ink minimum (valley) + search_range = int(char_w * 0.3) # 30% wiggle room + start_search = max(0, ideal_cut - search_range) + end_search = min(w - 1, ideal_cut + search_range) + + # Find index of min projection in this window + window_proj = proj[start_search:end_search] + if len(window_proj) > 0: + # Add start_search to get global index + best_cut = start_search + np.argmin(window_proj) + cuts.append(best_cut) + else: + cuts.append(ideal_cut) + + # Perform cuts + char_imgs = [] + last_cut = 0 + for cut in cuts: + char_imgs.append(img[:, last_cut:cut]) + last_cut = cut + char_imgs.append(img[:, last_cut:]) # Last char + + return char_imgs diff --git a/src/style_extractor.py b/src/style_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..113929eb557f80d89abc84d1c9ffebc575e114db --- /dev/null +++ b/src/style_extractor.py @@ -0,0 +1,294 @@ +""" +Style Extractor Module for Type.ai + +Extracts handwriting style features from character samples: +- Stroke width/thickness +- Slant angle +- Letter proportions +- Roughness (hand-drawn quality) +- Baseline variation + +This module provides style analysis to help generate consistent +missing characters. +""" + +import cv2 +import numpy as np +from dataclasses import dataclass +from typing import List, Dict, Optional, Tuple +import math + + +@dataclass +class StyleFeatures: + """Extracted handwriting style features.""" + stroke_width: float # Average stroke width in pixels + slant_angle: float # Slant angle in degrees (-45 to +45) + aspect_ratio: float # Average width/height ratio + roughness: float # 0-1, higher = more hand-drawn look + baseline_var: float # Baseline variation 0-1 + avg_height: float # Average character height + avg_width: float # Average character width + letter_spacing: float # Typical spacing between letters + + def to_dict(self) -> dict: + return { + 'stroke_width': self.stroke_width, + 'slant_angle': self.slant_angle, + 'aspect_ratio': self.aspect_ratio, + 'roughness': self.roughness, + 'baseline_var': self.baseline_var, + 'avg_height': self.avg_height, + 'avg_width': self.avg_width, + 'letter_spacing': self.letter_spacing + } + + +class StyleExtractor: + """ + Extracts handwriting style features from character images. + """ + + def __init__(self): + pass + + def extract_style(self, char_images: List[np.ndarray]) -> StyleFeatures: + """ + Analyze multiple character images to extract overall style. + + Args: + char_images: List of character bitmap images + + Returns: + StyleFeatures with extracted properties + """ + if not char_images: + return self._default_style() + + stroke_widths = [] + slant_angles = [] + aspect_ratios = [] + roughness_values = [] + heights = [] + widths = [] + + for img in char_images: + if img is None or img.size == 0: + continue + + # Ensure grayscale + if len(img.shape) == 3: + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + h, w = img.shape[:2] + heights.append(h) + widths.append(w) + + # Measure stroke width + sw = self._measure_stroke_width(img) + if sw > 0: + stroke_widths.append(sw) + + # Measure slant + slant = self._measure_slant(img) + slant_angles.append(slant) + + # Measure aspect ratio + if h > 0: + aspect_ratios.append(w / h) + + # Measure roughness + rough = self._measure_roughness(img) + roughness_values.append(rough) + + return StyleFeatures( + stroke_width=np.mean(stroke_widths) if stroke_widths else 3.0, + slant_angle=np.median(slant_angles) if slant_angles else 0.0, + aspect_ratio=np.mean(aspect_ratios) if aspect_ratios else 0.6, + roughness=np.mean(roughness_values) if roughness_values else 0.3, + baseline_var=0.1, # Computed from full layout + avg_height=np.mean(heights) if heights else 50, + avg_width=np.mean(widths) if widths else 30, + letter_spacing=np.mean(widths) * 0.2 if widths else 10 + ) + + def _measure_stroke_width(self, img: np.ndarray) -> float: + """Measure average stroke width using distance transform.""" + try: + # Binarize + _, binary = cv2.threshold(img, 0, 255, + cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) + + # Distance transform + dist = cv2.distanceTransform(binary, cv2.DIST_L2, 3) + + # Average non-zero distance + non_zero = dist[dist > 0] + if len(non_zero) > 0: + return float(np.mean(non_zero) * 2) # Diameter + + return float(np.max(dist) * 2) + except Exception: + return 3.0 + + def _measure_slant(self, img: np.ndarray) -> float: + """Measure slant angle using contour analysis.""" + try: + _, binary = cv2.threshold(img, 0, 255, + cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) + + contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, + cv2.CHAIN_APPROX_SIMPLE) + + if not contours: + return 0.0 + + # Use largest contour + largest = max(contours, key=cv2.contourArea) + + # Fit ellipse if enough points + if len(largest) >= 5: + ellipse = cv2.fitEllipse(largest) + angle = ellipse[2] + # Normalize to -45 to +45 + if angle > 90: + angle -= 180 + return float(angle) + + return 0.0 + except Exception: + return 0.0 + + def _measure_roughness(self, img: np.ndarray) -> float: + """Measure edge roughness (hand-drawn quality).""" + try: + _, binary = cv2.threshold(img, 0, 255, + cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) + contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, + cv2.CHAIN_APPROX_SIMPLE) + + if not contours: + return 0.3 + + largest = max(contours, key=cv2.contourArea) + area = cv2.contourArea(largest) + perimeter = cv2.arcLength(largest, True) + + if area <= 0 or perimeter <= 0: + return 0.3 + + # Circularity: 4*pi*area / perimeter^2 + circularity = (4 * np.pi * area) / (perimeter ** 2) + + # Invert: higher = rougher + roughness = max(0, min(1, 1 - circularity)) + return float(roughness) + except Exception: + return 0.3 + + def _default_style(self) -> StyleFeatures: + """Return default style when no samples available.""" + return StyleFeatures( + stroke_width=3.0, + slant_angle=0.0, + aspect_ratio=0.6, + roughness=0.3, + baseline_var=0.1, + avg_height=50, + avg_width=30, + letter_spacing=10 + ) + + def apply_style_to_template(self, template: np.ndarray, + style: StyleFeatures) -> np.ndarray: + """ + Apply extracted style features to a template character. + + Args: + template: Base character template image + style: Style features to apply + + Returns: + Styled character image + """ + result = template.copy() + + # Apply slant + if abs(style.slant_angle) > 1: + result = self._apply_slant(result, style.slant_angle) + + # Apply stroke width adjustment + result = self._apply_stroke_width(result, style.stroke_width) + + # Apply roughness + if style.roughness > 0.2: + result = self._apply_roughness(result, style.roughness) + + return result + + def _apply_slant(self, img: np.ndarray, angle: float) -> np.ndarray: + """Apply slant transformation.""" + h, w = img.shape[:2] + + # Shear transformation + shear = math.tan(math.radians(angle / 2)) + M = np.float32([[1, shear, 0], [0, 1, 0]]) + + new_w = int(w + abs(shear) * h) + result = cv2.warpAffine(img, M, (new_w, h), + borderMode=cv2.BORDER_CONSTANT, + borderValue=255) + return result + + def _apply_stroke_width(self, img: np.ndarray, + target_width: float) -> np.ndarray: + """Adjust stroke width through dilation/erosion.""" + # Measure current width + current = self._measure_stroke_width(img) + + if current <= 0: + return img + + diff = target_width - current + + if abs(diff) < 0.5: + return img + + # Binarize + _, binary = cv2.threshold(img, 0, 255, + cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) + + kernel_size = max(1, int(abs(diff))) + kernel = np.ones((kernel_size, kernel_size), np.uint8) + + if diff > 0: + # Make strokes thicker + result = cv2.dilate(binary, kernel, iterations=1) + else: + # Make strokes thinner + result = cv2.erode(binary, kernel, iterations=1) + + return cv2.bitwise_not(result) + + def _apply_roughness(self, img: np.ndarray, roughness: float) -> np.ndarray: + """Add hand-drawn roughness to edges.""" + # Add slight noise + noise_level = int(roughness * 10) + if noise_level > 0: + noise = np.random.randint(-noise_level, noise_level + 1, + img.shape, dtype=np.int16) + result = np.clip(img.astype(np.int16) + noise, 0, 255).astype(np.uint8) + else: + result = img.copy() + + # Slight blur for natural look + if roughness > 0.3: + result = cv2.GaussianBlur(result, (3, 3), 0.5) + + return result + + +def extract_style(char_images: List[np.ndarray]) -> StyleFeatures: + """Convenience function for style extraction.""" + extractor = StyleExtractor() + return extractor.extract_style(char_images) diff --git a/src/trocr_recognizer.py b/src/trocr_recognizer.py new file mode 100644 index 0000000000000000000000000000000000000000..c1b5fc7497f08f488968bb3c72ea24609dff450e --- /dev/null +++ b/src/trocr_recognizer.py @@ -0,0 +1,176 @@ +""" +TrOCR Handwriting Recognition Module for Type.ai + +Uses Microsoft's TrOCR model for state-of-the-art handwriting recognition. +Pretrained model is downloaded automatically from Hugging Face. +""" + +import os +import cv2 +import numpy as np +from PIL import Image +from typing import List, Optional +import torch + +# Check if TrOCR is available +TROCR_AVAILABLE = False +try: + from transformers import TrOCRProcessor, VisionEncoderDecoderModel + TROCR_AVAILABLE = True + print("TrOCR available") +except ImportError: + print("TrOCR not available - install with: pip install transformers[torch]") + + +class TrOCRRecognizer: + """ + Handwriting recognition using Microsoft TrOCR. + + Uses the handwritten-specific model trained on IAM dataset. + """ + + # Model options: + # - "microsoft/trocr-small-handwritten" - Smaller, faster + # - "microsoft/trocr-base-handwritten" - Better quality + # - "microsoft/trocr-large-handwritten" - Best quality, requires more memory + + def __init__(self, model_name: str = "microsoft/trocr-small-handwritten"): + """ + Initialize TrOCR recognizer. + + Args: + model_name: Hugging Face model name + """ + if not TROCR_AVAILABLE: + raise RuntimeError("TrOCR not available. Install with: pip install transformers[torch]") + + self.model_name = model_name + self.processor = None + self.model = None + self.device = "cuda" if torch.cuda.is_available() else "cpu" + + self._load_model() + + def _load_model(self): + """Load the TrOCR model (downloads if not cached).""" + print(f"Loading TrOCR model: {self.model_name}") + print(f"Using device: {self.device}") + + self.processor = TrOCRProcessor.from_pretrained(self.model_name) + self.model = VisionEncoderDecoderModel.from_pretrained(self.model_name) + self.model.to(self.device) + self.model.eval() + + print("TrOCR model loaded successfully!") + + def recognize(self, image: np.ndarray) -> str: + """ + Recognize text from a single image. + + Args: + image: OpenCV image (BGR or grayscale) + + Returns: + Recognized text string + """ + # Convert to PIL Image + if len(image.shape) == 3: + # BGR to RGB + rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + else: + # Grayscale to RGB + rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) + + pil_image = Image.fromarray(rgb) + + # Process + pixel_values = self.processor(pil_image, return_tensors="pt").pixel_values + pixel_values = pixel_values.to(self.device) + + # Generate + with torch.no_grad(): + generated_ids = self.model.generate(pixel_values) + + # Decode + text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + + return text.strip() + + def recognize_character(self, char_image: np.ndarray) -> str: + """ + Recognize a single character from an image. + + Args: + char_image: Image of a single character + + Returns: + Single character or empty string + """ + text = self.recognize(char_image) + + # Return first character if any + if text: + # Filter to only alphanumeric + for c in text: + if c.isalnum(): + return c + + return "" + + def recognize_batch(self, images: List[np.ndarray]) -> List[str]: + """ + Recognize multiple images. + + Args: + images: List of character images + + Returns: + List of recognized characters + """ + results = [] + for img in images: + try: + result = self.recognize_character(img) + results.append(result) + except Exception as e: + print(f"Recognition error: {e}") + results.append("") + + return results + + +# Global instance for convenience +_recognizer: Optional[TrOCRRecognizer] = None + + +def get_recognizer(model_name: str = "microsoft/trocr-small-handwritten") -> TrOCRRecognizer: + """Get or create TrOCR recognizer instance.""" + global _recognizer + if _recognizer is None: + _recognizer = TrOCRRecognizer(model_name) + return _recognizer + + +def recognize_handwriting(image: np.ndarray) -> str: + """Convenience function for handwriting recognition.""" + recognizer = get_recognizer() + return recognizer.recognize(image) + + +def recognize_character(char_image: np.ndarray) -> str: + """Convenience function for single character recognition.""" + recognizer = get_recognizer() + return recognizer.recognize_character(char_image) + + +if __name__ == "__main__": + # Test the model + print("Testing TrOCR...") + + # Create a simple test image + test_img = np.ones((100, 300, 3), dtype=np.uint8) * 255 + cv2.putText(test_img, "Hello", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 0), 3) + + recognizer = TrOCRRecognizer() + result = recognizer.recognize(test_img) + print(f"Recognized: '{result}'") diff --git a/src/vectorizer.py b/src/vectorizer.py new file mode 100644 index 0000000000000000000000000000000000000000..bc87ad52d27610fbff5716b12313c8299878dd99 --- /dev/null +++ b/src/vectorizer.py @@ -0,0 +1,263 @@ +""" +Vectorizer Module for Type.ai + +Converts bitmap character images to vector paths (SVG format) +for use in font generation. + +Uses OpenCV contour-based approach (potrace-like) for reliable +cross-platform vectorization. +""" + +import cv2 +import numpy as np +from typing import List, Tuple, Optional +from pathlib import Path + + +class CharacterVectorizer: + """ + Converts bitmap characters to SVG path data. + """ + + def __init__(self, simplify_tolerance: float = 0.8): + """ + Initialize vectorizer. + + Args: + simplify_tolerance: Path simplification tolerance (higher = simpler) + """ + self.simplify_tolerance = simplify_tolerance + self.min_contour_area = 30 # Filter noise + + def vectorize(self, bitmap: np.ndarray, + normalize: bool = True) -> Tuple[str, Tuple[int, int]]: + """ + Convert bitmap to SVG path. + + Args: + bitmap: Binary character image + normalize: Whether to normalize coordinates + + Returns: + Tuple of (SVG path data string, (width, height)) + """ + # Prepare bitmap + processed = self._prepare_bitmap(bitmap) + h, w = processed.shape[:2] + + # Find contours + contours, hierarchy = cv2.findContours( + processed, cv2.RETR_TREE, cv2.CHAIN_APPROX_TC89_L1 + ) + + if not contours: + return "", (w, h) + + # Convert contours to SVG paths + svg_path = self._contours_to_svg(contours, hierarchy, w, h, normalize) + + return svg_path, (w, h) + + def _prepare_bitmap(self, bitmap: np.ndarray) -> np.ndarray: + """Prepare bitmap for vectorization with enhanced preprocessing.""" + # Ensure we have a proper image + if bitmap is None or bitmap.size == 0: + return np.zeros((10, 10), dtype=np.uint8) + + # Upscale small images for better contour detection + h, w = bitmap.shape[:2] + if h < 150: + scale = 150 / h + bitmap = cv2.resize(bitmap, None, fx=scale, fy=scale, + interpolation=cv2.INTER_CUBIC) + + # Ensure grayscale + if len(bitmap.shape) == 3: + gray = cv2.cvtColor(bitmap, cv2.COLOR_BGR2GRAY) + else: + gray = bitmap.copy() + + # Denoise with slight blur + gray = cv2.GaussianBlur(gray, (3, 3), 0) + + # Threshold to binary + _, binary = cv2.threshold(gray, 0, 255, + cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) + + # Remove small noise (opening) + kernel_small = np.ones((2, 2), np.uint8) + binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel_small) + + # Close small gaps in strokes + kernel = np.ones((3, 3), np.uint8) + binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel) + + return binary + + def _contours_to_svg(self, contours: List, hierarchy: np.ndarray, + width: int, height: int, + normalize: bool) -> str: + """Convert OpenCV contours to SVG path string with correct winding.""" + paths = [] + + # Hierarchy structure: [Next, Previous, First_Child, Parent] + # We need to determine depth to know if it's a hole or shape + + if hierarchy is None or len(hierarchy) == 0: + return "" + + # Hierarchy is nested list [[...countours...]] + hier_list = hierarchy[0] + + for i, contour in enumerate(contours): + if len(contour) < 3: + continue + + # Filter out noise (small contours) + if cv2.contourArea(contour) < self.min_contour_area: + continue + + # Simplify contour + epsilon = self.simplify_tolerance + simplified = cv2.approxPolyDP(contour, epsilon, True) + + if len(simplified) < 3: + continue + + # Determine depth + # A simple way is to count parents + parent_idx = hier_list[i][3] + depth = 0 + curr_idx = parent_idx + while curr_idx != -1: + depth += 1 + curr_idx = hier_list[curr_idx][3] + + # Enforce winding + # Image Space (Y-down): + # Outer (Depth Even) -> Should be CCW (Standard Math Positive? or Negative?) + # Let's use the standard convention: + # TTF requires Outer=CW, Inner=CCW (in Y-up). + # Mapping Y-down to Y-up flips the winding. + # So Image Space: Outer=CCW, Inner=CW. + # + # In OpenCV (Y-down): + # CCW usually gives Negative Area (Green's theorem with Y-down) + # CW usually gives Positive Area + # Let's enforce: + # Outer (Even) -> Negative Area + # Inner (Odd) -> Positive Area + + area = cv2.contourArea(simplified, oriented=True) + + to_flip = False + if depth % 2 == 0: # Outer + if area > 0: # Is CW, want CCW + to_flip = True + else: # Inner + if area < 0: # Is CCW, want CW + to_flip = True + + if to_flip: + simplified = np.flip(simplified, axis=0) + + # Convert to SVG path + path = self._contour_to_path(simplified, width, height, normalize) + if path: + paths.append(path) + + return " ".join(paths) + + def _contour_to_path(self, contour: np.ndarray, + width: int, height: int, + normalize: bool) -> str: + """Convert single contour to SVG path.""" + if len(contour) < 3: + return "" + + points = contour.reshape(-1, 2) + + # Optional normalization for font coordinates + if normalize: + # Scale to 2048 range (font units) based on HEIGHT to preserve Aspect Ratio + # We assume the input bitmap represents the full line height (or close to it) + # mapping it to the full EM square. + scale = 2048 / height if height > 0 else 1 + scale_x = scale + scale_y = scale + + # Center horizontally if needed? + # For now, just placing it at x=0 (left aligned) is standard for glyph constr + # but we might want to ensure it doesn't drift. + + points = points.astype(float) + points[:, 0] *= scale_x + # Flip Y axis (font coordinates are bottom-up) + points[:, 1] = 2048 - (points[:, 1] * scale_y) + + # Build path string using lines for accuracy + parts = [] + + # Move to first point + x, y = points[0] + parts.append(f"M {x:.0f} {y:.0f}") + + # Use line segments for accuracy (beziers can distort small shapes) + for i in range(1, len(points)): + x, y = points[i] + parts.append(f"L {x:.0f} {y:.0f}") + + # Close path + parts.append("Z") + + return " ".join(parts) + + def vectorize_batch(self, char_images: dict) -> dict: + """ + Vectorize multiple characters. + + Args: + char_images: Dict mapping character labels to bitmap images + + Returns: + Dict mapping character labels to (svg_path, dimensions) tuples + """ + results = {} + + for label, bitmap in char_images.items(): + try: + svg_path, dims = self.vectorize(bitmap) + if svg_path: + results[label] = (svg_path, dims) + except Exception as e: + print(f"Error vectorizing '{label}': {e}") + + return results + + def create_svg_file(self, svg_path: str, width: int, height: int, + output_path: str, fill_color: str = "black"): + """ + Create complete SVG file from path data. + + Args: + svg_path: SVG path data + width: Canvas width + height: Canvas height + output_path: Output file path + fill_color: Fill color for the path + """ + svg_content = f''' +''' + + with open(output_path, 'w') as f: + f.write(svg_content) + + +def vectorize_character(bitmap: np.ndarray) -> Tuple[str, Tuple[int, int]]: + """Convenience function for single character vectorization.""" + vectorizer = CharacterVectorizer() + return vectorizer.vectorize(bitmap) diff --git a/temp_grid.jpg b/temp_grid.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b5f877c8dc227310dc57d8d37f606690ab80f928 --- /dev/null +++ b/temp_grid.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58d50d92071e817f862c6cff3bf9b35bb56e5c07b96d534677ddfb78a5d592dd +size 181493 diff --git a/temp_sentence.jpg b/temp_sentence.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0b2c9d24d82e1a122a809235385d40070756e966 Binary files /dev/null and b/temp_sentence.jpg differ diff --git a/temp_word.png b/temp_word.png new file mode 100644 index 0000000000000000000000000000000000000000..94bd6404715b9313d5bd6955a9e0a8604a535a7b Binary files /dev/null and b/temp_word.png differ diff --git a/ui/index.html b/ui/index.html new file mode 100644 index 0000000000000000000000000000000000000000..827e0c3c042dfec44e8062d8b1f668bb6bb7cc66 --- /dev/null +++ b/ui/index.html @@ -0,0 +1,525 @@ + + +
+ + +Convert your handwriting into a custom font
+or click to browse âĸ JPG, PNG, BMP supported
+ +Analyzing your handwriting
+