Spaces:

akhaliq
/

anycoder

Running

App Files Files Community

akhaliq HF Staff commited on 7 days ago

Commit

a2a2a54

1 Parent(s): 82a5d4c

update image text extract

Browse files

Files changed (3) hide show

README.md +18 -0
app.py +47 -3
requirements.txt +4 -1

README.md CHANGED Viewed

@@ -21,6 +21,7 @@ AnyCoder is an AI-powered code generator that helps you create applications by d
 - **Multi-Model Support**: Choose from various AI models including DeepSeek, ERNIE-4.5-VL, MiniMax, and Qwen
 - **Image-to-Code**: Upload UI design images and get corresponding HTML/CSS code (ERNIE-4.5-VL model)
 - **Live Preview**: See your generated code in action with the built-in sandbox
 - **Web Search Integration**: Enable real-time web search to get the latest information and best practices
 - **Chat History**: Keep track of your conversations and generated code
@@ -75,6 +76,23 @@ The web search feature uses Tavily to provide real-time information when generat
 When enabled, the AI will search the web for the latest information, best practices, and technologies related to your request.
 ## Available Models
 - **DeepSeek V3**: Advanced code generation model

 - **Multi-Model Support**: Choose from various AI models including DeepSeek, ERNIE-4.5-VL, MiniMax, and Qwen
 - **Image-to-Code**: Upload UI design images and get corresponding HTML/CSS code (ERNIE-4.5-VL model)
+- **Image Text Extraction**: Upload images and extract text using OCR for processing
 - **Live Preview**: See your generated code in action with the built-in sandbox
 - **Web Search Integration**: Enable real-time web search to get the latest information and best practices
 - **Chat History**: Keep track of your conversations and generated code
 When enabled, the AI will search the web for the latest information, best practices, and technologies related to your request.
+## Image Text Extraction
+The application supports extracting text from images using OCR (Optical Character Recognition). This feature allows you to:
+1. Upload image files (JPG, PNG, BMP, TIFF, GIF, WebP) through the file input
+2. Automatically extract text from the images using Tesseract OCR
+3. Include the extracted text in your prompts for code generation
+### Setting up OCR
+To use the image text extraction feature, you need to install Tesseract OCR on your system. See `install_tesseract.md` for detailed installation instructions.
+**Example usage:**
+- Upload an image containing text (like a screenshot, document, or handwritten notes)
+- The application will extract the text and include it in your prompt
+- You can then ask the AI to process, summarize, or work with the extracted text
 ## Available Models
 - **DeepSeek V3**: Advanced code generation model

app.py CHANGED Viewed

@@ -6,6 +6,10 @@ import base64
 import mimetypes
 import PyPDF2
 import docx
 import gradio as gr
 from huggingface_hub import InferenceClient
@@ -116,6 +120,10 @@ DEMO_LIST = [
     {
         "title": "UI from Image",
         "description": "Upload an image of a UI design and I'll generate the HTML/CSS code for it"
     }
 ]
@@ -372,6 +380,38 @@ def demo_card_click(e: gr.EventData):
         # Return the first demo description as fallback
         return DEMO_LIST[0]['description']
 def extract_text_from_file(file_path):
     if not file_path:
         return ""
@@ -391,6 +431,8 @@ def extract_text_from_file(file_path):
         elif ext == ".docx":
             doc = docx.Document(file_path)
             return "\n".join([para.text for para in doc.paragraphs])
         else:
             return ""
     except Exception as e:
@@ -471,7 +513,7 @@ with gr.Blocks(theme=gr.themes.Base(), title="AnyCoder - AI Code Generator") as
         gr.Markdown("# AnyCoder\nAI-Powered Code Generator")
         gr.Markdown("""Describe your app or UI in plain English. Optionally upload a UI image (for ERNIE model). Click Generate to get code and preview.""")
         gr.Markdown("**Tip:** For best search results about people or entities, include details like profession, company, or location. Example: 'John Smith software engineer at Google.'")
-        gr.Markdown("**Tip:** You can attach a file (PDF, TXT, DOCX, CSV, MD) to use as reference for your prompt, e.g. 'Summarize this PDF.'")
         input = gr.Textbox(
             label="Describe your application",
             placeholder="e.g., Create a todo app with add, delete, and mark as complete functionality",
@@ -482,8 +524,8 @@ with gr.Blocks(theme=gr.themes.Base(), title="AnyCoder - AI Code Generator") as
             visible=False
         )
         file_input = gr.File(
-            label="Attach a file (PDF, TXT, DOCX, CSV, MD)",
-            file_types=[".pdf", ".txt", ".md", ".csv", ".docx"],
             visible=True
         )
         with gr.Row():
@@ -503,6 +545,8 @@ with gr.Blocks(theme=gr.themes.Base(), title="AnyCoder - AI Code Generator") as
         else:
             gr.Markdown("✅ **Web Search Available**: Toggle above to enable real-time search")
         gr.Markdown("### Quick Examples")
         for i, demo_item in enumerate(DEMO_LIST[:5]):
             demo_card = gr.Button(

 import mimetypes
 import PyPDF2
 import docx
+import cv2
+import numpy as np
+from PIL import Image
+import pytesseract
 import gradio as gr
 from huggingface_hub import InferenceClient
     {
         "title": "UI from Image",
         "description": "Upload an image of a UI design and I'll generate the HTML/CSS code for it"
+    },
+    {
+        "title": "Extract Text from Image",
+        "description": "Upload an image containing text and I'll extract and process the text content"
     }
 ]
         # Return the first demo description as fallback
         return DEMO_LIST[0]['description']
+def extract_text_from_image(image_path):
+    """Extract text from image using OCR"""
+    try:
+        # Check if tesseract is available
+        try:
+            pytesseract.get_tesseract_version()
+        except Exception:
+            return "Error: Tesseract OCR is not installed. Please install Tesseract to extract text from images. See install_tesseract.md for instructions."
+        # Read image using OpenCV
+        image = cv2.imread(image_path)
+        if image is None:
+            return "Error: Could not read image file"
+        # Convert to RGB (OpenCV uses BGR)
+        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        # Preprocess image for better OCR results
+        # Convert to grayscale
+        gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
+        # Apply thresholding to get binary image
+        _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        # Extract text using pytesseract
+        text = pytesseract.image_to_string(binary, config='--psm 6')
+        return text.strip() if text.strip() else "No text found in image"
+    except Exception as e:
+        return f"Error extracting text from image: {e}"
 def extract_text_from_file(file_path):
     if not file_path:
         return ""
         elif ext == ".docx":
             doc = docx.Document(file_path)
             return "\n".join([para.text for para in doc.paragraphs])
+        elif ext.lower() in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif", ".webp"]:
+            return extract_text_from_image(file_path)
         else:
             return ""
     except Exception as e:
         gr.Markdown("# AnyCoder\nAI-Powered Code Generator")
         gr.Markdown("""Describe your app or UI in plain English. Optionally upload a UI image (for ERNIE model). Click Generate to get code and preview.""")
         gr.Markdown("**Tip:** For best search results about people or entities, include details like profession, company, or location. Example: 'John Smith software engineer at Google.'")
+        gr.Markdown("**Tip:** You can attach a file (PDF, TXT, DOCX, CSV, MD, Images) to use as reference for your prompt, e.g. 'Summarize this PDF' or 'Extract text from this image'.")
         input = gr.Textbox(
             label="Describe your application",
             placeholder="e.g., Create a todo app with add, delete, and mark as complete functionality",
             visible=False
         )
         file_input = gr.File(
+            label="Attach a file (PDF, TXT, DOCX, CSV, MD, Images)",
+            file_types=[".pdf", ".txt", ".md", ".csv", ".docx", ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif", ".webp"],
             visible=True
         )
         with gr.Row():
         else:
             gr.Markdown("✅ **Web Search Available**: Toggle above to enable real-time search")
+        gr.Markdown("📷 **Image Text Extraction**: Upload images to extract text using OCR (requires Tesseract installation)")
         gr.Markdown("### Quick Examples")
         for i, demo_item in enumerate(DEMO_LIST[:5]):
             demo_card = gr.Button(

requirements.txt CHANGED Viewed

@@ -2,4 +2,7 @@ git+https://github.com/huggingface/huggingface_hub.git
 gradio[oauth]
 tavily-python
 PyPDF2
-python-docx

 gradio[oauth]
 tavily-python
 PyPDF2
+python-docx
+pytesseract
+Pillow
+opencv-python