Spaces:
Running
Running
update image text extract
Browse files- README.md +18 -0
- app.py +47 -3
- requirements.txt +4 -1
README.md
CHANGED
@@ -21,6 +21,7 @@ AnyCoder is an AI-powered code generator that helps you create applications by d
|
|
21 |
|
22 |
- **Multi-Model Support**: Choose from various AI models including DeepSeek, ERNIE-4.5-VL, MiniMax, and Qwen
|
23 |
- **Image-to-Code**: Upload UI design images and get corresponding HTML/CSS code (ERNIE-4.5-VL model)
|
|
|
24 |
- **Live Preview**: See your generated code in action with the built-in sandbox
|
25 |
- **Web Search Integration**: Enable real-time web search to get the latest information and best practices
|
26 |
- **Chat History**: Keep track of your conversations and generated code
|
@@ -75,6 +76,23 @@ The web search feature uses Tavily to provide real-time information when generat
|
|
75 |
|
76 |
When enabled, the AI will search the web for the latest information, best practices, and technologies related to your request.
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
## Available Models
|
79 |
|
80 |
- **DeepSeek V3**: Advanced code generation model
|
|
|
21 |
|
22 |
- **Multi-Model Support**: Choose from various AI models including DeepSeek, ERNIE-4.5-VL, MiniMax, and Qwen
|
23 |
- **Image-to-Code**: Upload UI design images and get corresponding HTML/CSS code (ERNIE-4.5-VL model)
|
24 |
+
- **Image Text Extraction**: Upload images and extract text using OCR for processing
|
25 |
- **Live Preview**: See your generated code in action with the built-in sandbox
|
26 |
- **Web Search Integration**: Enable real-time web search to get the latest information and best practices
|
27 |
- **Chat History**: Keep track of your conversations and generated code
|
|
|
76 |
|
77 |
When enabled, the AI will search the web for the latest information, best practices, and technologies related to your request.
|
78 |
|
79 |
+
## Image Text Extraction
|
80 |
+
|
81 |
+
The application supports extracting text from images using OCR (Optical Character Recognition). This feature allows you to:
|
82 |
+
|
83 |
+
1. Upload image files (JPG, PNG, BMP, TIFF, GIF, WebP) through the file input
|
84 |
+
2. Automatically extract text from the images using Tesseract OCR
|
85 |
+
3. Include the extracted text in your prompts for code generation
|
86 |
+
|
87 |
+
### Setting up OCR
|
88 |
+
|
89 |
+
To use the image text extraction feature, you need to install Tesseract OCR on your system. See `install_tesseract.md` for detailed installation instructions.
|
90 |
+
|
91 |
+
**Example usage:**
|
92 |
+
- Upload an image containing text (like a screenshot, document, or handwritten notes)
|
93 |
+
- The application will extract the text and include it in your prompt
|
94 |
+
- You can then ask the AI to process, summarize, or work with the extracted text
|
95 |
+
|
96 |
## Available Models
|
97 |
|
98 |
- **DeepSeek V3**: Advanced code generation model
|
app.py
CHANGED
@@ -6,6 +6,10 @@ import base64
|
|
6 |
import mimetypes
|
7 |
import PyPDF2
|
8 |
import docx
|
|
|
|
|
|
|
|
|
9 |
|
10 |
import gradio as gr
|
11 |
from huggingface_hub import InferenceClient
|
@@ -116,6 +120,10 @@ DEMO_LIST = [
|
|
116 |
{
|
117 |
"title": "UI from Image",
|
118 |
"description": "Upload an image of a UI design and I'll generate the HTML/CSS code for it"
|
|
|
|
|
|
|
|
|
119 |
}
|
120 |
]
|
121 |
|
@@ -372,6 +380,38 @@ def demo_card_click(e: gr.EventData):
|
|
372 |
# Return the first demo description as fallback
|
373 |
return DEMO_LIST[0]['description']
|
374 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
def extract_text_from_file(file_path):
|
376 |
if not file_path:
|
377 |
return ""
|
@@ -391,6 +431,8 @@ def extract_text_from_file(file_path):
|
|
391 |
elif ext == ".docx":
|
392 |
doc = docx.Document(file_path)
|
393 |
return "\n".join([para.text for para in doc.paragraphs])
|
|
|
|
|
394 |
else:
|
395 |
return ""
|
396 |
except Exception as e:
|
@@ -471,7 +513,7 @@ with gr.Blocks(theme=gr.themes.Base(), title="AnyCoder - AI Code Generator") as
|
|
471 |
gr.Markdown("# AnyCoder\nAI-Powered Code Generator")
|
472 |
gr.Markdown("""Describe your app or UI in plain English. Optionally upload a UI image (for ERNIE model). Click Generate to get code and preview.""")
|
473 |
gr.Markdown("**Tip:** For best search results about people or entities, include details like profession, company, or location. Example: 'John Smith software engineer at Google.'")
|
474 |
-
gr.Markdown("**Tip:** You can attach a file (PDF, TXT, DOCX, CSV, MD) to use as reference for your prompt, e.g. 'Summarize this PDF.
|
475 |
input = gr.Textbox(
|
476 |
label="Describe your application",
|
477 |
placeholder="e.g., Create a todo app with add, delete, and mark as complete functionality",
|
@@ -482,8 +524,8 @@ with gr.Blocks(theme=gr.themes.Base(), title="AnyCoder - AI Code Generator") as
|
|
482 |
visible=False
|
483 |
)
|
484 |
file_input = gr.File(
|
485 |
-
label="Attach a file (PDF, TXT, DOCX, CSV, MD)",
|
486 |
-
file_types=[".pdf", ".txt", ".md", ".csv", ".docx"],
|
487 |
visible=True
|
488 |
)
|
489 |
with gr.Row():
|
@@ -503,6 +545,8 @@ with gr.Blocks(theme=gr.themes.Base(), title="AnyCoder - AI Code Generator") as
|
|
503 |
else:
|
504 |
gr.Markdown("✅ **Web Search Available**: Toggle above to enable real-time search")
|
505 |
|
|
|
|
|
506 |
gr.Markdown("### Quick Examples")
|
507 |
for i, demo_item in enumerate(DEMO_LIST[:5]):
|
508 |
demo_card = gr.Button(
|
|
|
6 |
import mimetypes
|
7 |
import PyPDF2
|
8 |
import docx
|
9 |
+
import cv2
|
10 |
+
import numpy as np
|
11 |
+
from PIL import Image
|
12 |
+
import pytesseract
|
13 |
|
14 |
import gradio as gr
|
15 |
from huggingface_hub import InferenceClient
|
|
|
120 |
{
|
121 |
"title": "UI from Image",
|
122 |
"description": "Upload an image of a UI design and I'll generate the HTML/CSS code for it"
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"title": "Extract Text from Image",
|
126 |
+
"description": "Upload an image containing text and I'll extract and process the text content"
|
127 |
}
|
128 |
]
|
129 |
|
|
|
380 |
# Return the first demo description as fallback
|
381 |
return DEMO_LIST[0]['description']
|
382 |
|
383 |
+
def extract_text_from_image(image_path):
|
384 |
+
"""Extract text from image using OCR"""
|
385 |
+
try:
|
386 |
+
# Check if tesseract is available
|
387 |
+
try:
|
388 |
+
pytesseract.get_tesseract_version()
|
389 |
+
except Exception:
|
390 |
+
return "Error: Tesseract OCR is not installed. Please install Tesseract to extract text from images. See install_tesseract.md for instructions."
|
391 |
+
|
392 |
+
# Read image using OpenCV
|
393 |
+
image = cv2.imread(image_path)
|
394 |
+
if image is None:
|
395 |
+
return "Error: Could not read image file"
|
396 |
+
|
397 |
+
# Convert to RGB (OpenCV uses BGR)
|
398 |
+
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
399 |
+
|
400 |
+
# Preprocess image for better OCR results
|
401 |
+
# Convert to grayscale
|
402 |
+
gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
|
403 |
+
|
404 |
+
# Apply thresholding to get binary image
|
405 |
+
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
406 |
+
|
407 |
+
# Extract text using pytesseract
|
408 |
+
text = pytesseract.image_to_string(binary, config='--psm 6')
|
409 |
+
|
410 |
+
return text.strip() if text.strip() else "No text found in image"
|
411 |
+
|
412 |
+
except Exception as e:
|
413 |
+
return f"Error extracting text from image: {e}"
|
414 |
+
|
415 |
def extract_text_from_file(file_path):
|
416 |
if not file_path:
|
417 |
return ""
|
|
|
431 |
elif ext == ".docx":
|
432 |
doc = docx.Document(file_path)
|
433 |
return "\n".join([para.text for para in doc.paragraphs])
|
434 |
+
elif ext.lower() in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif", ".webp"]:
|
435 |
+
return extract_text_from_image(file_path)
|
436 |
else:
|
437 |
return ""
|
438 |
except Exception as e:
|
|
|
513 |
gr.Markdown("# AnyCoder\nAI-Powered Code Generator")
|
514 |
gr.Markdown("""Describe your app or UI in plain English. Optionally upload a UI image (for ERNIE model). Click Generate to get code and preview.""")
|
515 |
gr.Markdown("**Tip:** For best search results about people or entities, include details like profession, company, or location. Example: 'John Smith software engineer at Google.'")
|
516 |
+
gr.Markdown("**Tip:** You can attach a file (PDF, TXT, DOCX, CSV, MD, Images) to use as reference for your prompt, e.g. 'Summarize this PDF' or 'Extract text from this image'.")
|
517 |
input = gr.Textbox(
|
518 |
label="Describe your application",
|
519 |
placeholder="e.g., Create a todo app with add, delete, and mark as complete functionality",
|
|
|
524 |
visible=False
|
525 |
)
|
526 |
file_input = gr.File(
|
527 |
+
label="Attach a file (PDF, TXT, DOCX, CSV, MD, Images)",
|
528 |
+
file_types=[".pdf", ".txt", ".md", ".csv", ".docx", ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif", ".webp"],
|
529 |
visible=True
|
530 |
)
|
531 |
with gr.Row():
|
|
|
545 |
else:
|
546 |
gr.Markdown("✅ **Web Search Available**: Toggle above to enable real-time search")
|
547 |
|
548 |
+
gr.Markdown("📷 **Image Text Extraction**: Upload images to extract text using OCR (requires Tesseract installation)")
|
549 |
+
|
550 |
gr.Markdown("### Quick Examples")
|
551 |
for i, demo_item in enumerate(DEMO_LIST[:5]):
|
552 |
demo_card = gr.Button(
|
requirements.txt
CHANGED
@@ -2,4 +2,7 @@ git+https://github.com/huggingface/huggingface_hub.git
|
|
2 |
gradio[oauth]
|
3 |
tavily-python
|
4 |
PyPDF2
|
5 |
-
python-docx
|
|
|
|
|
|
|
|
2 |
gradio[oauth]
|
3 |
tavily-python
|
4 |
PyPDF2
|
5 |
+
python-docx
|
6 |
+
pytesseract
|
7 |
+
Pillow
|
8 |
+
opencv-python
|