Bapt120 commited on
Commit
bc711a3
Β·
verified Β·
1 Parent(s): 17e7b7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -6
app.py CHANGED
@@ -168,8 +168,8 @@ def extract_text_from_image(image, temperature=0.2, stream=False):
168
  yield cleaned_text
169
 
170
 
171
- def process_input(file_input, temperature, page_num):
172
- """Process uploaded file (image or PDF) and extract text with streaming."""
173
  if file_input is None:
174
  yield "Please upload an image or PDF first.", "", "", None, gr.update()
175
  return
@@ -197,8 +197,8 @@ def process_input(file_input, temperature, page_num):
197
  return
198
 
199
  try:
200
- # Extract text using LightOnOCR with streaming
201
- for extracted_text in extract_text_from_image(image_to_process, temperature, stream=True):
202
  yield extracted_text, extracted_text, page_info, image_to_process, gr.update()
203
 
204
  except Exception as e:
@@ -228,12 +228,18 @@ def update_slider(file_input):
228
  # Create Gradio interface
229
  with gr.Blocks(title="πŸ“– Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft()) as demo:
230
  gr.Markdown(f"""
 
 
 
 
 
 
231
  # πŸ“– Image/PDF to Text Extraction with LightOnOCR
232
 
233
  **πŸ’‘ How to use:**
234
  1. Upload an image or PDF
235
  2. For PDFs: select which page to extract (1-20)
236
- 3. Adjust temperature if needed (0.0 for deterministic, higher for more varied output)
237
  4. Click "Extract Text"
238
 
239
  **Note:** The Markdown rendering for tables may not always be perfect. Check the raw output for complex tables!
@@ -277,6 +283,11 @@ with gr.Blocks(title="πŸ“– Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft(
277
  label="Temperature",
278
  info="0.0 = deterministic, Higher = more varied"
279
  )
 
 
 
 
 
280
  submit_btn = gr.Button("Extract Text", variant="primary")
281
  clear_btn = gr.Button("Clear", variant="secondary")
282
 
@@ -299,7 +310,7 @@ with gr.Blocks(title="πŸ“– Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft(
299
  # Event handlers
300
  submit_btn.click(
301
  fn=process_input,
302
- inputs=[file_input, temperature, num_pages],
303
  outputs=[output_text, raw_output, page_info, rendered_image, num_pages]
304
  )
305
 
 
168
  yield cleaned_text
169
 
170
 
171
+ def process_input(file_input, temperature, page_num, enable_streaming):
172
+ """Process uploaded file (image or PDF) and extract text with optional streaming."""
173
  if file_input is None:
174
  yield "Please upload an image or PDF first.", "", "", None, gr.update()
175
  return
 
197
  return
198
 
199
  try:
200
+ # Extract text using LightOnOCR with optional streaming
201
+ for extracted_text in extract_text_from_image(image_to_process, temperature, stream=enable_streaming):
202
  yield extracted_text, extracted_text, page_info, image_to_process, gr.update()
203
 
204
  except Exception as e:
 
228
  # Create Gradio interface
229
  with gr.Blocks(title="πŸ“– Image/PDF OCR with LightOnOCR", theme=gr.themes.Soft()) as demo:
230
  gr.Markdown(f"""
231
+ # ⚠️ **HEADS UP: This space is now on CPU and runs very slowly.**
232
+
233
+ For much faster results, check out the [GPU version here](https://huggingface.co/spaces/lightonai/LightOnOCR-1B-Demo-zero).
234
+
235
+ ---
236
+
237
  # πŸ“– Image/PDF to Text Extraction with LightOnOCR
238
 
239
  **πŸ’‘ How to use:**
240
  1. Upload an image or PDF
241
  2. For PDFs: select which page to extract (1-20)
242
+ 3. Adjust temperature if needed
243
  4. Click "Extract Text"
244
 
245
  **Note:** The Markdown rendering for tables may not always be perfect. Check the raw output for complex tables!
 
283
  label="Temperature",
284
  info="0.0 = deterministic, Higher = more varied"
285
  )
286
+ enable_streaming = gr.Checkbox(
287
+ label="Enable Streaming",
288
+ value=False,
289
+ info="Show text progressively as it's generated"
290
+ )
291
  submit_btn = gr.Button("Extract Text", variant="primary")
292
  clear_btn = gr.Button("Clear", variant="secondary")
293
 
 
310
  # Event handlers
311
  submit_btn.click(
312
  fn=process_input,
313
+ inputs=[file_input, temperature, num_pages, enable_streaming],
314
  outputs=[output_text, raw_output, page_info, rendered_image, num_pages]
315
  )
316