MohamedRashad commited on
Commit
71311e8
1 Parent(s): e076d40

Add model description to app.py

Browse files
Files changed (2) hide show
  1. app.py +10 -0
  2. requirements.txt +1 -0
app.py CHANGED
@@ -55,8 +55,18 @@ def extract_text_from_pdf(pdf_path, progress=gr.Progress()):
55
 
56
  return "\n".join(texts)
57
 
 
 
 
 
 
 
 
 
 
58
  with gr.Blocks(title="Arabic Small Nougat") as demo:
59
  gr.HTML("<h1 style='text-align: center'>Arabic End-to-End Structured OCR for textbooks</h1>")
 
60
 
61
  with gr.Tab("Extract Text from Image"):
62
  with gr.Row():
 
55
 
56
  return "\n".join(texts)
57
 
58
+ model_description = """
59
+ This is a demo for the Arabic Small Nougat model. It is an end-to-end OCR model that can extract text from images and PDFs.
60
+
61
+ - The model is trained on the [Khatt dataset](https://huggingface.co/datasets/Fakhraddin/khatt) and custom made dataset.
62
+ - The model is a finetune of [facebook/nougat-small](https://huggingface.co/facebook/nougat-small) model.
63
+
64
+ **Note**: The model is a prototype in my book and may not work well on all types of images and PDFs. **Check the output carefully before using it for any serious work.**
65
+ """
66
+
67
  with gr.Blocks(title="Arabic Small Nougat") as demo:
68
  gr.HTML("<h1 style='text-align: center'>Arabic End-to-End Structured OCR for textbooks</h1>")
69
+ gr.Markdown(model_description)
70
 
71
  with gr.Tab("Extract Text from Image"):
72
  with gr.Row():
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  pdf2image
 
2
  transformers
3
  gradio
 
1
  pdf2image
2
+ torch
3
  transformers
4
  gradio