Synced repo using 'sync_with_huggingface' Github Action
Browse files- gradio_app.py +10 -6
 - requirements.txt +1 -1
 
    	
        gradio_app.py
    CHANGED
    
    | 
         @@ -98,12 +98,14 @@ with gr.Blocks(title="Marker") as demo: 
     | 
|
| 98 | 
         
             
                        in_num = gr.Slider(label="PDF file page number", minimum=1, maximum=1, value=1, step=1, visible=False)
         
     | 
| 99 | 
         
             
                        in_img = gr.Image(label="PDF file (preview)", type="pil", sources=None, visible=False)
         
     | 
| 100 | 
         | 
| 101 | 
         
            -
                        page_range_txt = gr.Textbox(label="Page range to parse, comma separated like 0,5-10,20", value=f" 
     | 
| 102 | 
         
             
                        output_format_dd = gr.Dropdown(label="Output format", choices=["markdown", "json", "html"], value="markdown")
         
     | 
| 103 | 
         | 
| 104 | 
         
             
                        force_ocr_ckb = gr.Checkbox(label="Force OCR", value=True, info="Force OCR on all pages")
         
     | 
| 105 | 
         
             
                        debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
         
     | 
| 106 | 
         
            -
                         
     | 
| 
         | 
|
| 
         | 
|
| 107 | 
         
             
                    with gr.Column():
         
     | 
| 108 | 
         
             
                        result_md = gr.Markdown(label="Result markdown", visible=False)
         
     | 
| 109 | 
         
             
                        result_json = gr.JSON(label="Result json", visible=False)
         
     | 
| 
         @@ -154,17 +156,19 @@ with gr.Blocks(title="Marker") as demo: 
     | 
|
| 154 | 
         
             
                    page_range_txt.change(
         
     | 
| 155 | 
         
             
                        fn=check_page_range,
         
     | 
| 156 | 
         
             
                        inputs=[page_range_txt, in_file],
         
     | 
| 157 | 
         
            -
                        outputs=[page_range_txt,  
     | 
| 158 | 
         
             
                    )
         
     | 
| 159 | 
         | 
| 160 | 
         
             
                    # Run Marker
         
     | 
| 161 | 
         
            -
                    def run_marker_img(filename, page_range, force_ocr, output_format, debug):
         
     | 
| 162 | 
         
             
                        cli_options = {
         
     | 
| 163 | 
         
             
                            "output_format": output_format,
         
     | 
| 164 | 
         
             
                            "page_range": page_range,
         
     | 
| 165 | 
         
             
                            "force_ocr": force_ocr,
         
     | 
| 166 | 
         
             
                            "debug": debug,
         
     | 
| 167 | 
         
             
                            "output_dir": settings.DEBUG_DATA_FOLDER if debug else None,
         
     | 
| 
         | 
|
| 
         | 
|
| 168 | 
         
             
                        }
         
     | 
| 169 | 
         
             
                        config_parser = ConfigParser(cli_options)
         
     | 
| 170 | 
         
             
                        rendered = convert_pdf(
         
     | 
| 
         @@ -213,9 +217,9 @@ with gr.Blocks(title="Marker") as demo: 
     | 
|
| 213 | 
         
             
                                gr_debug_lay
         
     | 
| 214 | 
         
             
                            ]
         
     | 
| 215 | 
         | 
| 216 | 
         
            -
                     
     | 
| 217 | 
         
             
                        fn=run_marker_img,
         
     | 
| 218 | 
         
            -
                        inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, debug_ckb],
         
     | 
| 219 | 
         
             
                        outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout]
         
     | 
| 220 | 
         
             
                    )
         
     | 
| 221 | 
         | 
| 
         | 
|
| 98 | 
         
             
                        in_num = gr.Slider(label="PDF file page number", minimum=1, maximum=1, value=1, step=1, visible=False)
         
     | 
| 99 | 
         
             
                        in_img = gr.Image(label="PDF file (preview)", type="pil", sources=None, visible=False)
         
     | 
| 100 | 
         | 
| 101 | 
         
            +
                        page_range_txt = gr.Textbox(label="Page range to parse, comma separated like 0,5-10,20", value=f"")
         
     | 
| 102 | 
         
             
                        output_format_dd = gr.Dropdown(label="Output format", choices=["markdown", "json", "html"], value="markdown")
         
     | 
| 103 | 
         | 
| 104 | 
         
             
                        force_ocr_ckb = gr.Checkbox(label="Force OCR", value=True, info="Force OCR on all pages")
         
     | 
| 105 | 
         
             
                        debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
         
     | 
| 106 | 
         
            +
                        use_llm_ckb = gr.Checkbox(label="Use LLM", value=False, info="Use LLM for higher quality processing")
         
     | 
| 107 | 
         
            +
                        strip_existing_ocr_ckb = gr.Checkbox(label="Strip existing OCR", value=False, info="Strip existing OCR text from the PDF and re-OCR.")
         
     | 
| 108 | 
         
            +
                        run_marker_btn = gr.Button("Run Marker", interactive=False)
         
     | 
| 109 | 
         
             
                    with gr.Column():
         
     | 
| 110 | 
         
             
                        result_md = gr.Markdown(label="Result markdown", visible=False)
         
     | 
| 111 | 
         
             
                        result_json = gr.JSON(label="Result json", visible=False)
         
     | 
| 
         | 
|
| 156 | 
         
             
                    page_range_txt.change(
         
     | 
| 157 | 
         
             
                        fn=check_page_range,
         
     | 
| 158 | 
         
             
                        inputs=[page_range_txt, in_file],
         
     | 
| 159 | 
         
            +
                        outputs=[page_range_txt, run_marker_btn]
         
     | 
| 160 | 
         
             
                    )
         
     | 
| 161 | 
         | 
| 162 | 
         
             
                    # Run Marker
         
     | 
| 163 | 
         
            +
                    def run_marker_img(filename, page_range, force_ocr, output_format, debug, use_llm, strip_existing_ocr):
         
     | 
| 164 | 
         
             
                        cli_options = {
         
     | 
| 165 | 
         
             
                            "output_format": output_format,
         
     | 
| 166 | 
         
             
                            "page_range": page_range,
         
     | 
| 167 | 
         
             
                            "force_ocr": force_ocr,
         
     | 
| 168 | 
         
             
                            "debug": debug,
         
     | 
| 169 | 
         
             
                            "output_dir": settings.DEBUG_DATA_FOLDER if debug else None,
         
     | 
| 170 | 
         
            +
                            "use_llm": use_llm,
         
     | 
| 171 | 
         
            +
                            "strip_existing_ocr": strip_existing_ocr
         
     | 
| 172 | 
         
             
                        }
         
     | 
| 173 | 
         
             
                        config_parser = ConfigParser(cli_options)
         
     | 
| 174 | 
         
             
                        rendered = convert_pdf(
         
     | 
| 
         | 
|
| 217 | 
         
             
                                gr_debug_lay
         
     | 
| 218 | 
         
             
                            ]
         
     | 
| 219 | 
         | 
| 220 | 
         
            +
                    run_marker_btn.click(
         
     | 
| 221 | 
         
             
                        fn=run_marker_img,
         
     | 
| 222 | 
         
            +
                        inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, debug_ckb, use_llm_ckb, strip_existing_ocr_ckb],
         
     | 
| 223 | 
         
             
                        outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout]
         
     | 
| 224 | 
         
             
                    )
         
     | 
| 225 | 
         | 
    	
        requirements.txt
    CHANGED
    
    | 
         @@ -1,4 +1,4 @@ 
     | 
|
| 1 | 
         
             
            torch==2.5.1
         
     | 
| 2 | 
         
            -
            marker-pdf==1. 
     | 
| 3 | 
         
             
            gradio==5.8.0
         
     | 
| 4 | 
         
             
            huggingface-hub==0.26.3
         
     | 
| 
         | 
|
| 1 | 
         
             
            torch==2.5.1
         
     | 
| 2 | 
         
            +
            marker-pdf==1.2.0
         
     | 
| 3 | 
         
             
            gradio==5.8.0
         
     | 
| 4 | 
         
             
            huggingface-hub==0.26.3
         
     |