rishiraj commited on
Commit
1bdb079
1 Parent(s): ca86eff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -87
app.py CHANGED
@@ -2,14 +2,10 @@ import spaces
2
  import gradio as gr
3
  from marker.markdown_extractor import MarkdownExtractorConfig, MarkdownExtractor
4
  from pdf.pdf_extractor import PDFExtractorConfig, PDFExtractor
5
- from gemini.gemini_extractor import GeminiExtractorConfig, GeminiExtractor
6
- from oai.oai_extractor import OAIExtractorConfig, OAIExtractor
7
  from indexify_extractor_sdk import Content
8
 
9
  markdown_extractor = MarkdownExtractor()
10
  pdf_extractor = PDFExtractor()
11
- gemini_extractor = GeminiExtractor()
12
- oai_extractor = OAIExtractor()
13
 
14
  @spaces.GPU
15
  def use_marker(pdf_filepath):
@@ -36,16 +32,16 @@ with gr.Blocks(title="PDF data extraction with Marker & Indexify") as marker_dem
36
  "You can extract from PDF files continuously and try various other extractors locally with "
37
  "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
38
  )
39
- pdf_file = gr.File(type="filepath")
40
  with gr.Column():
41
  gr.HTML("<p><b>Step 2:</b> Run the extractor.</p>")
42
  go_button = gr.Button(value="Run extractor", variant="primary")
43
- model_output_text_box = gr.Textbox(label="Extractor Output", elem_id="model_output_text_box")
44
 
45
  with gr.Row():
46
  gr.HTML("<p style='text-align: center'>Developed with 🫶 by <a href='https://getindexify.ai/' target='_blank'>Indexify</a> | a <a href='https://www.tensorlake.ai/' target='_blank'>Tensorlake</a> product</p>")
47
 
48
- go_button.click(fn=use_marker, inputs=[pdf_file], outputs=[model_output_text_box])
49
 
50
  @spaces.GPU
51
  def use_pdf_extractor(pdf_filepath):
@@ -72,94 +68,18 @@ with gr.Blocks(title="PDF data extraction with PDF Extractor & Indexify") as pdf
72
  "You can extract from PDF files continuously and try various other extractors locally with "
73
  "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
74
  )
75
- pdf_file = gr.File(type="filepath")
76
  with gr.Column():
77
  gr.HTML("<p><b>Step 2:</b> Run the extractor.</p>")
78
  go_button = gr.Button(value="Run extractor", variant="primary")
79
- model_output_text_box = gr.Textbox(label="Extractor Output", elem_id="model_output_text_box")
80
 
81
  with gr.Row():
82
  gr.HTML("<p style='text-align: center'>Developed with 🫶 by <a href='https://getindexify.ai/' target='_blank'>Indexify</a> | a <a href='https://www.tensorlake.ai/' target='_blank'>Tensorlake</a> product</p>")
83
 
84
- go_button.click(fn=use_pdf_extractor, inputs=[pdf_file], outputs=[model_output_text_box])
85
 
86
- @spaces.GPU
87
- def use_gemini(pdf_filepath, key):
88
- if pdf_filepath is None:
89
- raise gr.Error("Please provide some input PDF: upload a PDF file")
90
- with open(pdf_filepath, "rb") as f:
91
- pdf_data = f.read()
92
- content = Content(content_type="application/pdf", data=pdf_data)
93
- config = GeminiExtractorConfig(prompt="Extract all text from the document.", model_name="gemini-1.5-flash", key=key)
94
- result = gemini_extractor.extract(content, config)
95
- return result
96
-
97
- with gr.Blocks(title="PDF data extraction with Gemini & Indexify") as gemini_demo:
98
- gr.HTML("<h1 style='text-align: center'>PDF data extraction with Gemini & <a href='https://getindexify.ai/'>Indexify</a></h1>")
99
- gr.HTML("<p style='text-align: center'>Indexify is a scalable realtime and continuous indexing and structured extraction engine for unstructured data to build generative AI applications</p>")
100
- gr.HTML("<h3 style='text-align: center'>If you like this demo, please ⭐ Star us on <a href='https://github.com/tensorlakeai/indexify' target='_blank'>GitHub</a>!</h3>")
101
- gr.HTML("<h4 style='text-align: center'>Here's an example notebook that demonstrates how to build a continuous <a href='https://github.com/tensorlakeai/indexify/blob/main/docs/docs/examples/multimodal_gemini.ipynb' target='_blank'>extraction pipeline</a> with Indexify</h4>")
102
-
103
- with gr.Row():
104
- with gr.Column():
105
- gr.HTML(
106
- "<p><b>Step 1:</b> Upload a PDF file from local storage.</p>"
107
- "<p style='color: #A0A0A0;'>Use this demo for single PDF file only. "
108
- "You can extract from PDF files continuously and try various other extractors locally with "
109
- "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
110
- )
111
- pdf_file = gr.File(type="filepath")
112
- gr.HTML("<p><b>Step 2:</b> Enter your API key.</p>")
113
- key = gr.Textbox(info="Please enter your GEMINI_API_KEY", label="Key:")
114
- with gr.Column():
115
- gr.HTML("<p><b>Step 3:</b> Run the extractor.</p>")
116
- go_button = gr.Button(value="Run extractor", variant="primary")
117
- model_output_text_box = gr.Textbox(label="Extractor Output", elem_id="model_output_text_box")
118
-
119
- with gr.Row():
120
- gr.HTML("<p style='text-align: center'>Developed with 🫶 by <a href='https://getindexify.ai/' target='_blank'>Indexify</a> | a <a href='https://www.tensorlake.ai/' target='_blank'>Tensorlake</a> product</p>")
121
-
122
- go_button.click(fn=use_gemini, inputs=[pdf_file, key], outputs=[model_output_text_box])
123
-
124
- @spaces.GPU
125
- def use_openai(pdf_filepath, key):
126
- if pdf_filepath is None:
127
- raise gr.Error("Please provide some input PDF: upload a PDF file")
128
- with open(pdf_filepath, "rb") as f:
129
- pdf_data = f.read()
130
- content = Content(content_type="application/pdf", data=pdf_data)
131
- config = OAIExtractorConfig(prompt="Extract all text from the document.", model_name="gpt-4o", key=key)
132
- result = oai_extractor.extract(content, config)
133
- return result
134
-
135
- with gr.Blocks(title="PDF data extraction with OpenAI & Indexify") as openai_demo:
136
- gr.HTML("<h1 style='text-align: center'>PDF data extraction with OpenAI & <a href='https://getindexify.ai/'>Indexify</a></h1>")
137
- gr.HTML("<p style='text-align: center'>Indexify is a scalable realtime and continuous indexing and structured extraction engine for unstructured data to build generative AI applications</p>")
138
- gr.HTML("<h3 style='text-align: center'>If you like this demo, please ⭐ Star us on <a href='https://github.com/tensorlakeai/indexify' target='_blank'>GitHub</a>!</h3>")
139
- gr.HTML("<h4 style='text-align: center'>Here's an example notebook that demonstrates how to build a continuous <a href='https://github.com/tensorlakeai/indexify/blob/main/docs/docs/examples/multimodal_openai.ipynb' target='_blank'>extraction pipeline</a> with Indexify</h4>")
140
-
141
- with gr.Row():
142
- with gr.Column():
143
- gr.HTML(
144
- "<p><b>Step 1:</b> Upload a PDF file from local storage.</p>"
145
- "<p style='color: #A0A0A0;'>Use this demo for single PDF file only. "
146
- "You can extract from PDF files continuously and try various other extractors locally with "
147
- "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
148
- )
149
- pdf_file = gr.File(type="filepath")
150
- gr.HTML("<p><b>Step 2:</b> Enter your API key.</p>")
151
- key = gr.Textbox(info="Please enter your OPENAI_API_KEY", label="Key:")
152
- with gr.Column():
153
- gr.HTML("<p><b>Step 3:</b> Run the extractor.</p>")
154
- go_button = gr.Button(value="Run extractor", variant="primary")
155
- model_output_text_box = gr.Textbox(label="Extractor Output", elem_id="model_output_text_box")
156
-
157
- with gr.Row():
158
- gr.HTML("<p style='text-align: center'>Developed with 🫶 by <a href='https://getindexify.ai/' target='_blank'>Indexify</a> | a <a href='https://www.tensorlake.ai/' target='_blank'>Tensorlake</a> product</p>")
159
-
160
- go_button.click(fn=use_openai, inputs=[pdf_file, key], outputs=[model_output_text_box])
161
-
162
- demo = gr.TabbedInterface([marker_demo, pdf_demo, gemini_demo, openai_demo], ["Marker Extractor", "PDF Extractor", "Gemini Extractor", "OpenAI Extractor"], theme=gr.themes.Soft())
163
 
164
  demo.queue()
165
  demo.launch()
 
2
  import gradio as gr
3
  from marker.markdown_extractor import MarkdownExtractorConfig, MarkdownExtractor
4
  from pdf.pdf_extractor import PDFExtractorConfig, PDFExtractor
 
 
5
  from indexify_extractor_sdk import Content
6
 
7
  markdown_extractor = MarkdownExtractor()
8
  pdf_extractor = PDFExtractor()
 
 
9
 
10
  @spaces.GPU
11
  def use_marker(pdf_filepath):
 
32
  "You can extract from PDF files continuously and try various other extractors locally with "
33
  "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
34
  )
35
+ pdf_file_1 = gr.File(type="filepath")
36
  with gr.Column():
37
  gr.HTML("<p><b>Step 2:</b> Run the extractor.</p>")
38
  go_button = gr.Button(value="Run extractor", variant="primary")
39
+ model_output_text_box_1 = gr.Textbox(label="Extractor Output", elem_id="model_output_text_box_1")
40
 
41
  with gr.Row():
42
  gr.HTML("<p style='text-align: center'>Developed with 🫶 by <a href='https://getindexify.ai/' target='_blank'>Indexify</a> | a <a href='https://www.tensorlake.ai/' target='_blank'>Tensorlake</a> product</p>")
43
 
44
+ go_button.click(fn=use_marker, inputs=[pdf_file_1], outputs=[model_output_text_box_1])
45
 
46
  @spaces.GPU
47
  def use_pdf_extractor(pdf_filepath):
 
68
  "You can extract from PDF files continuously and try various other extractors locally with "
69
  "<a href='https://getindexify.ai/'>Indexify</a>.</p>"
70
  )
71
+ pdf_file_2 = gr.File(type="filepath")
72
  with gr.Column():
73
  gr.HTML("<p><b>Step 2:</b> Run the extractor.</p>")
74
  go_button = gr.Button(value="Run extractor", variant="primary")
75
+ model_output_text_box_2 = gr.Textbox(label="Extractor Output", elem_id="model_output_text_box_2")
76
 
77
  with gr.Row():
78
  gr.HTML("<p style='text-align: center'>Developed with 🫶 by <a href='https://getindexify.ai/' target='_blank'>Indexify</a> | a <a href='https://www.tensorlake.ai/' target='_blank'>Tensorlake</a> product</p>")
79
 
80
+ go_button.click(fn=use_pdf_extractor, inputs=[pdf_file_2], outputs=[model_output_text_box_2])
81
 
82
+ demo = gr.TabbedInterface([marker_demo, pdf_demo], ["Marker Extractor", "PDF Extractor"], theme=gr.themes.Soft())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  demo.queue()
85
  demo.launch()