Vik Paruchuri commited on
Commit
1f06427
·
1 Parent(s): 817e4ae

Fix pdftext workers config

Browse files
Files changed (3) hide show
  1. marker_app.py +3 -3
  2. marker_server.py +3 -6
  3. run_marker_app.py +1 -1
marker_app.py CHANGED
@@ -2,7 +2,6 @@ import os
2
 
3
  os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
4
  os.environ["IN_STREAMLIT"] = "true"
5
- os.environ["PDFTEXT_CPU_WORKERS"] = "1"
6
 
7
  import base64
8
  import io
@@ -25,8 +24,10 @@ def load_models():
25
 
26
  def convert_pdf(fname: str, **kwargs) -> (str, Dict[str, Any], dict):
27
  config_parser = ConfigParser(kwargs)
 
 
28
  converter = PdfConverter(
29
- config=config_parser.generate_config_dict(),
30
  artifact_dict=model_dict,
31
  processor_list=config_parser.get_processors(),
32
  renderer=config_parser.get_renderer()
@@ -51,7 +52,6 @@ def img_to_html(img, img_alt):
51
  def markdown_insert_images(markdown, images):
52
  image_tags = re.findall(r'(!\[(?P<image_title>[^\]]*)\]\((?P<image_path>[^\)"\s]+)\s*([^\)]*)\))', markdown)
53
 
54
- print(image_tags)
55
  for image in image_tags:
56
  image_markdown = image[0]
57
  image_alt = image[1]
 
2
 
3
  os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
4
  os.environ["IN_STREAMLIT"] = "true"
 
5
 
6
  import base64
7
  import io
 
24
 
25
  def convert_pdf(fname: str, **kwargs) -> (str, Dict[str, Any], dict):
26
  config_parser = ConfigParser(kwargs)
27
+ config_dict = config_parser.generate_config_dict()
28
+ config_dict["pdftext_workers"] = 1
29
  converter = PdfConverter(
30
+ config=config_dict,
31
  artifact_dict=model_dict,
32
  processor_list=config_parser.get_processors(),
33
  renderer=config_parser.get_renderer()
 
52
  def markdown_insert_images(markdown, images):
53
  image_tags = re.findall(r'(!\[(?P<image_title>[^\]]*)\]\((?P<image_path>[^\)"\s]+)\s*([^\)]*)\))', markdown)
54
 
 
55
  for image in image_tags:
56
  image_markdown = image[0]
57
  image_alt = image[1]
marker_server.py CHANGED
@@ -1,10 +1,5 @@
1
- import argparse
2
- import os
3
-
4
  import click
5
 
6
- os.environ["PDFTEXT_CPU_WORKERS"] = "1"
7
-
8
  import uvicorn
9
  from pydantic import BaseModel, Field
10
  from starlette.responses import HTMLResponse
@@ -83,8 +78,10 @@ async def convert_pdf(
83
  try:
84
  options = params.model_dump()
85
  config_parser = ConfigParser(options)
 
 
86
  converter = PdfConverter(
87
- config=config_parser.generate_config_dict(),
88
  artifact_dict=app_data["models"],
89
  processor_list=config_parser.get_processors(),
90
  renderer=config_parser.get_renderer()
 
 
 
 
1
  import click
2
 
 
 
3
  import uvicorn
4
  from pydantic import BaseModel, Field
5
  from starlette.responses import HTMLResponse
 
78
  try:
79
  options = params.model_dump()
80
  config_parser = ConfigParser(options)
81
+ config_dict = config_parser.generate_config_dict()
82
+ config_dict["pdftext_workers"] = 1
83
  converter = PdfConverter(
84
+ config=config_dict,
85
  artifact_dict=app_data["models"],
86
  processor_list=config_parser.get_processors(),
87
  renderer=config_parser.get_renderer()
run_marker_app.py CHANGED
@@ -6,7 +6,7 @@ def run():
6
  cur_dir = os.path.dirname(os.path.abspath(__file__))
7
  app_path = os.path.join(cur_dir, "marker_app.py")
8
  cmd = ["streamlit", "run", app_path]
9
- subprocess.run(cmd, env={**os.environ, "IN_STREAMLIT": "true", "PDFTEXT_CPU_WORKERS": "1"})
10
 
11
 
12
  if __name__ == "__main__":
 
6
  cur_dir = os.path.dirname(os.path.abspath(__file__))
7
  app_path = os.path.join(cur_dir, "marker_app.py")
8
  cmd = ["streamlit", "run", app_path]
9
+ subprocess.run(cmd, env={**os.environ, "IN_STREAMLIT": "true"})
10
 
11
 
12
  if __name__ == "__main__":