Vik Paruchuri commited on
Commit
487406d
·
1 Parent(s): a800c59

parallel fixes

Browse files
Files changed (3) hide show
  1. marker/convert.py +0 -1
  2. marker_app.py +1 -0
  3. run_marker_app.py +1 -1
marker/convert.py CHANGED
@@ -96,7 +96,6 @@ def convert_single_pdf(
96
 
97
  # OCR pages as needed
98
  pages, ocr_stats = run_ocr(doc, pages, langs, ocr_model, batch_multiplier=batch_multiplier, ocr_all_pages=ocr_all_pages)
99
- flush_cuda_memory()
100
 
101
  out_meta["ocr_stats"] = ocr_stats
102
  if len([b for p in pages for b in p.blocks]) == 0:
 
96
 
97
  # OCR pages as needed
98
  pages, ocr_stats = run_ocr(doc, pages, langs, ocr_model, batch_multiplier=batch_multiplier, ocr_all_pages=ocr_all_pages)
 
99
 
100
  out_meta["ocr_stats"] = ocr_stats
101
  if len([b for p in pages for b in p.blocks]) == 0:
marker_app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
3
  os.environ["IN_STREAMLIT"] = "true"
 
4
 
5
  import base64
6
  import io
 
1
  import os
2
  os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
3
  os.environ["IN_STREAMLIT"] = "true"
4
+ os.environ["PDFTEXT_CPU_WORKERS"] = "1"
5
 
6
  import base64
7
  import io
run_marker_app.py CHANGED
@@ -7,7 +7,7 @@ def run_app():
7
  cur_dir = os.path.dirname(os.path.abspath(__file__))
8
  app_path = os.path.join(cur_dir, "marker_app.py")
9
  cmd = ["streamlit", "run", app_path]
10
- subprocess.run(cmd, env={**os.environ, "IN_STREAMLIT": "true"})
11
 
12
 
13
  if __name__ == "__main__":
 
7
  cur_dir = os.path.dirname(os.path.abspath(__file__))
8
  app_path = os.path.join(cur_dir, "marker_app.py")
9
  cmd = ["streamlit", "run", app_path]
10
+ subprocess.run(cmd, env={**os.environ, "IN_STREAMLIT": "true", "PDFTEXT_CPU_WORKERS": "1"})
11
 
12
 
13
  if __name__ == "__main__":