Vik Paruchuri
commited on
Commit
·
487406d
1
Parent(s):
a800c59
parallel fixes
Browse files- marker/convert.py +0 -1
- marker_app.py +1 -0
- run_marker_app.py +1 -1
marker/convert.py
CHANGED
|
@@ -96,7 +96,6 @@ def convert_single_pdf(
|
|
| 96 |
|
| 97 |
# OCR pages as needed
|
| 98 |
pages, ocr_stats = run_ocr(doc, pages, langs, ocr_model, batch_multiplier=batch_multiplier, ocr_all_pages=ocr_all_pages)
|
| 99 |
-
flush_cuda_memory()
|
| 100 |
|
| 101 |
out_meta["ocr_stats"] = ocr_stats
|
| 102 |
if len([b for p in pages for b in p.blocks]) == 0:
|
|
|
|
| 96 |
|
| 97 |
# OCR pages as needed
|
| 98 |
pages, ocr_stats = run_ocr(doc, pages, langs, ocr_model, batch_multiplier=batch_multiplier, ocr_all_pages=ocr_all_pages)
|
|
|
|
| 99 |
|
| 100 |
out_meta["ocr_stats"] = ocr_stats
|
| 101 |
if len([b for p in pages for b in p.blocks]) == 0:
|
marker_app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
| 3 |
os.environ["IN_STREAMLIT"] = "true"
|
|
|
|
| 4 |
|
| 5 |
import base64
|
| 6 |
import io
|
|
|
|
| 1 |
import os
|
| 2 |
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
| 3 |
os.environ["IN_STREAMLIT"] = "true"
|
| 4 |
+
os.environ["PDFTEXT_CPU_WORKERS"] = "1"
|
| 5 |
|
| 6 |
import base64
|
| 7 |
import io
|
run_marker_app.py
CHANGED
|
@@ -7,7 +7,7 @@ def run_app():
|
|
| 7 |
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
| 8 |
app_path = os.path.join(cur_dir, "marker_app.py")
|
| 9 |
cmd = ["streamlit", "run", app_path]
|
| 10 |
-
subprocess.run(cmd, env={**os.environ, "IN_STREAMLIT": "true"})
|
| 11 |
|
| 12 |
|
| 13 |
if __name__ == "__main__":
|
|
|
|
| 7 |
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
| 8 |
app_path = os.path.join(cur_dir, "marker_app.py")
|
| 9 |
cmd = ["streamlit", "run", app_path]
|
| 10 |
+
subprocess.run(cmd, env={**os.environ, "IN_STREAMLIT": "true", "PDFTEXT_CPU_WORKERS": "1"})
|
| 11 |
|
| 12 |
|
| 13 |
if __name__ == "__main__":
|