Vik Paruchuri commited on
Commit
45ef16a
·
1 Parent(s): aa38742

Improve force ocr, enable parallel factor below 1

Browse files
.github/workflows/tests.yml CHANGED
@@ -17,7 +17,9 @@ jobs:
17
  with:
18
  python-version: 3.11
19
  - name: Install system dependencies
20
- run: cat scripts/install/apt-requirements.txt | xargs sudo apt-get install -y
 
 
21
  - name: Show tessdata folders
22
  run: ls /usr/share/tesseract-ocr/
23
  - name: Install python dependencies
 
17
  with:
18
  python-version: 3.11
19
  - name: Install system dependencies
20
+ run: |
21
+ sudo apt-get update
22
+ cat scripts/install/apt-requirements.txt | xargs sudo apt-get install -y
23
  - name: Show tessdata folders
24
  run: ls /usr/share/tesseract-ocr/
25
  - name: Install python dependencies
README.md CHANGED
@@ -51,7 +51,7 @@ PDF is a tricky format, so marker will not always work perfectly. Here are some
51
  - Marker will convert fewer equations to latex than nougat. This is because it has to first detect equations, then convert them without hallucation.
52
  - Whitespace and indentations are not always respected.
53
  - Not all lines/spans will be joined properly.
54
- - Only languages similar to English (Spanish, French, German, Russian, etc) are supported. Languages with different character sets (Chinese, Japanese, Korean, etc) are not.
55
  - This works best on digital PDFs that won't require a lot of OCR. It's optimized for speed, and limited OCR is used to fix errors.
56
 
57
  # Installation
@@ -88,6 +88,7 @@ First, clone the repo:
88
  - Install python requirements
89
  - `poetry install`
90
  - `poetry shell` to activate your poetry venv
 
91
 
92
  # Usage
93
 
 
51
  - Marker will convert fewer equations to latex than nougat. This is because it has to first detect equations, then convert them without hallucation.
52
  - Whitespace and indentations are not always respected.
53
  - Not all lines/spans will be joined properly.
54
+ - Languages similar to English (Spanish, French, German, Russian, etc) have the best support. There is provisional support for Chinese, Japanese, Korean, and Hindi, but it may not work as well.
55
  - This works best on digital PDFs that won't require a lot of OCR. It's optimized for speed, and limited OCR is used to fix errors.
56
 
57
  # Installation
 
88
  - Install python requirements
89
  - `poetry install`
90
  - `poetry shell` to activate your poetry venv
91
+ - On ARM macs (M1+), make sure to set the `TORCH_DEVICE` setting to `mps` (more details below) for a speedup
92
 
93
  # Usage
94
 
marker/convert.py CHANGED
@@ -92,7 +92,7 @@ def convert_single_pdf(
92
  tess_lang,
93
  spell_lang,
94
  max_pages=max_pages,
95
- parallel=parallel_factor * settings.OCR_PARALLEL_WORKERS
96
  )
97
 
98
  out_meta["toc"] = toc
@@ -109,7 +109,7 @@ def convert_single_pdf(
109
  doc,
110
  blocks,
111
  layoutlm_model,
112
- batch_size=settings.LAYOUT_BATCH_SIZE * parallel_factor
113
  )
114
 
115
  # Find headers and footers
@@ -125,7 +125,7 @@ def convert_single_pdf(
125
  doc,
126
  blocks,
127
  order_model,
128
- batch_size=settings.ORDERER_BATCH_SIZE * parallel_factor
129
  )
130
 
131
  # Fix code blocks
@@ -148,7 +148,7 @@ def convert_single_pdf(
148
  blocks,
149
  block_types,
150
  nougat_model,
151
- batch_size=settings.NOUGAT_BATCH_SIZE * parallel_factor
152
  )
153
  out_meta["block_stats"]["equations"] = eq_stats
154
 
 
92
  tess_lang,
93
  spell_lang,
94
  max_pages=max_pages,
95
+ parallel=int(parallel_factor * settings.OCR_PARALLEL_WORKERS)
96
  )
97
 
98
  out_meta["toc"] = toc
 
109
  doc,
110
  blocks,
111
  layoutlm_model,
112
+ batch_size=int(settings.LAYOUT_BATCH_SIZE * parallel_factor)
113
  )
114
 
115
  # Find headers and footers
 
125
  doc,
126
  blocks,
127
  order_model,
128
+ batch_size=int(settings.ORDERER_BATCH_SIZE * parallel_factor)
129
  )
130
 
131
  # Fix code blocks
 
148
  blocks,
149
  block_types,
150
  nougat_model,
151
+ batch_size=int(settings.NOUGAT_BATCH_SIZE * parallel_factor)
152
  )
153
  out_meta["block_stats"]["equations"] = eq_stats
154
 
marker/debug/data.py CHANGED
@@ -11,7 +11,7 @@ import io
11
 
12
 
13
  def dump_nougat_debug_data(doc, images, converted_spans):
14
- if not settings.DEBUG_DATA_FOLDER:
15
  return
16
 
17
  if len(images) == 0:
@@ -44,7 +44,7 @@ def dump_nougat_debug_data(doc, images, converted_spans):
44
 
45
 
46
  def dump_bbox_debug_data(doc, blocks: List[Page]):
47
- if not settings.DEBUG_DATA_FOLDER:
48
  return
49
 
50
  # Remove extension from doc name
 
11
 
12
 
13
  def dump_nougat_debug_data(doc, images, converted_spans):
14
+ if not settings.DEBUG_DATA_FOLDER or settings.DEBUG_LEVEL == 0:
15
  return
16
 
17
  if len(images) == 0:
 
44
 
45
 
46
  def dump_bbox_debug_data(doc, blocks: List[Page]):
47
+ if not settings.DEBUG_DATA_FOLDER or settings.DEBUG_LEVEL < 2:
48
  return
49
 
50
  # Remove extension from doc name
marker/ocr/page.py CHANGED
@@ -53,7 +53,8 @@ def ocr_entire_page_ocrmp(page, lang: str, spellchecker: Optional[SpellChecker]
53
  outbytes,
54
  language=lang,
55
  output_type="pdf",
56
- redo_ocr=True,
 
57
  progress_bar=False,
58
  optimize=False,
59
  fast_web_view=1e6,
 
53
  outbytes,
54
  language=lang,
55
  output_type="pdf",
56
+ redo_ocr=None if settings.OCR_ALL_PAGES else True,
57
+ force_ocr=True if settings.OCR_ALL_PAGES else None,
58
  progress_bar=False,
59
  optimize=False,
60
  fast_web_view=1e6,
marker/settings.py CHANGED
@@ -37,6 +37,10 @@ class Settings(BaseSettings):
37
  "French": "fra",
38
  "German": "deu",
39
  "Russian": "rus",
 
 
 
 
40
  }
41
  TESSERACT_TIMEOUT: int = 20 # When to give up on OCR
42
  SPELLCHECK_LANGUAGES: Dict = {
@@ -46,6 +50,10 @@ class Settings(BaseSettings):
46
  "French": "fr",
47
  "German": "de",
48
  "Russian": "ru",
 
 
 
 
49
  }
50
  OCR_ALL_PAGES: bool = False # Run OCR on every page even if text can be extracted
51
  OCR_PARALLEL_WORKERS: int = 2 # How many CPU workers to use for OCR
@@ -101,6 +109,7 @@ class Settings(BaseSettings):
101
  # Debug
102
  DEBUG: bool = False # Enable debug logging
103
  DEBUG_DATA_FOLDER: Optional[str] = None
 
104
 
105
  @computed_field
106
  @property
 
37
  "French": "fra",
38
  "German": "deu",
39
  "Russian": "rus",
40
+ "Chinese": "chi_sim",
41
+ "Japanese": "jpn",
42
+ "Korean": "kor",
43
+ "Hindi": "hin",
44
  }
45
  TESSERACT_TIMEOUT: int = 20 # When to give up on OCR
46
  SPELLCHECK_LANGUAGES: Dict = {
 
50
  "French": "fr",
51
  "German": "de",
52
  "Russian": "ru",
53
+ "Chinese": None,
54
+ "Japanese": None,
55
+ "Korean": None,
56
+ "Hindi": None,
57
  }
58
  OCR_ALL_PAGES: bool = False # Run OCR on every page even if text can be extracted
59
  OCR_PARALLEL_WORKERS: int = 2 # How many CPU workers to use for OCR
 
109
  # Debug
110
  DEBUG: bool = False # Enable debug logging
111
  DEBUG_DATA_FOLDER: Optional[str] = None
112
+ DEBUG_LEVEL: int = 0 # 0 to 2, 2 means log everything
113
 
114
  @computed_field
115
  @property
poetry.lock CHANGED
@@ -968,6 +968,72 @@ files = [
968
  [package.dependencies]
969
  wcwidth = ">=0.2.12,<0.3.0"
970
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
971
  [[package]]
972
  name = "huggingface-hub"
973
  version = "0.19.4"
@@ -5746,4 +5812,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
5746
  [metadata]
5747
  lock-version = "2.0"
5748
  python-versions = ">=3.9,<3.13"
5749
- content-hash = "a5c103deeebe3f7f31384cf69884d893b123c89f30fa9122a2bd4067c1675843"
 
968
  [package.dependencies]
969
  wcwidth = ">=0.2.12,<0.3.0"
970
 
971
+ [[package]]
972
+ name = "grpcio"
973
+ version = "1.60.0"
974
+ description = "HTTP/2-based RPC framework"
975
+ optional = false
976
+ python-versions = ">=3.7"
977
+ files = [
978
+ {file = "grpcio-1.60.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:d020cfa595d1f8f5c6b343530cd3ca16ae5aefdd1e832b777f9f0eb105f5b139"},
979
+ {file = "grpcio-1.60.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b98f43fcdb16172dec5f4b49f2fece4b16a99fd284d81c6bbac1b3b69fcbe0ff"},
980
+ {file = "grpcio-1.60.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:20e7a4f7ded59097c84059d28230907cd97130fa74f4a8bfd1d8e5ba18c81491"},
981
+ {file = "grpcio-1.60.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:452ca5b4afed30e7274445dd9b441a35ece656ec1600b77fff8c216fdf07df43"},
982
+ {file = "grpcio-1.60.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43e636dc2ce9ece583b3e2ca41df5c983f4302eabc6d5f9cd04f0562ee8ec1ae"},
983
+ {file = "grpcio-1.60.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6e306b97966369b889985a562ede9d99180def39ad42c8014628dd3cc343f508"},
984
+ {file = "grpcio-1.60.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f897c3b127532e6befdcf961c415c97f320d45614daf84deba0a54e64ea2457b"},
985
+ {file = "grpcio-1.60.0-cp310-cp310-win32.whl", hash = "sha256:b87efe4a380887425bb15f220079aa8336276398dc33fce38c64d278164f963d"},
986
+ {file = "grpcio-1.60.0-cp310-cp310-win_amd64.whl", hash = "sha256:a9c7b71211f066908e518a2ef7a5e211670761651039f0d6a80d8d40054047df"},
987
+ {file = "grpcio-1.60.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:fb464479934778d7cc5baf463d959d361954d6533ad34c3a4f1d267e86ee25fd"},
988
+ {file = "grpcio-1.60.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:4b44d7e39964e808b071714666a812049765b26b3ea48c4434a3b317bac82f14"},
989
+ {file = "grpcio-1.60.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:90bdd76b3f04bdb21de5398b8a7c629676c81dfac290f5f19883857e9371d28c"},
990
+ {file = "grpcio-1.60.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91229d7203f1ef0ab420c9b53fe2ca5c1fbeb34f69b3bc1b5089466237a4a134"},
991
+ {file = "grpcio-1.60.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b36a2c6d4920ba88fa98075fdd58ff94ebeb8acc1215ae07d01a418af4c0253"},
992
+ {file = "grpcio-1.60.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:297eef542156d6b15174a1231c2493ea9ea54af8d016b8ca7d5d9cc65cfcc444"},
993
+ {file = "grpcio-1.60.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:87c9224acba0ad8bacddf427a1c2772e17ce50b3042a789547af27099c5f751d"},
994
+ {file = "grpcio-1.60.0-cp311-cp311-win32.whl", hash = "sha256:95ae3e8e2c1b9bf671817f86f155c5da7d49a2289c5cf27a319458c3e025c320"},
995
+ {file = "grpcio-1.60.0-cp311-cp311-win_amd64.whl", hash = "sha256:467a7d31554892eed2aa6c2d47ded1079fc40ea0b9601d9f79204afa8902274b"},
996
+ {file = "grpcio-1.60.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:a7152fa6e597c20cb97923407cf0934e14224af42c2b8d915f48bc3ad2d9ac18"},
997
+ {file = "grpcio-1.60.0-cp312-cp312-macosx_10_10_universal2.whl", hash = "sha256:7db16dd4ea1b05ada504f08d0dca1cd9b926bed3770f50e715d087c6f00ad748"},
998
+ {file = "grpcio-1.60.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:b0571a5aef36ba9177e262dc88a9240c866d903a62799e44fd4aae3f9a2ec17e"},
999
+ {file = "grpcio-1.60.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6fd9584bf1bccdfff1512719316efa77be235469e1e3295dce64538c4773840b"},
1000
+ {file = "grpcio-1.60.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6a478581b1a1a8fdf3318ecb5f4d0cda41cacdffe2b527c23707c9c1b8fdb55"},
1001
+ {file = "grpcio-1.60.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:77c8a317f0fd5a0a2be8ed5cbe5341537d5c00bb79b3bb27ba7c5378ba77dbca"},
1002
+ {file = "grpcio-1.60.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1c30bb23a41df95109db130a6cc1b974844300ae2e5d68dd4947aacba5985aa5"},
1003
+ {file = "grpcio-1.60.0-cp312-cp312-win32.whl", hash = "sha256:2aef56e85901c2397bd557c5ba514f84de1f0ae5dd132f5d5fed042858115951"},
1004
+ {file = "grpcio-1.60.0-cp312-cp312-win_amd64.whl", hash = "sha256:e381fe0c2aa6c03b056ad8f52f8efca7be29fb4d9ae2f8873520843b6039612a"},
1005
+ {file = "grpcio-1.60.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:92f88ca1b956eb8427a11bb8b4a0c0b2b03377235fc5102cb05e533b8693a415"},
1006
+ {file = "grpcio-1.60.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:e278eafb406f7e1b1b637c2cf51d3ad45883bb5bd1ca56bc05e4fc135dfdaa65"},
1007
+ {file = "grpcio-1.60.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:a48edde788b99214613e440fce495bbe2b1e142a7f214cce9e0832146c41e324"},
1008
+ {file = "grpcio-1.60.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de2ad69c9a094bf37c1102b5744c9aec6cf74d2b635558b779085d0263166454"},
1009
+ {file = "grpcio-1.60.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:073f959c6f570797272f4ee9464a9997eaf1e98c27cb680225b82b53390d61e6"},
1010
+ {file = "grpcio-1.60.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c826f93050c73e7769806f92e601e0efdb83ec8d7c76ddf45d514fee54e8e619"},
1011
+ {file = "grpcio-1.60.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:9e30be89a75ee66aec7f9e60086fadb37ff8c0ba49a022887c28c134341f7179"},
1012
+ {file = "grpcio-1.60.0-cp37-cp37m-win_amd64.whl", hash = "sha256:b0fb2d4801546598ac5cd18e3ec79c1a9af8b8f2a86283c55a5337c5aeca4b1b"},
1013
+ {file = "grpcio-1.60.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:9073513ec380434eb8d21970e1ab3161041de121f4018bbed3146839451a6d8e"},
1014
+ {file = "grpcio-1.60.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:74d7d9fa97809c5b892449b28a65ec2bfa458a4735ddad46074f9f7d9550ad13"},
1015
+ {file = "grpcio-1.60.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:1434ca77d6fed4ea312901122dc8da6c4389738bf5788f43efb19a838ac03ead"},
1016
+ {file = "grpcio-1.60.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e61e76020e0c332a98290323ecfec721c9544f5b739fab925b6e8cbe1944cf19"},
1017
+ {file = "grpcio-1.60.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675997222f2e2f22928fbba640824aebd43791116034f62006e19730715166c0"},
1018
+ {file = "grpcio-1.60.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5208a57eae445ae84a219dfd8b56e04313445d146873117b5fa75f3245bc1390"},
1019
+ {file = "grpcio-1.60.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:428d699c8553c27e98f4d29fdc0f0edc50e9a8a7590bfd294d2edb0da7be3629"},
1020
+ {file = "grpcio-1.60.0-cp38-cp38-win32.whl", hash = "sha256:83f2292ae292ed5a47cdcb9821039ca8e88902923198f2193f13959360c01860"},
1021
+ {file = "grpcio-1.60.0-cp38-cp38-win_amd64.whl", hash = "sha256:705a68a973c4c76db5d369ed573fec3367d7d196673fa86614b33d8c8e9ebb08"},
1022
+ {file = "grpcio-1.60.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:c193109ca4070cdcaa6eff00fdb5a56233dc7610216d58fb81638f89f02e4968"},
1023
+ {file = "grpcio-1.60.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:676e4a44e740deaba0f4d95ba1d8c5c89a2fcc43d02c39f69450b1fa19d39590"},
1024
+ {file = "grpcio-1.60.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:5ff21e000ff2f658430bde5288cb1ac440ff15c0d7d18b5fb222f941b46cb0d2"},
1025
+ {file = "grpcio-1.60.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c86343cf9ff7b2514dd229bdd88ebba760bd8973dac192ae687ff75e39ebfab"},
1026
+ {file = "grpcio-1.60.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fd3b3968ffe7643144580f260f04d39d869fcc2cddb745deef078b09fd2b328"},
1027
+ {file = "grpcio-1.60.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:30943b9530fe3620e3b195c03130396cd0ee3a0d10a66c1bee715d1819001eaf"},
1028
+ {file = "grpcio-1.60.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b10241250cb77657ab315270b064a6c7f1add58af94befa20687e7c8d8603ae6"},
1029
+ {file = "grpcio-1.60.0-cp39-cp39-win32.whl", hash = "sha256:79a050889eb8d57a93ed21d9585bb63fca881666fc709f5d9f7f9372f5e7fd03"},
1030
+ {file = "grpcio-1.60.0-cp39-cp39-win_amd64.whl", hash = "sha256:8a97a681e82bc11a42d4372fe57898d270a2707f36c45c6676e49ce0d5c41353"},
1031
+ {file = "grpcio-1.60.0.tar.gz", hash = "sha256:2199165a1affb666aa24adf0c97436686d0a61bc5fc113c037701fb7c7fceb96"},
1032
+ ]
1033
+
1034
+ [package.extras]
1035
+ protobuf = ["grpcio-tools (>=1.60.0)"]
1036
+
1037
  [[package]]
1038
  name = "huggingface-hub"
1039
  version = "0.19.4"
 
5812
  [metadata]
5813
  lock-version = "2.0"
5814
  python-versions = ">=3.9,<3.13"
5815
+ content-hash = "7d8f07f7b4ab7e802386b76d1add6ade5560636df131e8a7123436817638ad7c"
pyproject.toml CHANGED
@@ -1,9 +1,12 @@
1
  [tool.poetry]
2
  name = "marker"
3
  version = "0.1.0"
4
- description = ""
5
- authors = ["Vik Paruchuri <vik.paruchuri@gmail.com>"]
6
  readme = "README.md"
 
 
 
7
 
8
  [tool.poetry.dependencies]
9
  python = ">=3.9,<3.13"
@@ -29,6 +32,7 @@ ftfy = "^6.1.1"
29
  nltk = "^3.8.1"
30
  ocrmypdf = "^15.4.0"
31
  bitsandbytes = "^0.41.2.post2"
 
32
 
33
  [tool.poetry.group.dev.dependencies]
34
  jupyter = "^1.0.0"
 
1
  [tool.poetry]
2
  name = "marker"
3
  version = "0.1.0"
4
+ description = "Convert PDF to markdown with high speed and accuracy."
5
+ authors = ["Vik Paruchuri <github@vikas.sh>"]
6
  readme = "README.md"
7
+ license = "GPL-3.0-or-later"
8
+ repository = "https://github.com/VikParuchuri/marker"
9
+ keywords = ["pdf", "markdown", "ocr", "nlp"]
10
 
11
  [tool.poetry.dependencies]
12
  python = ">=3.9,<3.13"
 
32
  nltk = "^3.8.1"
33
  ocrmypdf = "^15.4.0"
34
  bitsandbytes = "^0.41.2.post2"
35
+ grpcio = "^1.60.0"
36
 
37
  [tool.poetry.group.dev.dependencies]
38
  jupyter = "^1.0.0"
scripts/install/apt-requirements.txt CHANGED
@@ -7,4 +7,8 @@ tesseract-ocr-deu
7
  tesseract-ocr-por
8
  tesseract-ocr-spa
9
  tesseract-ocr-rus
10
- tesseract-ocr-fra
 
 
 
 
 
7
  tesseract-ocr-por
8
  tesseract-ocr-spa
9
  tesseract-ocr-rus
10
+ tesseract-ocr-fra
11
+ tesseract-ocr-chi-sim
12
+ tesseract-ocr-jpn
13
+ tesseract-ocr-kor
14
+ tesseract-ocr-hin
scripts/markdown_to_pdf.sh CHANGED
@@ -7,4 +7,4 @@ if [ $# -ne 2 ]; then
7
  exit 1
8
  fi
9
 
10
- pandoc $1 $2 --pdf-engine=xelatex --include-in-header=header.tex
 
7
  exit 1
8
  fi
9
 
10
+ pandoc $1 -o $2 --pdf-engine=xelatex --include-in-header=header.tex