Vik Paruchuri commited on
Commit
55f74a0
·
1 Parent(s): 44d1d02

Avoid duplicate use llm flag

Browse files
.github/workflows/scripts.yml CHANGED
@@ -24,4 +24,8 @@ jobs:
24
  - name: Test convert script
25
  run: poetry run marker benchmark_data/pdfs --max_files 1 --workers 1 --page_range 0
26
  - name: Text convert script multiple workers
27
- run: poetry run marker benchmark_data/pdfs --max_files 2 --workers 2 --page_range 0-5
 
 
 
 
 
24
  - name: Test convert script
25
  run: poetry run marker benchmark_data/pdfs --max_files 1 --workers 1 --page_range 0
26
  - name: Text convert script multiple workers
27
+ run: poetry run marker benchmark_data/pdfs --max_files 2 --workers 2 --page_range 0-5
28
+ - name: Test llm option
29
+ run: |
30
+ poetry run marker_single benchmark_data/pdfs/switch_trans.pdf --page_range 0 --use_llm > output.txt || true
31
+ grep -qv "UserWarning" output.txt
marker/config/parser.py CHANGED
@@ -70,6 +70,12 @@ class ConfigParser:
70
  )(fn)
71
 
72
  # we put common options here
 
 
 
 
 
 
73
  fn = click.option(
74
  "--converter_cls",
75
  type=str,
 
70
  )(fn)
71
 
72
  # we put common options here
73
+ fn = click.option(
74
+ "--use_llm",
75
+ is_flag=True,
76
+ default=False,
77
+ help="Use LLM for higher accuracy.",
78
+ )(fn)
79
  fn = click.option(
80
  "--converter_cls",
81
  type=str,
marker/config/printer.py CHANGED
@@ -6,6 +6,8 @@ from marker.config.crawler import crawler
6
 
7
 
8
  class CustomClickPrinter(click.Command):
 
 
9
  def parse_args(self, ctx, args):
10
  display_help = "config" in args and "--help" in args
11
  if display_help:
@@ -47,6 +49,9 @@ class CustomClickPrinter(click.Command):
47
  # Add shared attribute options first
48
  for attr, info in shared_attrs.items():
49
  if info["type"] in attr_types:
 
 
 
50
  ctx.command.params.append(
51
  click.Option(
52
  ["--" + attr],
 
6
 
7
 
8
  class CustomClickPrinter(click.Command):
9
+ force_flags = ["use_llm"]
10
+
11
  def parse_args(self, ctx, args):
12
  display_help = "config" in args and "--help" in args
13
  if display_help:
 
49
  # Add shared attribute options first
50
  for attr, info in shared_attrs.items():
51
  if info["type"] in attr_types:
52
+ if attr in self.force_flags:
53
+ continue
54
+
55
  ctx.command.params.append(
56
  click.Option(
57
  ["--" + attr],
poetry.lock CHANGED
@@ -850,14 +850,14 @@ files = [
850
 
851
  [[package]]
852
  name = "click"
853
- version = "8.1.8"
854
  description = "Composable command line interface toolkit"
855
  optional = false
856
- python-versions = ">=3.7"
857
  groups = ["main", "dev"]
858
  files = [
859
- {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"},
860
- {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"},
861
  ]
862
 
863
  [package.dependencies]
@@ -6467,4 +6467,4 @@ full = ["ebooklib", "mammoth", "openpyxl", "python-pptx", "weasyprint"]
6467
  [metadata]
6468
  lock-version = "2.1"
6469
  python-versions = "^3.10"
6470
- content-hash = "9ad983393fbfbb4ade6ce2e8a1ec239afde9e648ce56d7d34dd21eb50cd5d0ca"
 
850
 
851
  [[package]]
852
  name = "click"
853
+ version = "8.2.0"
854
  description = "Composable command line interface toolkit"
855
  optional = false
856
+ python-versions = ">=3.10"
857
  groups = ["main", "dev"]
858
  files = [
859
+ {file = "click-8.2.0-py3-none-any.whl", hash = "sha256:6b303f0b2aa85f1cb4e5303078fadcbcd4e476f114fab9b5007005711839325c"},
860
+ {file = "click-8.2.0.tar.gz", hash = "sha256:f5452aeddd9988eefa20f90f05ab66f17fce1ee2a36907fd30b05bbb5953814d"},
861
  ]
862
 
863
  [package.dependencies]
 
6467
  [metadata]
6468
  lock-version = "2.1"
6469
  python-versions = "^3.10"
6470
+ content-hash = "1f5c00e7588f89650cbe93b1ae34fa7b265d472b0b954e4a81a7b4912c2f3c01"
pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
  [tool.poetry]
2
  name = "marker-pdf"
3
- version = "1.7.0"
4
  description = "Convert documents to markdown with high speed and accuracy."
5
  authors = ["Vik Paruchuri <github@vikas.sh>"]
6
  readme = "README.md"
@@ -30,7 +30,7 @@ surya-ocr = "^0.14.1"
30
  regex = "^2024.4.28"
31
  pdftext = "~0.6.2"
32
  markdownify = "^0.13.1"
33
- click = "^8.1.7"
34
  markdown2 = "^2.5.2"
35
  filetype = "^1.2.0"
36
  scikit-learn = "^1.6.1"
 
1
  [tool.poetry]
2
  name = "marker-pdf"
3
+ version = "1.7.1"
4
  description = "Convert documents to markdown with high speed and accuracy."
5
  authors = ["Vik Paruchuri <github@vikas.sh>"]
6
  readme = "README.md"
 
30
  regex = "^2024.4.28"
31
  pdftext = "~0.6.2"
32
  markdownify = "^0.13.1"
33
+ click = "^8.2.0"
34
  markdown2 = "^2.5.2"
35
  filetype = "^1.2.0"
36
  scikit-learn = "^1.6.1"