Vik Paruchuri commited on
Commit
f7db972
·
2 Parent(s): f8e5b5e e165c1b

Merge in master

Browse files
chunk_convert.py CHANGED
@@ -1,4 +1,4 @@
1
- from marker.scripts import chunk_convert_cli
2
 
3
  if __name__ == "__main__":
4
  chunk_convert_cli()
 
1
+ from marker.scripts.chunk_convert import chunk_convert_cli
2
 
3
  if __name__ == "__main__":
4
  chunk_convert_cli()
convert.py CHANGED
@@ -1,4 +1,4 @@
1
- from marker.scripts import convert_cli
2
 
3
  if __name__ == "__main__":
4
  convert_cli()
 
1
+ from marker.scripts.convert import convert_cli
2
 
3
  if __name__ == "__main__":
4
  convert_cli()
convert_single.py CHANGED
@@ -1,4 +1,4 @@
1
- from marker.scripts import convert_single_cli
2
 
3
  if __name__ == "__main__":
4
  convert_single_cli()
 
1
+ from marker.scripts.convert_single import convert_single_cli
2
 
3
  if __name__ == "__main__":
4
  convert_single_cli()
marker/scripts/__init__.py CHANGED
@@ -1,5 +0,0 @@
1
- from marker.scripts.convert_single import convert_single_cli
2
- from marker.scripts.convert import convert_cli
3
- from marker.scripts.server import server_cli
4
- from marker.scripts.run_streamlit_app import streamlit_app_cli
5
- from marker.scripts.chunk_convert import chunk_convert_cli
 
 
 
 
 
 
marker/scripts/server.py CHANGED
@@ -3,7 +3,6 @@ import traceback
3
  import click
4
  import os
5
 
6
- import uvicorn
7
  from pydantic import BaseModel, Field
8
  from starlette.responses import HTMLResponse
9
 
@@ -163,6 +162,7 @@ async def convert_pdf_upload(
163
  @click.option("--port", type=int, default=8000, help="Port to run the server on")
164
  @click.option("--host", type=str, default="127.0.0.1", help="Host to run the server on")
165
  def server_cli(port: int, host: str):
 
166
  # Run the server
167
  uvicorn.run(
168
  app,
 
3
  import click
4
  import os
5
 
 
6
  from pydantic import BaseModel, Field
7
  from starlette.responses import HTMLResponse
8
 
 
162
  @click.option("--port", type=int, default=8000, help="Port to run the server on")
163
  @click.option("--host", type=str, default="127.0.0.1", help="Host to run the server on")
164
  def server_cli(port: int, host: str):
165
+ import uvicorn
166
  # Run the server
167
  uvicorn.run(
168
  app,
marker/scripts/streamlit_app.py CHANGED
@@ -68,15 +68,12 @@ def markdown_insert_images(markdown, images):
68
  def get_page_image(pdf_file, page_num, dpi=96):
69
  if "pdf" in pdf_file.type:
70
  doc = open_pdf(pdf_file)
71
- renderer = doc.render(
72
- pypdfium2.PdfBitmap.to_pil,
73
- page_indices=[page_num],
74
  scale=dpi / 72,
75
- )
76
- png = list(renderer)[0]
77
- png_image = png.convert("RGB")
78
  else:
79
- png_image = Image.open(in_file).convert("RGB")
80
  return png_image
81
 
82
 
 
68
  def get_page_image(pdf_file, page_num, dpi=96):
69
  if "pdf" in pdf_file.type:
70
  doc = open_pdf(pdf_file)
71
+ page = doc[page_num]
72
+ png_image = page.render(
 
73
  scale=dpi / 72,
74
+ ).to_pil().convert("RGB")
 
 
75
  else:
76
+ png_image = Image.open(pdf_file).convert("RGB")
77
  return png_image
78
 
79
 
marker_app.py CHANGED
@@ -1,4 +1,4 @@
1
- from marker.scripts import streamlit_app_cli
2
 
3
  if __name__ == "__main__":
4
  streamlit_app_cli()
 
1
+ from marker.scripts.run_streamlit_app import streamlit_app_cli
2
 
3
  if __name__ == "__main__":
4
  streamlit_app_cli()
marker_server.py CHANGED
@@ -1,4 +1,4 @@
1
- from marker.scripts import server_cli
2
 
3
  if __name__ == "__main__":
4
  server_cli()
 
1
+ from marker.scripts.server import server_cli
2
 
3
  if __name__ == "__main__":
4
  server_cli()
poetry.lock CHANGED
The diff for this file is too large to render. See raw diff
 
pyproject.toml CHANGED
@@ -23,13 +23,12 @@ transformers = "^4.45.2"
23
  python-dotenv = "^1.0.0"
24
  torch = "^2.5.1"
25
  tqdm = "^4.66.1"
26
- tabulate = "^0.9.0"
27
  ftfy = "^6.1.1"
28
  texify = "^0.2.1"
29
  rapidfuzz = "^3.8.1"
30
- surya-ocr = "~0.8.3"
31
  regex = "^2024.4.28"
32
- pdftext = "~0.4.1"
33
  markdownify = "^0.13.1"
34
  click = "^8.1.7"
35
  google-generativeai = "^0.8.3"
@@ -49,6 +48,7 @@ pytest-mock = "^3.14.0"
49
  apted = "1.0.3"
50
  distance = "0.1.3"
51
  lxml = "5.3.0"
 
52
 
53
  [tool.poetry.scripts]
54
  marker = "marker.scripts.convert:convert_cli"
 
23
  python-dotenv = "^1.0.0"
24
  torch = "^2.5.1"
25
  tqdm = "^4.66.1"
 
26
  ftfy = "^6.1.1"
27
  texify = "^0.2.1"
28
  rapidfuzz = "^3.8.1"
29
+ surya-ocr = "~0.9.0"
30
  regex = "^2024.4.28"
31
+ pdftext = "~0.5.0"
32
  markdownify = "^0.13.1"
33
  click = "^8.1.7"
34
  google-generativeai = "^0.8.3"
 
48
  apted = "1.0.3"
49
  distance = "0.1.3"
50
  lxml = "5.3.0"
51
+ tabulate = "^0.9.0"
52
 
53
  [tool.poetry.scripts]
54
  marker = "marker.scripts.convert:convert_cli"
signatures/version1/cla.json CHANGED
@@ -111,6 +111,38 @@
111
  "created_at": "2024-12-05T13:13:34Z",
112
  "repoId": 712111618,
113
  "pullRequestNo": 416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
115
  ]
116
  }
 
111
  "created_at": "2024-12-05T13:13:34Z",
112
  "repoId": 712111618,
113
  "pullRequestNo": 416
114
+ },
115
+ {
116
+ "name": "tarun-menta",
117
+ "id": 66506307,
118
+ "comment_id": 2543907406,
119
+ "created_at": "2024-12-15T15:06:32Z",
120
+ "repoId": 712111618,
121
+ "pullRequestNo": 427
122
+ },
123
+ {
124
+ "name": "ZeyuTeng96",
125
+ "id": 96521059,
126
+ "comment_id": 2567236036,
127
+ "created_at": "2025-01-02T02:36:02Z",
128
+ "repoId": 712111618,
129
+ "pullRequestNo": 452
130
+ },
131
+ {
132
+ "name": "xiaoyao9184",
133
+ "id": 6614349,
134
+ "comment_id": 2571623521,
135
+ "created_at": "2025-01-05T13:15:34Z",
136
+ "repoId": 712111618,
137
+ "pullRequestNo": 463
138
+ },
139
+ {
140
+ "name": "yasyf",
141
+ "id": 709645,
142
+ "comment_id": 2571679069,
143
+ "created_at": "2025-01-05T16:23:12Z",
144
+ "repoId": 712111618,
145
+ "pullRequestNo": 464
146
  }
147
  ]
148
  }