File size: 2,113 Bytes
a5c1c2e
8650951
52f44f9
478b8f8
45ef16a
a5c1c2e
45ef16a
 
 
8650951
 
 
 
467f8c9
e197f1e
8650951
a5c1c2e
 
18e797e
a5c1c2e
 
 
18e797e
a5c1c2e
7a9ab8a
c959776
e0b8544
2ad7f6b
a45c553
5330766
1cc452c
8ebe119
55f74a0
e332feb
78f3a66
71a77dd
d3ec0e6
b34699f
8a97aa0
776aabb
 
 
 
 
 
 
a6f6ed1
c959776
a5c1c2e
 
6fa9fe6
 
853977e
 
 
c1b5598
f7ff7f7
0ad83bb
 
 
f7db972
6c81421
 
a5c1c2e
776aabb
 
 
8650951
5732cdc
 
 
 
5c982c9
5732cdc
8650951
a5c1c2e
 
ff6aa6d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
[tool.poetry]
name = "marker-pdf"
version = "1.10.1"
description = "Convert documents to markdown with high speed and accuracy."
authors = ["Vik Paruchuri <github@vikas.sh>"]
readme = "README.md"
license = "GPL-3.0-or-later"
repository = "https://github.com/VikParuchuri/marker"
keywords = ["pdf", "markdown", "ocr", "nlp"]
packages = [
    {include = "marker"}
]
include = [
    "marker/scripts/*.sh",
    "marker/scripts/*.html",
]

[tool.poetry.dependencies]
python = "^3.10"
Pillow = "^10.1.0"
pydantic = "^2.4.2"
pydantic-settings = "^2.0.3"
transformers = "^4.45.2"
python-dotenv = "^1.0.0"
torch = "^2.7.0"
tqdm = "^4.66.1"
ftfy = "^6.1.1"
rapidfuzz = "^3.8.1"
surya-ocr = "^0.17.0"
regex = "^2024.4.28"
pdftext = "~0.6.3"
markdownify = "^1.1.0"
click = "^8.2.0"
markdown2 = "^2.5.2"
filetype = "^1.2.0"
google-genai = "^1.0.0"
anthropic = "^0.46.0"
pre-commit = "^4.2.0"
scikit-learn = "^1.6.1"

# Optional dependencies for documents
mammoth = {version = "^1.9.0", optional = true}
openpyxl = {version = "^3.1.5", optional = true}
python-pptx = {version = "^1.0.2", optional = true}
ebooklib = {version = "^0.18", optional = true}
weasyprint = {version = "^63.1", optional = true}
openai = "^1.65.2"

[tool.poetry.group.dev.dependencies]
jupyter = "^1.0.0"
datasets = "^2.21.0"
streamlit = "^1.37.1"
fastapi = "^0.115.4"
uvicorn = "^0.32.0"
python-multipart = "^0.0.16"
pytest = "^8.3.3"
pytest-mock = "^3.14.0"
apted = "1.0.3"
distance = "0.1.3"
lxml = "5.3.0"
tabulate = "^0.9.0"
latex2mathml = "^3.77.0"
playwright = "^1.49.1"

[tool.poetry.extras]
full = ["mammoth", "openpyxl", "python-pptx", "ebooklib", "weasyprint"]

[tool.poetry.scripts]
marker = "marker.scripts.convert:convert_cli"
marker_single = "marker.scripts.convert_single:convert_single_cli"
marker_chunk_convert = "marker.scripts.chunk_convert:chunk_convert_cli"
marker_gui = "marker.scripts.run_streamlit_app:streamlit_app_cli"
marker_extract = "marker.scripts.run_streamlit_app:extraction_app_cli"
marker_server = "marker.scripts.server:server_cli"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"