Vik Paruchuri commited on
Commit
c959776
Β·
1 Parent(s): fc65ff4

Handle tables

Browse files
README.md CHANGED
@@ -10,7 +10,9 @@ This project converts PDF to Markdown, balancing speed with quality:
10
  ## Install
11
 
12
  - `poetry install`
 
13
  - Set `TESSDATA_PREFIX`
 
14
 
15
 
16
  ## Usage
 
10
  ## Install
11
 
12
  - `poetry install`
13
+ - Install apt requirements
14
  - Set `TESSDATA_PREFIX`
15
+ - Find tessdata folder
16
 
17
 
18
  ## Usage
apt-requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ tesseract-ocr
2
+ libtesseract-dev
convert.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import ray
4
+ from tqdm import tqdm
5
+
6
+ from marker.convert import convert_single_pdf
7
+ from marker.segmentation import load_layout_model
8
+ from marker.cleaners.equations import load_nougat_model
9
+ from marker.settings import settings
10
+
11
+
12
+ @ray.remote(num_cpus=settings.RAY_CORES_PER_WORKER)
13
+ def process_single_pdf(fname, out_folder, nougat_model, layout_model):
14
+ out_filename = fname.rsplit(".", 1)[0] + ".md"
15
+ out_filename = os.path.join(out_folder, os.path.basename(out_filename))
16
+ if os.path.exists(out_filename):
17
+ return
18
+ try:
19
+ full_text = convert_single_pdf(fname, layout_model, nougat_model)
20
+ with open(out_filename, "w+") as f:
21
+ f.write(full_text)
22
+ except Exception as e:
23
+ print(f"Error converting {fname}: {e}")
24
+
25
+
26
+ if __name__ == "__main__":
27
+ parser = argparse.ArgumentParser(description="Convert multiple pdfs to markdown.")
28
+ parser.add_argument("in_folder", help="Input folder with pdfs.")
29
+ parser.add_argument("out_folder", help="Output folder")
30
+ parser.add_argument("--chunk_idx", type=int, default=0, help="Chunk index to convert")
31
+ parser.add_argument("--num_chunks", type=int, default=1, help="Number of chunks being processed in parallel")
32
+ parser.add_argument("--max", type=int, default=None, help="Maximum number of pdfs to convert")
33
+ parser.add_argument("--workers", type=int, default=5, help="Number of worker processes to use")
34
+
35
+ args = parser.parse_args()
36
+
37
+ in_folder = args.in_folder
38
+ out_folder = args.out_folder
39
+ files = [os.path.join(in_folder, f) for f in os.listdir(in_folder) if f.endswith(".pdf")]
40
+ os.makedirs(out_folder, exist_ok=True)
41
+
42
+ # Handle chunks if we're processing in parallel
43
+ chunk_size = len(files) // args.num_chunks
44
+ start_idx = args.chunk_idx * chunk_size
45
+ end_idx = start_idx + chunk_size
46
+ files_to_convert = files[start_idx:end_idx]
47
+
48
+ # Limit files converted if needed
49
+ if args.max:
50
+ files_to_convert = files_to_convert[:args.max]
51
+
52
+ total_processes = min(len(files), args.workers)
53
+
54
+ ray.init(
55
+ num_cpus=total_processes,
56
+ storage=settings.RAY_CACHE_PATH,
57
+ _temp_dir=settings.RAY_CACHE_PATH,
58
+ dashboard_host=settings.RAY_DASHBOARD_HOST
59
+ )
60
+
61
+ nougat_model = load_nougat_model()
62
+ layoutlm_model = load_layout_model()
63
+
64
+ nougat_ref = ray.put(nougat_model)
65
+ layoutlm_ref = ray.put(layoutlm_model)
66
+
67
+ print(f"Converting {len(files_to_convert)} pdfs with {total_processes} processes, and storing in {out_folder}")
68
+ futures = [process_single_pdf.remote(filename, out_folder, nougat_ref, layoutlm_ref) for filename in files_to_convert]
69
+
70
+ # Run all ray conversion tasks
71
+ progress_bar = tqdm(total=len(futures))
72
+ while len(futures) > 0:
73
+ finished, futures = ray.wait(
74
+ futures, timeout=7.0
75
+ )
76
+ finished_lst = ray.get(finished)
77
+ progress_bar.update(len(finished_lst))
78
+
79
+ # Shutdown ray to free resources
80
+ ray.shutdown()
convert_single.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ from marker.convert import convert_single_pdf
4
+ from marker.segmentation import load_layout_model
5
+ from marker.cleaners.equations import load_nougat_model
6
+
7
+
8
+ if __name__ == "__main__":
9
+ parser = argparse.ArgumentParser()
10
+ parser.add_argument("filename", help="PDF file to parse")
11
+ parser.add_argument("output", help="Output file name")
12
+ args = parser.parse_args()
13
+
14
+ fname = args.filename
15
+ layoutlm_model = load_layout_model()
16
+ nougat_model = load_nougat_model()
17
+ full_text = convert_single_pdf(fname, layoutlm_model, nougat_model)
18
+
19
+ with open(args.output, "w+") as f:
20
+ f.write(full_text)
marker/{code.py β†’ cleaners/code.py} RENAMED
@@ -4,10 +4,10 @@ from typing import List
4
  import fitz as pymupdf
5
 
6
 
7
- def is_code_linelen(lines, thresh=70):
8
  # Decide based on chars per newline threshold
9
  total_alnum_chars = sum(len(re.findall(r'\w', line.prelim_text)) for line in lines)
10
- total_newlines = len(lines) - 1
11
 
12
  if total_alnum_chars == 0:
13
  return False
@@ -30,49 +30,51 @@ def identify_code_blocks(blocks: List[Page]):
30
  else:
31
  font_info += stats
32
  most_common_font = font_info.most_common(1)[0][0]
 
33
  for page in blocks:
34
  try:
35
- common_height = page.get_line_height_stats().most_common(1)[0][0]
36
- common_start = page.get_line_start_stats().most_common(1)[0][0]
37
  except IndexError:
38
  continue
39
 
40
  for block in page.blocks:
41
- if len(block.lines) < 2:
42
- continue
43
  if block.most_common_block_type() != "Text":
 
44
  continue
45
 
46
- is_code = []
47
  line_fonts = []
48
  for line in block.lines:
49
  fonts = [span.font for span in line.spans]
50
  line_fonts += fonts
51
- line_height = line.bbox[3] - line.bbox[1]
52
  line_start = line.bbox[0]
53
- if line_start > common_start:
54
- is_code.append(True)
55
  else:
56
- is_code.append(False)
57
  comment_lines = comment_count([line.prelim_text for line in block.lines])
58
  is_code = [
59
- len(block.lines) > 2,
60
- sum([f != most_common_font for f in line_fonts]) > len(line_fonts) // 1.5, # At least 1/3 of the fonts are not the most common, since code usually uses a different font from the main body text
61
- (
62
- sum(is_code) > len(block.lines) * .2
63
- or
64
- comment_lines > len(block.lines) * .1
65
- ), # 20% of lines are indented or comments
66
  (
67
- is_code_linelen(block.lines)
68
  or
69
- comment_lines > len(block.lines) * .1
70
- ), # 60 chars per newline or less for code, or 20% of lines are comments
71
  ]
72
 
73
- if all(is_code):
 
 
 
 
 
 
74
  block.set_block_type("Code")
75
 
 
 
76
 
77
  def indent_blocks(blocks: List[Page]):
78
  span_counter = 0
 
4
  import fitz as pymupdf
5
 
6
 
7
+ def is_code_linelen(lines, thresh=60):
8
  # Decide based on chars per newline threshold
9
  total_alnum_chars = sum(len(re.findall(r'\w', line.prelim_text)) for line in lines)
10
+ total_newlines = max(len(lines) - 1, 1)
11
 
12
  if total_alnum_chars == 0:
13
  return False
 
30
  else:
31
  font_info += stats
32
  most_common_font = font_info.most_common(1)[0][0]
33
+ last_block = None
34
  for page in blocks:
35
  try:
36
+ min_start = page.get_min_line_start()
 
37
  except IndexError:
38
  continue
39
 
40
  for block in page.blocks:
 
 
41
  if block.most_common_block_type() != "Text":
42
+ last_block = block
43
  continue
44
 
45
+ is_indent = []
46
  line_fonts = []
47
  for line in block.lines:
48
  fonts = [span.font for span in line.spans]
49
  line_fonts += fonts
 
50
  line_start = line.bbox[0]
51
+ if line_start > min_start:
52
+ is_indent.append(True)
53
  else:
54
+ is_indent.append(False)
55
  comment_lines = comment_count([line.prelim_text for line in block.lines])
56
  is_code = [
57
+ len(block.lines) > 3,
58
+ sum([f != most_common_font for f in line_fonts]) > len(line_fonts) * .8, # At least 80% of the fonts are not the most common, since code usually uses a different font from the main body text
59
+ is_code_linelen(block.lines),
 
 
 
 
60
  (
61
+ sum(is_indent) > len(block.lines) * .2
62
  or
63
+ comment_lines > len(block.lines) * .2
64
+ ), # 20% lines indented or 20% of the lines are comments
65
  ]
66
 
67
+ # Check if previous block is code, and this block is indented
68
+ is_code_prev = [
69
+ last_block and last_block.most_common_block_type() == "Code",
70
+ sum(is_indent) >= len(block.lines) * .8 # At least 80% indented
71
+ ]
72
+
73
+ if all(is_code) or all(is_code_prev):
74
  block.set_block_type("Code")
75
 
76
+ last_block = block
77
+
78
 
79
  def indent_blocks(blocks: List[Page]):
80
  span_counter = 0
marker/{equations.py β†’ cleaners/equations.py} RENAMED
File without changes
marker/{headers.py β†’ cleaners/headers.py} RENAMED
File without changes
marker/cleaners/table.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from marker.bbox import merge_boxes
2
+ from marker.schema import Line, Span, Block, Page
3
+ from copy import deepcopy
4
+ from tabulate import tabulate
5
+ from typing import List
6
+ import re
7
+ import textwrap
8
+
9
+
10
+ def merge_table_blocks(blocks: List[Page]):
11
+ last_block = None
12
+ current_lines = []
13
+ current_bbox = None
14
+ for page in blocks:
15
+ new_page_blocks = []
16
+ for block in page.blocks:
17
+ if block.most_common_block_type() != "Table":
18
+ if len(current_lines) > 0:
19
+ new_block = Block(
20
+ lines=deepcopy(current_lines),
21
+ pnum=last_block.pnum,
22
+ bbox=current_bbox
23
+ )
24
+ new_page_blocks.append(new_block)
25
+ current_lines = []
26
+ current_bbox = None
27
+
28
+ new_page_blocks.append(block)
29
+ last_block = block
30
+ continue
31
+
32
+ current_lines.extend(block.lines)
33
+ if current_bbox is None:
34
+ current_bbox = block.bbox
35
+ else:
36
+ current_bbox = merge_boxes(current_bbox, block.bbox)
37
+
38
+ if len(current_lines) > 0:
39
+ new_block = Block(
40
+ lines=deepcopy(current_lines),
41
+ pnum=last_block.pnum,
42
+ bbox=current_bbox
43
+ )
44
+ blocks[-1].blocks.append(new_block)
45
+ current_lines = []
46
+ current_bbox = []
47
+
48
+ page.blocks = new_page_blocks
49
+
50
+
51
+ def create_new_tables(blocks: List[Page]):
52
+ table_idx = 0
53
+ dot_pattern = re.compile(r'(\s*\.\s*){4,}')
54
+ dot_multiline_pattern = re.compile(r'.*(\s*\.\s*){4,}.*', re.DOTALL)
55
+
56
+ for page in blocks:
57
+ for block in page.blocks:
58
+ if block.most_common_block_type() != "Table" or len(block.lines) < 3:
59
+ continue
60
+
61
+ table_rows = []
62
+ y_coord = None
63
+ row = []
64
+ for line in block.lines:
65
+ for span in line.spans:
66
+ if y_coord != span.y_start:
67
+ if len(row) > 0:
68
+ table_rows.append(row)
69
+ row = []
70
+ y_coord = span.y_start
71
+
72
+ text = span.text
73
+ if dot_multiline_pattern.match(text):
74
+ text = dot_pattern.sub(' ', text)
75
+ row.append(text)
76
+ if len(row) > 0:
77
+ table_rows.append(row)
78
+ new_text = tabulate(table_rows, headers="firstrow", tablefmt="pipe")
79
+ new_span = Span(
80
+ bbox=block.bbox,
81
+ span_id=f"{table_idx}_fix_table",
82
+ font="Table",
83
+ color=0,
84
+ block_type="Table",
85
+ text=new_text
86
+ )
87
+ new_line = Line(
88
+ bbox=block.bbox,
89
+ spans=[new_span]
90
+ )
91
+ block.lines = [new_line]
92
+ table_idx += 1
parse.py β†’ marker/convert.py RENAMED
@@ -1,11 +1,9 @@
1
- import argparse
2
-
3
  import fitz as pymupdf
4
  from marker.extract_text import get_text_blocks
5
- from marker.headers import categorize_blocks, filter_header_footer
6
- from marker.equations import replace_equations, load_nougat_model
7
  from marker.segmentation import detect_all_block_types, load_layout_model
8
- from marker.code import identify_code_blocks, indent_blocks
9
  from marker.markdown import merge_spans, merge_lines, get_full_text
10
  from marker.schema import Page, BlockType
11
  from typing import List
@@ -18,35 +16,26 @@ def annotate_spans(blocks: List[Page], block_types: List[BlockType]):
18
  page.add_block_types(page_block_types)
19
 
20
 
21
- if __name__ == "__main__":
22
- parser = argparse.ArgumentParser()
23
- parser.add_argument("filename", help="PDF file to parse")
24
- parser.add_argument("output", help="Output file name")
25
- args = parser.parse_args()
26
-
27
- fname = args.filename
28
  doc = pymupdf.open(fname)
29
  blocks, toc = get_text_blocks(doc)
30
 
31
- layoutlm_model = load_layout_model()
32
  block_types = detect_all_block_types(doc, blocks, layoutlm_model)
33
 
 
 
 
 
34
  filtered = deepcopy(blocks)
35
  annotate_spans(filtered, block_types)
36
  identify_code_blocks(filtered)
37
  indent_blocks(filtered)
38
 
39
- bad_span_ids = categorize_blocks(blocks)
40
- bad_span_ids += filter_header_footer(blocks)
41
-
42
- # Copy to avoid changing original data
43
-
44
  for page in filtered:
45
  for block in page.blocks:
46
  block.filter_spans(bad_span_ids)
47
  block.filter_bad_span_types(block_types[page.pnum])
48
 
49
- nougat_model = load_nougat_model()
50
  filtered = replace_equations(doc, filtered, block_types, nougat_model)
51
 
52
  # Copy to avoid changing original data
@@ -54,5 +43,4 @@ if __name__ == "__main__":
54
  text_blocks = merge_lines(merged_lines, filtered)
55
  full_text = get_full_text(text_blocks)
56
 
57
- with open(args.output, "w+") as f:
58
- f.write(full_text)
 
 
 
1
  import fitz as pymupdf
2
  from marker.extract_text import get_text_blocks
3
+ from marker.cleaners.headers import categorize_blocks, filter_header_footer
4
+ from marker.cleaners.equations import replace_equations, load_nougat_model
5
  from marker.segmentation import detect_all_block_types, load_layout_model
6
+ from marker.cleaners.code import identify_code_blocks, indent_blocks
7
  from marker.markdown import merge_spans, merge_lines, get_full_text
8
  from marker.schema import Page, BlockType
9
  from typing import List
 
16
  page.add_block_types(page_block_types)
17
 
18
 
19
+ def convert_single_pdf(fname: str, layoutlm_model, nougat_model):
 
 
 
 
 
 
20
  doc = pymupdf.open(fname)
21
  blocks, toc = get_text_blocks(doc)
22
 
 
23
  block_types = detect_all_block_types(doc, blocks, layoutlm_model)
24
 
25
+ # Find headers and footers
26
+ bad_span_ids = categorize_blocks(blocks)
27
+ bad_span_ids += filter_header_footer(blocks)
28
+
29
  filtered = deepcopy(blocks)
30
  annotate_spans(filtered, block_types)
31
  identify_code_blocks(filtered)
32
  indent_blocks(filtered)
33
 
 
 
 
 
 
34
  for page in filtered:
35
  for block in page.blocks:
36
  block.filter_spans(bad_span_ids)
37
  block.filter_bad_span_types(block_types[page.pnum])
38
 
 
39
  filtered = replace_equations(doc, filtered, block_types, nougat_model)
40
 
41
  # Copy to avoid changing original data
 
43
  text_blocks = merge_lines(merged_lines, filtered)
44
  full_text = get_full_text(text_blocks)
45
 
46
+ return full_text
 
marker/markdown.py CHANGED
@@ -55,8 +55,6 @@ def merge_spans(blocks):
55
 
56
 
57
  def block_surround(text, block_type):
58
- dot_pattern = re.compile(r'(\s*\.\s*){4,}')
59
- dot_multiline_pattern = re.compile(r'.*(\s*\.\s*){4,}.*', re.DOTALL)
60
  match block_type:
61
  case "Section-header":
62
  if not text.startswith("#"):
@@ -64,8 +62,8 @@ def block_surround(text, block_type):
64
  case "Title":
65
  if not text.startswith("#"):
66
  text = "# " + text.strip() + "\n"
67
- case "Table" if dot_multiline_pattern.match(text):
68
- text = dot_pattern.sub(' ', text)
69
  case "List-item":
70
  pass
71
  case "Code":
@@ -89,7 +87,7 @@ def line_separator(line1, line2, block_type, is_continuation=False):
89
 
90
  if block_type in ["Title", "Section-header"]:
91
  return line1.rstrip() + " " + line2.lstrip()
92
- elif lowercase_pattern1.match(line1) and lowercase_pattern2.match(line2):
93
  return line1.rstrip() + " " + line2.lstrip()
94
  elif is_continuation:
95
  return line1.rstrip() + " " + line2.lstrip()
 
55
 
56
 
57
  def block_surround(text, block_type):
 
 
58
  match block_type:
59
  case "Section-header":
60
  if not text.startswith("#"):
 
62
  case "Title":
63
  if not text.startswith("#"):
64
  text = "# " + text.strip() + "\n"
65
+ case "Table":
66
+ text = "\n" + text + "\n"
67
  case "List-item":
68
  pass
69
  case "Code":
 
87
 
88
  if block_type in ["Title", "Section-header"]:
89
  return line1.rstrip() + " " + line2.lstrip()
90
+ elif lowercase_pattern1.match(line1) and lowercase_pattern2.match(line2) and block_type == "Text":
91
  return line1.rstrip() + " " + line2.lstrip()
92
  elif is_continuation:
93
  return line1.rstrip() + " " + line2.lstrip()
marker/schema.py CHANGED
@@ -26,6 +26,22 @@ class BboxElement(BaseModel):
26
  raise ValueError('bbox must have 4 elements')
27
  return v
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  class BlockType(BboxElement):
31
  block_type: str
@@ -151,6 +167,11 @@ class Page(BaseModel):
151
  start_counts = Counter(starts)
152
  return start_counts
153
 
 
 
 
 
 
154
 
155
  class MergedLine(BboxElement):
156
  text: str
 
26
  raise ValueError('bbox must have 4 elements')
27
  return v
28
 
29
+ @property
30
+ def height(self):
31
+ return self.bbox[3] - self.bbox[1]
32
+
33
+ @property
34
+ def width(self):
35
+ return self.bbox[2] - self.bbox[0]
36
+
37
+ @property
38
+ def x_start(self):
39
+ return self.bbox[0]
40
+
41
+ @property
42
+ def y_start(self):
43
+ return self.bbox[1]
44
+
45
 
46
  class BlockType(BboxElement):
47
  block_type: str
 
167
  start_counts = Counter(starts)
168
  return start_counts
169
 
170
+ def get_min_line_start(self):
171
+ starts = [l.bbox[0] for l in self.get_nonblank_lines() if l.spans[0].block_type == "Text"]
172
+ if len(starts) == 0:
173
+ raise IndexError("No lines found")
174
+ return min(starts)
175
 
176
  class MergedLine(BboxElement):
177
  text: str
marker/segmentation.py CHANGED
@@ -19,6 +19,7 @@ NO_CHUNK_KEYS = ["pixel_values"]
19
 
20
  def load_layout_model():
21
  model = LayoutLMv3ForTokenClassification.from_pretrained("Kwan0/layoutlmv3-base-finetune-DocLayNet-100k").to(settings.TORCH_DEVICE)
 
22
  model.config.id2label = {
23
  0: "Caption",
24
  1: "Footnote",
@@ -33,7 +34,7 @@ def load_layout_model():
33
  10: "Title"
34
  }
35
 
36
- model.config.label2id = d = {v: k for k, v in model.config.id2label.items()}
37
  return model
38
 
39
 
 
19
 
20
  def load_layout_model():
21
  model = LayoutLMv3ForTokenClassification.from_pretrained("Kwan0/layoutlmv3-base-finetune-DocLayNet-100k").to(settings.TORCH_DEVICE)
22
+
23
  model.config.id2label = {
24
  0: "Caption",
25
  1: "Footnote",
 
34
  10: "Title"
35
  }
36
 
37
+ model.config.label2id = {v: k for k, v in model.config.id2label.items()}
38
  return model
39
 
40
 
marker/settings.py CHANGED
@@ -6,18 +6,30 @@ from pydantic_settings import BaseSettings
6
 
7
 
8
  class Settings(BaseSettings):
9
- # Path settings
10
- DPI: int = 400
11
- INVALID_CHARS: List[str] = [chr(0xfffd), "~", chr(65533), "↡"]
12
  TORCH_DEVICE: str = "cpu"
 
 
 
 
13
  TESSDATA_PREFIX: str = ""
14
- BAD_SPAN_TYPES: List[str] = ["Caption", "Footnote", "Page-footer", "Page-header", "Picture"]
 
15
  NOUGAT_MODEL_MAX: int = 1024 # Max inference length for nougat
16
  NOUGAT_HALLUCINATION_WORDS: List[str] = ["[MISSING_PAGE_POST]", "## References\n", "**Figure Captions**\n", "Footnote",
17
  "\par\par\par", "## Chapter", "Fig."]
 
 
 
18
  LAYOUT_MODEL_MAX: int = 512
19
  LAYOUT_CHUNK_OVERLAP: int = 128
20
 
 
 
 
 
 
 
21
  class Config:
22
  env_file = find_dotenv("local.env")
23
 
 
6
 
7
 
8
  class Settings(BaseSettings):
9
+ # General
 
 
10
  TORCH_DEVICE: str = "cpu"
11
+
12
+ # OCR
13
+ INVALID_CHARS: List[str] = [chr(0xfffd), "~", chr(65533), "↡"]
14
+ DPI: int = 400
15
  TESSDATA_PREFIX: str = ""
16
+
17
+ # Nougat Model
18
  NOUGAT_MODEL_MAX: int = 1024 # Max inference length for nougat
19
  NOUGAT_HALLUCINATION_WORDS: List[str] = ["[MISSING_PAGE_POST]", "## References\n", "**Figure Captions**\n", "Footnote",
20
  "\par\par\par", "## Chapter", "Fig."]
21
+
22
+ # Layout Model
23
+ BAD_SPAN_TYPES: List[str] = ["Caption", "Footnote", "Page-footer", "Page-header", "Picture"]
24
  LAYOUT_MODEL_MAX: int = 512
25
  LAYOUT_CHUNK_OVERLAP: int = 128
26
 
27
+ # Ray
28
+ RAY_CACHE_PATH: Optional[str] = None # Where to save ray cache
29
+ RAY_DASHBOARD_HOST: str = "127.0.0.1"
30
+ RAY_CORES_PER_WORKER: int = 1 # How many cpu cores to allocate per worker
31
+
32
+
33
  class Config:
34
  env_file = find_dotenv("local.env")
35
 
poetry.lock CHANGED
@@ -1786,6 +1786,71 @@ docs = ["sphinx"]
1786
  gmpy = ["gmpy2 (>=2.1.0a4)"]
1787
  tests = ["pytest (>=4.6)"]
1788
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1789
  [[package]]
1790
  name = "multidict"
1791
  version = "6.0.4"
@@ -2483,6 +2548,28 @@ files = [
2483
  [package.dependencies]
2484
  wcwidth = "*"
2485
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2486
  [[package]]
2487
  name = "psutil"
2488
  version = "5.9.6"
@@ -3348,6 +3435,66 @@ files = [
3348
  [package.extras]
3349
  full = ["numpy"]
3350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3351
  [[package]]
3352
  name = "referencing"
3353
  version = "0.30.2"
@@ -4113,6 +4260,20 @@ files = [
4113
  [package.dependencies]
4114
  mpmath = ">=0.19"
4115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4116
  [[package]]
4117
  name = "terminado"
4118
  version = "0.17.1"
@@ -4616,13 +4777,13 @@ zstd = ["zstandard (>=0.18.0)"]
4616
 
4617
  [[package]]
4618
  name = "wcwidth"
4619
- version = "0.2.8"
4620
  description = "Measures the displayed width of unicode strings in a terminal"
4621
  optional = false
4622
  python-versions = "*"
4623
  files = [
4624
- {file = "wcwidth-0.2.8-py2.py3-none-any.whl", hash = "sha256:77f719e01648ed600dfa5402c347481c0992263b81a027344f3e1ba25493a704"},
4625
- {file = "wcwidth-0.2.8.tar.gz", hash = "sha256:8705c569999ffbb4f6a87c6d1b80f324bd6db952f5eb0b95bc07517f4c1813d4"},
4626
  ]
4627
 
4628
  [[package]]
@@ -4900,4 +5061,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
4900
  [metadata]
4901
  lock-version = "2.0"
4902
  python-versions = ">=3.9,<3.13"
4903
- content-hash = "005924f5f7020a79a158b551eb84385b83c796744fa32b57e2199d84f0c50736"
 
1786
  gmpy = ["gmpy2 (>=2.1.0a4)"]
1787
  tests = ["pytest (>=4.6)"]
1788
 
1789
+ [[package]]
1790
+ name = "msgpack"
1791
+ version = "1.0.7"
1792
+ description = "MessagePack serializer"
1793
+ optional = false
1794
+ python-versions = ">=3.8"
1795
+ files = [
1796
+ {file = "msgpack-1.0.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:04ad6069c86e531682f9e1e71b71c1c3937d6014a7c3e9edd2aa81ad58842862"},
1797
+ {file = "msgpack-1.0.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cca1b62fe70d761a282496b96a5e51c44c213e410a964bdffe0928e611368329"},
1798
+ {file = "msgpack-1.0.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e50ebce52f41370707f1e21a59514e3375e3edd6e1832f5e5235237db933c98b"},
1799
+ {file = "msgpack-1.0.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a7b4f35de6a304b5533c238bee86b670b75b03d31b7797929caa7a624b5dda6"},
1800
+ {file = "msgpack-1.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28efb066cde83c479dfe5a48141a53bc7e5f13f785b92ddde336c716663039ee"},
1801
+ {file = "msgpack-1.0.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4cb14ce54d9b857be9591ac364cb08dc2d6a5c4318c1182cb1d02274029d590d"},
1802
+ {file = "msgpack-1.0.7-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b573a43ef7c368ba4ea06050a957c2a7550f729c31f11dd616d2ac4aba99888d"},
1803
+ {file = "msgpack-1.0.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ccf9a39706b604d884d2cb1e27fe973bc55f2890c52f38df742bc1d79ab9f5e1"},
1804
+ {file = "msgpack-1.0.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cb70766519500281815dfd7a87d3a178acf7ce95390544b8c90587d76b227681"},
1805
+ {file = "msgpack-1.0.7-cp310-cp310-win32.whl", hash = "sha256:b610ff0f24e9f11c9ae653c67ff8cc03c075131401b3e5ef4b82570d1728f8a9"},
1806
+ {file = "msgpack-1.0.7-cp310-cp310-win_amd64.whl", hash = "sha256:a40821a89dc373d6427e2b44b572efc36a2778d3f543299e2f24eb1a5de65415"},
1807
+ {file = "msgpack-1.0.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:576eb384292b139821c41995523654ad82d1916da6a60cff129c715a6223ea84"},
1808
+ {file = "msgpack-1.0.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:730076207cb816138cf1af7f7237b208340a2c5e749707457d70705715c93b93"},
1809
+ {file = "msgpack-1.0.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:85765fdf4b27eb5086f05ac0491090fc76f4f2b28e09d9350c31aac25a5aaff8"},
1810
+ {file = "msgpack-1.0.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3476fae43db72bd11f29a5147ae2f3cb22e2f1a91d575ef130d2bf49afd21c46"},
1811
+ {file = "msgpack-1.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d4c80667de2e36970ebf74f42d1088cc9ee7ef5f4e8c35eee1b40eafd33ca5b"},
1812
+ {file = "msgpack-1.0.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b0bf0effb196ed76b7ad883848143427a73c355ae8e569fa538365064188b8e"},
1813
+ {file = "msgpack-1.0.7-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f9a7c509542db4eceed3dcf21ee5267ab565a83555c9b88a8109dcecc4709002"},
1814
+ {file = "msgpack-1.0.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:84b0daf226913133f899ea9b30618722d45feffa67e4fe867b0b5ae83a34060c"},
1815
+ {file = "msgpack-1.0.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ec79ff6159dffcc30853b2ad612ed572af86c92b5168aa3fc01a67b0fa40665e"},
1816
+ {file = "msgpack-1.0.7-cp311-cp311-win32.whl", hash = "sha256:3e7bf4442b310ff154b7bb9d81eb2c016b7d597e364f97d72b1acc3817a0fdc1"},
1817
+ {file = "msgpack-1.0.7-cp311-cp311-win_amd64.whl", hash = "sha256:3f0c8c6dfa6605ab8ff0611995ee30d4f9fcff89966cf562733b4008a3d60d82"},
1818
+ {file = "msgpack-1.0.7-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f0936e08e0003f66bfd97e74ee530427707297b0d0361247e9b4f59ab78ddc8b"},
1819
+ {file = "msgpack-1.0.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:98bbd754a422a0b123c66a4c341de0474cad4a5c10c164ceed6ea090f3563db4"},
1820
+ {file = "msgpack-1.0.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b291f0ee7961a597cbbcc77709374087fa2a9afe7bdb6a40dbbd9b127e79afee"},
1821
+ {file = "msgpack-1.0.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebbbba226f0a108a7366bf4b59bf0f30a12fd5e75100c630267d94d7f0ad20e5"},
1822
+ {file = "msgpack-1.0.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e2d69948e4132813b8d1131f29f9101bc2c915f26089a6d632001a5c1349672"},
1823
+ {file = "msgpack-1.0.7-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bdf38ba2d393c7911ae989c3bbba510ebbcdf4ecbdbfec36272abe350c454075"},
1824
+ {file = "msgpack-1.0.7-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:993584fc821c58d5993521bfdcd31a4adf025c7d745bbd4d12ccfecf695af5ba"},
1825
+ {file = "msgpack-1.0.7-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:52700dc63a4676669b341ba33520f4d6e43d3ca58d422e22ba66d1736b0a6e4c"},
1826
+ {file = "msgpack-1.0.7-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e45ae4927759289c30ccba8d9fdce62bb414977ba158286b5ddaf8df2cddb5c5"},
1827
+ {file = "msgpack-1.0.7-cp312-cp312-win32.whl", hash = "sha256:27dcd6f46a21c18fa5e5deed92a43d4554e3df8d8ca5a47bf0615d6a5f39dbc9"},
1828
+ {file = "msgpack-1.0.7-cp312-cp312-win_amd64.whl", hash = "sha256:7687e22a31e976a0e7fc99c2f4d11ca45eff652a81eb8c8085e9609298916dcf"},
1829
+ {file = "msgpack-1.0.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5b6ccc0c85916998d788b295765ea0e9cb9aac7e4a8ed71d12e7d8ac31c23c95"},
1830
+ {file = "msgpack-1.0.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:235a31ec7db685f5c82233bddf9858748b89b8119bf4538d514536c485c15fe0"},
1831
+ {file = "msgpack-1.0.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cab3db8bab4b7e635c1c97270d7a4b2a90c070b33cbc00c99ef3f9be03d3e1f7"},
1832
+ {file = "msgpack-1.0.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bfdd914e55e0d2c9e1526de210f6fe8ffe9705f2b1dfcc4aecc92a4cb4b533d"},
1833
+ {file = "msgpack-1.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36e17c4592231a7dbd2ed09027823ab295d2791b3b1efb2aee874b10548b7524"},
1834
+ {file = "msgpack-1.0.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38949d30b11ae5f95c3c91917ee7a6b239f5ec276f271f28638dec9156f82cfc"},
1835
+ {file = "msgpack-1.0.7-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ff1d0899f104f3921d94579a5638847f783c9b04f2d5f229392ca77fba5b82fc"},
1836
+ {file = "msgpack-1.0.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dc43f1ec66eb8440567186ae2f8c447d91e0372d793dfe8c222aec857b81a8cf"},
1837
+ {file = "msgpack-1.0.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dd632777ff3beaaf629f1ab4396caf7ba0bdd075d948a69460d13d44357aca4c"},
1838
+ {file = "msgpack-1.0.7-cp38-cp38-win32.whl", hash = "sha256:4e71bc4416de195d6e9b4ee93ad3f2f6b2ce11d042b4d7a7ee00bbe0358bd0c2"},
1839
+ {file = "msgpack-1.0.7-cp38-cp38-win_amd64.whl", hash = "sha256:8f5b234f567cf76ee489502ceb7165c2a5cecec081db2b37e35332b537f8157c"},
1840
+ {file = "msgpack-1.0.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bfef2bb6ef068827bbd021017a107194956918ab43ce4d6dc945ffa13efbc25f"},
1841
+ {file = "msgpack-1.0.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:484ae3240666ad34cfa31eea7b8c6cd2f1fdaae21d73ce2974211df099a95d81"},
1842
+ {file = "msgpack-1.0.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3967e4ad1aa9da62fd53e346ed17d7b2e922cba5ab93bdd46febcac39be636fc"},
1843
+ {file = "msgpack-1.0.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dd178c4c80706546702c59529ffc005681bd6dc2ea234c450661b205445a34d"},
1844
+ {file = "msgpack-1.0.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6ffbc252eb0d229aeb2f9ad051200668fc3a9aaa8994e49f0cb2ffe2b7867e7"},
1845
+ {file = "msgpack-1.0.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:822ea70dc4018c7e6223f13affd1c5c30c0f5c12ac1f96cd8e9949acddb48a61"},
1846
+ {file = "msgpack-1.0.7-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:384d779f0d6f1b110eae74cb0659d9aa6ff35aaf547b3955abf2ab4c901c4819"},
1847
+ {file = "msgpack-1.0.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f64e376cd20d3f030190e8c32e1c64582eba56ac6dc7d5b0b49a9d44021b52fd"},
1848
+ {file = "msgpack-1.0.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5ed82f5a7af3697b1c4786053736f24a0efd0a1b8a130d4c7bfee4b9ded0f08f"},
1849
+ {file = "msgpack-1.0.7-cp39-cp39-win32.whl", hash = "sha256:f26a07a6e877c76a88e3cecac8531908d980d3d5067ff69213653649ec0f60ad"},
1850
+ {file = "msgpack-1.0.7-cp39-cp39-win_amd64.whl", hash = "sha256:1dc93e8e4653bdb5910aed79f11e165c85732067614f180f70534f056da97db3"},
1851
+ {file = "msgpack-1.0.7.tar.gz", hash = "sha256:572efc93db7a4d27e404501975ca6d2d9775705c2d922390d878fcf768d92c87"},
1852
+ ]
1853
+
1854
  [[package]]
1855
  name = "multidict"
1856
  version = "6.0.4"
 
2548
  [package.dependencies]
2549
  wcwidth = "*"
2550
 
2551
+ [[package]]
2552
+ name = "protobuf"
2553
+ version = "4.24.4"
2554
+ description = ""
2555
+ optional = false
2556
+ python-versions = ">=3.7"
2557
+ files = [
2558
+ {file = "protobuf-4.24.4-cp310-abi3-win32.whl", hash = "sha256:ec9912d5cb6714a5710e28e592ee1093d68c5ebfeda61983b3f40331da0b1ebb"},
2559
+ {file = "protobuf-4.24.4-cp310-abi3-win_amd64.whl", hash = "sha256:1badab72aa8a3a2b812eacfede5020472e16c6b2212d737cefd685884c191085"},
2560
+ {file = "protobuf-4.24.4-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:8e61a27f362369c2f33248a0ff6896c20dcd47b5d48239cb9720134bef6082e4"},
2561
+ {file = "protobuf-4.24.4-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:bffa46ad9612e6779d0e51ae586fde768339b791a50610d85eb162daeb23661e"},
2562
+ {file = "protobuf-4.24.4-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:b493cb590960ff863743b9ff1452c413c2ee12b782f48beca77c8da3e2ffe9d9"},
2563
+ {file = "protobuf-4.24.4-cp37-cp37m-win32.whl", hash = "sha256:dbbed8a56e56cee8d9d522ce844a1379a72a70f453bde6243e3c86c30c2a3d46"},
2564
+ {file = "protobuf-4.24.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6b7d2e1c753715dcfe9d284a25a52d67818dd43c4932574307daf836f0071e37"},
2565
+ {file = "protobuf-4.24.4-cp38-cp38-win32.whl", hash = "sha256:02212557a76cd99574775a81fefeba8738d0f668d6abd0c6b1d3adcc75503dbe"},
2566
+ {file = "protobuf-4.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:2fa3886dfaae6b4c5ed2730d3bf47c7a38a72b3a1f0acb4d4caf68e6874b947b"},
2567
+ {file = "protobuf-4.24.4-cp39-cp39-win32.whl", hash = "sha256:b77272f3e28bb416e2071186cb39efd4abbf696d682cbb5dc731308ad37fa6dd"},
2568
+ {file = "protobuf-4.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:9fee5e8aa20ef1b84123bb9232b3f4a5114d9897ed89b4b8142d81924e05d79b"},
2569
+ {file = "protobuf-4.24.4-py3-none-any.whl", hash = "sha256:80797ce7424f8c8d2f2547e2d42bfbb6c08230ce5832d6c099a37335c9c90a92"},
2570
+ {file = "protobuf-4.24.4.tar.gz", hash = "sha256:5a70731910cd9104762161719c3d883c960151eea077134458503723b60e3667"},
2571
+ ]
2572
+
2573
  [[package]]
2574
  name = "psutil"
2575
  version = "5.9.6"
 
3435
  [package.extras]
3436
  full = ["numpy"]
3437
 
3438
+ [[package]]
3439
+ name = "ray"
3440
+ version = "2.7.1"
3441
+ description = "Ray provides a simple, universal API for building distributed applications."
3442
+ optional = false
3443
+ python-versions = "*"
3444
+ files = [
3445
+ {file = "ray-2.7.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:4a2c98ab42881836894f20408ce40c0fd7fe5da7f0bc69cf22c951ccceda55ed"},
3446
+ {file = "ray-2.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:53800aadfc07152bc8672d5fa91bb4dc17d96b572a9bd436dd00fd2e0d07ef6a"},
3447
+ {file = "ray-2.7.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:17a425b4a2c2098f78fd0ab3831a35a53608d36466453e90c30a6495e9dce354"},
3448
+ {file = "ray-2.7.1-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:9681a8a7bf081e2244360206f3cd80d1a6adb4dc6330a507fd8c78ebe6e57365"},
3449
+ {file = "ray-2.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:148c77050ceab3c90739147bb86ac535e9590046cc36364ae9eb15469ea16fbc"},
3450
+ {file = "ray-2.7.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:0b0e80e26d6899820c12301626a74a209ab29373f46caf5b48c3ae3f99ec1bc7"},
3451
+ {file = "ray-2.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b5d13e910bb3449ef7b25084dcc4f0b9a763d3aa7b2fdd39e3b4d93d8c266951"},
3452
+ {file = "ray-2.7.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:0a6e8a736fe5294a0b0064679e59e393c66942db81fdf95804bdc1495d1f1651"},
3453
+ {file = "ray-2.7.1-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:f4c9f8a813444bd5346756db1a6d6e09a805b28b5fb6831e91b8d1324c12a888"},
3454
+ {file = "ray-2.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:85a8b0f122e4c14d2ee354fce9651834f7ffc9b60ebdce023a5ba8ca5841a6ee"},
3455
+ {file = "ray-2.7.1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:bfa924bbc4042e83a0f31f058f08818418307252fceeee27c4c02bc0d3c02f3f"},
3456
+ {file = "ray-2.7.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:0f5657abb376eddf6b56489082d2f94ab36597a2f25da2849e2f66476b90dcc0"},
3457
+ {file = "ray-2.7.1-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:d548e1c67a512975c4241be64a8df2153ae6c29ee2f5b08834fadcad7dfc94a4"},
3458
+ {file = "ray-2.7.1-cp37-cp37m-win_amd64.whl", hash = "sha256:1f4c09a81971cc54d95be55b9b413fd12121a37528b402d1861a8fa0b4e85509"},
3459
+ {file = "ray-2.7.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:1f6d2508d117aac0b880d26a4db65a9f90def2d688709b62e0d039879c3afc7a"},
3460
+ {file = "ray-2.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32a6c0866d559d4e6c623ff220cd0790d2da1f3785073a5d0444b8f0486ff541"},
3461
+ {file = "ray-2.7.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:d035642e6033f43551a0c17e2363a392739f01df6b4072c5ed71cf3096936d33"},
3462
+ {file = "ray-2.7.1-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:a366569d1bd220a92af0dbe092821a11d1ff8ad7b00ed4f74b8a5f380e34ccc7"},
3463
+ {file = "ray-2.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:6fe65dc7f83f1c617af3068d84f8c67f3371b1a48776e44ab6af54998891364c"},
3464
+ {file = "ray-2.7.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:3c1501ca56da394e07213efd5be42c2cf0a2eae68d76949d26a3133154d6d9ff"},
3465
+ {file = "ray-2.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:57f7e05ad275317158c447680705e046410f68d2a5992e16d07bbc2cc79da2b3"},
3466
+ {file = "ray-2.7.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b5410ae53c765108c65821fc5e5968509579f98a64d275e103408e1b068e8ca8"},
3467
+ {file = "ray-2.7.1-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:1b096abab78b63db6c1a2633f242dd8b3c51e395b574215f3cb8e47f5d7364b9"},
3468
+ {file = "ray-2.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:c03fe26443598bd7ad1c22de4585daec324bc03eabc04d3c2f805d9697a554d6"},
3469
+ ]
3470
+
3471
+ [package.dependencies]
3472
+ aiosignal = "*"
3473
+ click = ">=7.0"
3474
+ filelock = "*"
3475
+ frozenlist = "*"
3476
+ jsonschema = "*"
3477
+ msgpack = ">=1.0.0,<2.0.0"
3478
+ numpy = {version = ">=1.19.3", markers = "python_version >= \"3.9\""}
3479
+ packaging = "*"
3480
+ protobuf = ">=3.15.3,<3.19.5 || >3.19.5"
3481
+ pyyaml = "*"
3482
+ requests = "*"
3483
+
3484
+ [package.extras]
3485
+ air = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "numpy (>=1.20)", "opencensus", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "requests", "smart-open", "starlette", "tensorboardX (>=1.9)", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"]
3486
+ all = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "dm-tree", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (!=1.56.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "gymnasium (==0.28.1)", "lz4", "numpy (>=1.20)", "opencensus", "opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "pyyaml", "ray-cpp (==2.7.1)", "requests", "rich", "scikit-image", "scipy", "smart-open", "starlette", "tensorboardX (>=1.9)", "typer", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"]
3487
+ client = ["grpcio (!=1.56.0)"]
3488
+ cpp = ["ray-cpp (==2.7.1)"]
3489
+ data = ["fsspec", "numpy (>=1.20)", "pandas (>=1.3)", "pyarrow (>=6.0.1)"]
3490
+ default = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "virtualenv (>=20.0.24,<20.21.1)"]
3491
+ observability = ["opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk"]
3492
+ rllib = ["dm-tree", "fsspec", "gymnasium (==0.28.1)", "lz4", "pandas", "pyarrow (>=6.0.1)", "pyyaml", "requests", "rich", "scikit-image", "scipy", "tensorboardX (>=1.9)", "typer"]
3493
+ serve = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "starlette", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"]
3494
+ serve-grpc = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "starlette", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"]
3495
+ train = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"]
3496
+ tune = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"]
3497
+
3498
  [[package]]
3499
  name = "referencing"
3500
  version = "0.30.2"
 
4260
  [package.dependencies]
4261
  mpmath = ">=0.19"
4262
 
4263
+ [[package]]
4264
+ name = "tabulate"
4265
+ version = "0.9.0"
4266
+ description = "Pretty-print tabular data"
4267
+ optional = false
4268
+ python-versions = ">=3.7"
4269
+ files = [
4270
+ {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"},
4271
+ {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"},
4272
+ ]
4273
+
4274
+ [package.extras]
4275
+ widechars = ["wcwidth"]
4276
+
4277
  [[package]]
4278
  name = "terminado"
4279
  version = "0.17.1"
 
4777
 
4778
  [[package]]
4779
  name = "wcwidth"
4780
+ version = "0.2.9"
4781
  description = "Measures the displayed width of unicode strings in a terminal"
4782
  optional = false
4783
  python-versions = "*"
4784
  files = [
4785
+ {file = "wcwidth-0.2.9-py2.py3-none-any.whl", hash = "sha256:9a929bd8380f6cd9571a968a9c8f4353ca58d7cd812a4822bba831f8d685b223"},
4786
+ {file = "wcwidth-0.2.9.tar.gz", hash = "sha256:a675d1a4a2d24ef67096a04b85b02deeecd8e226f57b5e3a72dbb9ed99d27da8"},
4787
  ]
4788
 
4789
  [[package]]
 
5061
  [metadata]
5062
  lock-version = "2.0"
5063
  python-versions = ">=3.9,<3.13"
5064
+ content-hash = "026459e6ec77505270a4430f661e19c2db1f7e49876b64b08b7fdc83729915bd"
pyproject.toml CHANGED
@@ -16,9 +16,13 @@ pydantic = "^2.4.2"
16
  pydantic-settings = "^2.0.3"
17
  nougat-ocr = "^0.1.17"
18
  transformers = "^4.34.1"
19
- torch = "^2.1.0"
20
  numpy = "^1.26.1"
21
  python-dotenv = "^1.0.0"
 
 
 
 
 
22
 
23
  [tool.poetry.group.dev.dependencies]
24
  jupyter = "^1.0.0"
 
16
  pydantic-settings = "^2.0.3"
17
  nougat-ocr = "^0.1.17"
18
  transformers = "^4.34.1"
 
19
  numpy = "^1.26.1"
20
  python-dotenv = "^1.0.0"
21
+ torch = "^2.1.0"
22
+ ray = "^2.7.1"
23
+ tqdm = "^4.66.1"
24
+ tabulate = "^0.9.0"
25
+
26
 
27
  [tool.poetry.group.dev.dependencies]
28
  jupyter = "^1.0.0"