Spaces:

rt4u
/

marker

Sleeping

App Files Files Community

Vik Paruchuri commited on Oct 31, 2023

Commit

c959776

1 Parent(s): fc65ff4

Handle tables

Browse files

Files changed (15) hide show

README.md +2 -0
apt-requirements.txt +2 -0
convert.py +80 -0
convert_single.py +20 -0
marker/{code.py → cleaners/code.py} +24 -22
marker/{equations.py → cleaners/equations.py} +0 -0
marker/{headers.py → cleaners/headers.py} +0 -0
marker/cleaners/table.py +92 -0
parse.py → marker/convert.py +9 -21
marker/markdown.py +3 -5
marker/schema.py +21 -0
marker/segmentation.py +2 -1
marker/settings.py +16 -4
poetry.lock +165 -4
pyproject.toml +5 -1

README.md CHANGED Viewed

@@ -10,7 +10,9 @@ This project converts PDF to Markdown, balancing speed with quality:
 ## Install
 - `poetry install`
 - Set `TESSDATA_PREFIX`
 ## Usage

 ## Install
 - `poetry install`
+- Install apt requirements
 - Set `TESSDATA_PREFIX`
+  - Find tessdata folder
 ## Usage

apt-requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ tesseract-ocr
2	+ libtesseract-dev

convert.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import argparse
+import os
+import ray
+from tqdm import tqdm
+from marker.convert import convert_single_pdf
+from marker.segmentation import load_layout_model
+from marker.cleaners.equations import load_nougat_model
+from marker.settings import settings
+@ray.remote(num_cpus=settings.RAY_CORES_PER_WORKER)
+def process_single_pdf(fname, out_folder, nougat_model, layout_model):
+    out_filename = fname.rsplit(".", 1)[0] + ".md"
+    out_filename = os.path.join(out_folder, os.path.basename(out_filename))
+    if os.path.exists(out_filename):
+        return
+    try:
+        full_text = convert_single_pdf(fname, layout_model, nougat_model)
+        with open(out_filename, "w+") as f:
+            f.write(full_text)
+    except Exception as e:
+        print(f"Error converting {fname}: {e}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert multiple pdfs to markdown.")
+    parser.add_argument("in_folder", help="Input folder with pdfs.")
+    parser.add_argument("out_folder", help="Output folder")
+    parser.add_argument("--chunk_idx", type=int, default=0, help="Chunk index to convert")
+    parser.add_argument("--num_chunks", type=int, default=1, help="Number of chunks being processed in parallel")
+    parser.add_argument("--max", type=int, default=None, help="Maximum number of pdfs to convert")
+    parser.add_argument("--workers", type=int, default=5, help="Number of worker processes to use")
+    args = parser.parse_args()
+    in_folder = args.in_folder
+    out_folder = args.out_folder
+    files = [os.path.join(in_folder, f) for f in os.listdir(in_folder) if f.endswith(".pdf")]
+    os.makedirs(out_folder, exist_ok=True)
+    # Handle chunks if we're processing in parallel
+    chunk_size = len(files) // args.num_chunks
+    start_idx = args.chunk_idx * chunk_size
+    end_idx = start_idx + chunk_size
+    files_to_convert = files[start_idx:end_idx]
+    # Limit files converted if needed
+    if args.max:
+        files_to_convert = files_to_convert[:args.max]
+    total_processes = min(len(files), args.workers)
+    ray.init(
+        num_cpus=total_processes,
+        storage=settings.RAY_CACHE_PATH,
+        _temp_dir=settings.RAY_CACHE_PATH,
+        dashboard_host=settings.RAY_DASHBOARD_HOST
+    )
+    nougat_model = load_nougat_model()
+    layoutlm_model = load_layout_model()
+    nougat_ref = ray.put(nougat_model)
+    layoutlm_ref = ray.put(layoutlm_model)
+    print(f"Converting {len(files_to_convert)} pdfs with {total_processes} processes, and storing in {out_folder}")
+    futures = [process_single_pdf.remote(filename, out_folder, nougat_ref, layoutlm_ref) for filename in files_to_convert]
+    # Run all ray conversion tasks
+    progress_bar = tqdm(total=len(futures))
+    while len(futures) > 0:
+        finished, futures = ray.wait(
+            futures, timeout=7.0
+        )
+        finished_lst = ray.get(finished)
+        progress_bar.update(len(finished_lst))
+    # Shutdown ray to free resources
+    ray.shutdown()

convert_single.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import argparse
+from marker.convert import convert_single_pdf
+from marker.segmentation import load_layout_model
+from marker.cleaners.equations import load_nougat_model
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("filename", help="PDF file to parse")
+    parser.add_argument("output", help="Output file name")
+    args = parser.parse_args()
+    fname = args.filename
+    layoutlm_model = load_layout_model()
+    nougat_model = load_nougat_model()
+    full_text = convert_single_pdf(fname, layoutlm_model, nougat_model)
+    with open(args.output, "w+") as f:
+        f.write(full_text)

marker/{code.py → cleaners/code.py} RENAMED Viewed

@@ -4,10 +4,10 @@ from typing import List
 import fitz as pymupdf
-def is_code_linelen(lines, thresh=70):
     # Decide based on chars per newline threshold
     total_alnum_chars = sum(len(re.findall(r'\w', line.prelim_text)) for line in lines)
-    total_newlines = len(lines) - 1
     if total_alnum_chars == 0:
         return False
@@ -30,49 +30,51 @@ def identify_code_blocks(blocks: List[Page]):
         else:
             font_info += stats
     most_common_font = font_info.most_common(1)[0][0]
     for page in blocks:
         try:
-            common_height = page.get_line_height_stats().most_common(1)[0][0]
-            common_start = page.get_line_start_stats().most_common(1)[0][0]
         except IndexError:
             continue
         for block in page.blocks:
-            if len(block.lines) < 2:
-                continue
             if block.most_common_block_type() != "Text":
                 continue
-            is_code = []
             line_fonts = []
             for line in block.lines:
                 fonts = [span.font for span in line.spans]
                 line_fonts += fonts
-                line_height = line.bbox[3] - line.bbox[1]
                 line_start = line.bbox[0]
-                if line_start > common_start:
-                    is_code.append(True)
                 else:
-                    is_code.append(False)
             comment_lines = comment_count([line.prelim_text for line in block.lines])
             is_code = [
-                len(block.lines) > 2,
-                sum([f != most_common_font for f in line_fonts]) > len(line_fonts) // 1.5,  # At least 1/3 of the fonts are not the most common, since code usually uses a different font from the main body text
-                (
-                    sum(is_code) > len(block.lines) * .2
-                    or
-                    comment_lines > len(block.lines) * .1
-                 ), # 20% of lines are indented or comments
                 (
-                    is_code_linelen(block.lines)
                     or
-                    comment_lines > len(block.lines) * .1
-                ), # 60 chars per newline or less for code, or 20% of lines are comments
             ]
-            if all(is_code):
                 block.set_block_type("Code")
 def indent_blocks(blocks: List[Page]):
     span_counter = 0

 import fitz as pymupdf
+def is_code_linelen(lines, thresh=60):
     # Decide based on chars per newline threshold
     total_alnum_chars = sum(len(re.findall(r'\w', line.prelim_text)) for line in lines)
+    total_newlines = max(len(lines) - 1, 1)
     if total_alnum_chars == 0:
         return False
         else:
             font_info += stats
     most_common_font = font_info.most_common(1)[0][0]
+    last_block = None
     for page in blocks:
         try:
+            min_start = page.get_min_line_start()
         except IndexError:
             continue
         for block in page.blocks:
             if block.most_common_block_type() != "Text":
+                last_block = block
                 continue
+            is_indent = []
             line_fonts = []
             for line in block.lines:
                 fonts = [span.font for span in line.spans]
                 line_fonts += fonts
                 line_start = line.bbox[0]
+                if line_start > min_start:
+                    is_indent.append(True)
                 else:
+                    is_indent.append(False)
             comment_lines = comment_count([line.prelim_text for line in block.lines])
             is_code = [
+                len(block.lines) > 3,
+                sum([f != most_common_font for f in line_fonts]) > len(line_fonts) * .8,  # At least 80% of the fonts are not the most common, since code usually uses a different font from the main body text
+                is_code_linelen(block.lines),
                 (
+                    sum(is_indent) > len(block.lines) * .2
                     or
+                    comment_lines > len(block.lines) * .2
+                 ), # 20% lines indented or 20% of the lines are comments
             ]
+            # Check if previous block is code, and this block is indented
+            is_code_prev = [
+                last_block and last_block.most_common_block_type() == "Code",
+                sum(is_indent) >= len(block.lines) * .8 # At least 80% indented
+            ]
+            if all(is_code) or all(is_code_prev):
                 block.set_block_type("Code")
+            last_block = block
 def indent_blocks(blocks: List[Page]):
     span_counter = 0

marker/{equations.py → cleaners/equations.py} RENAMED Viewed

File without changes

marker/{headers.py → cleaners/headers.py} RENAMED Viewed

File without changes

marker/cleaners/table.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from marker.bbox import merge_boxes
+from marker.schema import Line, Span, Block, Page
+from copy import deepcopy
+from tabulate import tabulate
+from typing import List
+import re
+import textwrap
+def merge_table_blocks(blocks: List[Page]):
+    last_block = None
+    current_lines = []
+    current_bbox = None
+    for page in blocks:
+        new_page_blocks = []
+        for block in page.blocks:
+            if block.most_common_block_type() != "Table":
+                if len(current_lines) > 0:
+                    new_block = Block(
+                        lines=deepcopy(current_lines),
+                        pnum=last_block.pnum,
+                        bbox=current_bbox
+                    )
+                    new_page_blocks.append(new_block)
+                    current_lines = []
+                    current_bbox = None
+                new_page_blocks.append(block)
+                last_block = block
+                continue
+            current_lines.extend(block.lines)
+            if current_bbox is None:
+                current_bbox = block.bbox
+            else:
+                current_bbox = merge_boxes(current_bbox, block.bbox)
+        if len(current_lines) > 0:
+            new_block = Block(
+                lines=deepcopy(current_lines),
+                pnum=last_block.pnum,
+                bbox=current_bbox
+            )
+            blocks[-1].blocks.append(new_block)
+            current_lines = []
+            current_bbox = []
+        page.blocks = new_page_blocks
+def create_new_tables(blocks: List[Page]):
+    table_idx = 0
+    dot_pattern = re.compile(r'(\s*\.\s*){4,}')
+    dot_multiline_pattern = re.compile(r'.*(\s*\.\s*){4,}.*', re.DOTALL)
+    for page in blocks:
+        for block in page.blocks:
+            if block.most_common_block_type() != "Table" or len(block.lines) < 3:
+                continue
+            table_rows = []
+            y_coord = None
+            row = []
+            for line in block.lines:
+                for span in line.spans:
+                    if y_coord != span.y_start:
+                        if len(row) > 0:
+                            table_rows.append(row)
+                            row = []
+                        y_coord = span.y_start
+                    text = span.text
+                    if dot_multiline_pattern.match(text):
+                        text = dot_pattern.sub(' ', text)
+                    row.append(text)
+            if len(row) > 0:
+                table_rows.append(row)
+            new_text = tabulate(table_rows, headers="firstrow", tablefmt="pipe")
+            new_span = Span(
+                bbox=block.bbox,
+                span_id=f"{table_idx}_fix_table",
+                font="Table",
+                color=0,
+                block_type="Table",
+                text=new_text
+            )
+            new_line = Line(
+                bbox=block.bbox,
+                spans=[new_span]
+            )
+            block.lines = [new_line]
+            table_idx += 1

parse.py → marker/convert.py RENAMED Viewed

@@ -1,11 +1,9 @@
-import argparse
 import fitz as pymupdf
 from marker.extract_text import get_text_blocks
-from marker.headers import categorize_blocks, filter_header_footer
-from marker.equations import replace_equations, load_nougat_model
 from marker.segmentation import detect_all_block_types, load_layout_model
-from marker.code import identify_code_blocks, indent_blocks
 from marker.markdown import merge_spans, merge_lines, get_full_text
 from marker.schema import Page, BlockType
 from typing import List
@@ -18,35 +16,26 @@ def annotate_spans(blocks: List[Page], block_types: List[BlockType]):
         page.add_block_types(page_block_types)
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("filename", help="PDF file to parse")
-    parser.add_argument("output", help="Output file name")
-    args = parser.parse_args()
-    fname = args.filename
     doc = pymupdf.open(fname)
     blocks, toc = get_text_blocks(doc)
-    layoutlm_model = load_layout_model()
     block_types = detect_all_block_types(doc, blocks, layoutlm_model)
     filtered = deepcopy(blocks)
     annotate_spans(filtered, block_types)
     identify_code_blocks(filtered)
     indent_blocks(filtered)
-    bad_span_ids = categorize_blocks(blocks)
-    bad_span_ids += filter_header_footer(blocks)
-    # Copy to avoid changing original data
     for page in filtered:
         for block in page.blocks:
             block.filter_spans(bad_span_ids)
             block.filter_bad_span_types(block_types[page.pnum])
-    nougat_model = load_nougat_model()
     filtered = replace_equations(doc, filtered, block_types, nougat_model)
     # Copy to avoid changing original data
@@ -54,5 +43,4 @@ if __name__ == "__main__":
     text_blocks = merge_lines(merged_lines, filtered)
     full_text = get_full_text(text_blocks)
-    with open(args.output, "w+") as f:
-        f.write(full_text)

 import fitz as pymupdf
 from marker.extract_text import get_text_blocks
+from marker.cleaners.headers import categorize_blocks, filter_header_footer
+from marker.cleaners.equations import replace_equations, load_nougat_model
 from marker.segmentation import detect_all_block_types, load_layout_model
+from marker.cleaners.code import identify_code_blocks, indent_blocks
 from marker.markdown import merge_spans, merge_lines, get_full_text
 from marker.schema import Page, BlockType
 from typing import List
         page.add_block_types(page_block_types)
+def convert_single_pdf(fname: str, layoutlm_model, nougat_model):
     doc = pymupdf.open(fname)
     blocks, toc = get_text_blocks(doc)
     block_types = detect_all_block_types(doc, blocks, layoutlm_model)
+    # Find headers and footers
+    bad_span_ids = categorize_blocks(blocks)
+    bad_span_ids += filter_header_footer(blocks)
     filtered = deepcopy(blocks)
     annotate_spans(filtered, block_types)
     identify_code_blocks(filtered)
     indent_blocks(filtered)
     for page in filtered:
         for block in page.blocks:
             block.filter_spans(bad_span_ids)
             block.filter_bad_span_types(block_types[page.pnum])
     filtered = replace_equations(doc, filtered, block_types, nougat_model)
     # Copy to avoid changing original data
     text_blocks = merge_lines(merged_lines, filtered)
     full_text = get_full_text(text_blocks)
+    return full_text

marker/markdown.py CHANGED Viewed

@@ -55,8 +55,6 @@ def merge_spans(blocks):
 def block_surround(text, block_type):
-    dot_pattern = re.compile(r'(\s*\.\s*){4,}')
-    dot_multiline_pattern = re.compile(r'.*(\s*\.\s*){4,}.*', re.DOTALL)
     match block_type:
         case "Section-header":
             if not text.startswith("#"):
@@ -64,8 +62,8 @@ def block_surround(text, block_type):
         case "Title":
             if not text.startswith("#"):
                 text = "# " + text.strip() + "\n"
-        case "Table" if dot_multiline_pattern.match(text):
-            text = dot_pattern.sub(' ', text)
         case "List-item":
             pass
         case "Code":
@@ -89,7 +87,7 @@ def line_separator(line1, line2, block_type, is_continuation=False):
     if block_type in ["Title", "Section-header"]:
         return line1.rstrip() + " " + line2.lstrip()
-    elif lowercase_pattern1.match(line1) and lowercase_pattern2.match(line2):
         return line1.rstrip() + " " + line2.lstrip()
     elif is_continuation:
         return line1.rstrip() + " " + line2.lstrip()

 def block_surround(text, block_type):
     match block_type:
         case "Section-header":
             if not text.startswith("#"):
         case "Title":
             if not text.startswith("#"):
                 text = "# " + text.strip() + "\n"
+        case "Table":
+            text = "\n" + text + "\n"
         case "List-item":
             pass
         case "Code":
     if block_type in ["Title", "Section-header"]:
         return line1.rstrip() + " " + line2.lstrip()
+    elif lowercase_pattern1.match(line1) and lowercase_pattern2.match(line2) and block_type == "Text":
         return line1.rstrip() + " " + line2.lstrip()
     elif is_continuation:
         return line1.rstrip() + " " + line2.lstrip()

marker/schema.py CHANGED Viewed

@@ -26,6 +26,22 @@ class BboxElement(BaseModel):
             raise ValueError('bbox must have 4 elements')
         return v
 class BlockType(BboxElement):
     block_type: str
@@ -151,6 +167,11 @@ class Page(BaseModel):
         start_counts = Counter(starts)
         return start_counts
 class MergedLine(BboxElement):
     text: str

             raise ValueError('bbox must have 4 elements')
         return v
+    @property
+    def height(self):
+        return self.bbox[3] - self.bbox[1]
+    @property
+    def width(self):
+        return self.bbox[2] - self.bbox[0]
+    @property
+    def x_start(self):
+        return self.bbox[0]
+    @property
+    def y_start(self):
+        return self.bbox[1]
 class BlockType(BboxElement):
     block_type: str
         start_counts = Counter(starts)
         return start_counts
+    def get_min_line_start(self):
+        starts = [l.bbox[0] for l in self.get_nonblank_lines() if l.spans[0].block_type == "Text"]
+        if len(starts) == 0:
+            raise IndexError("No lines found")
+        return min(starts)
 class MergedLine(BboxElement):
     text: str

marker/segmentation.py CHANGED Viewed

@@ -19,6 +19,7 @@ NO_CHUNK_KEYS = ["pixel_values"]
 def load_layout_model():
     model = LayoutLMv3ForTokenClassification.from_pretrained("Kwan0/layoutlmv3-base-finetune-DocLayNet-100k").to(settings.TORCH_DEVICE)
     model.config.id2label = {
         0: "Caption",
         1: "Footnote",
@@ -33,7 +34,7 @@ def load_layout_model():
         10: "Title"
     }
-    model.config.label2id = d = {v: k for k, v in model.config.id2label.items()}
     return model

 def load_layout_model():
     model = LayoutLMv3ForTokenClassification.from_pretrained("Kwan0/layoutlmv3-base-finetune-DocLayNet-100k").to(settings.TORCH_DEVICE)
     model.config.id2label = {
         0: "Caption",
         1: "Footnote",
         10: "Title"
     }
+    model.config.label2id = {v: k for k, v in model.config.id2label.items()}
     return model

marker/settings.py CHANGED Viewed

@@ -6,18 +6,30 @@ from pydantic_settings import BaseSettings
 class Settings(BaseSettings):
-    # Path settings
-    DPI: int = 400
-    INVALID_CHARS: List[str] = [chr(0xfffd), "~", chr(65533), "↵"]
     TORCH_DEVICE: str = "cpu"
     TESSDATA_PREFIX: str = ""
-    BAD_SPAN_TYPES: List[str] = ["Caption", "Footnote", "Page-footer", "Page-header", "Picture"]
     NOUGAT_MODEL_MAX: int = 1024 # Max inference length for nougat
     NOUGAT_HALLUCINATION_WORDS: List[str] = ["[MISSING_PAGE_POST]", "## References\n", "**Figure Captions**\n", "Footnote",
                                   "\par\par\par", "## Chapter", "Fig."]
     LAYOUT_MODEL_MAX: int = 512
     LAYOUT_CHUNK_OVERLAP: int = 128
     class Config:
         env_file = find_dotenv("local.env")

 class Settings(BaseSettings):
+    # General
     TORCH_DEVICE: str = "cpu"
+    # OCR
+    INVALID_CHARS: List[str] = [chr(0xfffd), "~", chr(65533), "↵"]
+    DPI: int = 400
     TESSDATA_PREFIX: str = ""
+    # Nougat Model
     NOUGAT_MODEL_MAX: int = 1024 # Max inference length for nougat
     NOUGAT_HALLUCINATION_WORDS: List[str] = ["[MISSING_PAGE_POST]", "## References\n", "**Figure Captions**\n", "Footnote",
                                   "\par\par\par", "## Chapter", "Fig."]
+    # Layout Model
+    BAD_SPAN_TYPES: List[str] = ["Caption", "Footnote", "Page-footer", "Page-header", "Picture"]
     LAYOUT_MODEL_MAX: int = 512
     LAYOUT_CHUNK_OVERLAP: int = 128
+    # Ray
+    RAY_CACHE_PATH: Optional[str] = None # Where to save ray cache
+    RAY_DASHBOARD_HOST: str = "127.0.0.1"
+    RAY_CORES_PER_WORKER: int = 1 # How many cpu cores to allocate per worker
     class Config:
         env_file = find_dotenv("local.env")

poetry.lock CHANGED Viewed

@@ -1786,6 +1786,71 @@ docs = ["sphinx"]
 gmpy = ["gmpy2 (>=2.1.0a4)"]
 tests = ["pytest (>=4.6)"]
 [[package]]
 name = "multidict"
 version = "6.0.4"
@@ -2483,6 +2548,28 @@ files = [
 [package.dependencies]
 wcwidth = "*"
 [[package]]
 name = "psutil"
 version = "5.9.6"
@@ -3348,6 +3435,66 @@ files = [
 [package.extras]
 full = ["numpy"]
 [[package]]
 name = "referencing"
 version = "0.30.2"
@@ -4113,6 +4260,20 @@ files = [
 [package.dependencies]
 mpmath = ">=0.19"
 [[package]]
 name = "terminado"
 version = "0.17.1"
@@ -4616,13 +4777,13 @@ zstd = ["zstandard (>=0.18.0)"]
 [[package]]
 name = "wcwidth"
-version = "0.2.8"
 description = "Measures the displayed width of unicode strings in a terminal"
 optional = false
 python-versions = "*"
 files = [
-    {file = "wcwidth-0.2.8-py2.py3-none-any.whl", hash = "sha256:77f719e01648ed600dfa5402c347481c0992263b81a027344f3e1ba25493a704"},
-    {file = "wcwidth-0.2.8.tar.gz", hash = "sha256:8705c569999ffbb4f6a87c6d1b80f324bd6db952f5eb0b95bc07517f4c1813d4"},
 ]
 [[package]]
@@ -4900,4 +5061,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.13"
-content-hash = "005924f5f7020a79a158b551eb84385b83c796744fa32b57e2199d84f0c50736"

 gmpy = ["gmpy2 (>=2.1.0a4)"]
 tests = ["pytest (>=4.6)"]
+[[package]]
+name = "msgpack"
+version = "1.0.7"
+description = "MessagePack serializer"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "msgpack-1.0.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:04ad6069c86e531682f9e1e71b71c1c3937d6014a7c3e9edd2aa81ad58842862"},
+    {file = "msgpack-1.0.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cca1b62fe70d761a282496b96a5e51c44c213e410a964bdffe0928e611368329"},
+    {file = "msgpack-1.0.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e50ebce52f41370707f1e21a59514e3375e3edd6e1832f5e5235237db933c98b"},
+    {file = "msgpack-1.0.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a7b4f35de6a304b5533c238bee86b670b75b03d31b7797929caa7a624b5dda6"},
+    {file = "msgpack-1.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28efb066cde83c479dfe5a48141a53bc7e5f13f785b92ddde336c716663039ee"},
+    {file = "msgpack-1.0.7-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4cb14ce54d9b857be9591ac364cb08dc2d6a5c4318c1182cb1d02274029d590d"},
+    {file = "msgpack-1.0.7-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b573a43ef7c368ba4ea06050a957c2a7550f729c31f11dd616d2ac4aba99888d"},
+    {file = "msgpack-1.0.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ccf9a39706b604d884d2cb1e27fe973bc55f2890c52f38df742bc1d79ab9f5e1"},
+    {file = "msgpack-1.0.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cb70766519500281815dfd7a87d3a178acf7ce95390544b8c90587d76b227681"},
+    {file = "msgpack-1.0.7-cp310-cp310-win32.whl", hash = "sha256:b610ff0f24e9f11c9ae653c67ff8cc03c075131401b3e5ef4b82570d1728f8a9"},
+    {file = "msgpack-1.0.7-cp310-cp310-win_amd64.whl", hash = "sha256:a40821a89dc373d6427e2b44b572efc36a2778d3f543299e2f24eb1a5de65415"},
+    {file = "msgpack-1.0.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:576eb384292b139821c41995523654ad82d1916da6a60cff129c715a6223ea84"},
+    {file = "msgpack-1.0.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:730076207cb816138cf1af7f7237b208340a2c5e749707457d70705715c93b93"},
+    {file = "msgpack-1.0.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:85765fdf4b27eb5086f05ac0491090fc76f4f2b28e09d9350c31aac25a5aaff8"},
+    {file = "msgpack-1.0.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3476fae43db72bd11f29a5147ae2f3cb22e2f1a91d575ef130d2bf49afd21c46"},
+    {file = "msgpack-1.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d4c80667de2e36970ebf74f42d1088cc9ee7ef5f4e8c35eee1b40eafd33ca5b"},
+    {file = "msgpack-1.0.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b0bf0effb196ed76b7ad883848143427a73c355ae8e569fa538365064188b8e"},
+    {file = "msgpack-1.0.7-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f9a7c509542db4eceed3dcf21ee5267ab565a83555c9b88a8109dcecc4709002"},
+    {file = "msgpack-1.0.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:84b0daf226913133f899ea9b30618722d45feffa67e4fe867b0b5ae83a34060c"},
+    {file = "msgpack-1.0.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ec79ff6159dffcc30853b2ad612ed572af86c92b5168aa3fc01a67b0fa40665e"},
+    {file = "msgpack-1.0.7-cp311-cp311-win32.whl", hash = "sha256:3e7bf4442b310ff154b7bb9d81eb2c016b7d597e364f97d72b1acc3817a0fdc1"},
+    {file = "msgpack-1.0.7-cp311-cp311-win_amd64.whl", hash = "sha256:3f0c8c6dfa6605ab8ff0611995ee30d4f9fcff89966cf562733b4008a3d60d82"},
+    {file = "msgpack-1.0.7-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f0936e08e0003f66bfd97e74ee530427707297b0d0361247e9b4f59ab78ddc8b"},
+    {file = "msgpack-1.0.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:98bbd754a422a0b123c66a4c341de0474cad4a5c10c164ceed6ea090f3563db4"},
+    {file = "msgpack-1.0.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b291f0ee7961a597cbbcc77709374087fa2a9afe7bdb6a40dbbd9b127e79afee"},
+    {file = "msgpack-1.0.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebbbba226f0a108a7366bf4b59bf0f30a12fd5e75100c630267d94d7f0ad20e5"},
+    {file = "msgpack-1.0.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e2d69948e4132813b8d1131f29f9101bc2c915f26089a6d632001a5c1349672"},
+    {file = "msgpack-1.0.7-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bdf38ba2d393c7911ae989c3bbba510ebbcdf4ecbdbfec36272abe350c454075"},
+    {file = "msgpack-1.0.7-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:993584fc821c58d5993521bfdcd31a4adf025c7d745bbd4d12ccfecf695af5ba"},
+    {file = "msgpack-1.0.7-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:52700dc63a4676669b341ba33520f4d6e43d3ca58d422e22ba66d1736b0a6e4c"},
+    {file = "msgpack-1.0.7-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e45ae4927759289c30ccba8d9fdce62bb414977ba158286b5ddaf8df2cddb5c5"},
+    {file = "msgpack-1.0.7-cp312-cp312-win32.whl", hash = "sha256:27dcd6f46a21c18fa5e5deed92a43d4554e3df8d8ca5a47bf0615d6a5f39dbc9"},
+    {file = "msgpack-1.0.7-cp312-cp312-win_amd64.whl", hash = "sha256:7687e22a31e976a0e7fc99c2f4d11ca45eff652a81eb8c8085e9609298916dcf"},
+    {file = "msgpack-1.0.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5b6ccc0c85916998d788b295765ea0e9cb9aac7e4a8ed71d12e7d8ac31c23c95"},
+    {file = "msgpack-1.0.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:235a31ec7db685f5c82233bddf9858748b89b8119bf4538d514536c485c15fe0"},
+    {file = "msgpack-1.0.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cab3db8bab4b7e635c1c97270d7a4b2a90c070b33cbc00c99ef3f9be03d3e1f7"},
+    {file = "msgpack-1.0.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bfdd914e55e0d2c9e1526de210f6fe8ffe9705f2b1dfcc4aecc92a4cb4b533d"},
+    {file = "msgpack-1.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36e17c4592231a7dbd2ed09027823ab295d2791b3b1efb2aee874b10548b7524"},
+    {file = "msgpack-1.0.7-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38949d30b11ae5f95c3c91917ee7a6b239f5ec276f271f28638dec9156f82cfc"},
+    {file = "msgpack-1.0.7-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ff1d0899f104f3921d94579a5638847f783c9b04f2d5f229392ca77fba5b82fc"},
+    {file = "msgpack-1.0.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dc43f1ec66eb8440567186ae2f8c447d91e0372d793dfe8c222aec857b81a8cf"},
+    {file = "msgpack-1.0.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dd632777ff3beaaf629f1ab4396caf7ba0bdd075d948a69460d13d44357aca4c"},
+    {file = "msgpack-1.0.7-cp38-cp38-win32.whl", hash = "sha256:4e71bc4416de195d6e9b4ee93ad3f2f6b2ce11d042b4d7a7ee00bbe0358bd0c2"},
+    {file = "msgpack-1.0.7-cp38-cp38-win_amd64.whl", hash = "sha256:8f5b234f567cf76ee489502ceb7165c2a5cecec081db2b37e35332b537f8157c"},
+    {file = "msgpack-1.0.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bfef2bb6ef068827bbd021017a107194956918ab43ce4d6dc945ffa13efbc25f"},
+    {file = "msgpack-1.0.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:484ae3240666ad34cfa31eea7b8c6cd2f1fdaae21d73ce2974211df099a95d81"},
+    {file = "msgpack-1.0.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3967e4ad1aa9da62fd53e346ed17d7b2e922cba5ab93bdd46febcac39be636fc"},
+    {file = "msgpack-1.0.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dd178c4c80706546702c59529ffc005681bd6dc2ea234c450661b205445a34d"},
+    {file = "msgpack-1.0.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6ffbc252eb0d229aeb2f9ad051200668fc3a9aaa8994e49f0cb2ffe2b7867e7"},
+    {file = "msgpack-1.0.7-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:822ea70dc4018c7e6223f13affd1c5c30c0f5c12ac1f96cd8e9949acddb48a61"},
+    {file = "msgpack-1.0.7-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:384d779f0d6f1b110eae74cb0659d9aa6ff35aaf547b3955abf2ab4c901c4819"},
+    {file = "msgpack-1.0.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f64e376cd20d3f030190e8c32e1c64582eba56ac6dc7d5b0b49a9d44021b52fd"},
+    {file = "msgpack-1.0.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5ed82f5a7af3697b1c4786053736f24a0efd0a1b8a130d4c7bfee4b9ded0f08f"},
+    {file = "msgpack-1.0.7-cp39-cp39-win32.whl", hash = "sha256:f26a07a6e877c76a88e3cecac8531908d980d3d5067ff69213653649ec0f60ad"},
+    {file = "msgpack-1.0.7-cp39-cp39-win_amd64.whl", hash = "sha256:1dc93e8e4653bdb5910aed79f11e165c85732067614f180f70534f056da97db3"},
+    {file = "msgpack-1.0.7.tar.gz", hash = "sha256:572efc93db7a4d27e404501975ca6d2d9775705c2d922390d878fcf768d92c87"},
+]
 [[package]]
 name = "multidict"
 version = "6.0.4"
 [package.dependencies]
 wcwidth = "*"
+[[package]]
+name = "protobuf"
+version = "4.24.4"
+description = ""
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "protobuf-4.24.4-cp310-abi3-win32.whl", hash = "sha256:ec9912d5cb6714a5710e28e592ee1093d68c5ebfeda61983b3f40331da0b1ebb"},
+    {file = "protobuf-4.24.4-cp310-abi3-win_amd64.whl", hash = "sha256:1badab72aa8a3a2b812eacfede5020472e16c6b2212d737cefd685884c191085"},
+    {file = "protobuf-4.24.4-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:8e61a27f362369c2f33248a0ff6896c20dcd47b5d48239cb9720134bef6082e4"},
+    {file = "protobuf-4.24.4-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:bffa46ad9612e6779d0e51ae586fde768339b791a50610d85eb162daeb23661e"},
+    {file = "protobuf-4.24.4-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:b493cb590960ff863743b9ff1452c413c2ee12b782f48beca77c8da3e2ffe9d9"},
+    {file = "protobuf-4.24.4-cp37-cp37m-win32.whl", hash = "sha256:dbbed8a56e56cee8d9d522ce844a1379a72a70f453bde6243e3c86c30c2a3d46"},
+    {file = "protobuf-4.24.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6b7d2e1c753715dcfe9d284a25a52d67818dd43c4932574307daf836f0071e37"},
+    {file = "protobuf-4.24.4-cp38-cp38-win32.whl", hash = "sha256:02212557a76cd99574775a81fefeba8738d0f668d6abd0c6b1d3adcc75503dbe"},
+    {file = "protobuf-4.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:2fa3886dfaae6b4c5ed2730d3bf47c7a38a72b3a1f0acb4d4caf68e6874b947b"},
+    {file = "protobuf-4.24.4-cp39-cp39-win32.whl", hash = "sha256:b77272f3e28bb416e2071186cb39efd4abbf696d682cbb5dc731308ad37fa6dd"},
+    {file = "protobuf-4.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:9fee5e8aa20ef1b84123bb9232b3f4a5114d9897ed89b4b8142d81924e05d79b"},
+    {file = "protobuf-4.24.4-py3-none-any.whl", hash = "sha256:80797ce7424f8c8d2f2547e2d42bfbb6c08230ce5832d6c099a37335c9c90a92"},
+    {file = "protobuf-4.24.4.tar.gz", hash = "sha256:5a70731910cd9104762161719c3d883c960151eea077134458503723b60e3667"},
+]
 [[package]]
 name = "psutil"
 version = "5.9.6"
 [package.extras]
 full = ["numpy"]
+[[package]]
+name = "ray"
+version = "2.7.1"
+description = "Ray provides a simple, universal API for building distributed applications."
+optional = false
+python-versions = "*"
+files = [
+    {file = "ray-2.7.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:4a2c98ab42881836894f20408ce40c0fd7fe5da7f0bc69cf22c951ccceda55ed"},
+    {file = "ray-2.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:53800aadfc07152bc8672d5fa91bb4dc17d96b572a9bd436dd00fd2e0d07ef6a"},
+    {file = "ray-2.7.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:17a425b4a2c2098f78fd0ab3831a35a53608d36466453e90c30a6495e9dce354"},
+    {file = "ray-2.7.1-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:9681a8a7bf081e2244360206f3cd80d1a6adb4dc6330a507fd8c78ebe6e57365"},
+    {file = "ray-2.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:148c77050ceab3c90739147bb86ac535e9590046cc36364ae9eb15469ea16fbc"},
+    {file = "ray-2.7.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:0b0e80e26d6899820c12301626a74a209ab29373f46caf5b48c3ae3f99ec1bc7"},
+    {file = "ray-2.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b5d13e910bb3449ef7b25084dcc4f0b9a763d3aa7b2fdd39e3b4d93d8c266951"},
+    {file = "ray-2.7.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:0a6e8a736fe5294a0b0064679e59e393c66942db81fdf95804bdc1495d1f1651"},
+    {file = "ray-2.7.1-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:f4c9f8a813444bd5346756db1a6d6e09a805b28b5fb6831e91b8d1324c12a888"},
+    {file = "ray-2.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:85a8b0f122e4c14d2ee354fce9651834f7ffc9b60ebdce023a5ba8ca5841a6ee"},
+    {file = "ray-2.7.1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:bfa924bbc4042e83a0f31f058f08818418307252fceeee27c4c02bc0d3c02f3f"},
+    {file = "ray-2.7.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:0f5657abb376eddf6b56489082d2f94ab36597a2f25da2849e2f66476b90dcc0"},
+    {file = "ray-2.7.1-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:d548e1c67a512975c4241be64a8df2153ae6c29ee2f5b08834fadcad7dfc94a4"},
+    {file = "ray-2.7.1-cp37-cp37m-win_amd64.whl", hash = "sha256:1f4c09a81971cc54d95be55b9b413fd12121a37528b402d1861a8fa0b4e85509"},
+    {file = "ray-2.7.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:1f6d2508d117aac0b880d26a4db65a9f90def2d688709b62e0d039879c3afc7a"},
+    {file = "ray-2.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32a6c0866d559d4e6c623ff220cd0790d2da1f3785073a5d0444b8f0486ff541"},
+    {file = "ray-2.7.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:d035642e6033f43551a0c17e2363a392739f01df6b4072c5ed71cf3096936d33"},
+    {file = "ray-2.7.1-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:a366569d1bd220a92af0dbe092821a11d1ff8ad7b00ed4f74b8a5f380e34ccc7"},
+    {file = "ray-2.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:6fe65dc7f83f1c617af3068d84f8c67f3371b1a48776e44ab6af54998891364c"},
+    {file = "ray-2.7.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:3c1501ca56da394e07213efd5be42c2cf0a2eae68d76949d26a3133154d6d9ff"},
+    {file = "ray-2.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:57f7e05ad275317158c447680705e046410f68d2a5992e16d07bbc2cc79da2b3"},
+    {file = "ray-2.7.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b5410ae53c765108c65821fc5e5968509579f98a64d275e103408e1b068e8ca8"},
+    {file = "ray-2.7.1-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:1b096abab78b63db6c1a2633f242dd8b3c51e395b574215f3cb8e47f5d7364b9"},
+    {file = "ray-2.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:c03fe26443598bd7ad1c22de4585daec324bc03eabc04d3c2f805d9697a554d6"},
+]
+[package.dependencies]
+aiosignal = "*"
+click = ">=7.0"
+filelock = "*"
+frozenlist = "*"
+jsonschema = "*"
+msgpack = ">=1.0.0,<2.0.0"
+numpy = {version = ">=1.19.3", markers = "python_version >= \"3.9\""}
+packaging = "*"
+protobuf = ">=3.15.3,<3.19.5 || >3.19.5"
+pyyaml = "*"
+requests = "*"
+[package.extras]
+air = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "numpy (>=1.20)", "opencensus", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "requests", "smart-open", "starlette", "tensorboardX (>=1.9)", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"]
+all = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "dm-tree", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (!=1.56.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "gymnasium (==0.28.1)", "lz4", "numpy (>=1.20)", "opencensus", "opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "pyyaml", "ray-cpp (==2.7.1)", "requests", "rich", "scikit-image", "scipy", "smart-open", "starlette", "tensorboardX (>=1.9)", "typer", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"]
+client = ["grpcio (!=1.56.0)"]
+cpp = ["ray-cpp (==2.7.1)"]
+data = ["fsspec", "numpy (>=1.20)", "pandas (>=1.3)", "pyarrow (>=6.0.1)"]
+default = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "virtualenv (>=20.0.24,<20.21.1)"]
+observability = ["opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk"]
+rllib = ["dm-tree", "fsspec", "gymnasium (==0.28.1)", "lz4", "pandas", "pyarrow (>=6.0.1)", "pyyaml", "requests", "rich", "scikit-image", "scipy", "tensorboardX (>=1.9)", "typer"]
+serve = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "starlette", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"]
+serve-grpc = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "starlette", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"]
+train = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"]
+tune = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"]
 [[package]]
 name = "referencing"
 version = "0.30.2"
 [package.dependencies]
 mpmath = ">=0.19"
+[[package]]
+name = "tabulate"
+version = "0.9.0"
+description = "Pretty-print tabular data"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"},
+    {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"},
+]
+[package.extras]
+widechars = ["wcwidth"]
 [[package]]
 name = "terminado"
 version = "0.17.1"
 [[package]]
 name = "wcwidth"
+version = "0.2.9"
 description = "Measures the displayed width of unicode strings in a terminal"
 optional = false
 python-versions = "*"
 files = [
+    {file = "wcwidth-0.2.9-py2.py3-none-any.whl", hash = "sha256:9a929bd8380f6cd9571a968a9c8f4353ca58d7cd812a4822bba831f8d685b223"},
+    {file = "wcwidth-0.2.9.tar.gz", hash = "sha256:a675d1a4a2d24ef67096a04b85b02deeecd8e226f57b5e3a72dbb9ed99d27da8"},
 ]
 [[package]]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.13"
+content-hash = "026459e6ec77505270a4430f661e19c2db1f7e49876b64b08b7fdc83729915bd"

pyproject.toml CHANGED Viewed

@@ -16,9 +16,13 @@ pydantic = "^2.4.2"
 pydantic-settings = "^2.0.3"
 nougat-ocr = "^0.1.17"
 transformers = "^4.34.1"
-torch = "^2.1.0"
 numpy = "^1.26.1"
 python-dotenv = "^1.0.0"
 [tool.poetry.group.dev.dependencies]
 jupyter = "^1.0.0"

 pydantic-settings = "^2.0.3"
 nougat-ocr = "^0.1.17"
 transformers = "^4.34.1"
 numpy = "^1.26.1"
 python-dotenv = "^1.0.0"
+torch = "^2.1.0"
+ray = "^2.7.1"
+tqdm = "^4.66.1"
+tabulate = "^0.9.0"
 [tool.poetry.group.dev.dependencies]
 jupyter = "^1.0.0"