Vik Paruchuri commited on
Commit
d3ec0e6
·
1 Parent(s): a1a7dcc

Update inline math line merges

Browse files
marker/builders/line.py CHANGED
@@ -113,7 +113,11 @@ class LineBuilder(BaseBuilder):
113
  def __call__(self, document: Document, provider: PdfProvider):
114
  # Disable Inline Detection for documents where layout model doesn't detect any equations
115
  # Also disable if we won't use the inline detections (if we aren't using the LLM or texify)
116
- do_inline_math_detection = document.contained_blocks([BlockTypes.Equation, BlockTypes.TextInlineMath]) and (self.texify_inline_spans or self.use_llm)
 
 
 
 
117
  provider_lines, ocr_lines = self.get_all_lines(document, provider, do_inline_math_detection)
118
  self.merge_blocks(document, provider_lines, ocr_lines)
119
 
@@ -186,7 +190,7 @@ class LineBuilder(BaseBuilder):
186
  if sum(layout_good) > len(document.pages) * self.min_document_ocr_threshold:
187
  layout_good = [True] * len(document.pages)
188
 
189
- run_detection = [not good or do_inline_math_detection for good in layout_good]
190
  page_images = [page.get_image(highres=False, remove_blocks=self.ocr_remove_blocks) for page, good in zip(document.pages, run_detection) if good]
191
 
192
  # Note: run_detection is longer than page_images, since it has a value for each page, not just good ones
@@ -416,13 +420,29 @@ class LineBuilder(BaseBuilder):
416
  best_overlap = np.argmax(overlaps[i])
417
  merge_lines[best_overlap].append(i)
418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  # Handle the merging
420
  already_merged = set()
421
- potential_merges = set(chain.from_iterable(merge_lines.values()))
422
  out_provider_lines = [(i, p) for i, p in enumerate(provider_lines) if i not in potential_merges]
423
- for line_idx in merge_lines:
424
  text_line = text_lines[line_idx]
425
- merge_section = merge_lines[line_idx]
426
  merge_section = [m for m in merge_section if m not in already_merged]
427
  if len(merge_section) == 0:
428
  continue
 
113
  def __call__(self, document: Document, provider: PdfProvider):
114
  # Disable Inline Detection for documents where layout model doesn't detect any equations
115
  # Also disable if we won't use the inline detections (if we aren't using the LLM or texify)
116
+ do_inline_math_detection = all([
117
+ len(document.contained_blocks([BlockTypes.Equation, BlockTypes.TextInlineMath])) > 0,
118
+ (self.texify_inline_spans or self.use_llm)
119
+ ])
120
+
121
  provider_lines, ocr_lines = self.get_all_lines(document, provider, do_inline_math_detection)
122
  self.merge_blocks(document, provider_lines, ocr_lines)
123
 
 
190
  if sum(layout_good) > len(document.pages) * self.min_document_ocr_threshold:
191
  layout_good = [True] * len(document.pages)
192
 
193
+ run_detection = [(not good or do_inline_math_detection) for good in layout_good]
194
  page_images = [page.get_image(highres=False, remove_blocks=self.ocr_remove_blocks) for page, good in zip(document.pages, run_detection) if good]
195
 
196
  # Note: run_detection is longer than page_images, since it has a value for each page, not just good ones
 
420
  best_overlap = np.argmax(overlaps[i])
421
  merge_lines[best_overlap].append(i)
422
 
423
+ # Filter to get rid of detected lines that include multiple provider lines
424
+ filtered_merge_lines = {}
425
+ for line_idx in merge_lines:
426
+ first_line = horizontal_provider_lines[merge_lines[line_idx][0]][1].line.polygon
427
+ all_close = all([
428
+ (
429
+ abs(horizontal_provider_lines[ml][1].line.polygon.y_start - first_line.y_start) < self.inline_math_line_vertical_merge_threshold
430
+ or
431
+ abs(horizontal_provider_lines[ml][1].line.polygon.y_end - first_line.y_end) < self.inline_math_line_vertical_merge_threshold
432
+ )
433
+ for ml in
434
+ merge_lines[line_idx]
435
+ ])
436
+ if all_close:
437
+ filtered_merge_lines[line_idx] = merge_lines[line_idx]
438
+
439
  # Handle the merging
440
  already_merged = set()
441
+ potential_merges = set(chain.from_iterable(filtered_merge_lines.values()))
442
  out_provider_lines = [(i, p) for i, p in enumerate(provider_lines) if i not in potential_merges]
443
+ for line_idx in filtered_merge_lines:
444
  text_line = text_lines[line_idx]
445
+ merge_section = filtered_merge_lines[line_idx]
446
  merge_section = [m for m in merge_section if m not in already_merged]
447
  if len(merge_section) == 0:
448
  continue
marker/processors/debug.py CHANGED
@@ -72,15 +72,20 @@ class DebugProcessor(BaseProcessor):
72
 
73
  line_bboxes = []
74
  span_bboxes = []
 
75
  for child in page.children:
 
 
 
76
  if child.block_type == BlockTypes.Line:
77
  bbox = child.polygon.rescale(page.polygon.size, png_image.size).bbox
78
  line_bboxes.append(bbox)
 
79
  elif child.block_type == BlockTypes.Span:
80
  bbox = child.polygon.rescale(page.polygon.size, png_image.size).bbox
81
  span_bboxes.append(bbox)
82
 
83
- self.render_on_image(line_bboxes, png_image, color="blue", draw_bbox=True, label_font_size=24)
84
  #self.render_on_image(span_bboxes, png_image, color="green", draw_bbox=True, label_font_size=24)
85
 
86
  png_image = self.render_layout_boxes(page, png_image)
 
72
 
73
  line_bboxes = []
74
  span_bboxes = []
75
+ line_ids = []
76
  for child in page.children:
77
+ # Skip any blocks that have been removed
78
+ if child.removed:
79
+ continue
80
  if child.block_type == BlockTypes.Line:
81
  bbox = child.polygon.rescale(page.polygon.size, png_image.size).bbox
82
  line_bboxes.append(bbox)
83
+ line_ids.append(child.block_id)
84
  elif child.block_type == BlockTypes.Span:
85
  bbox = child.polygon.rescale(page.polygon.size, png_image.size).bbox
86
  span_bboxes.append(bbox)
87
 
88
+ self.render_on_image(line_bboxes, png_image, color="blue", draw_bbox=True, label_font_size=24, labels=[str(i) for i in line_ids])
89
  #self.render_on_image(span_bboxes, png_image, color="green", draw_bbox=True, label_font_size=24)
90
 
91
  png_image = self.render_layout_boxes(page, png_image)
marker/processors/line_merge.py CHANGED
@@ -23,7 +23,7 @@ class LineMergeProcessor(BaseProcessor):
23
  intersection_pct_threshold: Annotated[
24
  float,
25
  "The total amount of intersection area concentrated in the max intersection block."
26
- ] = .9
27
 
28
  def __init__(self, config):
29
  super().__init__(config)
@@ -38,7 +38,8 @@ class LineMergeProcessor(BaseProcessor):
38
  continue
39
 
40
  lines = block.contained_blocks(document, (BlockTypes.Line,))
41
- line_bboxes = [l.polygon.bbox for l in lines]
 
42
  intersections = matrix_intersection_area(line_bboxes, line_bboxes)
43
 
44
  merges = []
@@ -49,6 +50,10 @@ class LineMergeProcessor(BaseProcessor):
49
  intersection_pct = intersection_val / max(1, lines[i].polygon.area)
50
  intersection_row = intersections[i]
51
  intersection_row[i] = 0 # Zero out the current idx
 
 
 
 
52
  max_intersection_idx = intersection_row.argmax()
53
  total_intersection = max(1, sum(intersection_row))
54
  max_intersection = intersection_row[max_intersection_idx]
@@ -61,7 +66,9 @@ class LineMergeProcessor(BaseProcessor):
61
  abs(lines[i].polygon.y_end - lines[next_idx].polygon.y_end) <= self.min_merge_ydist,
62
  max_intersection / total_intersection >= self.intersection_pct_threshold
63
  ]):
64
- merge.append(i)
 
 
65
  else:
66
  merges.append(merge)
67
  merge = []
@@ -82,6 +89,7 @@ class LineMergeProcessor(BaseProcessor):
82
  other_line: Line = lines[idx]
83
  line.merge(other_line)
84
  block.structure.remove(other_line.id)
 
85
  merged.add(idx)
86
 
87
  # It is probably math if we are merging provider lines like this
 
23
  intersection_pct_threshold: Annotated[
24
  float,
25
  "The total amount of intersection area concentrated in the max intersection block."
26
+ ] = .7
27
 
28
  def __init__(self, config):
29
  super().__init__(config)
 
38
  continue
39
 
40
  lines = block.contained_blocks(document, (BlockTypes.Line,))
41
+ lines = [l for l in lines if l.polygon.width * 5 > l.polygon.height] # Skip vertical lines
42
+ line_bboxes = [l.polygon.expand(self.min_merge_pct, 0).bbox for l in lines] # Expand horizontally
43
  intersections = matrix_intersection_area(line_bboxes, line_bboxes)
44
 
45
  merges = []
 
50
  intersection_pct = intersection_val / max(1, lines[i].polygon.area)
51
  intersection_row = intersections[i]
52
  intersection_row[i] = 0 # Zero out the current idx
53
+
54
+ # Zero out previous merge segments
55
+ for m in merge:
56
+ intersection_row[m] = 0
57
  max_intersection_idx = intersection_row.argmax()
58
  total_intersection = max(1, sum(intersection_row))
59
  max_intersection = intersection_row[max_intersection_idx]
 
66
  abs(lines[i].polygon.y_end - lines[next_idx].polygon.y_end) <= self.min_merge_ydist,
67
  max_intersection / total_intersection >= self.intersection_pct_threshold
68
  ]):
69
+ if not merge:
70
+ merge.append(i)
71
+ merge.append(next_idx)
72
  else:
73
  merges.append(merge)
74
  merge = []
 
89
  other_line: Line = lines[idx]
90
  line.merge(other_line)
91
  block.structure.remove(other_line.id)
92
+ other_line.removed = True # Mark line as removed
93
  merged.add(idx)
94
 
95
  # It is probably math if we are merging provider lines like this
marker/processors/llm/llm_inlinemath.py CHANGED
@@ -83,11 +83,14 @@ Output:
83
  if not self.redo_inline_math:
84
  return
85
 
 
86
  inline_blocks = [
87
  (page, block)
88
  for page in document.pages
89
  for block in page.contained_blocks(document, self.block_types)
90
  ]
 
 
91
  detected_blocks = [
92
  (page, block)
93
  for page in document.pages
@@ -95,6 +98,7 @@ Output:
95
  if any([b.formats and "math" in b.formats for b in block.contained_blocks(document, (BlockTypes.Line,))])
96
  ]
97
  inference_blocks = inline_blocks + detected_blocks
 
98
  # Don't show progress if there are no blocks to process
99
  total_blocks = len(inference_blocks)
100
  if total_blocks == 0:
 
83
  if not self.redo_inline_math:
84
  return
85
 
86
+ # Get inline math blocks
87
  inline_blocks = [
88
  (page, block)
89
  for page in document.pages
90
  for block in page.contained_blocks(document, self.block_types)
91
  ]
92
+
93
+ # Get other blocks with detected math in them
94
  detected_blocks = [
95
  (page, block)
96
  for page in document.pages
 
98
  if any([b.formats and "math" in b.formats for b in block.contained_blocks(document, (BlockTypes.Line,))])
99
  ]
100
  inference_blocks = inline_blocks + detected_blocks
101
+
102
  # Don't show progress if there are no blocks to process
103
  total_blocks = len(inference_blocks)
104
  if total_blocks == 0:
marker/schema/blocks/base.py CHANGED
@@ -215,7 +215,7 @@ class Block(BaseModel):
215
  blocks = []
216
  for block_id in self.structure:
217
  block = document.get_block(block_id)
218
- if block_types is None or block.block_type in block_types:
219
  blocks.append(block)
220
  blocks += block.contained_blocks(document, block_types)
221
  return blocks
 
215
  blocks = []
216
  for block_id in self.structure:
217
  block = document.get_block(block_id)
218
+ if (block_types is None or block.block_type in block_types) and not block.removed:
219
  blocks.append(block)
220
  blocks += block.contained_blocks(document, block_types)
221
  return blocks
marker/services/claude.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import json
3
+ import time
4
+ from io import BytesIO
5
+ from typing import List, Annotated, Union, T
6
+
7
+ import PIL
8
+ from PIL import Image
9
+ import anthropic
10
+ from anthropic import APIError, APIConnectionError, APITimeoutError, RateLimitError
11
+ from pydantic import BaseModel
12
+
13
+ from marker.schema.blocks import Block
14
+ from marker.services import BaseService
15
+
16
+ class ClaudeService(BaseService):
17
+ claude_model_name: Annotated[
18
+ str,
19
+ "The name of the Google model to use for the service."
20
+ ] = "claude-3-5-sonnet-20241022"
21
+ claude_api_key: Annotated[
22
+ str,
23
+ "The Claude API key to use for the service."
24
+ ] = None
25
+ max_claude_tokens: Annotated[
26
+ int,
27
+ "The maximum number of tokens to use for a single Claude request."
28
+ ] = 4096
29
+
30
+
31
+ def img_to_base64(self, img: PIL.Image.Image):
32
+ image_bytes = BytesIO()
33
+ img.save(image_bytes, format="WEBP")
34
+ return base64.b64encode(image_bytes.getvalue()).decode('utf-8')
35
+
36
+ def prepare_images(self, images: Union[Image.Image, List[Image.Image]]) -> List[dict]:
37
+ if isinstance(images, Image.Image):
38
+ images = [images]
39
+
40
+ return [
41
+ {
42
+ "type": "image",
43
+ "source": {
44
+ "type": "base64",
45
+ "media_type": "image/webp",
46
+ "data": self.img_to_base64(img)
47
+ }
48
+ }
49
+ for img in images
50
+ ]
51
+
52
+ def validate_response(self, response_text: str, schema: type[T]) -> T:
53
+ try:
54
+ # Try to parse as JSON first
55
+ data = json.loads(response_text)
56
+ return schema.parse_obj(data)
57
+ except json.JSONDecodeError:
58
+ # If not JSON, try to parse the raw text into the schema
59
+ return schema.parse_raw(response_text)
60
+
61
+ def get_client(self):
62
+ return anthropic.Anthropic(
63
+ api_key=self.claude_api_key,
64
+ )
65
+
66
+ def __call__(
67
+ self,
68
+ prompt: str,
69
+ image: PIL.Image.Image | List[PIL.Image.Image],
70
+ block: Block,
71
+ response_schema: type[BaseModel],
72
+ max_retries: int | None = None,
73
+ timeout: int | None = None
74
+ ):
75
+ if max_retries is None:
76
+ max_retries = self.max_retries
77
+
78
+ if timeout is None:
79
+ timeout = self.timeout
80
+
81
+ if not isinstance(image, list):
82
+ image = [image]
83
+
84
+ schema_example = response_schema.model_json_schema()
85
+ system_prompt = f"""
86
+ Follow the instructions given by the user prompt. You must provide your response in JSON format matching this schema:
87
+
88
+ {json.dumps(schema_example, indent=2)}
89
+ """.strip()
90
+
91
+ client = self.get_client()
92
+ image_data = self.prepare_images(image)
93
+
94
+ messages = [
95
+ {
96
+ "role": "system",
97
+ "content": system_prompt
98
+ },
99
+ {
100
+ "role": "user",
101
+ "content": [
102
+ {
103
+ "type": "text",
104
+ "text": prompt
105
+ },
106
+ *image_data
107
+ ]
108
+ }
109
+ ]
110
+
111
+ tries = 0
112
+ while tries < max_retries:
113
+ try:
114
+ response = client.messages.create(
115
+ model=self.claude_model_name,
116
+ max_tokens=self.max_claude_tokens,
117
+ messages=messages,
118
+ timeout=timeout
119
+ )
120
+ # Extract and validate response
121
+ response_text = response.content[0].text
122
+ return self.validate_response(response_text, response_schema)
123
+ except RateLimitError as e:
124
+ # Rate limit exceeded
125
+ tries += 1
126
+ wait_time = tries * 3
127
+ print(f"Rate limit error: {e}. Retrying in {wait_time} seconds... (Attempt {tries}/{max_retries})")
128
+ time.sleep(wait_time)
129
+ except Exception as e:
130
+ print(e)
131
+ break
132
+
133
+ return {}
poetry.lock CHANGED
@@ -163,6 +163,30 @@ files = [
163
  {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
164
  ]
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  [[package]]
167
  name = "anyio"
168
  version = "4.8.0"
@@ -777,6 +801,17 @@ files = [
777
  {file = "Distance-0.1.3.tar.gz", hash = "sha256:60807584f5b6003f5c521aa73f39f51f631de3be5cccc5a1d67166fcbf0d4551"},
778
  ]
779
 
 
 
 
 
 
 
 
 
 
 
 
780
  [[package]]
781
  name = "exceptiongroup"
782
  version = "1.2.2"
@@ -1450,6 +1485,91 @@ MarkupSafe = ">=2.0"
1450
  [package.extras]
1451
  i18n = ["Babel (>=2.7)"]
1452
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1453
  [[package]]
1454
  name = "joblib"
1455
  version = "1.4.2"
@@ -2691,10 +2811,10 @@ files = [
2691
 
2692
  [package.dependencies]
2693
  numpy = [
2694
- {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
2695
- {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
2696
  {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
2697
  {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
 
 
2698
  ]
2699
 
2700
  [[package]]
@@ -2772,9 +2892,9 @@ files = [
2772
 
2773
  [package.dependencies]
2774
  numpy = [
2775
- {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
2776
- {version = ">=1.23.2", markers = "python_version == \"3.11\""},
2777
  {version = ">=1.22.4", markers = "python_version < \"3.11\""},
 
 
2778
  ]
2779
  python-dateutil = ">=2.8.2"
2780
  pytz = ">=2020.1"
@@ -5453,4 +5573,4 @@ propcache = ">=0.2.0"
5453
  [metadata]
5454
  lock-version = "2.0"
5455
  python-versions = "^3.10"
5456
- content-hash = "04afa6e305c60db8d9f5d304d67f6c51e0415cee9b3c7d1171750f7dd787135b"
 
163
  {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
164
  ]
165
 
166
+ [[package]]
167
+ name = "anthropic"
168
+ version = "0.46.0"
169
+ description = "The official Python library for the anthropic API"
170
+ optional = false
171
+ python-versions = ">=3.8"
172
+ files = [
173
+ {file = "anthropic-0.46.0-py3-none-any.whl", hash = "sha256:1445ec9be78d2de7ea51b4d5acd3574e414aea97ef903d0ecbb57bec806aaa49"},
174
+ {file = "anthropic-0.46.0.tar.gz", hash = "sha256:eac3d43271d02321a57c3ca68aca84c3d58873e8e72d1433288adee2d46b745b"},
175
+ ]
176
+
177
+ [package.dependencies]
178
+ anyio = ">=3.5.0,<5"
179
+ distro = ">=1.7.0,<2"
180
+ httpx = ">=0.23.0,<1"
181
+ jiter = ">=0.4.0,<1"
182
+ pydantic = ">=1.9.0,<3"
183
+ sniffio = "*"
184
+ typing-extensions = ">=4.10,<5"
185
+
186
+ [package.extras]
187
+ bedrock = ["boto3 (>=1.28.57)", "botocore (>=1.31.57)"]
188
+ vertex = ["google-auth (>=2,<3)"]
189
+
190
  [[package]]
191
  name = "anyio"
192
  version = "4.8.0"
 
801
  {file = "Distance-0.1.3.tar.gz", hash = "sha256:60807584f5b6003f5c521aa73f39f51f631de3be5cccc5a1d67166fcbf0d4551"},
802
  ]
803
 
804
+ [[package]]
805
+ name = "distro"
806
+ version = "1.9.0"
807
+ description = "Distro - an OS platform information API"
808
+ optional = false
809
+ python-versions = ">=3.6"
810
+ files = [
811
+ {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
812
+ {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
813
+ ]
814
+
815
  [[package]]
816
  name = "exceptiongroup"
817
  version = "1.2.2"
 
1485
  [package.extras]
1486
  i18n = ["Babel (>=2.7)"]
1487
 
1488
+ [[package]]
1489
+ name = "jiter"
1490
+ version = "0.8.2"
1491
+ description = "Fast iterable JSON parser."
1492
+ optional = false
1493
+ python-versions = ">=3.8"
1494
+ files = [
1495
+ {file = "jiter-0.8.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:ca8577f6a413abe29b079bc30f907894d7eb07a865c4df69475e868d73e71c7b"},
1496
+ {file = "jiter-0.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b25bd626bde7fb51534190c7e3cb97cee89ee76b76d7585580e22f34f5e3f393"},
1497
+ {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5c826a221851a8dc028eb6d7d6429ba03184fa3c7e83ae01cd6d3bd1d4bd17d"},
1498
+ {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d35c864c2dff13dfd79fb070fc4fc6235d7b9b359efe340e1261deb21b9fcb66"},
1499
+ {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f557c55bc2b7676e74d39d19bcb8775ca295c7a028246175d6a8b431e70835e5"},
1500
+ {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:580ccf358539153db147e40751a0b41688a5ceb275e6f3e93d91c9467f42b2e3"},
1501
+ {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af102d3372e917cffce49b521e4c32c497515119dc7bd8a75665e90a718bbf08"},
1502
+ {file = "jiter-0.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cadcc978f82397d515bb2683fc0d50103acff2a180552654bb92d6045dec2c49"},
1503
+ {file = "jiter-0.8.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ba5bdf56969cad2019d4e8ffd3f879b5fdc792624129741d3d83fc832fef8c7d"},
1504
+ {file = "jiter-0.8.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3b94a33a241bee9e34b8481cdcaa3d5c2116f575e0226e421bed3f7a6ea71cff"},
1505
+ {file = "jiter-0.8.2-cp310-cp310-win32.whl", hash = "sha256:6e5337bf454abddd91bd048ce0dca5134056fc99ca0205258766db35d0a2ea43"},
1506
+ {file = "jiter-0.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:4a9220497ca0cb1fe94e3f334f65b9b5102a0b8147646118f020d8ce1de70105"},
1507
+ {file = "jiter-0.8.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:2dd61c5afc88a4fda7d8b2cf03ae5947c6ac7516d32b7a15bf4b49569a5c076b"},
1508
+ {file = "jiter-0.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a6c710d657c8d1d2adbbb5c0b0c6bfcec28fd35bd6b5f016395f9ac43e878a15"},
1509
+ {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9584de0cd306072635fe4b89742bf26feae858a0683b399ad0c2509011b9dc0"},
1510
+ {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5a90a923338531b7970abb063cfc087eebae6ef8ec8139762007188f6bc69a9f"},
1511
+ {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21974d246ed0181558087cd9f76e84e8321091ebfb3a93d4c341479a736f099"},
1512
+ {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32475a42b2ea7b344069dc1e81445cfc00b9d0e3ca837f0523072432332e9f74"},
1513
+ {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b9931fd36ee513c26b5bf08c940b0ac875de175341cbdd4fa3be109f0492586"},
1514
+ {file = "jiter-0.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce0820f4a3a59ddced7fce696d86a096d5cc48d32a4183483a17671a61edfddc"},
1515
+ {file = "jiter-0.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8ffc86ae5e3e6a93765d49d1ab47b6075a9c978a2b3b80f0f32628f39caa0c88"},
1516
+ {file = "jiter-0.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5127dc1abd809431172bc3fbe8168d6b90556a30bb10acd5ded41c3cfd6f43b6"},
1517
+ {file = "jiter-0.8.2-cp311-cp311-win32.whl", hash = "sha256:66227a2c7b575720c1871c8800d3a0122bb8ee94edb43a5685aa9aceb2782d44"},
1518
+ {file = "jiter-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:cde031d8413842a1e7501e9129b8e676e62a657f8ec8166e18a70d94d4682855"},
1519
+ {file = "jiter-0.8.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e6ec2be506e7d6f9527dae9ff4b7f54e68ea44a0ef6b098256ddf895218a2f8f"},
1520
+ {file = "jiter-0.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76e324da7b5da060287c54f2fabd3db5f76468006c811831f051942bf68c9d44"},
1521
+ {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:180a8aea058f7535d1c84183c0362c710f4750bef66630c05f40c93c2b152a0f"},
1522
+ {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:025337859077b41548bdcbabe38698bcd93cfe10b06ff66617a48ff92c9aec60"},
1523
+ {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ecff0dc14f409599bbcafa7e470c00b80f17abc14d1405d38ab02e4b42e55b57"},
1524
+ {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffd9fee7d0775ebaba131f7ca2e2d83839a62ad65e8e02fe2bd8fc975cedeb9e"},
1525
+ {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14601dcac4889e0a1c75ccf6a0e4baf70dbc75041e51bcf8d0e9274519df6887"},
1526
+ {file = "jiter-0.8.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92249669925bc1c54fcd2ec73f70f2c1d6a817928480ee1c65af5f6b81cdf12d"},
1527
+ {file = "jiter-0.8.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e725edd0929fa79f8349ab4ec7f81c714df51dc4e991539a578e5018fa4a7152"},
1528
+ {file = "jiter-0.8.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bf55846c7b7a680eebaf9c3c48d630e1bf51bdf76c68a5f654b8524335b0ad29"},
1529
+ {file = "jiter-0.8.2-cp312-cp312-win32.whl", hash = "sha256:7efe4853ecd3d6110301665a5178b9856be7e2a9485f49d91aa4d737ad2ae49e"},
1530
+ {file = "jiter-0.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:83c0efd80b29695058d0fd2fa8a556490dbce9804eac3e281f373bbc99045f6c"},
1531
+ {file = "jiter-0.8.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:ca1f08b8e43dc3bd0594c992fb1fd2f7ce87f7bf0d44358198d6da8034afdf84"},
1532
+ {file = "jiter-0.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5672a86d55416ccd214c778efccf3266b84f87b89063b582167d803246354be4"},
1533
+ {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58dc9bc9767a1101f4e5e22db1b652161a225874d66f0e5cb8e2c7d1c438b587"},
1534
+ {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:37b2998606d6dadbb5ccda959a33d6a5e853252d921fec1792fc902351bb4e2c"},
1535
+ {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ab9a87f3784eb0e098f84a32670cfe4a79cb6512fd8f42ae3d0709f06405d18"},
1536
+ {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:79aec8172b9e3c6d05fd4b219d5de1ac616bd8da934107325a6c0d0e866a21b6"},
1537
+ {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:711e408732d4e9a0208008e5892c2966b485c783cd2d9a681f3eb147cf36c7ef"},
1538
+ {file = "jiter-0.8.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:653cf462db4e8c41995e33d865965e79641ef45369d8a11f54cd30888b7e6ff1"},
1539
+ {file = "jiter-0.8.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:9c63eaef32b7bebac8ebebf4dabebdbc6769a09c127294db6babee38e9f405b9"},
1540
+ {file = "jiter-0.8.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:eb21aaa9a200d0a80dacc7a81038d2e476ffe473ffdd9c91eb745d623561de05"},
1541
+ {file = "jiter-0.8.2-cp313-cp313-win32.whl", hash = "sha256:789361ed945d8d42850f919342a8665d2dc79e7e44ca1c97cc786966a21f627a"},
1542
+ {file = "jiter-0.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:ab7f43235d71e03b941c1630f4b6e3055d46b6cb8728a17663eaac9d8e83a865"},
1543
+ {file = "jiter-0.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b426f72cd77da3fec300ed3bc990895e2dd6b49e3bfe6c438592a3ba660e41ca"},
1544
+ {file = "jiter-0.8.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2dd880785088ff2ad21ffee205e58a8c1ddabc63612444ae41e5e4b321b39c0"},
1545
+ {file = "jiter-0.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:3ac9f578c46f22405ff7f8b1f5848fb753cc4b8377fbec8470a7dc3997ca7566"},
1546
+ {file = "jiter-0.8.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9e1fa156ee9454642adb7e7234a383884452532bc9d53d5af2d18d98ada1d79c"},
1547
+ {file = "jiter-0.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0cf5dfa9956d96ff2efb0f8e9c7d055904012c952539a774305aaaf3abdf3d6c"},
1548
+ {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e52bf98c7e727dd44f7c4acb980cb988448faeafed8433c867888268899b298b"},
1549
+ {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a2ecaa3c23e7a7cf86d00eda3390c232f4d533cd9ddea4b04f5d0644faf642c5"},
1550
+ {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:08d4c92bf480e19fc3f2717c9ce2aa31dceaa9163839a311424b6862252c943e"},
1551
+ {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99d9a1eded738299ba8e106c6779ce5c3893cffa0e32e4485d680588adae6db8"},
1552
+ {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d20be8b7f606df096e08b0b1b4a3c6f0515e8dac296881fe7461dfa0fb5ec817"},
1553
+ {file = "jiter-0.8.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d33f94615fcaf872f7fd8cd98ac3b429e435c77619777e8a449d9d27e01134d1"},
1554
+ {file = "jiter-0.8.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:317b25e98a35ffec5c67efe56a4e9970852632c810d35b34ecdd70cc0e47b3b6"},
1555
+ {file = "jiter-0.8.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fc9043259ee430ecd71d178fccabd8c332a3bf1e81e50cae43cc2b28d19e4cb7"},
1556
+ {file = "jiter-0.8.2-cp38-cp38-win32.whl", hash = "sha256:fc5adda618205bd4678b146612ce44c3cbfdee9697951f2c0ffdef1f26d72b63"},
1557
+ {file = "jiter-0.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:cd646c827b4f85ef4a78e4e58f4f5854fae0caf3db91b59f0d73731448a970c6"},
1558
+ {file = "jiter-0.8.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:e41e75344acef3fc59ba4765df29f107f309ca9e8eace5baacabd9217e52a5ee"},
1559
+ {file = "jiter-0.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7f22b16b35d5c1df9dfd58843ab2cd25e6bf15191f5a236bed177afade507bfc"},
1560
+ {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7200b8f7619d36aa51c803fd52020a2dfbea36ffec1b5e22cab11fd34d95a6d"},
1561
+ {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:70bf4c43652cc294040dbb62256c83c8718370c8b93dd93d934b9a7bf6c4f53c"},
1562
+ {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f9d471356dc16f84ed48768b8ee79f29514295c7295cb41e1133ec0b2b8d637d"},
1563
+ {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:859e8eb3507894093d01929e12e267f83b1d5f6221099d3ec976f0c995cb6bd9"},
1564
+ {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaa58399c01db555346647a907b4ef6d4f584b123943be6ed5588c3f2359c9f4"},
1565
+ {file = "jiter-0.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8f2d5ed877f089862f4c7aacf3a542627c1496f972a34d0474ce85ee7d939c27"},
1566
+ {file = "jiter-0.8.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:03c9df035d4f8d647f8c210ddc2ae0728387275340668fb30d2421e17d9a0841"},
1567
+ {file = "jiter-0.8.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8bd2a824d08d8977bb2794ea2682f898ad3d8837932e3a74937e93d62ecbb637"},
1568
+ {file = "jiter-0.8.2-cp39-cp39-win32.whl", hash = "sha256:ca29b6371ebc40e496995c94b988a101b9fbbed48a51190a4461fcb0a68b4a36"},
1569
+ {file = "jiter-0.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:1c0dfbd1be3cbefc7510102370d86e35d1d53e5a93d48519688b1bf0f761160a"},
1570
+ {file = "jiter-0.8.2.tar.gz", hash = "sha256:cd73d3e740666d0e639f678adb176fad25c1bcbdae88d8d7b857e1783bb4212d"},
1571
+ ]
1572
+
1573
  [[package]]
1574
  name = "joblib"
1575
  version = "1.4.2"
 
2811
 
2812
  [package.dependencies]
2813
  numpy = [
 
 
2814
  {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
2815
  {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
2816
+ {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
2817
+ {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
2818
  ]
2819
 
2820
  [[package]]
 
2892
 
2893
  [package.dependencies]
2894
  numpy = [
 
 
2895
  {version = ">=1.22.4", markers = "python_version < \"3.11\""},
2896
+ {version = ">=1.23.2", markers = "python_version == \"3.11\""},
2897
+ {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
2898
  ]
2899
  python-dateutil = ">=2.8.2"
2900
  pytz = ">=2020.1"
 
5573
  [metadata]
5574
  lock-version = "2.0"
5575
  python-versions = "^3.10"
5576
+ content-hash = "664bbdbf1226d34671e66f96973588ea266c9e0aefaace7d67da5a7cd00f0eec"
pyproject.toml CHANGED
@@ -35,6 +35,7 @@ markdown2 = "^2.5.2"
35
  filetype = "^1.2.0"
36
  scikit-learn = "^1.6.1"
37
  google-genai = "^1.0.0"
 
38
 
39
  [tool.poetry.group.dev.dependencies]
40
  jupyter = "^1.0.0"
 
35
  filetype = "^1.2.0"
36
  scikit-learn = "^1.6.1"
37
  google-genai = "^1.0.0"
38
+ anthropic = "^0.46.0"
39
 
40
  [tool.poetry.group.dev.dependencies]
41
  jupyter = "^1.0.0"