File size: 478 Bytes
2c69783 1ecee7a 19a4543 70bf91d 632e817 0c603b0 70bf91d 19a4543 70bf91d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
from marker.providers.pdf import PdfProvider
import tempfile
import datasets
def setup_pdf_provider(
filename='adversarial.pdf',
config=None,
) -> PdfProvider:
dataset = datasets.load_dataset("datalab-to/pdfs", split="train")
idx = dataset['filename'].index(filename)
temp_pdf = tempfile.NamedTemporaryFile(suffix=".pdf")
temp_pdf.write(dataset['pdf'][idx])
temp_pdf.flush()
provider = PdfProvider(temp_pdf.name, config)
return provider
|