Spaces:
Runtime error
Runtime error
| from haystack.nodes import TextConverter, PDFToTextConverter, DocxToTextConverter, PreProcessor | |
| import gradio as gr | |
| from haystack.nodes import PreProcessor | |
| from haystack.document_stores.faiss import FAISSDocumentStore | |
| from haystack.nodes import DensePassageRetriever | |
| from haystack.nodes import FARMReader | |
| from haystack.pipelines import ExtractiveQAPipeline | |
| pdf_converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"]) | |
| converted = pdf_converter.convert(file_path="statistics-for-machine-learning.pdf", meta | |
| preprocessor = PreProcessor( | |
| split_by="word", | |
| split_length=200, | |
| split_overlap=10, | |
| ) | |
| preprocessed = preprocessor.process(converted) | |
| document_store = FAISSDocumentStore(faiss_index_factory_str="Flat", return_embedding=True) | |
| document_store.delete_all_documents() | |
| document_store.write_documents(preprocessed) | |
| retriever = DensePassageRetriever(document_store=document_store) | |
| reader = FARMReader(model_name_or_path='deepset/roberta-base-squad2-distilled', use_gpu=False) | |
| document_store.update_embeddings(retriever) | |
| pipeline = ExtractiveQAPipeline(reader, retriever) | |
| questions = [ 'What is linear regression?', | |
| 'What is machine learning?', | |
| 'What are the steps in machine learning model development and deployment?', | |
| 'What is classification?' | |
| ] | |
| answers = [] | |
| for question in questions: | |
| prediction = pipeline.run(query=question) | |
| answers.append(prediction) | |
| for answer in answers: | |
| print('Q:', answer['query']) | |
| print('A:', answer['answers'][0].answer) | |
| print('Context: ', answer['answers'][0].context) | |
| print('score: ',answer['answers'][0].score) | |
| print('\n') | |
| def correct(question): | |
| prediction = pipeline.run(query=question) | |
| return answers.append(prediction) | |
| app_inputs = gr.inputs.File() | |
| interface = gr.Interface(fn=correct, | |
| inputs=[app_inputs,gr.inputs.Textbox(lines=10)], | |
| outputs=gr.inputs.Textbox(lines=20), | |
| title='PDF QA system') | |
| interface.launch(share=True) |