Spaces:
Runtime error
Runtime error
| import logging | |
| import os | |
| import re | |
| from functools import lru_cache | |
| from urllib.parse import unquote | |
| import streamlit as st | |
| from codetiming import Timer | |
| from transformers import pipeline | |
| from arabert.preprocess import ArabertPreprocessor | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM | |
| import tokenizers | |
| import re | |
| import heapq | |
| from string import punctuation | |
| import nltk | |
| from nltk.corpus import stopwords | |
| import download | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| nltk.download('omw-1.4') | |
| punctuation = punctuation + '\n' | |
| logger = logging.getLogger(__name__) | |
| os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
| logger.info("Loading models...") | |
| reader_time = Timer("loading", text="Time: {:.2f}", logger=logging.info) | |
| reader_time.start() | |
| reader_time.stop() | |
| logger.info("Finished loading the models...") | |
| logger.info(f"Time spent loading: {reader_time.last}") | |
| def get_results(text, model_selected, num_beams, length_penalty): | |
| logger.info("\n=================================================================") | |
| logger.info(f"Text: {text}") | |
| logger.info(f"model_selected: {model_selected}") | |
| logger.info(f"length_penalty: {length_penalty}") | |
| reader_time = Timer("summarize", text="Time: {:.2f}", logger=logging.info) | |
| reader_time.start() | |
| if model_selected == 'GPT-2': | |
| number_of_tokens_limit = 80 | |
| else: | |
| number_of_tokens_limit = 150 | |
| logger.info(f"input length: {len(text.split())}") | |
| if model_selected == 'arabartsummarization': | |
| model_name="abdalrahmanshahrour/arabartsummarization" | |
| preprocessor = ArabertPreprocessor(model_name="") | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| pipeline1 = pipeline("text2text-generation",model=model,tokenizer=tokenizer) | |
| result = pipeline1(text, | |
| pad_token_id= tokenizer.eos_token_id, | |
| num_beams=num_beams, | |
| repetition_penalty=3.0, | |
| max_length=200, | |
| length_penalty=length_penalty, | |
| no_repeat_ngram_size = 3)[0]['generated_text'] | |
| logger.info('arabartsummarization') | |
| elif model_selected == 'AraBART': | |
| model_name= "abdalrahmanshahrour/AraBART-summ" | |
| preprocessor = ArabertPreprocessor(model_name="") | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| pipeline1 = pipeline("text2text-generation",model=model,tokenizer=tokenizer) | |
| result = pipeline1(text, | |
| pad_token_id= tokenizer.eos_token_id, | |
| num_beams=num_beams, | |
| repetition_penalty=3.0, | |
| max_length=200, | |
| length_penalty=length_penalty, | |
| no_repeat_ngram_size = 3)[0]['generated_text'] | |
| logger.info('AraBART') | |
| elif model_selected == "auto-arabic-summarization": | |
| model_name="abdalrahmanshahrour/auto-arabic-summarization" | |
| preprocessor = ArabertPreprocessor(model_name="") | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| pipeline1 = pipeline("text2text-generation",model=model,tokenizer=tokenizer) | |
| result = pipeline1(text, | |
| pad_token_id= tokenizer.eos_token_id, | |
| num_beams=num_beams, | |
| repetition_penalty=3.0, | |
| max_length=200, | |
| length_penalty=length_penalty, | |
| no_repeat_ngram_size = 3)[0]['generated_text'] | |
| logger.info('auto-arabic-summarization') | |
| else: | |
| result = "الرجاء اختيار نموذج" | |
| reader_time.stop() | |
| logger.info(f"Time spent summarizing: {reader_time.last}") | |
| return result | |
| if __name__ == "__main__": | |
| results_dict = "" |