Spaces:
Build error
Build error
| import gradio as gr | |
| import tensorflow as tf | |
| model = tf.saved_model.load('arabert_pretrained') | |
| from transformers import TFAutoModel, AutoTokenizer | |
| arabert_tokenizer = AutoTokenizer.from_pretrained('aubmindlab/bert-base-arabert') | |
| import pandas as pd | |
| def preprocess_input_data(texts, tokenizer, max_len=120): | |
| """Tokenize and preprocess the input data for Arabert model. | |
| Args: | |
| texts (list): List of text strings. | |
| tokenizer (AutoTokenizer): Arabert tokenizer from transformers library. | |
| max_len (int, optional): Maximum sequence length. Defaults to 120. | |
| Returns: | |
| Tuple of numpy arrays: Input token IDs and attention masks. | |
| """ | |
| # Tokenize the text data using the tokenizer | |
| tokenized_data = [tokenizer.encode_plus( | |
| t, | |
| max_length=max_len, | |
| pad_to_max_length=True, | |
| add_special_tokens=True) for t in texts] | |
| # Extract tokenized input IDs and attention masks | |
| input_ids = [data['input_ids'] for data in tokenized_data] | |
| attention_mask = [data['attention_mask'] for data in tokenized_data] | |
| return input_ids, attention_mask | |
| def sentiment_analysis(text): | |
| X_input_ids, X_attention_mask = preprocess_input_data(text, arabert_tokenizer) | |
| preds = model(X_input_ids) | |
| import numpy as np | |
| predicted_classe=list(np.where(preds <0.5,0,1).reshape(len(preds),1)) | |
| predicted_class = ''.join(str(x) for x in np.where(preds < 0.5, 0, 1).flatten()) | |
| return predicted_class | |
| iface = gr.Interface(fn=sentiment_analysis, inputs="text", outputs="text") | |
| iface.launch() |