Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from gradio.components import Text | |
| import joblib | |
| import clean | |
| import nltk | |
| nltk.download('wordnet') | |
| import numpy as np | |
| import language_detection | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| import torch | |
| print("all imports worked") | |
| # Load pre-trained model | |
| model = joblib.load('model_joblib.pkl') | |
| print("model load ") | |
| tf = joblib.load('tf_joblib.pkl') | |
| print("tfidf load ") | |
| # Load Hindi abuse detection model | |
| hindi_tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL") | |
| hindi_model = AutoModelForSequenceClassification.from_pretrained("Hate-speech-CNERG/hindi-abusive-MuRIL") | |
| print("Hindi model loaded") | |
| def predict_hindi_text(text): | |
| inputs = hindi_tokenizer(text, return_tensors="pt", padding=True, truncation=True) | |
| outputs = hindi_model(**inputs) | |
| predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
| scores = predictions[0].detach().numpy() | |
| return scores | |
| # Define function to predict whether sentence is abusive or not | |
| def predict_abusive_lang(text): | |
| print("original text ", text) | |
| lang = language_detection.en_hi_detection(text) | |
| print("language detected ", lang) | |
| if lang=='eng': | |
| cleaned_text = clean.text_cleaning(text) | |
| print("cleaned text ", text) | |
| text = tf.transform([cleaned_text]) | |
| print("tfidf transformation ", text) | |
| prediction = model.predict(text) | |
| print("prediction ", prediction) | |
| if len(prediction)!=0 and prediction[0]==0: | |
| return ["Not Abusive", cleaned_text] | |
| elif len(prediction)!=0 and prediction[0]==1: | |
| return ["Abusive",cleaned_text] | |
| else : | |
| return ["Please write something in the comment box..","No cleaned text"] | |
| elif lang=='hi': | |
| print("using transformers for Hindi text") | |
| scores = predict_hindi_text(text) | |
| if scores[1] > scores[0]: # If score for abusive class is higher | |
| return ["Abusive", text] | |
| else: | |
| return ["Not Abusive", text] | |
| else: | |
| return ["Uknown Language","No cleaned text"] | |
| # Define the GRADIO output interfaces | |
| output_interfaces = [ | |
| gr.Textbox(label="Result"), | |
| gr.Textbox(label="Cleaned text") | |
| ] | |
| app = gr.Interface(predict_abusive_lang, inputs='text', outputs=output_interfaces, title="Abuse Classifier", description="Enter a sentence and the model will predict whether it is abusive or not.") | |
| #Start the GRADIO app | |
| app.launch() |