Spaces:

Tevfik-istanbullu
/

ArabicTextClassification

Build error

Tevfik istanbullu

Update app.py

d59ee08 verified 12 months ago

2.53 kB

	import joblib
	import gradio as gr
	from datasets import Dataset, DatasetDict, load_dataset
	from huggingface_hub import login
	import os

	token = os.getenv('HF_TOKEN')
	login(token, add_to_git_credential=True,write_permission=True )
	model = joblib.load('arabic_text_classifier.pkl')
	vectorizer = joblib.load('tfidf_vectorizer.pkl')
	label_encoder = joblib.load('label_encoder.pkl')
	available_labels = label_encoder.classes_
	def predict_category(text):
	text_vector = vectorizer.transform([text])
	probabilities = model.predict_proba(text_vector)[0]
	max_prob = max(probabilities)
	predicted_category = model.predict(text_vector)[0]

	if max_prob < 0.5:
	return "Other"

	predicted_label = label_encoder.inverse_transform([predicted_category])[0]
	return predicted_label

	def flag_data(text, prediction):

	try:
	dataset = load_dataset("Tevfik34/crowdsourced-text-classification-data", split="train")
	except:

	dataset = Dataset.from_dict({"text": [], "prediction": []})

	new_data = {"text": [text], "prediction": [prediction]}
	dataset = dataset.add_item(new_data)


	dataset.push_to_hub("Tevfik34/crowdsourced-text-classification-data")


	def classify_and_flag(text):
	prediction = predict_category(text)
	flag_data(text, prediction)
	return prediction


	interface = gr.Interface(fn=classify_and_flag,
	inputs=gr.Textbox(lines=5, placeholder= "Enter text in Arabic here...", label="Text" ),
	outputs=gr.Label(label="Predicted Category"),
	title="Arabic Text Classifier",
	description="""
	This interface allows you to classify Arabic text into different categories using a machine learning model trained on 160,000 real-world text samples.

	Model Overview:
	- The model is based on Logistic Regression.
	- It was trained on a large dataset of 160,000 Arabic text entries, ensuring robustness and accuracy in classifying Arabic text.

	How to use:
	- Enter any Arabic text in the input box.
	- The model will predict the category that the text most likely belongs to.
	- If the model is uncertain, it will classify the text as 'Other'.

	Available Labels:
	The model can predict the following categories:
	- {}

	Try entering some text in Arabic to see how the model works.
	""".format(", ".join(available_labels)),theme="ParityError/Interstellar")

	interface.launch()