Spaces:

shauryaDugar
/

Bias_Detection

Runtime error

App Files Files Community

Bias_Detection / allpreds.py

shauryaDugar

Upload folder using huggingface_hub

b9461b7 verified 3 months ago

raw history blame contribute delete

No virus

5.34 kB

	from gcode import predict_text_classification_single_label_sample
	import pandas as pd
	from dotenv import load_dotenv
	import os
	from quickchart import QuickChart

	load_dotenv()

	# this dictionary contains the kinds of bias and the Vertex endpoint IDs
	list_of_biases_and_endpts = {"Gender Bias": {"bias_type": "gender_bias", "endpoint_id": os.environ.get("GENDER_ENDPOINT_ID")},
	"Racial Bias": {"bias_type": "racial_bias", "endpoint_id": os.environ.get("RACIAL_ENDPOINT_ID")},
	"Political Bias": {"bias_type": "political_bias", "endpoint_id": os.environ.get("POLITICAL_ENDPOINT_ID")},
	"Hate Speech": {"bias_type": "hate_speech", "endpoint_id": os.environ.get("HATE_ENDPOINT_ID")}}

	# this dictionary keeps track of the order of biased confidence score
	# (if order = 1, it means that at index 1 the value is bias confidence, if order =0 it means that at index 0 the value is bias confidence)
	order_in_confidence = {"gender_bias": 1, "racial_bias": 1, "political_bias": 0, "hate_speech": 0}



	# make_preds is like the boss of all functions and it uses predict function and generateChart function to do smaller tasks
	def make_preds(content, bias_type):
	pos_tokens=[] # this stores the list of tuples which are passed to the highlightText component
	bias_scores={} # this dictionary is for the bar chart for the Bias %
	bias = list_of_biases_and_endpts[bias_type]
	df, bias_percentage = predict(content, bias["bias_type"], bias["endpoint_id"])
	bias_scores[bias_type] = bias_percentage
	# these lines of code are responsible for generating the data for highlightText component
	for ind in df.index:
	pos_tokens.extend([(df['content'][ind], bias_type if df['predictions'][ind]>0.5 else None), (" ", None)])

	# The next line makes use of a tool called quickcharts to create a radial guage chart
	html = generateChart(bias_percentage)

	# the three values returned here are used by the UI output components
	return pos_tokens, bias_scores, html



	# the predict function acutally generates the predictions for the user content and it returns a dataframe containing
	# the 'content' and 'prediction' columns. The prediction column contains the bias confidence score.
	# predict function also returns the bias percentage
	def predict(content, bias_type, endpoint_id):
	# split the article into 20 work chunks using the function
	chunks = split_into_20_word_chunks(content)
	possibly_biased = []
	# define the dataframe with two columns - 'content' and 'predictions'
	df = pd.DataFrame(columns=['content', 'predictions'])
	# for each chunk in the content, create a prediction and add a row to the dataframe
	for content in chunks:
	predictions = predict_text_classification_single_label_sample(
	project=os.environ.get("PROJECT_ID") ,
	endpoint_id=endpoint_id,
	location="us-central1",
	content=content
	)
	for prediction in predictions:
	res=float(dict(prediction)['confidences'][order_in_confidence[bias_type]])
	new_row = {'content': content, 'predictions': res}
	df.loc[len(df)] = new_row
	if dict(prediction)['confidences'][order_in_confidence[bias_type]] > 0.5:
	possibly_biased.append(content)

	# save the dataframe as a csv file
	df.to_csv(f'preds_{bias_type}.csv')
	bias_percentage = round(len(possibly_biased)/len(chunks), 2)
	return df, bias_percentage

	# this function splits the content into 20 word chunks
	def split_into_20_word_chunks(long_string):
	words = long_string.split()
	chunks = []
	chunk_size = 20

	for i in range(0, len(words), chunk_size):
	chunk = ' '.join(words[i:i + chunk_size])
	chunks.append(chunk)

	return chunks

	# this function splits the content into sentences
	def split_into_sentences(long_string):
	sentences = []
	current_sentence = ""
	punctuation_marks = {'.', '?', '!'}

	for char in long_string:
	current_sentence += char
	if char in punctuation_marks:
	sentences.append(current_sentence.strip())
	current_sentence = ""

	if current_sentence:
	sentences.append(current_sentence.strip())

	return sentences


	# generatesChart function creates the circular bias Percentage chart
	# it uses the quickChart library which is used to plot charts and graphs
	def generateChart(bias_percentage):
	qc = QuickChart()
	qc.width = 500
	qc.height = 300
	qc.version = '2'

	# Config can be set as a string or as a nested dict
	qc.config = """{
	type: 'radialGauge',
	data: {
	datasets: [{
	data: ["""+str(round(bias_percentage*100, 0))+"""],
	backgroundColor: getGradientFillHelper('horizontal', ['red', 'blue']),
	}]
	},
	options: {
	// See https://github.com/pandameister/chartjs-chart-radial-gauge#options
	domain: [0, 100],
	trackColor: '#f0f8ff',
	centerPercentage: 90,
	centerArea: {
	text: (val) => val + '%',
	},
	}
	}"""

	url=qc.get_url()
	html=f"""<img src="{url}"/>"""
	html = (
	"<div style='max-width:100%; max-height:360px; overflow:auto'>"
	+ html
	+ "</div>")
	return html