from gcode import predict_text_classification_single_label_sample import pandas as pd from dotenv import load_dotenv import os from quickchart import QuickChart load_dotenv() # this dictionary contains the kinds of bias and the Vertex endpoint IDs list_of_biases_and_endpts = {"Gender Bias": {"bias_type": "gender_bias", "endpoint_id": os.environ.get("GENDER_ENDPOINT_ID")}, "Racial Bias": {"bias_type": "racial_bias", "endpoint_id": os.environ.get("RACIAL_ENDPOINT_ID")}, "Political Bias": {"bias_type": "political_bias", "endpoint_id": os.environ.get("POLITICAL_ENDPOINT_ID")}, "Hate Speech": {"bias_type": "hate_speech", "endpoint_id": os.environ.get("HATE_ENDPOINT_ID")}} # this dictionary keeps track of the order of biased confidence score # (if order = 1, it means that at index 1 the value is bias confidence, if order =0 it means that at index 0 the value is bias confidence) order_in_confidence = {"gender_bias": 1, "racial_bias": 1, "political_bias": 0, "hate_speech": 0} # make_preds is like the boss of all functions and it uses predict function and generateChart function to do smaller tasks def make_preds(content, bias_type): pos_tokens=[] # this stores the list of tuples which are passed to the highlightText component bias_scores={} # this dictionary is for the bar chart for the Bias % bias = list_of_biases_and_endpts[bias_type] df, bias_percentage = predict(content, bias["bias_type"], bias["endpoint_id"]) bias_scores[bias_type] = bias_percentage # these lines of code are responsible for generating the data for highlightText component for ind in df.index: pos_tokens.extend([(df['content'][ind], bias_type if df['predictions'][ind]>0.5 else None), (" ", None)]) # The next line makes use of a tool called quickcharts to create a radial guage chart html = generateChart(bias_percentage) # the three values returned here are used by the UI output components return pos_tokens, bias_scores, html # the predict function acutally generates the predictions for the user content and it returns a dataframe containing # the 'content' and 'prediction' columns. The prediction column contains the bias confidence score. # predict function also returns the bias percentage def predict(content, bias_type, endpoint_id): # split the article into 20 work chunks using the function chunks = split_into_20_word_chunks(content) possibly_biased = [] # define the dataframe with two columns - 'content' and 'predictions' df = pd.DataFrame(columns=['content', 'predictions']) # for each chunk in the content, create a prediction and add a row to the dataframe for content in chunks: predictions = predict_text_classification_single_label_sample( project=os.environ.get("PROJECT_ID") , endpoint_id=endpoint_id, location="us-central1", content=content ) for prediction in predictions: res=float(dict(prediction)['confidences'][order_in_confidence[bias_type]]) new_row = {'content': content, 'predictions': res} df.loc[len(df)] = new_row if dict(prediction)['confidences'][order_in_confidence[bias_type]] > 0.5: possibly_biased.append(content) # save the dataframe as a csv file df.to_csv(f'preds_{bias_type}.csv') bias_percentage = round(len(possibly_biased)/len(chunks), 2) return df, bias_percentage # this function splits the content into 20 word chunks def split_into_20_word_chunks(long_string): words = long_string.split() chunks = [] chunk_size = 20 for i in range(0, len(words), chunk_size): chunk = ' '.join(words[i:i + chunk_size]) chunks.append(chunk) return chunks # this function splits the content into sentences def split_into_sentences(long_string): sentences = [] current_sentence = "" punctuation_marks = {'.', '?', '!'} for char in long_string: current_sentence += char if char in punctuation_marks: sentences.append(current_sentence.strip()) current_sentence = "" if current_sentence: sentences.append(current_sentence.strip()) return sentences # generatesChart function creates the circular bias Percentage chart # it uses the quickChart library which is used to plot charts and graphs def generateChart(bias_percentage): qc = QuickChart() qc.width = 500 qc.height = 300 qc.version = '2' # Config can be set as a string or as a nested dict qc.config = """{ type: 'radialGauge', data: { datasets: [{ data: ["""+str(round(bias_percentage*100, 0))+"""], backgroundColor: getGradientFillHelper('horizontal', ['red', 'blue']), }] }, options: { // See https://github.com/pandameister/chartjs-chart-radial-gauge#options domain: [0, 100], trackColor: '#f0f8ff', centerPercentage: 90, centerArea: { text: (val) => val + '%', }, } }""" url=qc.get_url() html=f"""""" html = ( "
" + html + "
") return html