{ "cells": [ { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('DemoData.csv')" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import yaml\n", "import os\n", "import ast\n", "\n", "# Create a folder to store YAML files if it doesn't exist\n", "if not os.path.exists('configs'):\n", " os.makedirs('configs')\n", "\n", "# Iterate over each row in the DataFrame\n", "for index, row in df.iterrows():\n", " # Extract Metaname and use it as the filename for YAML\n", " filename = str(row['Metaname']) + '.yaml'\n", " # Convert 'Screenshots' column to a Python list\n", " screenshots_list = None\n", " try:\n", "\n", " screenshots_list = ast.literal_eval(row['Screenshots'])\n", " except:\n", " screenshots_list = []\n", " # Remove the 'Metaname' and 'Screenshots' columns from the data to be converted to YAML\n", " row_data = row.drop(['Metaname', 'Screenshots'])\n", " # Convert the remaining data to a dictionary\n", " data_dict = row_data.to_dict()\n", " # Add the 'Screenshots' list to the dictionary\n", " data_dict['Screenshots'] = screenshots_list\n", " # Write the data as YAML to a new file\n", " with open(os.path.join('configs', filename), 'w') as yamlfile:\n", " yaml.dump(data_dict, yamlfile, default_flow_style=False)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GroupModalityLevelMetanameSuggested EvaluationWhat it is evaluatingConsiderationsLinkURLScreenshotsApplicable ModelsDatasetsHashtags
0BiasEvalsTextModelweatWord Embedding Association Test (WEAT)Associations and word embeddings based on Impl...Although based in human associations, general ...Semantics derived automatically from language ...https://researchportal.bath.ac.uk/en/publicati...['Images/WEAT1.png', 'Images/WEAT2.png']NaNNaNNaN
1BiasEvalsTextModelwefatWord Embedding Factual As\\nsociation Test (WEFAT)Associations and word embeddings based on Impl...Although based in human associations, general ...Semantics derived automatically from language ...https://researchportal.bath.ac.uk/en/publicati...NaNNaNNaNNaN
2BiasEvalsTextDatasetstereosetStereoSetProtected class stereotypesAutomating stereotype detection makes distingu...StereoSet: Measuring stereotypical bias in pre...https://arxiv.org/abs/2004.09456NaNNaNNaNNaN
3BiasEvalsTextDatasetcrwospairsCrow-S PairsProtected class stereotypesAutomating stereotype detection makes distingu...CrowS-Pairs: A Challenge Dataset for Measuring...https://arxiv.org/abs/2010.00133NaNNaNNaNNaN
4BiasEvalsTextOutputhonestHONEST: Measuring Hurtful Sentence Completion ...Protected class stereotypes and hurtful languageAutomating stereotype detection makes distingu...HONEST: Measuring Hurtful Sentence Completion ...https://aclanthology.org/2021.naacl-main.191.pdfNaNNaNNaNNaN
5BiasEvalsImageModelieatImage Embedding Association Test (iEAT)Embedding associationsAlthough based in human associations, general ...Image Representations Learned With Unsupervise...https://dl.acm.org/doi/abs/10.1145/3442188.344...NaNNaNNaNNaN
6BiasEvalsImageDatasetimagedataleakDataset leakage and model leakageGender and label biasNaNBalanced Datasets Are Not Enough: Estimating a...https://arxiv.org/abs/1811.08489NaNNaNNaNNaN
7BiasEvalsImageOutputstablebiasCharacterizing the variation in generated imagesNaNNaNStable bias: Analyzing societal representation...https://arxiv.org/abs/2303.11408NaNNaNNaNNaN
8BiasEvalsImageOutputhomoglyphbiasEffect of different scripts on text-to-image g...It evaluates generated images for cultural ste...NaNExploiting Cultural Biases via Homoglyphs in T...https://arxiv.org/pdf/2209.08891.pdfNaNNaNNaNNaN
9BiasEvalsAudioTaxonomy (?)notmyvoiceNot My Voice! A Taxonomy of Ethical and Safety...Lists harms of audio/speech generatorsNot necessarily evaluation but a good source o...Not My Voice! A Taxonomy of Ethical and Safety...https://arxiv.org/pdf/2402.01708.pdfNaNNaNNaNNaN
10BiasEvalsVideoOutputvideodiversemisinfoDiverse Misinformation: Impacts of Human Biase...Human led evaluations of deepfakes to understa...Repr. harm, incite violenceDiverse Misinformation: Impacts of Human Biase...https://arxiv.org/abs/2210.10026NaNNaNNaNNaN
11PrivacyNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " Group Modality Level Metaname \\\n", "0 BiasEvals Text Model weat \n", "1 BiasEvals Text Model wefat \n", "2 BiasEvals Text Dataset stereoset \n", "3 BiasEvals Text Dataset crwospairs \n", "4 BiasEvals Text Output honest \n", "5 BiasEvals Image Model ieat \n", "6 BiasEvals Image Dataset imagedataleak \n", "7 BiasEvals Image Output stablebias \n", "8 BiasEvals Image Output homoglyphbias \n", "9 BiasEvals Audio Taxonomy (?) notmyvoice \n", "10 BiasEvals Video Output videodiversemisinfo \n", "11 Privacy NaN NaN NaN \n", "\n", " Suggested Evaluation \\\n", "0 Word Embedding Association Test (WEAT) \n", "1 Word Embedding Factual As\\nsociation Test (WEFAT) \n", "2 StereoSet \n", "3 Crow-S Pairs \n", "4 HONEST: Measuring Hurtful Sentence Completion ... \n", "5 Image Embedding Association Test (iEAT) \n", "6 Dataset leakage and model leakage \n", "7 Characterizing the variation in generated images \n", "8 Effect of different scripts on text-to-image g... \n", "9 Not My Voice! A Taxonomy of Ethical and Safety... \n", "10 Diverse Misinformation: Impacts of Human Biase... \n", "11 NaN \n", "\n", " What it is evaluating \\\n", "0 Associations and word embeddings based on Impl... \n", "1 Associations and word embeddings based on Impl... \n", "2 Protected class stereotypes \n", "3 Protected class stereotypes \n", "4 Protected class stereotypes and hurtful language \n", "5 Embedding associations \n", "6 Gender and label bias \n", "7 NaN \n", "8 It evaluates generated images for cultural ste... \n", "9 Lists harms of audio/speech generators \n", "10 Human led evaluations of deepfakes to understa... \n", "11 NaN \n", "\n", " Considerations \\\n", "0 Although based in human associations, general ... \n", "1 Although based in human associations, general ... \n", "2 Automating stereotype detection makes distingu... \n", "3 Automating stereotype detection makes distingu... \n", "4 Automating stereotype detection makes distingu... \n", "5 Although based in human associations, general ... \n", "6 NaN \n", "7 NaN \n", "8 NaN \n", "9 Not necessarily evaluation but a good source o... \n", "10 Repr. harm, incite violence \n", "11 NaN \n", "\n", " Link \\\n", "0 Semantics derived automatically from language ... \n", "1 Semantics derived automatically from language ... \n", "2 StereoSet: Measuring stereotypical bias in pre... \n", "3 CrowS-Pairs: A Challenge Dataset for Measuring... \n", "4 HONEST: Measuring Hurtful Sentence Completion ... \n", "5 Image Representations Learned With Unsupervise... \n", "6 Balanced Datasets Are Not Enough: Estimating a... \n", "7 Stable bias: Analyzing societal representation... \n", "8 Exploiting Cultural Biases via Homoglyphs in T... \n", "9 Not My Voice! A Taxonomy of Ethical and Safety... \n", "10 Diverse Misinformation: Impacts of Human Biase... \n", "11 NaN \n", "\n", " URL \\\n", "0 https://researchportal.bath.ac.uk/en/publicati... \n", "1 https://researchportal.bath.ac.uk/en/publicati... \n", "2 https://arxiv.org/abs/2004.09456 \n", "3 https://arxiv.org/abs/2010.00133 \n", "4 https://aclanthology.org/2021.naacl-main.191.pdf \n", "5 https://dl.acm.org/doi/abs/10.1145/3442188.344... \n", "6 https://arxiv.org/abs/1811.08489 \n", "7 https://arxiv.org/abs/2303.11408 \n", "8 https://arxiv.org/pdf/2209.08891.pdf \n", "9 https://arxiv.org/pdf/2402.01708.pdf \n", "10 https://arxiv.org/abs/2210.10026 \n", "11 NaN \n", "\n", " Screenshots Applicable Models Datasets \\\n", "0 ['Images/WEAT1.png', 'Images/WEAT2.png'] NaN NaN \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "5 NaN NaN NaN \n", "6 NaN NaN NaN \n", "7 NaN NaN NaN \n", "8 NaN NaN NaN \n", "9 NaN NaN NaN \n", "10 NaN NaN NaN \n", "11 NaN NaN NaN \n", "\n", " Hashtags \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "5 NaN \n", "6 NaN \n", "7 NaN \n", "8 NaN \n", "9 NaN \n", "10 NaN \n", "11 NaN " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "import urllib.request\n", "from bs4 import BeautifulSoup\n", "\n", "from pypdf import PdfReader \n", "from urllib.request import urlretrieve\n", "\n", "import pdfplumber\n", "\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://researchportal.bath.ac.uk/en/publications/semantics-derived-automatically-from-language-corpora-necessarily\n", "\n", " Semantics derived automatically from language corpora contain human-like biases\n", " — the University of Bath's research portal\n", "https://researchportal.bath.ac.uk/en/publications/semantics-derived-automatically-from-language-corpora-necessarily\n", "\n", " Semantics derived automatically from language corpora contain human-like biases\n", " — the University of Bath's research portal\n", "https://arxiv.org/abs/1903.10561\n", "[1903.10561] On Measuring Social Biases in Sentence Encoders\n", "https://dl.acm.org/doi/abs/10.5555/3454287.3455472\n", "Error\n", "https://arxiv.org/abs/2004.09456\n", "[2004.09456] StereoSet: Measuring stereotypical bias in pretrained language models\n", "https://arxiv.org/abs/2010.00133\n", "[2010.00133] CrowS-Pairs: A Challenge Dataset for Measuring Social Biases in Masked Language Models\n", "https://aclanthology.org/2021.naacl-main.191.pdf\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "HONEST: Measuring Hurtful Sentence Completion in Language Models\n", "nan\n", "Error\n", "https://aclanthology.org/2022.findings-acl.165.pdf\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "BBQ: A Hand-Built Bias Benchmark for Question Answering \n", "https://aclanthology.org/2022.findings-naacl.42.pdf\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "On Measuring Social Biases in Prompt-Based Multi-Task Learning\n" ] } ], "source": [ "def get_page_title(url):\n", " soup = BeautifulSoup(urllib.request.urlopen(url))\n", " return soup.title.string\n", "\n", "\n", "def extract_pdf_title(url):\n", " urlretrieve(url, 'temp.pdf')\n", " with pdfplumber.open('temp.pdf') as pdf:\n", " for page in pdf.pages:\n", " for line in page.extract_text().split('\\n'):\n", " return line\n", " return \"\"\n", "\n", " \n", " \n", "for url in df['URL'][:10]:\n", " try:\n", " print(url)\n", " title = get_page_title(url)\n", " print(title)\n", " except:\n", " try:\n", " title = extract_pdf_title(url)\n", " print(title)\n", " except:\n", " print(\"Error\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "gradio", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }