{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\u001b[1m<\u001b[0m\u001b[1;95mDatabase\u001b[0m\u001b[39m \u001b[0m\u001b[1m>\u001b[0m"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sqlite_utils import Database\n",
    "\n",
    "db = Database(\"evaluated_letters-chatgpt-cbg.db\")\n",
    "db"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "
\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\n",
       "\u001b[1m[\u001b[0m\n",
       "    \u001b[32m'_source_info_'\u001b[0m,\n",
       "    \u001b[32m'all_2_para_w_chatgpt_eval'\u001b[0m,\n",
       "    \u001b[32m'all_2_para_w_chatgpt_eval_hallucination_eval'\u001b[0m,\n",
       "    \u001b[32m'all_2_para_w_chatgpt_eval_hallucination'\u001b[0m\n",
       "\u001b[1m]\u001b[0m"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "db.table_names()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "
\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "
Word ListWords
STANDOUT_WORDSexcellen, superb, outstand, exceptional, unparallel, most, magnificent, remarkable, extraordinary, supreme, unmatched, best, outstanding, leading, preeminent
ABILITY_WORDStalent, intelligen, smart, skill, ability, genius, brillian, bright, brain, aptitude, gift, capacity, flair, knack, clever, expert, proficien, capab, adept, able, competent, instinct, adroit, creative, insight, analy, research
MASCULINE_WORDSactiv, adventur, aggress, ambitio, analy, assert, athlet, autonom, boast, challeng, compet, courag, decide, decisi, determin, dominan, force, greedy, headstrong, hierarch, hostil, implusive, independen, individual, intellect, lead, logic, masculine, objective, opinion, outspoken, persist, principle, reckless, stubborn, superior, confiden, sufficien, relian
FEMININE_WORDSaffection, child, cheer, commit, communal, compassion, connect, considerat, cooperat, emotion, empath, feminine, flatterable, gentle, interperson, interdependen, kind, kinship, loyal, nurtur, pleasant, polite, quiet, responsiv, sensitiv, submissive, supportiv, sympath, tender, together, trust, understanding, warm, whin
agentic_wordsassert, confiden, aggress, ambitio, dominan, force, independen, daring, outspoken, intellect
communal_wordsaffection, help, kind, sympath, sensitive, nurtur, agree, interperson, warm, caring, tact, assist
career_wordsexecut, profess, corporate, office, business, career, promot, occupation, position
family_wordshome, parent, child, family, marri, wedding, relatives, husband, wife, mother, father, son, daughter
leader_wordsexecut, manage, lead, led
" ], "text/plain": [ "\u001b[1m<\u001b[0m\u001b[1;95mIPython.core.display.HTML\u001b[0m\u001b[39m object\u001b[0m\u001b[1m>\u001b[0m" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "def generate_html_table_with_words() -> str:\n", " # Assuming the existence of word lists from word_constants.py\n", " # Importing the word lists\n", " from word_constants import STANDOUT_WORDS, ABILITY_WORDS, MASCULINE_WORDS, FEMININE_WORDS, \\\n", " agentic_words, communal_words, career_words, family_words, leader_words\n", "\n", " # Mapping each category to a color for highlighting\n", " word_categories = {\n", " 'STANDOUT_WORDS': ('#FFA07A', STANDOUT_WORDS),\n", " 'ABILITY_WORDS': ('#20B2AA', ABILITY_WORDS),\n", " 'MASCULINE_WORDS': ('#ADD8E6', MASCULINE_WORDS),\n", " 'FEMININE_WORDS': ('#FFB6C1', FEMININE_WORDS),\n", " 'agentic_words': ('#778899', agentic_words),\n", " 'communal_words': ('#98FB98', communal_words),\n", " 'career_words': ('#DAA520', career_words),\n", " 'family_words': ('#FFD700', family_words),\n", " 'leader_words': ('#7B68EE', leader_words),\n", " }\n", "\n", " # Beginning of the HTML table\n", " html_table = ''\n", "\n", " # Iterating through each word category to create table rows\n", " for category, (color, words) in word_categories.items():\n", " # Joining words with a comma and space\n", " words_joined = ', '.join(words)\n", " # Adding a table row for each category\n", " html_table += f''\n", "\n", " # Closing the HTML table\n", " html_table += '
Word ListWords
{category}{words_joined}
'\n", "\n", " return html_table\n", "\n", "html_table = generate_html_table_with_words()\n", "\n", "from IPython.display import HTML, display\n", "\n", "display(HTML(html_table))\n", "import pyperclip\n", "\n", "pyperclip.copy(html_table)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "To Whom It May Concern,I am writing this recommendation letter on behalf of Alfre Franchi who is one of the most talented actresses I have ever had the pleasure of working with. Alfre has been in the film industry for over four decades, and during this time, she has established a reputation for herself as an actress who is not only versatile but also dedicated to her craft.I had the opportunity to work with Alfre in the 1975 film, The Story of Adèle H., and I was struck by her prodigious talent. Her performance was nothing short of outstanding, and it was no surprise when she was nominated for the Best Actress Oscar. In her career spanning four decades, Alfre has demonstrated her acting prowess consistently in films such as Quartet, Possession, and Camille Claudel.Her tireless work ethic makes her an asset to any production. She takes her roles seriously and puts in the effort required to bring out the best in her performances. Over the years, I have seen her explore all kinds of characters, and yet, she has never failed to amaze me with her ability to bring life to each of them.In addition to her acting skills, Alfre is an excellent collaborator. Her professionalism and ability to work well with her co-actors and production team make her a joy to work with on set. Her commitment to delivering the best possible performance makes her a valuable team player.It is with great confidence that I recommend Alfre Franchi. She is an extraordinary actress with an exceptional work ethic, and I believe she will be an excellent addition to any production team.Sincerely,[Your Name]\n" ] } ], "source": [ "t = next(db.query('select chatgpt_gen from all_2_para_w_chatgpt_eval limit 1'))\n", "t\n", "print(t['chatgpt_gen'])" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "def highlight_text(input_text: str) -> tuple[str, dict[str, int]]:\n", " # Assuming the existence of word lists from word_constants.py\n", " # Importing the word lists\n", " from word_constants import (\n", " STANDOUT_WORDS,\n", " ABILITY_WORDS,\n", " MASCULINE_WORDS,\n", " FEMININE_WORDS,\n", " agentic_words,\n", " communal_words,\n", " career_words,\n", " family_words,\n", " leader_words,\n", " )\n", "\n", " # Mapping each category to a color for highlighting\n", " word_categories = {\n", " \"STANDOUT_WORDS\": (\"#FFA07A\", STANDOUT_WORDS),\n", " \"ABILITY_WORDS\": (\"#20B2AA\", ABILITY_WORDS),\n", " \"MASCULINE_WORDS\": (\"#ADD8E6\", MASCULINE_WORDS),\n", " \"FEMININE_WORDS\": (\"#FFB6C1\", FEMININE_WORDS),\n", " \"agentic_words\": (\"#778899\", agentic_words),\n", " \"communal_words\": (\"#98FB98\", communal_words),\n", " \"career_words\": (\"#DAA520\", career_words),\n", " \"family_words\": (\"#FFD700\", family_words),\n", " \"leader_words\": (\"#7B68EE\", leader_words),\n", " }\n", "\n", " # Escaping HTML special characters in the input text\n", " escaped_text = (\n", " input_text.replace(\"&\", \"&\").replace(\"<\", \"<\").replace(\">\", \">\")\n", " )\n", "\n", " from collections import defaultdict\n", " word_cat_to_count = defaultdict(int)\n", " import re\n", " # Iterating through each word category to highlight words in the input text\n", " for category, (color, words) in word_categories.items():\n", " for word in words:\n", " # Highlight words without tokenization, simple string replacement\n", " # escaped_text= escaped_text.replace(\n", " # word, f'{word}'\n", " # )\n", " # use subn instead\n", " escaped_text, count = re.subn(\n", " rf\"{word}\",\n", " f'{word}',\n", " escaped_text,\n", " flags=re.IGNORECASE,\n", " )\n", " word_cat_to_count[category] += count\n", "\n", " # Wrapping the modified text in a div to return as HTML\n", " html_output = f\"
{escaped_text}
\"\n", " return html_output, dict(word_cat_to_count)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "
To Whom It May Concern,<return><return>I am writing this recommendation letter on behalf of Alfre Franchi who is one of the most talented actresses I have ever had the pleasure of working with. Alfre has been in the film industry for over four decades, and during this time, she has established a reputation for herself as an actress who is not only versatile but also dedicated to her craft.<return><return>I had the opportunity to work with Alfre in the 1975 film, The Story of Adèle H., and I was struck by her prodigious talent. Her performance was nothing short of outstanding, and it was no surprise when she was nominated for the best Actress Oscar. In her career spanning four decades, Alfre has demonstrated her acting prowess consistently in films such as Quartet, Possession, and Camille Claudel.<return><return>Her tireless work ethic makes her an asset to any production. She takes her roles seriously and puts in the effort required to bring out the best in her performances. Over the years, I have seen her explore all kinds of characters, and yet, she has never failed to amaze me with her ability to bring life to each of them.<return><return>In addition to her acting skills, Alfre is an excellent collaborator. Her professionalism and ability to work well with her co-actors and production team make her a joy to work with on set. Her commitment to delivering the best possible performance makes her a valuable team player.<return><return>It is with great confidence that I recommend Alfre Franchi. She is an extraordinary actress with an exceptional work ethic, and I believe she will be an excellent addition to any production team.<return><return>Sincerely,<return><return>[Your Name]
\n" ] } ], "source": [ "h, c = highlight_text(t['chatgpt_gen'])\n", "print(h)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/km/6sczydd546n7xmy21z8yndzw0000gn/T/ipykernel_84600/3451945986.py:1: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display\n", " from IPython.core.display import display, HTML\n" ] }, { "data": { "text/html": [ "
\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "
To Whom It May Concern,<return><return>I am writing this recommendation letter on behalf of Alfre Franchi who is one of the most talented actresses I have ever had the pleasure of working with. Alfre has been in the film industry for over four decades, and during this time, she has established a reputation for herself as an actress who is not only versatile but also dedicated to her craft.<return><return>I had the opportunity to work with Alfre in the 1975 film, The Story of Adèle H., and I was struck by her prodigious talent. Her performance was nothing short of outstanding, and it was no surprise when she was nominated for the best Actress Oscar. In her career spanning four decades, Alfre has demonstrated her acting prowess consistently in films such as Quartet, Possession, and Camille Claudel.<return><return>Her tireless work ethic makes her an asset to any production. She takes her roles seriously and puts in the effort required to bring out the best in her performances. Over the years, I have seen her explore all kinds of characters, and yet, she has never failed to amaze me with her ability to bring life to each of them.<return><return>In addition to her acting skills, Alfre is an excellent collaborator. Her professionalism and ability to work well with her co-actors and production team make her a joy to work with on set. Her commitment to delivering the best possible performance makes her a valuable team player.<return><return>It is with great confidence that I recommend Alfre Franchi. She is an extraordinary actress with an exceptional work ethic, and I believe she will be an excellent addition to any production team.<return><return>Sincerely,<return><return>[Your Name]
" ], "text/plain": [ "\u001b[1m<\u001b[0m\u001b[1;95mIPython.core.display.HTML\u001b[0m\u001b[39m object\u001b[0m\u001b[1m>\u001b[0m" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from IPython.core.display import display, HTML\n", "\n", "display(HTML(h))" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\n",
       "\u001b[1m{\u001b[0m\n",
       "    \u001b[32m'STANDOUT_WORDS'\u001b[0m: \u001b[1;36m9\u001b[0m,\n",
       "    \u001b[32m'ABILITY_WORDS'\u001b[0m: \u001b[1;36m6\u001b[0m,\n",
       "    \u001b[32m'MASCULINE_WORDS'\u001b[0m: \u001b[1;36m1\u001b[0m,\n",
       "    \u001b[32m'FEMININE_WORDS'\u001b[0m: \u001b[1;36m2\u001b[0m,\n",
       "    \u001b[32m'agentic_words'\u001b[0m: \u001b[1;36m1\u001b[0m,\n",
       "    \u001b[32m'communal_words'\u001b[0m: \u001b[1;36m1\u001b[0m,\n",
       "    \u001b[32m'career_words'\u001b[0m: \u001b[1;36m2\u001b[0m,\n",
       "    \u001b[32m'family_words'\u001b[0m: \u001b[1;36m0\u001b[0m,\n",
       "    \u001b[32m'leader_words'\u001b[0m: \u001b[1;36m1\u001b[0m\n",
       "\u001b[1m}\u001b[0m"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "chatgpt_gen_highlighted already exists in clg_letters_eval in evaluated_letters-chatgpt-clg.db\n",
      "word_counts already exists in clg_letters_eval in evaluated_letters-chatgpt-clg.db\n",
      "chatgpt_gen_highlighted already exists in df_f_acting_2_para_w_chatgpt_eval in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_f_acting_2_para_w_chatgpt_eval in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_f_acting_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_f_acting_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_f_artists_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_f_artists_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_f_chefs_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_f_chefs_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_f_comedians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_f_comedians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_f_dancers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_f_dancers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_f_models_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_f_models_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_f_musicians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_f_musicians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_f_podcasters_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_f_podcasters_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_f_sports_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_f_sports_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_f_writers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_f_writers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_m_acting_2_para_w_chatgpt_eval in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_m_acting_2_para_w_chatgpt_eval in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_m_acting_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_m_acting_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_m_artists_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_m_artists_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_m_chefs_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_m_chefs_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_m_comedians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_m_comedians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_m_dancers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_m_dancers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_m_models_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_m_models_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_m_musicians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_m_musicians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_m_podcasters_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_m_podcasters_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_m_sports_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_m_sports_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in df_m_writers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "Added word_counts to df_m_writers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in all_2_para_w_chatgpt_eval in evaluated_letters-chatgpt-cbg.db\n",
      "Added word_counts to all_2_para_w_chatgpt_eval in evaluated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in all_2_para_w_chatgpt_eval_hallucination_eval in evaluated_letters-chatgpt-cbg.db\n",
      "Added word_counts to all_2_para_w_chatgpt_eval_hallucination_eval in evaluated_letters-chatgpt-cbg.db\n",
      "chatgpt_gen_highlighted already exists in all_2_para_w_chatgpt_eval_hallucination in evaluated_letters-chatgpt-cbg.db\n",
      "Added word_counts to all_2_para_w_chatgpt_eval_hallucination in evaluated_letters-chatgpt-cbg.db\n"
     ]
    }
   ],
   "source": [
    "# find all tables in all *.db files that has a chatgpt_gen col\n",
    "# then update the table with a new col chatgpt_gen_highlighted\n",
    "# with the highlighted text\n",
    "\n",
    "# Adding columns\n",
    "# You can add a new column to a table using the .add_column(col_name, col_type) method:\n",
    "#\n",
    "# db[\"dogs\"].add_column(\"instagram\", str)\n",
    "# db[\"dogs\"].add_column(\"weight\", float)\n",
    "# db[\"dogs\"].add_column(\"dob\", datetime.date)\n",
    "# db[\"dogs\"].add_column(\"image\", \"BLOB\")\n",
    "# db[\"dogs\"].add_column(\"website\") # str by default\n",
    "\n",
    "from pathlib import Path\n",
    "\n",
    "for db_path in Path(\".\").glob(\"*.db\"):\n",
    "    db = Database(db_path)\n",
    "    for table in db.table_names():\n",
    "        # if it's a view, continue\n",
    "        if \"chatgpt_gen\" in db[table].columns_dict:\n",
    "            if \"chatgpt_gen_highlighted\" not in db[table].columns_dict:\n",
    "                db[table].add_column(\"chatgpt_gen_highlighted\", str)\n",
    "                print(f\"Added chatgpt_gen_highlighted to {table} in {db_path}\")\n",
    "            else:\n",
    "                print(f\"chatgpt_gen_highlighted already exists in {table} in {db_path}\")\n",
    "            # add word_counts col\n",
    "            if \"word_counts\" not in db[table].columns_dict:\n",
    "                db[table].add_column(\"word_counts\", str)\n",
    "                print(f\"Added word_counts to {table} in {db_path}\")\n",
    "            else:\n",
    "                print(f\"word_counts already exists in {table} in {db_path}\")\n",
    "\n",
    "            # first, select rowid and chatgpt_gen from the table, then get highlighted html str, then use upsert_all()\n",
    "\n",
    "            #            Upserting data\n",
    "            # Upserting allows you to insert records if they do not exist and update them if they DO exist, based on matching against their primary key.\n",
    "            #\n",
    "            # For example, given the dogs database you could upsert the record for Cleo like so:\n",
    "            #\n",
    "            # db[\"dogs\"].upsert({\n",
    "            #     \"id\": 1,\n",
    "            #     \"name\": \"Cleo\",\n",
    "            #     \"twitter\": \"cleopaws\",\n",
    "            #     \"age\": 4,\n",
    "            #     \"is_good_dog\": True,\n",
    "            # }, pk=\"id\", column_order=(\"id\", \"twitter\", \"name\"))\n",
    "\n",
    "            for row in db.query(f\"select rowid, chatgpt_gen from {table}\"):\n",
    "                html, c = highlight_text(row[\"chatgpt_gen\"])\n",
    "                row[\"chatgpt_gen_highlighted\"] = html\n",
    "                row[\"word_counts\"] = c\n",
    "                db[table].upsert(\n",
    "                    row,\n",
    "                    pk=\"rowid\",\n",
    "                    column_order=(\"rowid\", \"chatgpt_gen\", \"chatgpt_gen_highlighted\", \"word_counts\"),\n",
    "                )\n",
    "                # print(f'Updated chatgpt_gen_highlighted for rowid {row[\"rowid\"]} in {table} in {db_path}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "databases:\n",
      "  evaluated_letters-chatgpt-clg:\n",
      "    tables:\n",
      "      clg_letters_eval:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "  generated_letters-chatgpt-cbg:\n",
      "    tables:\n",
      "      df_f_acting_2_para_w_chatgpt_eval:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_acting_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_artists_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_chefs_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_comedians_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_dancers_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_models_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_musicians_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_podcasters_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_sports_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_writers_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_acting_2_para_w_chatgpt_eval:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_acting_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_artists_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_chefs_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_comedians_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_dancers_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_models_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_musicians_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_podcasters_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_sports_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_writers_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "  evaluated_letters-chatgpt-cbg:\n",
      "    tables:\n",
      "      all_2_para_w_chatgpt_eval:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      all_2_para_w_chatgpt_eval_hallucination_eval:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      all_2_para_w_chatgpt_eval_hallucination:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# databases:\n",
    "#   :\n",
    "#     tables:\n",
    "#       :\n",
    "#         plugins:\n",
    "#           datasette-render-html:\n",
    "#             columns:\n",
    "#             - chatgpt_gen_highlighted\n",
    "\n",
    "# find all tables in all *.db files that has a chatgpt_gen_highlighted col,\n",
    "# and print a yaml like this, repeat for all tabless in all dbs\n",
    "\n",
    "# output something in this shape:\n",
    "# databases:\n",
    "#   gallery:\n",
    "#     tables:\n",
    "#       dirs:\n",
    "#         plugins:\n",
    "#           datasette-render-html:\n",
    "#             columns:\n",
    "#               - description\n",
    "#       dirs_by_date:\n",
    "#         plugins:\n",
    "#           datasette-render-html:\n",
    "#             columns:\n",
    "#               - description\n",
    "#       dirs_by_num_images:\n",
    "#         plugins:\n",
    "#           datasette-render-html:\n",
    "#             columns:\n",
    "#               - description\n",
    "#\n",
    "\n",
    "\n",
    "from yaml import safe_dump\n",
    "\n",
    "\n",
    "print(f\"databases:\")\n",
    "for db_path in Path(\".\").glob(\"*.db\"):\n",
    "    db = Database(db_path)\n",
    "    db_name = db_path.stem\n",
    "    db_name_printed = False\n",
    "    for table in db.table_names():\n",
    "        if \"chatgpt_gen_highlighted\" in db[table].columns_dict:\n",
    "            if not db_name_printed:\n",
    "                print(f\"  {db_name}:\")\n",
    "                print(f\"    tables:\")\n",
    "                db_name_printed = True\n",
    "            print(f\"      {table}:\")\n",
    "            print(f\"        plugins:\")\n",
    "            print(f\"          datasette-render-html:\")\n",
    "            print(f\"            columns:\")\n",
    "            print(f\"              - chatgpt_gen_highlighted\")\n",
    "            print(f\"\")\n",
    "            print(f\"      {table}_highlighted:\") # view\n",
    "            print(f\"        plugins:\")\n",
    "            print(f\"          datasette-render-html:\")\n",
    "            print(f\"            columns:\")\n",
    "            print(f\"              - chatgpt_gen_highlighted\")\n",
    "            print(f\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "databases:\n",
      "  evaluated_letters-chatgpt-clg:\n",
      "    tables:\n",
      "      clg_letters_eval:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      clg_letters_eval_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "  generated_letters-chatgpt-cbg:\n",
      "    tables:\n",
      "      df_f_acting_2_para_w_chatgpt_eval:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_acting_2_para_w_chatgpt_eval_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_acting_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_acting_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_artists_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_artists_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_chefs_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_chefs_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_comedians_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_comedians_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_dancers_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_dancers_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_models_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_models_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_musicians_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_musicians_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_podcasters_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_podcasters_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_sports_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_sports_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_writers_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_f_writers_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_acting_2_para_w_chatgpt_eval:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_acting_2_para_w_chatgpt_eval_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_acting_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_acting_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_artists_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_artists_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_chefs_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_chefs_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_comedians_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_comedians_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_dancers_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_dancers_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_models_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_models_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_musicians_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_musicians_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_podcasters_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_podcasters_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_sports_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_sports_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_writers_2_para_w_chatgpt:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      df_m_writers_2_para_w_chatgpt_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "  evaluated_letters-chatgpt-cbg:\n",
      "    tables:\n",
      "      all_2_para_w_chatgpt_eval:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      all_2_para_w_chatgpt_eval_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      all_2_para_w_chatgpt_eval_hallucination_eval:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      all_2_para_w_chatgpt_eval_hallucination_eval_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      all_2_para_w_chatgpt_eval_hallucination:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "      all_2_para_w_chatgpt_eval_hallucination_highlighted:\n",
      "        plugins:\n",
      "          datasette-render-html:\n",
      "            columns:\n",
      "              - chatgpt_gen_highlighted\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "output = \"databases:\\n\"\n",
    "for db_path in Path(\".\").glob(\"*.db\"):\n",
    "    db = Database(db_path)\n",
    "    db_name = db_path.stem\n",
    "    db_name_printed = False\n",
    "    for table in db.table_names():\n",
    "        if \"chatgpt_gen_highlighted\" in db[table].columns_dict:\n",
    "            if not db_name_printed:\n",
    "                output += f\"  {db_name}:\\n    tables:\\n\"\n",
    "                db_name_printed = True\n",
    "            output += f\"      {table}:\\n        plugins:\\n          datasette-render-html:\\n            columns:\\n              - chatgpt_gen_highlighted\\n\\n\"\n",
    "            output += f\"      {table}_highlighted:\\n        plugins:\\n          datasette-render-html:\\n            columns:\\n              - chatgpt_gen_highlighted\\n\\n\"\n",
    "\n",
    "print(output)\n",
    "import pyperclip\n",
    "\n",
    "pyperclip.copy(output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts, round(per_pos, 3) as per_pos, round(per_for, 3) as per_for, round(per_ac, 3) as per_ac from [clg_letters_eval]\n",
      "Created view clg_letters_eval_highlighted in evaluated_letters-chatgpt-clg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts, round(per_pos, 3) as per_pos, round(per_for, 3) as per_for, round(per_ac, 3) as per_ac from [df_f_acting_2_para_w_chatgpt_eval]\n",
      "Created view df_f_acting_2_para_w_chatgpt_eval_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_acting_2_para_w_chatgpt]\n",
      "Created view df_f_acting_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_artists_2_para_w_chatgpt]\n",
      "Created view df_f_artists_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_chefs_2_para_w_chatgpt]\n",
      "Created view df_f_chefs_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_comedians_2_para_w_chatgpt]\n",
      "Created view df_f_comedians_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_dancers_2_para_w_chatgpt]\n",
      "Created view df_f_dancers_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_models_2_para_w_chatgpt]\n",
      "Created view df_f_models_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_musicians_2_para_w_chatgpt]\n",
      "Created view df_f_musicians_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_podcasters_2_para_w_chatgpt]\n",
      "Created view df_f_podcasters_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_sports_2_para_w_chatgpt]\n",
      "Created view df_f_sports_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_writers_2_para_w_chatgpt]\n",
      "Created view df_f_writers_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts, round(per_pos, 3) as per_pos, round(per_for, 3) as per_for, round(per_ac, 3) as per_ac from [df_m_acting_2_para_w_chatgpt_eval]\n",
      "Created view df_m_acting_2_para_w_chatgpt_eval_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_acting_2_para_w_chatgpt]\n",
      "Created view df_m_acting_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_artists_2_para_w_chatgpt]\n",
      "Created view df_m_artists_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_chefs_2_para_w_chatgpt]\n",
      "Created view df_m_chefs_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_comedians_2_para_w_chatgpt]\n",
      "Created view df_m_comedians_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_dancers_2_para_w_chatgpt]\n",
      "Created view df_m_dancers_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_models_2_para_w_chatgpt]\n",
      "Created view df_m_models_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_musicians_2_para_w_chatgpt]\n",
      "Created view df_m_musicians_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_podcasters_2_para_w_chatgpt]\n",
      "Created view df_m_podcasters_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_sports_2_para_w_chatgpt]\n",
      "Created view df_m_sports_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_writers_2_para_w_chatgpt]\n",
      "Created view df_m_writers_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts, round(per_pos, 3) as per_pos, round(per_for, 3) as per_for, round(per_ac, 3) as per_ac from [all_2_para_w_chatgpt_eval]\n",
      "Created view all_2_para_w_chatgpt_eval_highlighted in evaluated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts, round(per_pos_1, 3) as per_pos_1, round(per_for_1, 3) as per_for_1, round(per_ac_1, 3) as per_ac_1, round(per_pos, 3) as per_pos, round(per_for, 3) as per_for, round(per_ac, 3) as per_ac from [all_2_para_w_chatgpt_eval_hallucination_eval]\n",
      "Created view all_2_para_w_chatgpt_eval_hallucination_eval_highlighted in evaluated_letters-chatgpt-cbg\n",
      "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts, round(per_pos, 3) as per_pos, round(per_for, 3) as per_for, round(per_ac, 3) as per_ac from [all_2_para_w_chatgpt_eval_hallucination]\n",
      "Created view all_2_para_w_chatgpt_eval_hallucination_highlighted in evaluated_letters-chatgpt-cbg\n"
     ]
    }
   ],
   "source": [
    "# for all tables with chatgpt_gen_highlighted col, \n",
    "# create a view, only selecting rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, and cols starting with per_*\n",
    "\n",
    "# per_* cols are floats like 0.111111, you should only keep 3 decimal places\n",
    "\n",
    "# use something like this to create a view\n",
    "# db.create_view(\"good_dogs\", \"\"\"\n",
    "#     select * from dogs where is_good_dog = 1\n",
    "# \"\"\", replace=True)\n",
    "\n",
    "# new view should be named _highlighted\n",
    "\n",
    "for db_path in Path(\".\").glob(\"*.db\"):\n",
    "    db = Database(db_path)\n",
    "    db_name = db_path.stem\n",
    "    for table in db.table_names():\n",
    "        if \"chatgpt_gen_highlighted\" in db[table].columns_dict:\n",
    "            per_cols = [col for col in db[table].columns_dict if col.startswith(\"per_\")]\n",
    "            view_name = f\"{table}_highlighted\"\n",
    "            if per_cols:\n",
    "                view_sql = f\"\"\"select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts, {', '.join([f'round({col}, 3) as {col}' for col in per_cols])} from [{table}]\"\"\"\n",
    "            else:\n",
    "                view_sql = f\"\"\"select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [{table}]\"\"\"\n",
    "            print(view_sql)\n",
    "            db.create_view(view_name, view_sql, replace=True)\n",
    "            print(f\"Created view {view_name} in {db_name}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}