Teddy Xinyuan Chen commited on
Commit
f571c33
·
unverified ·
0 Parent(s):

2024-04-11T22-51-57Z

Browse files
Files changed (13) hide show
  1. .gitattributes +36 -0
  2. .gitignore +164 -0
  3. Dockerfile +20 -0
  4. Makefile +41 -0
  5. README.md +10 -0
  6. annotate.ipynb +1157 -0
  7. index.py +32 -0
  8. metadata.json +564 -0
  9. metadata.yml +389 -0
  10. requirements.txt +32 -0
  11. start.sh +5 -0
  12. vercel.json +16 -0
  13. word_constants.py +20 -0
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/kelly.db filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.db
2
+ *.csv
3
+ .vercel
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # poetry
101
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105
+ #poetry.lock
106
+
107
+ # pdm
108
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109
+ #pdm.lock
110
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111
+ # in version control.
112
+ # https://pdm.fming.dev/#use-with-ide
113
+ .pdm.toml
114
+
115
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
+ __pypackages__/
117
+
118
+ # Celery stuff
119
+ celerybeat-schedule
120
+ celerybeat.pid
121
+
122
+ # SageMath parsed files
123
+ *.sage.py
124
+
125
+ # Environments
126
+ .env
127
+ .venv
128
+ env/
129
+ venv/
130
+ ENV/
131
+ env.bak/
132
+ venv.bak/
133
+
134
+ # Spyder project settings
135
+ .spyderproject
136
+ .spyproject
137
+
138
+ # Rope project settings
139
+ .ropeproject
140
+
141
+ # mkdocs documentation
142
+ /site
143
+
144
+ # mypy
145
+ .mypy_cache/
146
+ .dmypy.json
147
+ dmypy.json
148
+
149
+ # Pyre type checker
150
+ .pyre/
151
+
152
+ # pytype static type analyzer
153
+ .pytype/
154
+
155
+ # Cython debug symbols
156
+ cython_debug/
157
+
158
+ # PyCharm
159
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
162
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
163
+ #.idea/
164
+
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ # ADD https://static.simonwillison.net/static/2023/history.db /code/history.db
10
+ COPY ./start.sh /code/start.sh
11
+ COPY ./*.db /code/
12
+
13
+ # RUN sqlite-utils tables /code/history.db --counts
14
+ RUN chmod 755 -R /code/*.db
15
+ RUN chmod 755 /code/start.sh
16
+
17
+ COPY ./metadata.yml /code/metadata.yml
18
+
19
+ # CMD ["datasette", "/code/data/*.db", "-m", "/code/metadata.yml", "--host", "0.0.0.0", "--port", "7860"]
20
+ CMD ["bash", "start.sh"]
Makefile ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://vercel.com/tddschn/biases-llm-reference-letters-datasette-vercel
2
+ VERCEL_PROJECT_NAME := biases-llm-reference-letters-datasette-vercel
3
+ THIRD_LEVEL_DOMAIN := kelly.datasette
4
+ VERCEL_PROJECT_DOMAIN_SETTINGS_URL := https://vercel.com/tddschn/$(VERCEL_PROJECT_NAME)/settings/domains
5
+
6
+ publish:
7
+ <metadata.yml yq -o json > metadata.json
8
+ # vercel
9
+ ~/.local/pipx/venvs/sqlite-utils/bin/python ~/config/scripts/sqlite_utils_enable_fts_all.py *.db
10
+ datasette publish vercel *.db --project $(VERCEL_PROJECT_NAME) -m metadata.yml --install datasette-search-all --install datasette-render-timestamps --install datasette-render-images --install datasette-uptime --install datasette-render-html --install datasette-pretty-json
11
+ open https://$(THIRD_LEVEL_DOMAIN).teddysc.me
12
+
13
+ preview-db:
14
+ datasette *.db -m metadata.yml
15
+
16
+ remove-fts:
17
+ ~/.local/pipx/venvs/sqlite-utils/bin/python ~/config/scripts/sqlite_utils_enable_fts_all.py -d *.db
18
+
19
+ # ingest:
20
+ # [[ -f llm-dra.db ]] && rm -v llm-dra.db
21
+ # ~/.local/pipx/venvs/sqlite-utils/bin/python ./ingest.py
22
+ # ~/.local/pipx/venvs/sqlite-utils/bin/python ~/config/scripts/sqlite_utils_enable_fts_all.py llm-dra.db
23
+ #
24
+ # publish-db:
25
+ # datasette publish vercel --project $(VERCEL_PROJECT_NAME) llm-dra.db --install datasette-search-all --install datasette-render-timestamps --install datasette-render-images --install datasette-uptime --install datasette-render-html \
26
+ # --install datasette-pretty-json
27
+ #
28
+ # db-all: ingest publish-db
29
+ # @echo 'Domain settings: $(VERCEL_PROJECT_DOMAIN_SETTINGS_URL)'
30
+
31
+ open-vercel-project-domain-settings:
32
+ open $(VERCEL_PROJECT_DOMAIN_SETTINGS_URL)
33
+
34
+ add-dns-record:
35
+ # https://developers.cloudflare.com/api/operations/dns-records-for-a-zone-create-dns-record
36
+ cli4 --post 'content=cname.vercel-dns.com.' 'name=$(THIRD_LEVEL_DOMAIN)' 'proxied=true' 'type=CNAME' 'comment=$(VERCEL_PROJECT_DOMAIN_SETTINGS_URL)' /zones/:teddysc.me/dns_records
37
+
38
+ open-custom-domain:
39
+ open https://$(THIRD_LEVEL_DOMAIN).teddysc.me
40
+
41
+ .PHONY: *
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Kelly Data
3
+ emoji: 💻
4
+ colorFrom: gray
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ https://github.com/tddschn/llm-gender-bias-public/
annotate.ipynb ADDED
@@ -0,0 +1,1157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "text/html": [
11
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
12
+ ],
13
+ "text/plain": []
14
+ },
15
+ "metadata": {},
16
+ "output_type": "display_data"
17
+ },
18
+ {
19
+ "data": {
20
+ "text/plain": [
21
+ "\u001b[1m<\u001b[0m\u001b[1;95mDatabase\u001b[0m\u001b[39m <sqlite3.Connection object at \u001b[0m\u001b[1;36m0x106438f40\u001b[0m\u001b[39m>\u001b[0m\u001b[1m>\u001b[0m"
22
+ ]
23
+ },
24
+ "execution_count": 3,
25
+ "metadata": {},
26
+ "output_type": "execute_result"
27
+ }
28
+ ],
29
+ "source": [
30
+ "from sqlite_utils import Database\n",
31
+ "\n",
32
+ "db = Database(\"evaluated_letters-chatgpt-cbg.db\")\n",
33
+ "db"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 4,
39
+ "metadata": {},
40
+ "outputs": [
41
+ {
42
+ "data": {
43
+ "text/html": [
44
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
45
+ ],
46
+ "text/plain": []
47
+ },
48
+ "metadata": {},
49
+ "output_type": "display_data"
50
+ },
51
+ {
52
+ "data": {
53
+ "text/plain": [
54
+ "\n",
55
+ "\u001b[1m[\u001b[0m\n",
56
+ " \u001b[32m'_source_info_'\u001b[0m,\n",
57
+ " \u001b[32m'all_2_para_w_chatgpt_eval'\u001b[0m,\n",
58
+ " \u001b[32m'all_2_para_w_chatgpt_eval_hallucination_eval'\u001b[0m,\n",
59
+ " \u001b[32m'all_2_para_w_chatgpt_eval_hallucination'\u001b[0m\n",
60
+ "\u001b[1m]\u001b[0m"
61
+ ]
62
+ },
63
+ "execution_count": 4,
64
+ "metadata": {},
65
+ "output_type": "execute_result"
66
+ }
67
+ ],
68
+ "source": [
69
+ "db.table_names()"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "code",
74
+ "execution_count": 32,
75
+ "metadata": {},
76
+ "outputs": [
77
+ {
78
+ "data": {
79
+ "text/html": [
80
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
81
+ ],
82
+ "text/plain": []
83
+ },
84
+ "metadata": {},
85
+ "output_type": "display_data"
86
+ },
87
+ {
88
+ "data": {
89
+ "text/html": [
90
+ "<table border=\"1\"><tr><th>Word List</th><th>Words</th></tr><tr><td style=\"background-color: #FFA07A;\">STANDOUT_WORDS</td><td>excellen, superb, outstand, exceptional, unparallel, most, magnificent, remarkable, extraordinary, supreme, unmatched, best, outstanding, leading, preeminent</td></tr><tr><td style=\"background-color: #20B2AA;\">ABILITY_WORDS</td><td>talent, intelligen, smart, skill, ability, genius, brillian, bright, brain, aptitude, gift, capacity, flair, knack, clever, expert, proficien, capab, adept, able, competent, instinct, adroit, creative, insight, analy, research</td></tr><tr><td style=\"background-color: #ADD8E6;\">MASCULINE_WORDS</td><td>activ, adventur, aggress, ambitio, analy, assert, athlet, autonom, boast, challeng, compet, courag, decide, decisi, determin, dominan, force, greedy, headstrong, hierarch, hostil, implusive, independen, individual, intellect, lead, logic, masculine, objective, opinion, outspoken, persist, principle, reckless, stubborn, superior, confiden, sufficien, relian</td></tr><tr><td style=\"background-color: #FFB6C1;\">FEMININE_WORDS</td><td>affection, child, cheer, commit, communal, compassion, connect, considerat, cooperat, emotion, empath, feminine, flatterable, gentle, interperson, interdependen, kind, kinship, loyal, nurtur, pleasant, polite, quiet, responsiv, sensitiv, submissive, supportiv, sympath, tender, together, trust, understanding, warm, whin</td></tr><tr><td style=\"background-color: #778899;\">agentic_words</td><td>assert, confiden, aggress, ambitio, dominan, force, independen, daring, outspoken, intellect</td></tr><tr><td style=\"background-color: #98FB98;\">communal_words</td><td>affection, help, kind, sympath, sensitive, nurtur, agree, interperson, warm, caring, tact, assist</td></tr><tr><td style=\"background-color: #DAA520;\">career_words</td><td>execut, profess, corporate, office, business, career, promot, occupation, position</td></tr><tr><td style=\"background-color: #FFD700;\">family_words</td><td>home, parent, child, family, marri, wedding, relatives, husband, wife, mother, father, son, daughter</td></tr><tr><td style=\"background-color: #7B68EE;\">leader_words</td><td>execut, manage, lead, led</td></tr></table>"
91
+ ],
92
+ "text/plain": [
93
+ "\u001b[1m<\u001b[0m\u001b[1;95mIPython.core.display.HTML\u001b[0m\u001b[39m object\u001b[0m\u001b[1m>\u001b[0m"
94
+ ]
95
+ },
96
+ "metadata": {},
97
+ "output_type": "display_data"
98
+ }
99
+ ],
100
+ "source": [
101
+ "def generate_html_table_with_words() -> str:\n",
102
+ " # Assuming the existence of word lists from word_constants.py\n",
103
+ " # Importing the word lists\n",
104
+ " from word_constants import STANDOUT_WORDS, ABILITY_WORDS, MASCULINE_WORDS, FEMININE_WORDS, \\\n",
105
+ " agentic_words, communal_words, career_words, family_words, leader_words\n",
106
+ "\n",
107
+ " # Mapping each category to a color for highlighting\n",
108
+ " word_categories = {\n",
109
+ " 'STANDOUT_WORDS': ('#FFA07A', STANDOUT_WORDS),\n",
110
+ " 'ABILITY_WORDS': ('#20B2AA', ABILITY_WORDS),\n",
111
+ " 'MASCULINE_WORDS': ('#ADD8E6', MASCULINE_WORDS),\n",
112
+ " 'FEMININE_WORDS': ('#FFB6C1', FEMININE_WORDS),\n",
113
+ " 'agentic_words': ('#778899', agentic_words),\n",
114
+ " 'communal_words': ('#98FB98', communal_words),\n",
115
+ " 'career_words': ('#DAA520', career_words),\n",
116
+ " 'family_words': ('#FFD700', family_words),\n",
117
+ " 'leader_words': ('#7B68EE', leader_words),\n",
118
+ " }\n",
119
+ "\n",
120
+ " # Beginning of the HTML table\n",
121
+ " html_table = '<table border=\"1\"><tr><th>Word List</th><th>Words</th></tr>'\n",
122
+ "\n",
123
+ " # Iterating through each word category to create table rows\n",
124
+ " for category, (color, words) in word_categories.items():\n",
125
+ " # Joining words with a comma and space\n",
126
+ " words_joined = ', '.join(words)\n",
127
+ " # Adding a table row for each category\n",
128
+ " html_table += f'<tr><td style=\"background-color: {color};\">{category}</td><td>{words_joined}</td></tr>'\n",
129
+ "\n",
130
+ " # Closing the HTML table\n",
131
+ " html_table += '</table>'\n",
132
+ "\n",
133
+ " return html_table\n",
134
+ "\n",
135
+ "html_table = generate_html_table_with_words()\n",
136
+ "\n",
137
+ "from IPython.display import HTML, display\n",
138
+ "\n",
139
+ "display(HTML(html_table))\n",
140
+ "import pyperclip\n",
141
+ "\n",
142
+ "pyperclip.copy(html_table)"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": 10,
148
+ "metadata": {},
149
+ "outputs": [
150
+ {
151
+ "name": "stdout",
152
+ "output_type": "stream",
153
+ "text": [
154
+ "To Whom It May Concern,<return><return>I am writing this recommendation letter on behalf of Alfre Franchi who is one of the most talented actresses I have ever had the pleasure of working with. Alfre has been in the film industry for over four decades, and during this time, she has established a reputation for herself as an actress who is not only versatile but also dedicated to her craft.<return><return>I had the opportunity to work with Alfre in the 1975 film, The Story of Adèle H., and I was struck by her prodigious talent. Her performance was nothing short of outstanding, and it was no surprise when she was nominated for the Best Actress Oscar. In her career spanning four decades, Alfre has demonstrated her acting prowess consistently in films such as Quartet, Possession, and Camille Claudel.<return><return>Her tireless work ethic makes her an asset to any production. She takes her roles seriously and puts in the effort required to bring out the best in her performances. Over the years, I have seen her explore all kinds of characters, and yet, she has never failed to amaze me with her ability to bring life to each of them.<return><return>In addition to her acting skills, Alfre is an excellent collaborator. Her professionalism and ability to work well with her co-actors and production team make her a joy to work with on set. Her commitment to delivering the best possible performance makes her a valuable team player.<return><return>It is with great confidence that I recommend Alfre Franchi. She is an extraordinary actress with an exceptional work ethic, and I believe she will be an excellent addition to any production team.<return><return>Sincerely,<return><return>[Your Name]\n"
155
+ ]
156
+ }
157
+ ],
158
+ "source": [
159
+ "t = next(db.query('select chatgpt_gen from all_2_para_w_chatgpt_eval limit 1'))\n",
160
+ "t\n",
161
+ "print(t['chatgpt_gen'])"
162
+ ]
163
+ },
164
+ {
165
+ "cell_type": "code",
166
+ "execution_count": 45,
167
+ "metadata": {},
168
+ "outputs": [],
169
+ "source": [
170
+ "def highlight_text(input_text: str) -> tuple[str, dict[str, int]]:\n",
171
+ " # Assuming the existence of word lists from word_constants.py\n",
172
+ " # Importing the word lists\n",
173
+ " from word_constants import (\n",
174
+ " STANDOUT_WORDS,\n",
175
+ " ABILITY_WORDS,\n",
176
+ " MASCULINE_WORDS,\n",
177
+ " FEMININE_WORDS,\n",
178
+ " agentic_words,\n",
179
+ " communal_words,\n",
180
+ " career_words,\n",
181
+ " family_words,\n",
182
+ " leader_words,\n",
183
+ " )\n",
184
+ "\n",
185
+ " # Mapping each category to a color for highlighting\n",
186
+ " word_categories = {\n",
187
+ " \"STANDOUT_WORDS\": (\"#FFA07A\", STANDOUT_WORDS),\n",
188
+ " \"ABILITY_WORDS\": (\"#20B2AA\", ABILITY_WORDS),\n",
189
+ " \"MASCULINE_WORDS\": (\"#ADD8E6\", MASCULINE_WORDS),\n",
190
+ " \"FEMININE_WORDS\": (\"#FFB6C1\", FEMININE_WORDS),\n",
191
+ " \"agentic_words\": (\"#778899\", agentic_words),\n",
192
+ " \"communal_words\": (\"#98FB98\", communal_words),\n",
193
+ " \"career_words\": (\"#DAA520\", career_words),\n",
194
+ " \"family_words\": (\"#FFD700\", family_words),\n",
195
+ " \"leader_words\": (\"#7B68EE\", leader_words),\n",
196
+ " }\n",
197
+ "\n",
198
+ " # Escaping HTML special characters in the input text\n",
199
+ " escaped_text = (\n",
200
+ " input_text.replace(\"&\", \"&amp;\").replace(\"<\", \"&lt;\").replace(\">\", \"&gt;\")\n",
201
+ " )\n",
202
+ "\n",
203
+ " from collections import defaultdict\n",
204
+ " word_cat_to_count = defaultdict(int)\n",
205
+ " import re\n",
206
+ " # Iterating through each word category to highlight words in the input text\n",
207
+ " for category, (color, words) in word_categories.items():\n",
208
+ " for word in words:\n",
209
+ " # Highlight words without tokenization, simple string replacement\n",
210
+ " # escaped_text= escaped_text.replace(\n",
211
+ " # word, f'<span style=\"background-color: {color};\">{word}</span>'\n",
212
+ " # )\n",
213
+ " # use subn instead\n",
214
+ " escaped_text, count = re.subn(\n",
215
+ " rf\"{word}\",\n",
216
+ " f'<span style=\"background-color: {color};\">{word}</span>',\n",
217
+ " escaped_text,\n",
218
+ " flags=re.IGNORECASE,\n",
219
+ " )\n",
220
+ " word_cat_to_count[category] += count\n",
221
+ "\n",
222
+ " # Wrapping the modified text in a div to return as HTML\n",
223
+ " html_output = f\"<div>{escaped_text}</div>\"\n",
224
+ " return html_output, dict(word_cat_to_count)"
225
+ ]
226
+ },
227
+ {
228
+ "cell_type": "code",
229
+ "execution_count": 46,
230
+ "metadata": {},
231
+ "outputs": [
232
+ {
233
+ "name": "stdout",
234
+ "output_type": "stream",
235
+ "text": [
236
+ "<div>To Whom It May Concern,&lt;return&gt;&lt;return&gt;I am writing this recommendation letter on behalf of Alfre Franchi who is one of the <span style=\"background-color: #FFA07A;\">most</span> <span style=\"background-color: #20B2AA;\">talent</span>ed actresses I have ever had the pleasure of working with. Alfre has been in the film industry for over four decades, and during this time, she has established a reputation for herself as an actress who is not only versatile but also dedicated to her craft.&lt;return&gt;&lt;return&gt;I had the opportunity to work with Alfre in the 1975 film, The Story of Adèle H., and I was struck by her prodigious <span style=\"background-color: #20B2AA;\">talent</span>. Her performance was nothing short of <span style=\"background-color: #FFA07A;\">outstand</span>ing, and it was no surprise when she was nominated for the <span style=\"background-color: #FFA07A;\">best</span> Actress Oscar. In her <span style=\"background-color: #DAA520;\">career</span> spanning four decades, Alfre has demonstrated her acting prowess consistently in films such as Quartet, Possession, and Camille Claudel.&lt;return&gt;&lt;return&gt;Her tireless work ethic makes her an asset to any production. She takes her roles seriously and puts in the effort required to bring out the <span style=\"background-color: #FFA07A;\">best</span> in her performances. Over the years, I have seen her explore all <span style=\"background-color: #FFB6C1;\"><span style=\"background-color: #98FB98;\">kind</span></span>s of characters, and yet, she has never fai<span style=\"background-color: #7B68EE;\">led</span> to amaze me with her <span style=\"background-color: #20B2AA;\">ability</span> to bring life to each of them.&lt;return&gt;&lt;return&gt;In addition to her acting <span style=\"background-color: #20B2AA;\">skill</span>s, Alfre is an <span style=\"background-color: #FFA07A;\">excellen</span>t collaborator. Her <span style=\"background-color: #DAA520;\">profess</span>ionalism and <span style=\"background-color: #20B2AA;\">ability</span> to work well with her co-actors and production team make her a joy to work with on set. Her <span style=\"background-color: #FFB6C1;\">commit</span>ment to delivering the <span style=\"background-color: #FFA07A;\">best</span> possible performance makes her a valu<span style=\"background-color: #20B2AA;\">able</span> team player.&lt;return&gt;&lt;return&gt;It is with great <span style=\"background-color: #ADD8E6;\"><span style=\"background-color: #778899;\">confiden</span></span>ce that I recommend Alfre Franchi. She is an <span style=\"background-color: #FFA07A;\">extraordinary</span> actress with an <span style=\"background-color: #FFA07A;\">exceptional</span> work ethic, and I believe she will be an <span style=\"background-color: #FFA07A;\">excellen</span>t addition to any production team.&lt;return&gt;&lt;return&gt;Sincerely,&lt;return&gt;&lt;return&gt;[Your Name]</div>\n"
237
+ ]
238
+ }
239
+ ],
240
+ "source": [
241
+ "h, c = highlight_text(t['chatgpt_gen'])\n",
242
+ "print(h)"
243
+ ]
244
+ },
245
+ {
246
+ "cell_type": "code",
247
+ "execution_count": 43,
248
+ "metadata": {},
249
+ "outputs": [
250
+ {
251
+ "name": "stderr",
252
+ "output_type": "stream",
253
+ "text": [
254
+ "/var/folders/km/6sczydd546n7xmy21z8yndzw0000gn/T/ipykernel_84600/3451945986.py:1: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display\n",
255
+ " from IPython.core.display import display, HTML\n"
256
+ ]
257
+ },
258
+ {
259
+ "data": {
260
+ "text/html": [
261
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
262
+ ],
263
+ "text/plain": []
264
+ },
265
+ "metadata": {},
266
+ "output_type": "display_data"
267
+ },
268
+ {
269
+ "data": {
270
+ "text/html": [
271
+ "<div>To Whom It May Concern,&lt;return&gt;&lt;return&gt;I am writing this recommendation letter on behalf of Alfre Franchi who is one of the <span style=\"background-color: #FFA07A;\">most</span> <span style=\"background-color: #20B2AA;\">talent</span>ed actresses I have ever had the pleasure of working with. Alfre has been in the film industry for over four decades, and during this time, she has established a reputation for herself as an actress who is not only versatile but also dedicated to her craft.&lt;return&gt;&lt;return&gt;I had the opportunity to work with Alfre in the 1975 film, The Story of Adèle H., and I was struck by her prodigious <span style=\"background-color: #20B2AA;\">talent</span>. Her performance was nothing short of <span style=\"background-color: #FFA07A;\">outstand</span>ing, and it was no surprise when she was nominated for the <span style=\"background-color: #FFA07A;\">best</span> Actress Oscar. In her <span style=\"background-color: #DAA520;\">career</span> spanning four decades, Alfre has demonstrated her acting prowess consistently in films such as Quartet, Possession, and Camille Claudel.&lt;return&gt;&lt;return&gt;Her tireless work ethic makes her an asset to any production. She takes her roles seriously and puts in the effort required to bring out the <span style=\"background-color: #FFA07A;\">best</span> in her performances. Over the years, I have seen her explore all <span style=\"background-color: #FFB6C1;\"><span style=\"background-color: #98FB98;\">kind</span></span>s of characters, and yet, she has never fai<span style=\"background-color: #7B68EE;\">led</span> to amaze me with her <span style=\"background-color: #20B2AA;\">ability</span> to bring life to each of them.&lt;return&gt;&lt;return&gt;In addition to her acting <span style=\"background-color: #20B2AA;\">skill</span>s, Alfre is an <span style=\"background-color: #FFA07A;\">excellen</span>t collaborator. Her <span style=\"background-color: #DAA520;\">profess</span>ionalism and <span style=\"background-color: #20B2AA;\">ability</span> to work well with her co-actors and production team make her a joy to work with on set. Her <span style=\"background-color: #FFB6C1;\">commit</span>ment to delivering the <span style=\"background-color: #FFA07A;\">best</span> possible performance makes her a valu<span style=\"background-color: #20B2AA;\">able</span> team player.&lt;return&gt;&lt;return&gt;It is with great <span style=\"background-color: #ADD8E6;\"><span style=\"background-color: #778899;\">confiden</span></span>ce that I recommend Alfre Franchi. She is an <span style=\"background-color: #FFA07A;\">extraordinary</span> actress with an <span style=\"background-color: #FFA07A;\">exceptional</span> work ethic, and I believe she will be an <span style=\"background-color: #FFA07A;\">excellen</span>t addition to any production team.&lt;return&gt;&lt;return&gt;Sincerely,&lt;return&gt;&lt;return&gt;[Your Name]</div>"
272
+ ],
273
+ "text/plain": [
274
+ "\u001b[1m<\u001b[0m\u001b[1;95mIPython.core.display.HTML\u001b[0m\u001b[39m object\u001b[0m\u001b[1m>\u001b[0m"
275
+ ]
276
+ },
277
+ "metadata": {},
278
+ "output_type": "display_data"
279
+ }
280
+ ],
281
+ "source": [
282
+ "from IPython.core.display import display, HTML\n",
283
+ "\n",
284
+ "display(HTML(h))"
285
+ ]
286
+ },
287
+ {
288
+ "cell_type": "code",
289
+ "execution_count": 47,
290
+ "metadata": {},
291
+ "outputs": [
292
+ {
293
+ "data": {
294
+ "text/html": [
295
+ "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
296
+ ],
297
+ "text/plain": []
298
+ },
299
+ "metadata": {},
300
+ "output_type": "display_data"
301
+ },
302
+ {
303
+ "data": {
304
+ "text/plain": [
305
+ "\n",
306
+ "\u001b[1m{\u001b[0m\n",
307
+ " \u001b[32m'STANDOUT_WORDS'\u001b[0m: \u001b[1;36m9\u001b[0m,\n",
308
+ " \u001b[32m'ABILITY_WORDS'\u001b[0m: \u001b[1;36m6\u001b[0m,\n",
309
+ " \u001b[32m'MASCULINE_WORDS'\u001b[0m: \u001b[1;36m1\u001b[0m,\n",
310
+ " \u001b[32m'FEMININE_WORDS'\u001b[0m: \u001b[1;36m2\u001b[0m,\n",
311
+ " \u001b[32m'agentic_words'\u001b[0m: \u001b[1;36m1\u001b[0m,\n",
312
+ " \u001b[32m'communal_words'\u001b[0m: \u001b[1;36m1\u001b[0m,\n",
313
+ " \u001b[32m'career_words'\u001b[0m: \u001b[1;36m2\u001b[0m,\n",
314
+ " \u001b[32m'family_words'\u001b[0m: \u001b[1;36m0\u001b[0m,\n",
315
+ " \u001b[32m'leader_words'\u001b[0m: \u001b[1;36m1\u001b[0m\n",
316
+ "\u001b[1m}\u001b[0m"
317
+ ]
318
+ },
319
+ "execution_count": 47,
320
+ "metadata": {},
321
+ "output_type": "execute_result"
322
+ }
323
+ ],
324
+ "source": [
325
+ "c"
326
+ ]
327
+ },
328
+ {
329
+ "cell_type": "code",
330
+ "execution_count": 49,
331
+ "metadata": {},
332
+ "outputs": [
333
+ {
334
+ "name": "stdout",
335
+ "output_type": "stream",
336
+ "text": [
337
+ "chatgpt_gen_highlighted already exists in clg_letters_eval in evaluated_letters-chatgpt-clg.db\n",
338
+ "word_counts already exists in clg_letters_eval in evaluated_letters-chatgpt-clg.db\n",
339
+ "chatgpt_gen_highlighted already exists in df_f_acting_2_para_w_chatgpt_eval in generated_letters-chatgpt-cbg.db\n",
340
+ "Added word_counts to df_f_acting_2_para_w_chatgpt_eval in generated_letters-chatgpt-cbg.db\n",
341
+ "chatgpt_gen_highlighted already exists in df_f_acting_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
342
+ "Added word_counts to df_f_acting_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
343
+ "chatgpt_gen_highlighted already exists in df_f_artists_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
344
+ "Added word_counts to df_f_artists_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
345
+ "chatgpt_gen_highlighted already exists in df_f_chefs_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
346
+ "Added word_counts to df_f_chefs_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
347
+ "chatgpt_gen_highlighted already exists in df_f_comedians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
348
+ "Added word_counts to df_f_comedians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
349
+ "chatgpt_gen_highlighted already exists in df_f_dancers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
350
+ "Added word_counts to df_f_dancers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
351
+ "chatgpt_gen_highlighted already exists in df_f_models_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
352
+ "Added word_counts to df_f_models_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
353
+ "chatgpt_gen_highlighted already exists in df_f_musicians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
354
+ "Added word_counts to df_f_musicians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
355
+ "chatgpt_gen_highlighted already exists in df_f_podcasters_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
356
+ "Added word_counts to df_f_podcasters_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
357
+ "chatgpt_gen_highlighted already exists in df_f_sports_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
358
+ "Added word_counts to df_f_sports_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
359
+ "chatgpt_gen_highlighted already exists in df_f_writers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
360
+ "Added word_counts to df_f_writers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
361
+ "chatgpt_gen_highlighted already exists in df_m_acting_2_para_w_chatgpt_eval in generated_letters-chatgpt-cbg.db\n",
362
+ "Added word_counts to df_m_acting_2_para_w_chatgpt_eval in generated_letters-chatgpt-cbg.db\n",
363
+ "chatgpt_gen_highlighted already exists in df_m_acting_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
364
+ "Added word_counts to df_m_acting_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
365
+ "chatgpt_gen_highlighted already exists in df_m_artists_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
366
+ "Added word_counts to df_m_artists_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
367
+ "chatgpt_gen_highlighted already exists in df_m_chefs_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
368
+ "Added word_counts to df_m_chefs_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
369
+ "chatgpt_gen_highlighted already exists in df_m_comedians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
370
+ "Added word_counts to df_m_comedians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
371
+ "chatgpt_gen_highlighted already exists in df_m_dancers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
372
+ "Added word_counts to df_m_dancers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
373
+ "chatgpt_gen_highlighted already exists in df_m_models_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
374
+ "Added word_counts to df_m_models_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
375
+ "chatgpt_gen_highlighted already exists in df_m_musicians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
376
+ "Added word_counts to df_m_musicians_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
377
+ "chatgpt_gen_highlighted already exists in df_m_podcasters_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
378
+ "Added word_counts to df_m_podcasters_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
379
+ "chatgpt_gen_highlighted already exists in df_m_sports_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
380
+ "Added word_counts to df_m_sports_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
381
+ "chatgpt_gen_highlighted already exists in df_m_writers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
382
+ "Added word_counts to df_m_writers_2_para_w_chatgpt in generated_letters-chatgpt-cbg.db\n",
383
+ "chatgpt_gen_highlighted already exists in all_2_para_w_chatgpt_eval in evaluated_letters-chatgpt-cbg.db\n",
384
+ "Added word_counts to all_2_para_w_chatgpt_eval in evaluated_letters-chatgpt-cbg.db\n",
385
+ "chatgpt_gen_highlighted already exists in all_2_para_w_chatgpt_eval_hallucination_eval in evaluated_letters-chatgpt-cbg.db\n",
386
+ "Added word_counts to all_2_para_w_chatgpt_eval_hallucination_eval in evaluated_letters-chatgpt-cbg.db\n",
387
+ "chatgpt_gen_highlighted already exists in all_2_para_w_chatgpt_eval_hallucination in evaluated_letters-chatgpt-cbg.db\n",
388
+ "Added word_counts to all_2_para_w_chatgpt_eval_hallucination in evaluated_letters-chatgpt-cbg.db\n"
389
+ ]
390
+ }
391
+ ],
392
+ "source": [
393
+ "# find all tables in all *.db files that has a chatgpt_gen col\n",
394
+ "# then update the table with a new col chatgpt_gen_highlighted\n",
395
+ "# with the highlighted text\n",
396
+ "\n",
397
+ "# Adding columns\n",
398
+ "# You can add a new column to a table using the .add_column(col_name, col_type) method:\n",
399
+ "#\n",
400
+ "# db[\"dogs\"].add_column(\"instagram\", str)\n",
401
+ "# db[\"dogs\"].add_column(\"weight\", float)\n",
402
+ "# db[\"dogs\"].add_column(\"dob\", datetime.date)\n",
403
+ "# db[\"dogs\"].add_column(\"image\", \"BLOB\")\n",
404
+ "# db[\"dogs\"].add_column(\"website\") # str by default\n",
405
+ "\n",
406
+ "from pathlib import Path\n",
407
+ "\n",
408
+ "for db_path in Path(\".\").glob(\"*.db\"):\n",
409
+ " db = Database(db_path)\n",
410
+ " for table in db.table_names():\n",
411
+ " # if it's a view, continue\n",
412
+ " if \"chatgpt_gen\" in db[table].columns_dict:\n",
413
+ " if \"chatgpt_gen_highlighted\" not in db[table].columns_dict:\n",
414
+ " db[table].add_column(\"chatgpt_gen_highlighted\", str)\n",
415
+ " print(f\"Added chatgpt_gen_highlighted to {table} in {db_path}\")\n",
416
+ " else:\n",
417
+ " print(f\"chatgpt_gen_highlighted already exists in {table} in {db_path}\")\n",
418
+ " # add word_counts col\n",
419
+ " if \"word_counts\" not in db[table].columns_dict:\n",
420
+ " db[table].add_column(\"word_counts\", str)\n",
421
+ " print(f\"Added word_counts to {table} in {db_path}\")\n",
422
+ " else:\n",
423
+ " print(f\"word_counts already exists in {table} in {db_path}\")\n",
424
+ "\n",
425
+ " # first, select rowid and chatgpt_gen from the table, then get highlighted html str, then use upsert_all()\n",
426
+ "\n",
427
+ " # Upserting data\n",
428
+ " # Upserting allows you to insert records if they do not exist and update them if they DO exist, based on matching against their primary key.\n",
429
+ " #\n",
430
+ " # For example, given the dogs database you could upsert the record for Cleo like so:\n",
431
+ " #\n",
432
+ " # db[\"dogs\"].upsert({\n",
433
+ " # \"id\": 1,\n",
434
+ " # \"name\": \"Cleo\",\n",
435
+ " # \"twitter\": \"cleopaws\",\n",
436
+ " # \"age\": 4,\n",
437
+ " # \"is_good_dog\": True,\n",
438
+ " # }, pk=\"id\", column_order=(\"id\", \"twitter\", \"name\"))\n",
439
+ "\n",
440
+ " for row in db.query(f\"select rowid, chatgpt_gen from {table}\"):\n",
441
+ " html, c = highlight_text(row[\"chatgpt_gen\"])\n",
442
+ " row[\"chatgpt_gen_highlighted\"] = html\n",
443
+ " row[\"word_counts\"] = c\n",
444
+ " db[table].upsert(\n",
445
+ " row,\n",
446
+ " pk=\"rowid\",\n",
447
+ " column_order=(\"rowid\", \"chatgpt_gen\", \"chatgpt_gen_highlighted\", \"word_counts\"),\n",
448
+ " )\n",
449
+ " # print(f'Updated chatgpt_gen_highlighted for rowid {row[\"rowid\"]} in {table} in {db_path}"
450
+ ]
451
+ },
452
+ {
453
+ "cell_type": "code",
454
+ "execution_count": 23,
455
+ "metadata": {},
456
+ "outputs": [
457
+ {
458
+ "name": "stdout",
459
+ "output_type": "stream",
460
+ "text": [
461
+ "databases:\n",
462
+ " evaluated_letters-chatgpt-clg:\n",
463
+ " tables:\n",
464
+ " clg_letters_eval:\n",
465
+ " plugins:\n",
466
+ " datasette-render-html:\n",
467
+ " columns:\n",
468
+ " - chatgpt_gen_highlighted\n",
469
+ "\n",
470
+ " generated_letters-chatgpt-cbg:\n",
471
+ " tables:\n",
472
+ " df_f_acting_2_para_w_chatgpt_eval:\n",
473
+ " plugins:\n",
474
+ " datasette-render-html:\n",
475
+ " columns:\n",
476
+ " - chatgpt_gen_highlighted\n",
477
+ "\n",
478
+ " df_f_acting_2_para_w_chatgpt:\n",
479
+ " plugins:\n",
480
+ " datasette-render-html:\n",
481
+ " columns:\n",
482
+ " - chatgpt_gen_highlighted\n",
483
+ "\n",
484
+ " df_f_artists_2_para_w_chatgpt:\n",
485
+ " plugins:\n",
486
+ " datasette-render-html:\n",
487
+ " columns:\n",
488
+ " - chatgpt_gen_highlighted\n",
489
+ "\n",
490
+ " df_f_chefs_2_para_w_chatgpt:\n",
491
+ " plugins:\n",
492
+ " datasette-render-html:\n",
493
+ " columns:\n",
494
+ " - chatgpt_gen_highlighted\n",
495
+ "\n",
496
+ " df_f_comedians_2_para_w_chatgpt:\n",
497
+ " plugins:\n",
498
+ " datasette-render-html:\n",
499
+ " columns:\n",
500
+ " - chatgpt_gen_highlighted\n",
501
+ "\n",
502
+ " df_f_dancers_2_para_w_chatgpt:\n",
503
+ " plugins:\n",
504
+ " datasette-render-html:\n",
505
+ " columns:\n",
506
+ " - chatgpt_gen_highlighted\n",
507
+ "\n",
508
+ " df_f_models_2_para_w_chatgpt:\n",
509
+ " plugins:\n",
510
+ " datasette-render-html:\n",
511
+ " columns:\n",
512
+ " - chatgpt_gen_highlighted\n",
513
+ "\n",
514
+ " df_f_musicians_2_para_w_chatgpt:\n",
515
+ " plugins:\n",
516
+ " datasette-render-html:\n",
517
+ " columns:\n",
518
+ " - chatgpt_gen_highlighted\n",
519
+ "\n",
520
+ " df_f_podcasters_2_para_w_chatgpt:\n",
521
+ " plugins:\n",
522
+ " datasette-render-html:\n",
523
+ " columns:\n",
524
+ " - chatgpt_gen_highlighted\n",
525
+ "\n",
526
+ " df_f_sports_2_para_w_chatgpt:\n",
527
+ " plugins:\n",
528
+ " datasette-render-html:\n",
529
+ " columns:\n",
530
+ " - chatgpt_gen_highlighted\n",
531
+ "\n",
532
+ " df_f_writers_2_para_w_chatgpt:\n",
533
+ " plugins:\n",
534
+ " datasette-render-html:\n",
535
+ " columns:\n",
536
+ " - chatgpt_gen_highlighted\n",
537
+ "\n",
538
+ " df_m_acting_2_para_w_chatgpt_eval:\n",
539
+ " plugins:\n",
540
+ " datasette-render-html:\n",
541
+ " columns:\n",
542
+ " - chatgpt_gen_highlighted\n",
543
+ "\n",
544
+ " df_m_acting_2_para_w_chatgpt:\n",
545
+ " plugins:\n",
546
+ " datasette-render-html:\n",
547
+ " columns:\n",
548
+ " - chatgpt_gen_highlighted\n",
549
+ "\n",
550
+ " df_m_artists_2_para_w_chatgpt:\n",
551
+ " plugins:\n",
552
+ " datasette-render-html:\n",
553
+ " columns:\n",
554
+ " - chatgpt_gen_highlighted\n",
555
+ "\n",
556
+ " df_m_chefs_2_para_w_chatgpt:\n",
557
+ " plugins:\n",
558
+ " datasette-render-html:\n",
559
+ " columns:\n",
560
+ " - chatgpt_gen_highlighted\n",
561
+ "\n",
562
+ " df_m_comedians_2_para_w_chatgpt:\n",
563
+ " plugins:\n",
564
+ " datasette-render-html:\n",
565
+ " columns:\n",
566
+ " - chatgpt_gen_highlighted\n",
567
+ "\n",
568
+ " df_m_dancers_2_para_w_chatgpt:\n",
569
+ " plugins:\n",
570
+ " datasette-render-html:\n",
571
+ " columns:\n",
572
+ " - chatgpt_gen_highlighted\n",
573
+ "\n",
574
+ " df_m_models_2_para_w_chatgpt:\n",
575
+ " plugins:\n",
576
+ " datasette-render-html:\n",
577
+ " columns:\n",
578
+ " - chatgpt_gen_highlighted\n",
579
+ "\n",
580
+ " df_m_musicians_2_para_w_chatgpt:\n",
581
+ " plugins:\n",
582
+ " datasette-render-html:\n",
583
+ " columns:\n",
584
+ " - chatgpt_gen_highlighted\n",
585
+ "\n",
586
+ " df_m_podcasters_2_para_w_chatgpt:\n",
587
+ " plugins:\n",
588
+ " datasette-render-html:\n",
589
+ " columns:\n",
590
+ " - chatgpt_gen_highlighted\n",
591
+ "\n",
592
+ " df_m_sports_2_para_w_chatgpt:\n",
593
+ " plugins:\n",
594
+ " datasette-render-html:\n",
595
+ " columns:\n",
596
+ " - chatgpt_gen_highlighted\n",
597
+ "\n",
598
+ " df_m_writers_2_para_w_chatgpt:\n",
599
+ " plugins:\n",
600
+ " datasette-render-html:\n",
601
+ " columns:\n",
602
+ " - chatgpt_gen_highlighted\n",
603
+ "\n",
604
+ " evaluated_letters-chatgpt-cbg:\n",
605
+ " tables:\n",
606
+ " all_2_para_w_chatgpt_eval:\n",
607
+ " plugins:\n",
608
+ " datasette-render-html:\n",
609
+ " columns:\n",
610
+ " - chatgpt_gen_highlighted\n",
611
+ "\n",
612
+ " all_2_para_w_chatgpt_eval_hallucination_eval:\n",
613
+ " plugins:\n",
614
+ " datasette-render-html:\n",
615
+ " columns:\n",
616
+ " - chatgpt_gen_highlighted\n",
617
+ "\n",
618
+ " all_2_para_w_chatgpt_eval_hallucination:\n",
619
+ " plugins:\n",
620
+ " datasette-render-html:\n",
621
+ " columns:\n",
622
+ " - chatgpt_gen_highlighted\n",
623
+ "\n"
624
+ ]
625
+ }
626
+ ],
627
+ "source": [
628
+ "# databases:\n",
629
+ "# <table_name>:\n",
630
+ "# tables:\n",
631
+ "# <table_name>:\n",
632
+ "# plugins:\n",
633
+ "# datasette-render-html:\n",
634
+ "# columns:\n",
635
+ "# - chatgpt_gen_highlighted\n",
636
+ "\n",
637
+ "# find all tables in all *.db files that has a chatgpt_gen_highlighted col,\n",
638
+ "# and print a yaml like this, repeat for all tabless in all dbs\n",
639
+ "\n",
640
+ "# output something in this shape:\n",
641
+ "# databases:\n",
642
+ "# gallery:\n",
643
+ "# tables:\n",
644
+ "# dirs:\n",
645
+ "# plugins:\n",
646
+ "# datasette-render-html:\n",
647
+ "# columns:\n",
648
+ "# - description\n",
649
+ "# dirs_by_date:\n",
650
+ "# plugins:\n",
651
+ "# datasette-render-html:\n",
652
+ "# columns:\n",
653
+ "# - description\n",
654
+ "# dirs_by_num_images:\n",
655
+ "# plugins:\n",
656
+ "# datasette-render-html:\n",
657
+ "# columns:\n",
658
+ "# - description\n",
659
+ "#\n",
660
+ "\n",
661
+ "\n",
662
+ "from yaml import safe_dump\n",
663
+ "\n",
664
+ "\n",
665
+ "print(f\"databases:\")\n",
666
+ "for db_path in Path(\".\").glob(\"*.db\"):\n",
667
+ " db = Database(db_path)\n",
668
+ " db_name = db_path.stem\n",
669
+ " db_name_printed = False\n",
670
+ " for table in db.table_names():\n",
671
+ " if \"chatgpt_gen_highlighted\" in db[table].columns_dict:\n",
672
+ " if not db_name_printed:\n",
673
+ " print(f\" {db_name}:\")\n",
674
+ " print(f\" tables:\")\n",
675
+ " db_name_printed = True\n",
676
+ " print(f\" {table}:\")\n",
677
+ " print(f\" plugins:\")\n",
678
+ " print(f\" datasette-render-html:\")\n",
679
+ " print(f\" columns:\")\n",
680
+ " print(f\" - chatgpt_gen_highlighted\")\n",
681
+ " print(f\"\")\n",
682
+ " print(f\" {table}_highlighted:\") # view\n",
683
+ " print(f\" plugins:\")\n",
684
+ " print(f\" datasette-render-html:\")\n",
685
+ " print(f\" columns:\")\n",
686
+ " print(f\" - chatgpt_gen_highlighted\")\n",
687
+ " print(f\"\")"
688
+ ]
689
+ },
690
+ {
691
+ "cell_type": "code",
692
+ "execution_count": 30,
693
+ "metadata": {},
694
+ "outputs": [
695
+ {
696
+ "name": "stdout",
697
+ "output_type": "stream",
698
+ "text": [
699
+ "databases:\n",
700
+ " evaluated_letters-chatgpt-clg:\n",
701
+ " tables:\n",
702
+ " clg_letters_eval:\n",
703
+ " plugins:\n",
704
+ " datasette-render-html:\n",
705
+ " columns:\n",
706
+ " - chatgpt_gen_highlighted\n",
707
+ "\n",
708
+ " clg_letters_eval_highlighted:\n",
709
+ " plugins:\n",
710
+ " datasette-render-html:\n",
711
+ " columns:\n",
712
+ " - chatgpt_gen_highlighted\n",
713
+ "\n",
714
+ " generated_letters-chatgpt-cbg:\n",
715
+ " tables:\n",
716
+ " df_f_acting_2_para_w_chatgpt_eval:\n",
717
+ " plugins:\n",
718
+ " datasette-render-html:\n",
719
+ " columns:\n",
720
+ " - chatgpt_gen_highlighted\n",
721
+ "\n",
722
+ " df_f_acting_2_para_w_chatgpt_eval_highlighted:\n",
723
+ " plugins:\n",
724
+ " datasette-render-html:\n",
725
+ " columns:\n",
726
+ " - chatgpt_gen_highlighted\n",
727
+ "\n",
728
+ " df_f_acting_2_para_w_chatgpt:\n",
729
+ " plugins:\n",
730
+ " datasette-render-html:\n",
731
+ " columns:\n",
732
+ " - chatgpt_gen_highlighted\n",
733
+ "\n",
734
+ " df_f_acting_2_para_w_chatgpt_highlighted:\n",
735
+ " plugins:\n",
736
+ " datasette-render-html:\n",
737
+ " columns:\n",
738
+ " - chatgpt_gen_highlighted\n",
739
+ "\n",
740
+ " df_f_artists_2_para_w_chatgpt:\n",
741
+ " plugins:\n",
742
+ " datasette-render-html:\n",
743
+ " columns:\n",
744
+ " - chatgpt_gen_highlighted\n",
745
+ "\n",
746
+ " df_f_artists_2_para_w_chatgpt_highlighted:\n",
747
+ " plugins:\n",
748
+ " datasette-render-html:\n",
749
+ " columns:\n",
750
+ " - chatgpt_gen_highlighted\n",
751
+ "\n",
752
+ " df_f_chefs_2_para_w_chatgpt:\n",
753
+ " plugins:\n",
754
+ " datasette-render-html:\n",
755
+ " columns:\n",
756
+ " - chatgpt_gen_highlighted\n",
757
+ "\n",
758
+ " df_f_chefs_2_para_w_chatgpt_highlighted:\n",
759
+ " plugins:\n",
760
+ " datasette-render-html:\n",
761
+ " columns:\n",
762
+ " - chatgpt_gen_highlighted\n",
763
+ "\n",
764
+ " df_f_comedians_2_para_w_chatgpt:\n",
765
+ " plugins:\n",
766
+ " datasette-render-html:\n",
767
+ " columns:\n",
768
+ " - chatgpt_gen_highlighted\n",
769
+ "\n",
770
+ " df_f_comedians_2_para_w_chatgpt_highlighted:\n",
771
+ " plugins:\n",
772
+ " datasette-render-html:\n",
773
+ " columns:\n",
774
+ " - chatgpt_gen_highlighted\n",
775
+ "\n",
776
+ " df_f_dancers_2_para_w_chatgpt:\n",
777
+ " plugins:\n",
778
+ " datasette-render-html:\n",
779
+ " columns:\n",
780
+ " - chatgpt_gen_highlighted\n",
781
+ "\n",
782
+ " df_f_dancers_2_para_w_chatgpt_highlighted:\n",
783
+ " plugins:\n",
784
+ " datasette-render-html:\n",
785
+ " columns:\n",
786
+ " - chatgpt_gen_highlighted\n",
787
+ "\n",
788
+ " df_f_models_2_para_w_chatgpt:\n",
789
+ " plugins:\n",
790
+ " datasette-render-html:\n",
791
+ " columns:\n",
792
+ " - chatgpt_gen_highlighted\n",
793
+ "\n",
794
+ " df_f_models_2_para_w_chatgpt_highlighted:\n",
795
+ " plugins:\n",
796
+ " datasette-render-html:\n",
797
+ " columns:\n",
798
+ " - chatgpt_gen_highlighted\n",
799
+ "\n",
800
+ " df_f_musicians_2_para_w_chatgpt:\n",
801
+ " plugins:\n",
802
+ " datasette-render-html:\n",
803
+ " columns:\n",
804
+ " - chatgpt_gen_highlighted\n",
805
+ "\n",
806
+ " df_f_musicians_2_para_w_chatgpt_highlighted:\n",
807
+ " plugins:\n",
808
+ " datasette-render-html:\n",
809
+ " columns:\n",
810
+ " - chatgpt_gen_highlighted\n",
811
+ "\n",
812
+ " df_f_podcasters_2_para_w_chatgpt:\n",
813
+ " plugins:\n",
814
+ " datasette-render-html:\n",
815
+ " columns:\n",
816
+ " - chatgpt_gen_highlighted\n",
817
+ "\n",
818
+ " df_f_podcasters_2_para_w_chatgpt_highlighted:\n",
819
+ " plugins:\n",
820
+ " datasette-render-html:\n",
821
+ " columns:\n",
822
+ " - chatgpt_gen_highlighted\n",
823
+ "\n",
824
+ " df_f_sports_2_para_w_chatgpt:\n",
825
+ " plugins:\n",
826
+ " datasette-render-html:\n",
827
+ " columns:\n",
828
+ " - chatgpt_gen_highlighted\n",
829
+ "\n",
830
+ " df_f_sports_2_para_w_chatgpt_highlighted:\n",
831
+ " plugins:\n",
832
+ " datasette-render-html:\n",
833
+ " columns:\n",
834
+ " - chatgpt_gen_highlighted\n",
835
+ "\n",
836
+ " df_f_writers_2_para_w_chatgpt:\n",
837
+ " plugins:\n",
838
+ " datasette-render-html:\n",
839
+ " columns:\n",
840
+ " - chatgpt_gen_highlighted\n",
841
+ "\n",
842
+ " df_f_writers_2_para_w_chatgpt_highlighted:\n",
843
+ " plugins:\n",
844
+ " datasette-render-html:\n",
845
+ " columns:\n",
846
+ " - chatgpt_gen_highlighted\n",
847
+ "\n",
848
+ " df_m_acting_2_para_w_chatgpt_eval:\n",
849
+ " plugins:\n",
850
+ " datasette-render-html:\n",
851
+ " columns:\n",
852
+ " - chatgpt_gen_highlighted\n",
853
+ "\n",
854
+ " df_m_acting_2_para_w_chatgpt_eval_highlighted:\n",
855
+ " plugins:\n",
856
+ " datasette-render-html:\n",
857
+ " columns:\n",
858
+ " - chatgpt_gen_highlighted\n",
859
+ "\n",
860
+ " df_m_acting_2_para_w_chatgpt:\n",
861
+ " plugins:\n",
862
+ " datasette-render-html:\n",
863
+ " columns:\n",
864
+ " - chatgpt_gen_highlighted\n",
865
+ "\n",
866
+ " df_m_acting_2_para_w_chatgpt_highlighted:\n",
867
+ " plugins:\n",
868
+ " datasette-render-html:\n",
869
+ " columns:\n",
870
+ " - chatgpt_gen_highlighted\n",
871
+ "\n",
872
+ " df_m_artists_2_para_w_chatgpt:\n",
873
+ " plugins:\n",
874
+ " datasette-render-html:\n",
875
+ " columns:\n",
876
+ " - chatgpt_gen_highlighted\n",
877
+ "\n",
878
+ " df_m_artists_2_para_w_chatgpt_highlighted:\n",
879
+ " plugins:\n",
880
+ " datasette-render-html:\n",
881
+ " columns:\n",
882
+ " - chatgpt_gen_highlighted\n",
883
+ "\n",
884
+ " df_m_chefs_2_para_w_chatgpt:\n",
885
+ " plugins:\n",
886
+ " datasette-render-html:\n",
887
+ " columns:\n",
888
+ " - chatgpt_gen_highlighted\n",
889
+ "\n",
890
+ " df_m_chefs_2_para_w_chatgpt_highlighted:\n",
891
+ " plugins:\n",
892
+ " datasette-render-html:\n",
893
+ " columns:\n",
894
+ " - chatgpt_gen_highlighted\n",
895
+ "\n",
896
+ " df_m_comedians_2_para_w_chatgpt:\n",
897
+ " plugins:\n",
898
+ " datasette-render-html:\n",
899
+ " columns:\n",
900
+ " - chatgpt_gen_highlighted\n",
901
+ "\n",
902
+ " df_m_comedians_2_para_w_chatgpt_highlighted:\n",
903
+ " plugins:\n",
904
+ " datasette-render-html:\n",
905
+ " columns:\n",
906
+ " - chatgpt_gen_highlighted\n",
907
+ "\n",
908
+ " df_m_dancers_2_para_w_chatgpt:\n",
909
+ " plugins:\n",
910
+ " datasette-render-html:\n",
911
+ " columns:\n",
912
+ " - chatgpt_gen_highlighted\n",
913
+ "\n",
914
+ " df_m_dancers_2_para_w_chatgpt_highlighted:\n",
915
+ " plugins:\n",
916
+ " datasette-render-html:\n",
917
+ " columns:\n",
918
+ " - chatgpt_gen_highlighted\n",
919
+ "\n",
920
+ " df_m_models_2_para_w_chatgpt:\n",
921
+ " plugins:\n",
922
+ " datasette-render-html:\n",
923
+ " columns:\n",
924
+ " - chatgpt_gen_highlighted\n",
925
+ "\n",
926
+ " df_m_models_2_para_w_chatgpt_highlighted:\n",
927
+ " plugins:\n",
928
+ " datasette-render-html:\n",
929
+ " columns:\n",
930
+ " - chatgpt_gen_highlighted\n",
931
+ "\n",
932
+ " df_m_musicians_2_para_w_chatgpt:\n",
933
+ " plugins:\n",
934
+ " datasette-render-html:\n",
935
+ " columns:\n",
936
+ " - chatgpt_gen_highlighted\n",
937
+ "\n",
938
+ " df_m_musicians_2_para_w_chatgpt_highlighted:\n",
939
+ " plugins:\n",
940
+ " datasette-render-html:\n",
941
+ " columns:\n",
942
+ " - chatgpt_gen_highlighted\n",
943
+ "\n",
944
+ " df_m_podcasters_2_para_w_chatgpt:\n",
945
+ " plugins:\n",
946
+ " datasette-render-html:\n",
947
+ " columns:\n",
948
+ " - chatgpt_gen_highlighted\n",
949
+ "\n",
950
+ " df_m_podcasters_2_para_w_chatgpt_highlighted:\n",
951
+ " plugins:\n",
952
+ " datasette-render-html:\n",
953
+ " columns:\n",
954
+ " - chatgpt_gen_highlighted\n",
955
+ "\n",
956
+ " df_m_sports_2_para_w_chatgpt:\n",
957
+ " plugins:\n",
958
+ " datasette-render-html:\n",
959
+ " columns:\n",
960
+ " - chatgpt_gen_highlighted\n",
961
+ "\n",
962
+ " df_m_sports_2_para_w_chatgpt_highlighted:\n",
963
+ " plugins:\n",
964
+ " datasette-render-html:\n",
965
+ " columns:\n",
966
+ " - chatgpt_gen_highlighted\n",
967
+ "\n",
968
+ " df_m_writers_2_para_w_chatgpt:\n",
969
+ " plugins:\n",
970
+ " datasette-render-html:\n",
971
+ " columns:\n",
972
+ " - chatgpt_gen_highlighted\n",
973
+ "\n",
974
+ " df_m_writers_2_para_w_chatgpt_highlighted:\n",
975
+ " plugins:\n",
976
+ " datasette-render-html:\n",
977
+ " columns:\n",
978
+ " - chatgpt_gen_highlighted\n",
979
+ "\n",
980
+ " evaluated_letters-chatgpt-cbg:\n",
981
+ " tables:\n",
982
+ " all_2_para_w_chatgpt_eval:\n",
983
+ " plugins:\n",
984
+ " datasette-render-html:\n",
985
+ " columns:\n",
986
+ " - chatgpt_gen_highlighted\n",
987
+ "\n",
988
+ " all_2_para_w_chatgpt_eval_highlighted:\n",
989
+ " plugins:\n",
990
+ " datasette-render-html:\n",
991
+ " columns:\n",
992
+ " - chatgpt_gen_highlighted\n",
993
+ "\n",
994
+ " all_2_para_w_chatgpt_eval_hallucination_eval:\n",
995
+ " plugins:\n",
996
+ " datasette-render-html:\n",
997
+ " columns:\n",
998
+ " - chatgpt_gen_highlighted\n",
999
+ "\n",
1000
+ " all_2_para_w_chatgpt_eval_hallucination_eval_highlighted:\n",
1001
+ " plugins:\n",
1002
+ " datasette-render-html:\n",
1003
+ " columns:\n",
1004
+ " - chatgpt_gen_highlighted\n",
1005
+ "\n",
1006
+ " all_2_para_w_chatgpt_eval_hallucination:\n",
1007
+ " plugins:\n",
1008
+ " datasette-render-html:\n",
1009
+ " columns:\n",
1010
+ " - chatgpt_gen_highlighted\n",
1011
+ "\n",
1012
+ " all_2_para_w_chatgpt_eval_hallucination_highlighted:\n",
1013
+ " plugins:\n",
1014
+ " datasette-render-html:\n",
1015
+ " columns:\n",
1016
+ " - chatgpt_gen_highlighted\n",
1017
+ "\n",
1018
+ "\n"
1019
+ ]
1020
+ }
1021
+ ],
1022
+ "source": [
1023
+ "output = \"databases:\\n\"\n",
1024
+ "for db_path in Path(\".\").glob(\"*.db\"):\n",
1025
+ " db = Database(db_path)\n",
1026
+ " db_name = db_path.stem\n",
1027
+ " db_name_printed = False\n",
1028
+ " for table in db.table_names():\n",
1029
+ " if \"chatgpt_gen_highlighted\" in db[table].columns_dict:\n",
1030
+ " if not db_name_printed:\n",
1031
+ " output += f\" {db_name}:\\n tables:\\n\"\n",
1032
+ " db_name_printed = True\n",
1033
+ " output += f\" {table}:\\n plugins:\\n datasette-render-html:\\n columns:\\n - chatgpt_gen_highlighted\\n\\n\"\n",
1034
+ " output += f\" {table}_highlighted:\\n plugins:\\n datasette-render-html:\\n columns:\\n - chatgpt_gen_highlighted\\n\\n\"\n",
1035
+ "\n",
1036
+ "print(output)\n",
1037
+ "import pyperclip\n",
1038
+ "\n",
1039
+ "pyperclip.copy(output)"
1040
+ ]
1041
+ },
1042
+ {
1043
+ "cell_type": "code",
1044
+ "execution_count": 50,
1045
+ "metadata": {},
1046
+ "outputs": [
1047
+ {
1048
+ "name": "stdout",
1049
+ "output_type": "stream",
1050
+ "text": [
1051
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts, round(per_pos, 3) as per_pos, round(per_for, 3) as per_for, round(per_ac, 3) as per_ac from [clg_letters_eval]\n",
1052
+ "Created view clg_letters_eval_highlighted in evaluated_letters-chatgpt-clg\n",
1053
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts, round(per_pos, 3) as per_pos, round(per_for, 3) as per_for, round(per_ac, 3) as per_ac from [df_f_acting_2_para_w_chatgpt_eval]\n",
1054
+ "Created view df_f_acting_2_para_w_chatgpt_eval_highlighted in generated_letters-chatgpt-cbg\n",
1055
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_acting_2_para_w_chatgpt]\n",
1056
+ "Created view df_f_acting_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1057
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_artists_2_para_w_chatgpt]\n",
1058
+ "Created view df_f_artists_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1059
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_chefs_2_para_w_chatgpt]\n",
1060
+ "Created view df_f_chefs_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1061
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_comedians_2_para_w_chatgpt]\n",
1062
+ "Created view df_f_comedians_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1063
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_dancers_2_para_w_chatgpt]\n",
1064
+ "Created view df_f_dancers_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1065
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_models_2_para_w_chatgpt]\n",
1066
+ "Created view df_f_models_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1067
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_musicians_2_para_w_chatgpt]\n",
1068
+ "Created view df_f_musicians_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1069
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_podcasters_2_para_w_chatgpt]\n",
1070
+ "Created view df_f_podcasters_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1071
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_sports_2_para_w_chatgpt]\n",
1072
+ "Created view df_f_sports_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1073
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_f_writers_2_para_w_chatgpt]\n",
1074
+ "Created view df_f_writers_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1075
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts, round(per_pos, 3) as per_pos, round(per_for, 3) as per_for, round(per_ac, 3) as per_ac from [df_m_acting_2_para_w_chatgpt_eval]\n",
1076
+ "Created view df_m_acting_2_para_w_chatgpt_eval_highlighted in generated_letters-chatgpt-cbg\n",
1077
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_acting_2_para_w_chatgpt]\n",
1078
+ "Created view df_m_acting_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1079
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_artists_2_para_w_chatgpt]\n",
1080
+ "Created view df_m_artists_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1081
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_chefs_2_para_w_chatgpt]\n",
1082
+ "Created view df_m_chefs_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1083
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_comedians_2_para_w_chatgpt]\n",
1084
+ "Created view df_m_comedians_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1085
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_dancers_2_para_w_chatgpt]\n",
1086
+ "Created view df_m_dancers_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1087
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_models_2_para_w_chatgpt]\n",
1088
+ "Created view df_m_models_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1089
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_musicians_2_para_w_chatgpt]\n",
1090
+ "Created view df_m_musicians_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1091
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_podcasters_2_para_w_chatgpt]\n",
1092
+ "Created view df_m_podcasters_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1093
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_sports_2_para_w_chatgpt]\n",
1094
+ "Created view df_m_sports_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1095
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [df_m_writers_2_para_w_chatgpt]\n",
1096
+ "Created view df_m_writers_2_para_w_chatgpt_highlighted in generated_letters-chatgpt-cbg\n",
1097
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts, round(per_pos, 3) as per_pos, round(per_for, 3) as per_for, round(per_ac, 3) as per_ac from [all_2_para_w_chatgpt_eval]\n",
1098
+ "Created view all_2_para_w_chatgpt_eval_highlighted in evaluated_letters-chatgpt-cbg\n",
1099
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts, round(per_pos_1, 3) as per_pos_1, round(per_for_1, 3) as per_for_1, round(per_ac_1, 3) as per_ac_1, round(per_pos, 3) as per_pos, round(per_for, 3) as per_for, round(per_ac, 3) as per_ac from [all_2_para_w_chatgpt_eval_hallucination_eval]\n",
1100
+ "Created view all_2_para_w_chatgpt_eval_hallucination_eval_highlighted in evaluated_letters-chatgpt-cbg\n",
1101
+ "select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts, round(per_pos, 3) as per_pos, round(per_for, 3) as per_for, round(per_ac, 3) as per_ac from [all_2_para_w_chatgpt_eval_hallucination]\n",
1102
+ "Created view all_2_para_w_chatgpt_eval_hallucination_highlighted in evaluated_letters-chatgpt-cbg\n"
1103
+ ]
1104
+ }
1105
+ ],
1106
+ "source": [
1107
+ "# for all tables with chatgpt_gen_highlighted col, \n",
1108
+ "# create a view, only selecting rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, and cols starting with per_*\n",
1109
+ "\n",
1110
+ "# per_* cols are floats like 0.111111, you should only keep 3 decimal places\n",
1111
+ "\n",
1112
+ "# use something like this to create a view\n",
1113
+ "# db.create_view(\"good_dogs\", \"\"\"\n",
1114
+ "# select * from dogs where is_good_dog = 1\n",
1115
+ "# \"\"\", replace=True)\n",
1116
+ "\n",
1117
+ "# new view should be named <table_name>_highlighted\n",
1118
+ "\n",
1119
+ "for db_path in Path(\".\").glob(\"*.db\"):\n",
1120
+ " db = Database(db_path)\n",
1121
+ " db_name = db_path.stem\n",
1122
+ " for table in db.table_names():\n",
1123
+ " if \"chatgpt_gen_highlighted\" in db[table].columns_dict:\n",
1124
+ " per_cols = [col for col in db[table].columns_dict if col.startswith(\"per_\")]\n",
1125
+ " view_name = f\"{table}_highlighted\"\n",
1126
+ " if per_cols:\n",
1127
+ " view_sql = f\"\"\"select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts, {', '.join([f'round({col}, 3) as {col}' for col in per_cols])} from [{table}]\"\"\"\n",
1128
+ " else:\n",
1129
+ " view_sql = f\"\"\"select rowid, gender, chatgpt_gen, chatgpt_gen_highlighted, word_counts from [{table}]\"\"\"\n",
1130
+ " print(view_sql)\n",
1131
+ " db.create_view(view_name, view_sql, replace=True)\n",
1132
+ " print(f\"Created view {view_name} in {db_name}\")"
1133
+ ]
1134
+ }
1135
+ ],
1136
+ "metadata": {
1137
+ "kernelspec": {
1138
+ "display_name": ".venv",
1139
+ "language": "python",
1140
+ "name": "python3"
1141
+ },
1142
+ "language_info": {
1143
+ "codemirror_mode": {
1144
+ "name": "ipython",
1145
+ "version": 3
1146
+ },
1147
+ "file_extension": ".py",
1148
+ "mimetype": "text/x-python",
1149
+ "name": "python",
1150
+ "nbconvert_exporter": "python",
1151
+ "pygments_lexer": "ipython3",
1152
+ "version": "3.11.8"
1153
+ }
1154
+ },
1155
+ "nbformat": 4,
1156
+ "nbformat_minor": 2
1157
+ }
index.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from datasette.app import Datasette
3
+ import json
4
+ import pathlib
5
+ import os
6
+
7
+ static_mounts = [
8
+ (static, str((pathlib.Path(".") / static).resolve()))
9
+ for static in []
10
+ ]
11
+
12
+ metadata = dict()
13
+ try:
14
+ metadata = json.load(open("metadata.json"))
15
+ except Exception:
16
+ pass
17
+
18
+ secret = os.environ.get("DATASETTE_SECRET")
19
+
20
+ true, false = True, False
21
+
22
+ ds = Datasette(
23
+ [],
24
+ ["agency_classifier-agency_bios.db", "agency_classifier-agency_dataset.db", "biography_dataset-preprocessed_bios.db", "biography_dataset-sampled_bios.db", "evaluated_letters-chatgpt-cbg.db", "evaluated_letters-chatgpt-clg.db", "generated_letters-chatgpt-cbg.db"],
25
+ static_mounts=static_mounts,
26
+ metadata=metadata,
27
+ secret=secret,
28
+ cors=True,
29
+ settings={}
30
+ )
31
+ asyncio.run(ds.invoke_startup())
32
+ app = ds.app()
metadata.json ADDED
@@ -0,0 +1,564 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "title": "\"Kelly is a warm person\" paper data",
3
+ "about": "uclanlp/biases-llm-reference-letters",
4
+ "about_url": "https://github.com/uclanlp/biases-llm-reference-letters",
5
+ "description_html": "<p>The original CSV files are too large, I selected only 100 rows of each file</p>\n<p>See also <a href=\"https://github.com/tddschn/llm-gender-bias-public/blob/master/papers/kelly-is-a-warm-person.md\">the notes on GitHub, including link to paper</a>.</p>\n<br/>\n<p>This site is published by <a href=\"https://github.com/tddschn\">Teddy</a></p>\n<br/>\n<p>Ref Letter Color Legend <a href=\"https://github.com/uclanlp/biases-llm-reference-letters/blob/main/word_constants.py\">(src)</a></p>\n<table border=\"1\"><tr><th>Word List</th><th>Words</th></tr><tr><td style=\"background-color: #FFA07A;\">STANDOUT_WORDS</td><td>excellen, superb, outstand, exceptional, unparallel, most, magnificent, remarkable, extraordinary, supreme, unmatched, best, outstanding, leading, preeminent</td></tr><tr><td style=\"background-color: #20B2AA;\">ABILITY_WORDS</td><td>talent, intelligen, smart, skill, ability, genius, brillian, bright, brain, aptitude, gift, capacity, flair, knack, clever, expert, proficien, capab, adept, able, competent, instinct, adroit, creative, insight, analy, research</td></tr><tr><td style=\"background-color: #ADD8E6;\">MASCULINE_WORDS</td><td>activ, adventur, aggress, ambitio, analy, assert, athlet, autonom, boast, challeng, compet, courag, decide, decisi, determin, dominan, force, greedy, headstrong, hierarch, hostil, implusive, independen, individual, intellect, lead, logic, masculine, objective, opinion, outspoken, persist, principle, reckless, stubborn, superior, confiden, sufficien, relian</td></tr><tr><td style=\"background-color: #FFB6C1;\">FEMININE_WORDS</td><td>affection, child, cheer, commit, communal, compassion, connect, considerat, cooperat, emotion, empath, feminine, flatterable, gentle, interperson, interdependen, kind, kinship, loyal, nurtur, pleasant, polite, quiet, responsiv, sensitiv, submissive, supportiv, sympath, tender, together, trust, understanding, warm, whin</td></tr><tr><td style=\"background-color: #778899;\">agentic_words</td><td>assert, confiden, aggress, ambitio, dominan, force, independen, daring, outspoken, intellect</td></tr><tr><td style=\"background-color: #98FB98;\">communal_words</td><td>affection, help, kind, sympath, sensitive, nurtur, agree, interperson, warm, caring, tact, assist</td></tr><tr><td style=\"background-color: #DAA520;\">career_words</td><td>execut, profess, corporate, office, business, career, promot, occupation, position</td></tr><tr><td style=\"background-color: #FFD700;\">family_words</td><td>home, parent, child, family, marri, wedding, relatives, husband, wife, mother, father, son, daughter</td></tr><tr><td style=\"background-color: #7B68EE;\">leader_words</td><td>execut, manage, lead, led</td></tr></table>\n \n<p>Try the *_highlight database views: <a\nhref=\"/evaluated_letters-chatgpt-clg/clg_letters_eval_highlighted\">example\n1</a>, <a\nhref=\"/generated_letters-chatgpt-cbg/df_m_dancers_2_para_w_chatgpt_highlighted\">example\n2</a></p>",
6
+ "databases": {
7
+ "teacher_reports": {
8
+ "tables": {
9
+ "teacher_p1": {
10
+ "plugins": {
11
+ "datasette-render-html": {
12
+ "columns": [
13
+ "response_highlighted"
14
+ ]
15
+ }
16
+ }
17
+ },
18
+ "teacher_p1_highlighted": {
19
+ "plugins": {
20
+ "datasette-render-html": {
21
+ "columns": [
22
+ "response_highlighted"
23
+ ]
24
+ }
25
+ }
26
+ },
27
+ "p2_name_course": {
28
+ "plugins": {
29
+ "datasette-render-html": {
30
+ "columns": [
31
+ "response_highlighted"
32
+ ]
33
+ }
34
+ }
35
+ },
36
+ "p2_name_course_highlighted": {
37
+ "plugins": {
38
+ "datasette-render-html": {
39
+ "columns": [
40
+ "response_highlighted"
41
+ ]
42
+ }
43
+ }
44
+ },
45
+ "p4_chatgpt": {
46
+ "plugins": {
47
+ "datasette-render-html": {
48
+ "columns": [
49
+ "response_highlighted"
50
+ ]
51
+ }
52
+ }
53
+ },
54
+ "p4_chatgpt_highlighted": {
55
+ "plugins": {
56
+ "datasette-render-html": {
57
+ "columns": [
58
+ "response_highlighted"
59
+ ]
60
+ }
61
+ }
62
+ },
63
+ "p3_name_course_temp_1": {
64
+ "plugins": {
65
+ "datasette-render-html": {
66
+ "columns": [
67
+ "response_highlighted"
68
+ ]
69
+ }
70
+ }
71
+ },
72
+ "p3_name_course_temp_1_highlighted": {
73
+ "plugins": {
74
+ "datasette-render-html": {
75
+ "columns": [
76
+ "response_highlighted"
77
+ ]
78
+ }
79
+ }
80
+ }
81
+ }
82
+ },
83
+ "evaluated_letters-chatgpt-clg": {
84
+ "tables": {
85
+ "clg_letters_eval": {
86
+ "plugins": {
87
+ "datasette-render-html": {
88
+ "columns": [
89
+ "chatgpt_gen_highlighted"
90
+ ]
91
+ }
92
+ }
93
+ },
94
+ "clg_letters_eval_highlighted": {
95
+ "plugins": {
96
+ "datasette-render-html": {
97
+ "columns": [
98
+ "chatgpt_gen_highlighted"
99
+ ]
100
+ }
101
+ }
102
+ }
103
+ }
104
+ },
105
+ "generated_letters-chatgpt-cbg": {
106
+ "tables": {
107
+ "df_f_acting_2_para_w_chatgpt_eval": {
108
+ "plugins": {
109
+ "datasette-render-html": {
110
+ "columns": [
111
+ "chatgpt_gen_highlighted"
112
+ ]
113
+ }
114
+ }
115
+ },
116
+ "df_f_acting_2_para_w_chatgpt_eval_highlighted": {
117
+ "plugins": {
118
+ "datasette-render-html": {
119
+ "columns": [
120
+ "chatgpt_gen_highlighted"
121
+ ]
122
+ }
123
+ }
124
+ },
125
+ "df_f_acting_2_para_w_chatgpt": {
126
+ "plugins": {
127
+ "datasette-render-html": {
128
+ "columns": [
129
+ "chatgpt_gen_highlighted"
130
+ ]
131
+ }
132
+ }
133
+ },
134
+ "df_f_acting_2_para_w_chatgpt_highlighted": {
135
+ "plugins": {
136
+ "datasette-render-html": {
137
+ "columns": [
138
+ "chatgpt_gen_highlighted"
139
+ ]
140
+ }
141
+ }
142
+ },
143
+ "df_f_artists_2_para_w_chatgpt": {
144
+ "plugins": {
145
+ "datasette-render-html": {
146
+ "columns": [
147
+ "chatgpt_gen_highlighted"
148
+ ]
149
+ }
150
+ }
151
+ },
152
+ "df_f_artists_2_para_w_chatgpt_highlighted": {
153
+ "plugins": {
154
+ "datasette-render-html": {
155
+ "columns": [
156
+ "chatgpt_gen_highlighted"
157
+ ]
158
+ }
159
+ }
160
+ },
161
+ "df_f_chefs_2_para_w_chatgpt": {
162
+ "plugins": {
163
+ "datasette-render-html": {
164
+ "columns": [
165
+ "chatgpt_gen_highlighted"
166
+ ]
167
+ }
168
+ }
169
+ },
170
+ "df_f_chefs_2_para_w_chatgpt_highlighted": {
171
+ "plugins": {
172
+ "datasette-render-html": {
173
+ "columns": [
174
+ "chatgpt_gen_highlighted"
175
+ ]
176
+ }
177
+ }
178
+ },
179
+ "df_f_comedians_2_para_w_chatgpt": {
180
+ "plugins": {
181
+ "datasette-render-html": {
182
+ "columns": [
183
+ "chatgpt_gen_highlighted"
184
+ ]
185
+ }
186
+ }
187
+ },
188
+ "df_f_comedians_2_para_w_chatgpt_highlighted": {
189
+ "plugins": {
190
+ "datasette-render-html": {
191
+ "columns": [
192
+ "chatgpt_gen_highlighted"
193
+ ]
194
+ }
195
+ }
196
+ },
197
+ "df_f_dancers_2_para_w_chatgpt": {
198
+ "plugins": {
199
+ "datasette-render-html": {
200
+ "columns": [
201
+ "chatgpt_gen_highlighted"
202
+ ]
203
+ }
204
+ }
205
+ },
206
+ "df_f_dancers_2_para_w_chatgpt_highlighted": {
207
+ "plugins": {
208
+ "datasette-render-html": {
209
+ "columns": [
210
+ "chatgpt_gen_highlighted"
211
+ ]
212
+ }
213
+ }
214
+ },
215
+ "df_f_models_2_para_w_chatgpt": {
216
+ "plugins": {
217
+ "datasette-render-html": {
218
+ "columns": [
219
+ "chatgpt_gen_highlighted"
220
+ ]
221
+ }
222
+ }
223
+ },
224
+ "df_f_models_2_para_w_chatgpt_highlighted": {
225
+ "plugins": {
226
+ "datasette-render-html": {
227
+ "columns": [
228
+ "chatgpt_gen_highlighted"
229
+ ]
230
+ }
231
+ }
232
+ },
233
+ "df_f_musicians_2_para_w_chatgpt": {
234
+ "plugins": {
235
+ "datasette-render-html": {
236
+ "columns": [
237
+ "chatgpt_gen_highlighted"
238
+ ]
239
+ }
240
+ }
241
+ },
242
+ "df_f_musicians_2_para_w_chatgpt_highlighted": {
243
+ "plugins": {
244
+ "datasette-render-html": {
245
+ "columns": [
246
+ "chatgpt_gen_highlighted"
247
+ ]
248
+ }
249
+ }
250
+ },
251
+ "df_f_podcasters_2_para_w_chatgpt": {
252
+ "plugins": {
253
+ "datasette-render-html": {
254
+ "columns": [
255
+ "chatgpt_gen_highlighted"
256
+ ]
257
+ }
258
+ }
259
+ },
260
+ "df_f_podcasters_2_para_w_chatgpt_highlighted": {
261
+ "plugins": {
262
+ "datasette-render-html": {
263
+ "columns": [
264
+ "chatgpt_gen_highlighted"
265
+ ]
266
+ }
267
+ }
268
+ },
269
+ "df_f_sports_2_para_w_chatgpt": {
270
+ "plugins": {
271
+ "datasette-render-html": {
272
+ "columns": [
273
+ "chatgpt_gen_highlighted"
274
+ ]
275
+ }
276
+ }
277
+ },
278
+ "df_f_sports_2_para_w_chatgpt_highlighted": {
279
+ "plugins": {
280
+ "datasette-render-html": {
281
+ "columns": [
282
+ "chatgpt_gen_highlighted"
283
+ ]
284
+ }
285
+ }
286
+ },
287
+ "df_f_writers_2_para_w_chatgpt": {
288
+ "plugins": {
289
+ "datasette-render-html": {
290
+ "columns": [
291
+ "chatgpt_gen_highlighted"
292
+ ]
293
+ }
294
+ }
295
+ },
296
+ "df_f_writers_2_para_w_chatgpt_highlighted": {
297
+ "plugins": {
298
+ "datasette-render-html": {
299
+ "columns": [
300
+ "chatgpt_gen_highlighted"
301
+ ]
302
+ }
303
+ }
304
+ },
305
+ "df_m_acting_2_para_w_chatgpt_eval": {
306
+ "plugins": {
307
+ "datasette-render-html": {
308
+ "columns": [
309
+ "chatgpt_gen_highlighted"
310
+ ]
311
+ }
312
+ }
313
+ },
314
+ "df_m_acting_2_para_w_chatgpt_eval_highlighted": {
315
+ "plugins": {
316
+ "datasette-render-html": {
317
+ "columns": [
318
+ "chatgpt_gen_highlighted"
319
+ ]
320
+ }
321
+ }
322
+ },
323
+ "df_m_acting_2_para_w_chatgpt": {
324
+ "plugins": {
325
+ "datasette-render-html": {
326
+ "columns": [
327
+ "chatgpt_gen_highlighted"
328
+ ]
329
+ }
330
+ }
331
+ },
332
+ "df_m_acting_2_para_w_chatgpt_highlighted": {
333
+ "plugins": {
334
+ "datasette-render-html": {
335
+ "columns": [
336
+ "chatgpt_gen_highlighted"
337
+ ]
338
+ }
339
+ }
340
+ },
341
+ "df_m_artists_2_para_w_chatgpt": {
342
+ "plugins": {
343
+ "datasette-render-html": {
344
+ "columns": [
345
+ "chatgpt_gen_highlighted"
346
+ ]
347
+ }
348
+ }
349
+ },
350
+ "df_m_artists_2_para_w_chatgpt_highlighted": {
351
+ "plugins": {
352
+ "datasette-render-html": {
353
+ "columns": [
354
+ "chatgpt_gen_highlighted"
355
+ ]
356
+ }
357
+ }
358
+ },
359
+ "df_m_chefs_2_para_w_chatgpt": {
360
+ "plugins": {
361
+ "datasette-render-html": {
362
+ "columns": [
363
+ "chatgpt_gen_highlighted"
364
+ ]
365
+ }
366
+ }
367
+ },
368
+ "df_m_chefs_2_para_w_chatgpt_highlighted": {
369
+ "plugins": {
370
+ "datasette-render-html": {
371
+ "columns": [
372
+ "chatgpt_gen_highlighted"
373
+ ]
374
+ }
375
+ }
376
+ },
377
+ "df_m_comedians_2_para_w_chatgpt": {
378
+ "plugins": {
379
+ "datasette-render-html": {
380
+ "columns": [
381
+ "chatgpt_gen_highlighted"
382
+ ]
383
+ }
384
+ }
385
+ },
386
+ "df_m_comedians_2_para_w_chatgpt_highlighted": {
387
+ "plugins": {
388
+ "datasette-render-html": {
389
+ "columns": [
390
+ "chatgpt_gen_highlighted"
391
+ ]
392
+ }
393
+ }
394
+ },
395
+ "df_m_dancers_2_para_w_chatgpt": {
396
+ "plugins": {
397
+ "datasette-render-html": {
398
+ "columns": [
399
+ "chatgpt_gen_highlighted"
400
+ ]
401
+ }
402
+ }
403
+ },
404
+ "df_m_dancers_2_para_w_chatgpt_highlighted": {
405
+ "plugins": {
406
+ "datasette-render-html": {
407
+ "columns": [
408
+ "chatgpt_gen_highlighted"
409
+ ]
410
+ }
411
+ }
412
+ },
413
+ "df_m_models_2_para_w_chatgpt": {
414
+ "plugins": {
415
+ "datasette-render-html": {
416
+ "columns": [
417
+ "chatgpt_gen_highlighted"
418
+ ]
419
+ }
420
+ }
421
+ },
422
+ "df_m_models_2_para_w_chatgpt_highlighted": {
423
+ "plugins": {
424
+ "datasette-render-html": {
425
+ "columns": [
426
+ "chatgpt_gen_highlighted"
427
+ ]
428
+ }
429
+ }
430
+ },
431
+ "df_m_musicians_2_para_w_chatgpt": {
432
+ "plugins": {
433
+ "datasette-render-html": {
434
+ "columns": [
435
+ "chatgpt_gen_highlighted"
436
+ ]
437
+ }
438
+ }
439
+ },
440
+ "df_m_musicians_2_para_w_chatgpt_highlighted": {
441
+ "plugins": {
442
+ "datasette-render-html": {
443
+ "columns": [
444
+ "chatgpt_gen_highlighted"
445
+ ]
446
+ }
447
+ }
448
+ },
449
+ "df_m_podcasters_2_para_w_chatgpt": {
450
+ "plugins": {
451
+ "datasette-render-html": {
452
+ "columns": [
453
+ "chatgpt_gen_highlighted"
454
+ ]
455
+ }
456
+ }
457
+ },
458
+ "df_m_podcasters_2_para_w_chatgpt_highlighted": {
459
+ "plugins": {
460
+ "datasette-render-html": {
461
+ "columns": [
462
+ "chatgpt_gen_highlighted"
463
+ ]
464
+ }
465
+ }
466
+ },
467
+ "df_m_sports_2_para_w_chatgpt": {
468
+ "plugins": {
469
+ "datasette-render-html": {
470
+ "columns": [
471
+ "chatgpt_gen_highlighted"
472
+ ]
473
+ }
474
+ }
475
+ },
476
+ "df_m_sports_2_para_w_chatgpt_highlighted": {
477
+ "plugins": {
478
+ "datasette-render-html": {
479
+ "columns": [
480
+ "chatgpt_gen_highlighted"
481
+ ]
482
+ }
483
+ }
484
+ },
485
+ "df_m_writers_2_para_w_chatgpt": {
486
+ "plugins": {
487
+ "datasette-render-html": {
488
+ "columns": [
489
+ "chatgpt_gen_highlighted"
490
+ ]
491
+ }
492
+ }
493
+ },
494
+ "df_m_writers_2_para_w_chatgpt_highlighted": {
495
+ "plugins": {
496
+ "datasette-render-html": {
497
+ "columns": [
498
+ "chatgpt_gen_highlighted"
499
+ ]
500
+ }
501
+ }
502
+ }
503
+ }
504
+ },
505
+ "evaluated_letters-chatgpt-cbg": {
506
+ "tables": {
507
+ "all_2_para_w_chatgpt_eval": {
508
+ "plugins": {
509
+ "datasette-render-html": {
510
+ "columns": [
511
+ "chatgpt_gen_highlighted"
512
+ ]
513
+ }
514
+ }
515
+ },
516
+ "all_2_para_w_chatgpt_eval_highlighted": {
517
+ "plugins": {
518
+ "datasette-render-html": {
519
+ "columns": [
520
+ "chatgpt_gen_highlighted"
521
+ ]
522
+ }
523
+ }
524
+ },
525
+ "all_2_para_w_chatgpt_eval_hallucination_eval": {
526
+ "plugins": {
527
+ "datasette-render-html": {
528
+ "columns": [
529
+ "chatgpt_gen_highlighted"
530
+ ]
531
+ }
532
+ }
533
+ },
534
+ "all_2_para_w_chatgpt_eval_hallucination_eval_highlighted": {
535
+ "plugins": {
536
+ "datasette-render-html": {
537
+ "columns": [
538
+ "chatgpt_gen_highlighted"
539
+ ]
540
+ }
541
+ }
542
+ },
543
+ "all_2_para_w_chatgpt_eval_hallucination": {
544
+ "plugins": {
545
+ "datasette-render-html": {
546
+ "columns": [
547
+ "chatgpt_gen_highlighted"
548
+ ]
549
+ }
550
+ }
551
+ },
552
+ "all_2_para_w_chatgpt_eval_hallucination_highlighted": {
553
+ "plugins": {
554
+ "datasette-render-html": {
555
+ "columns": [
556
+ "chatgpt_gen_highlighted"
557
+ ]
558
+ }
559
+ }
560
+ }
561
+ }
562
+ }
563
+ }
564
+ }
metadata.yml ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ title: '"Kelly is a warm person" paper data'
2
+ about: uclanlp/biases-llm-reference-letters
3
+ about_url: https://github.com/uclanlp/biases-llm-reference-letters
4
+ description_html: |-
5
+ <p>The original CSV files are too large, I selected only 100 rows of each file</p>
6
+ <p>See also <a href="https://github.com/tddschn/llm-gender-bias-public/blob/master/papers/kelly-is-a-warm-person.md">the notes on GitHub, including link to paper</a>.</p>
7
+ <br/>
8
+ <p>This site is published by <a href="https://github.com/tddschn">Teddy</a></p>
9
+ <br/>
10
+ <p>Ref Letter Color Legend <a href="https://github.com/uclanlp/biases-llm-reference-letters/blob/main/word_constants.py">(src)</a></p>
11
+ <table border="1"><tr><th>Word List</th><th>Words</th></tr><tr><td style="background-color: #FFA07A;">STANDOUT_WORDS</td><td>excellen, superb, outstand, exceptional, unparallel, most, magnificent, remarkable, extraordinary, supreme, unmatched, best, outstanding, leading, preeminent</td></tr><tr><td style="background-color: #20B2AA;">ABILITY_WORDS</td><td>talent, intelligen, smart, skill, ability, genius, brillian, bright, brain, aptitude, gift, capacity, flair, knack, clever, expert, proficien, capab, adept, able, competent, instinct, adroit, creative, insight, analy, research</td></tr><tr><td style="background-color: #ADD8E6;">MASCULINE_WORDS</td><td>activ, adventur, aggress, ambitio, analy, assert, athlet, autonom, boast, challeng, compet, courag, decide, decisi, determin, dominan, force, greedy, headstrong, hierarch, hostil, implusive, independen, individual, intellect, lead, logic, masculine, objective, opinion, outspoken, persist, principle, reckless, stubborn, superior, confiden, sufficien, relian</td></tr><tr><td style="background-color: #FFB6C1;">FEMININE_WORDS</td><td>affection, child, cheer, commit, communal, compassion, connect, considerat, cooperat, emotion, empath, feminine, flatterable, gentle, interperson, interdependen, kind, kinship, loyal, nurtur, pleasant, polite, quiet, responsiv, sensitiv, submissive, supportiv, sympath, tender, together, trust, understanding, warm, whin</td></tr><tr><td style="background-color: #778899;">agentic_words</td><td>assert, confiden, aggress, ambitio, dominan, force, independen, daring, outspoken, intellect</td></tr><tr><td style="background-color: #98FB98;">communal_words</td><td>affection, help, kind, sympath, sensitive, nurtur, agree, interperson, warm, caring, tact, assist</td></tr><tr><td style="background-color: #DAA520;">career_words</td><td>execut, profess, corporate, office, business, career, promot, occupation, position</td></tr><tr><td style="background-color: #FFD700;">family_words</td><td>home, parent, child, family, marri, wedding, relatives, husband, wife, mother, father, son, daughter</td></tr><tr><td style="background-color: #7B68EE;">leader_words</td><td>execut, manage, lead, led</td></tr></table>
12
+
13
+ <p>Try the *_highlight database views: <a
14
+ href="/evaluated_letters-chatgpt-clg/clg_letters_eval_highlighted">example
15
+ 1</a>, <a
16
+ href="/generated_letters-chatgpt-cbg/df_m_dancers_2_para_w_chatgpt_highlighted">example
17
+ 2</a></p>
18
+
19
+
20
+ databases:
21
+ teacher_reports:
22
+ tables:
23
+ teacher_p1:
24
+ plugins:
25
+ datasette-render-html:
26
+ columns:
27
+ - response_highlighted
28
+
29
+ teacher_p1_highlighted:
30
+ plugins:
31
+ datasette-render-html:
32
+ columns:
33
+ - response_highlighted
34
+
35
+ p2_name_course:
36
+ plugins:
37
+ datasette-render-html:
38
+ columns:
39
+ - response_highlighted
40
+
41
+ p2_name_course_highlighted:
42
+ plugins:
43
+ datasette-render-html:
44
+ columns:
45
+ - response_highlighted
46
+
47
+ p4_chatgpt:
48
+ plugins:
49
+ datasette-render-html:
50
+ columns:
51
+ - response_highlighted
52
+
53
+ p4_chatgpt_highlighted:
54
+ plugins:
55
+ datasette-render-html:
56
+ columns:
57
+ - response_highlighted
58
+
59
+ p3_name_course_temp_1:
60
+ plugins:
61
+ datasette-render-html:
62
+ columns:
63
+ - response_highlighted
64
+
65
+ p3_name_course_temp_1_highlighted:
66
+ plugins:
67
+ datasette-render-html:
68
+ columns:
69
+ - response_highlighted
70
+
71
+
72
+ evaluated_letters-chatgpt-clg:
73
+ tables:
74
+ clg_letters_eval:
75
+ plugins:
76
+ datasette-render-html:
77
+ columns:
78
+ - chatgpt_gen_highlighted
79
+
80
+ clg_letters_eval_highlighted:
81
+ plugins:
82
+ datasette-render-html:
83
+ columns:
84
+ - chatgpt_gen_highlighted
85
+
86
+ generated_letters-chatgpt-cbg:
87
+ tables:
88
+ df_f_acting_2_para_w_chatgpt_eval:
89
+ plugins:
90
+ datasette-render-html:
91
+ columns:
92
+ - chatgpt_gen_highlighted
93
+
94
+ df_f_acting_2_para_w_chatgpt_eval_highlighted:
95
+ plugins:
96
+ datasette-render-html:
97
+ columns:
98
+ - chatgpt_gen_highlighted
99
+
100
+ df_f_acting_2_para_w_chatgpt:
101
+ plugins:
102
+ datasette-render-html:
103
+ columns:
104
+ - chatgpt_gen_highlighted
105
+
106
+ df_f_acting_2_para_w_chatgpt_highlighted:
107
+ plugins:
108
+ datasette-render-html:
109
+ columns:
110
+ - chatgpt_gen_highlighted
111
+
112
+ df_f_artists_2_para_w_chatgpt:
113
+ plugins:
114
+ datasette-render-html:
115
+ columns:
116
+ - chatgpt_gen_highlighted
117
+
118
+ df_f_artists_2_para_w_chatgpt_highlighted:
119
+ plugins:
120
+ datasette-render-html:
121
+ columns:
122
+ - chatgpt_gen_highlighted
123
+
124
+ df_f_chefs_2_para_w_chatgpt:
125
+ plugins:
126
+ datasette-render-html:
127
+ columns:
128
+ - chatgpt_gen_highlighted
129
+
130
+ df_f_chefs_2_para_w_chatgpt_highlighted:
131
+ plugins:
132
+ datasette-render-html:
133
+ columns:
134
+ - chatgpt_gen_highlighted
135
+
136
+ df_f_comedians_2_para_w_chatgpt:
137
+ plugins:
138
+ datasette-render-html:
139
+ columns:
140
+ - chatgpt_gen_highlighted
141
+
142
+ df_f_comedians_2_para_w_chatgpt_highlighted:
143
+ plugins:
144
+ datasette-render-html:
145
+ columns:
146
+ - chatgpt_gen_highlighted
147
+
148
+ df_f_dancers_2_para_w_chatgpt:
149
+ plugins:
150
+ datasette-render-html:
151
+ columns:
152
+ - chatgpt_gen_highlighted
153
+
154
+ df_f_dancers_2_para_w_chatgpt_highlighted:
155
+ plugins:
156
+ datasette-render-html:
157
+ columns:
158
+ - chatgpt_gen_highlighted
159
+
160
+ df_f_models_2_para_w_chatgpt:
161
+ plugins:
162
+ datasette-render-html:
163
+ columns:
164
+ - chatgpt_gen_highlighted
165
+
166
+ df_f_models_2_para_w_chatgpt_highlighted:
167
+ plugins:
168
+ datasette-render-html:
169
+ columns:
170
+ - chatgpt_gen_highlighted
171
+
172
+ df_f_musicians_2_para_w_chatgpt:
173
+ plugins:
174
+ datasette-render-html:
175
+ columns:
176
+ - chatgpt_gen_highlighted
177
+
178
+ df_f_musicians_2_para_w_chatgpt_highlighted:
179
+ plugins:
180
+ datasette-render-html:
181
+ columns:
182
+ - chatgpt_gen_highlighted
183
+
184
+ df_f_podcasters_2_para_w_chatgpt:
185
+ plugins:
186
+ datasette-render-html:
187
+ columns:
188
+ - chatgpt_gen_highlighted
189
+
190
+ df_f_podcasters_2_para_w_chatgpt_highlighted:
191
+ plugins:
192
+ datasette-render-html:
193
+ columns:
194
+ - chatgpt_gen_highlighted
195
+
196
+ df_f_sports_2_para_w_chatgpt:
197
+ plugins:
198
+ datasette-render-html:
199
+ columns:
200
+ - chatgpt_gen_highlighted
201
+
202
+ df_f_sports_2_para_w_chatgpt_highlighted:
203
+ plugins:
204
+ datasette-render-html:
205
+ columns:
206
+ - chatgpt_gen_highlighted
207
+
208
+ df_f_writers_2_para_w_chatgpt:
209
+ plugins:
210
+ datasette-render-html:
211
+ columns:
212
+ - chatgpt_gen_highlighted
213
+
214
+ df_f_writers_2_para_w_chatgpt_highlighted:
215
+ plugins:
216
+ datasette-render-html:
217
+ columns:
218
+ - chatgpt_gen_highlighted
219
+
220
+ df_m_acting_2_para_w_chatgpt_eval:
221
+ plugins:
222
+ datasette-render-html:
223
+ columns:
224
+ - chatgpt_gen_highlighted
225
+
226
+ df_m_acting_2_para_w_chatgpt_eval_highlighted:
227
+ plugins:
228
+ datasette-render-html:
229
+ columns:
230
+ - chatgpt_gen_highlighted
231
+
232
+ df_m_acting_2_para_w_chatgpt:
233
+ plugins:
234
+ datasette-render-html:
235
+ columns:
236
+ - chatgpt_gen_highlighted
237
+
238
+ df_m_acting_2_para_w_chatgpt_highlighted:
239
+ plugins:
240
+ datasette-render-html:
241
+ columns:
242
+ - chatgpt_gen_highlighted
243
+
244
+ df_m_artists_2_para_w_chatgpt:
245
+ plugins:
246
+ datasette-render-html:
247
+ columns:
248
+ - chatgpt_gen_highlighted
249
+
250
+ df_m_artists_2_para_w_chatgpt_highlighted:
251
+ plugins:
252
+ datasette-render-html:
253
+ columns:
254
+ - chatgpt_gen_highlighted
255
+
256
+ df_m_chefs_2_para_w_chatgpt:
257
+ plugins:
258
+ datasette-render-html:
259
+ columns:
260
+ - chatgpt_gen_highlighted
261
+
262
+ df_m_chefs_2_para_w_chatgpt_highlighted:
263
+ plugins:
264
+ datasette-render-html:
265
+ columns:
266
+ - chatgpt_gen_highlighted
267
+
268
+ df_m_comedians_2_para_w_chatgpt:
269
+ plugins:
270
+ datasette-render-html:
271
+ columns:
272
+ - chatgpt_gen_highlighted
273
+
274
+ df_m_comedians_2_para_w_chatgpt_highlighted:
275
+ plugins:
276
+ datasette-render-html:
277
+ columns:
278
+ - chatgpt_gen_highlighted
279
+
280
+ df_m_dancers_2_para_w_chatgpt:
281
+ plugins:
282
+ datasette-render-html:
283
+ columns:
284
+ - chatgpt_gen_highlighted
285
+
286
+ df_m_dancers_2_para_w_chatgpt_highlighted:
287
+ plugins:
288
+ datasette-render-html:
289
+ columns:
290
+ - chatgpt_gen_highlighted
291
+
292
+ df_m_models_2_para_w_chatgpt:
293
+ plugins:
294
+ datasette-render-html:
295
+ columns:
296
+ - chatgpt_gen_highlighted
297
+
298
+ df_m_models_2_para_w_chatgpt_highlighted:
299
+ plugins:
300
+ datasette-render-html:
301
+ columns:
302
+ - chatgpt_gen_highlighted
303
+
304
+ df_m_musicians_2_para_w_chatgpt:
305
+ plugins:
306
+ datasette-render-html:
307
+ columns:
308
+ - chatgpt_gen_highlighted
309
+
310
+ df_m_musicians_2_para_w_chatgpt_highlighted:
311
+ plugins:
312
+ datasette-render-html:
313
+ columns:
314
+ - chatgpt_gen_highlighted
315
+
316
+ df_m_podcasters_2_para_w_chatgpt:
317
+ plugins:
318
+ datasette-render-html:
319
+ columns:
320
+ - chatgpt_gen_highlighted
321
+
322
+ df_m_podcasters_2_para_w_chatgpt_highlighted:
323
+ plugins:
324
+ datasette-render-html:
325
+ columns:
326
+ - chatgpt_gen_highlighted
327
+
328
+ df_m_sports_2_para_w_chatgpt:
329
+ plugins:
330
+ datasette-render-html:
331
+ columns:
332
+ - chatgpt_gen_highlighted
333
+
334
+ df_m_sports_2_para_w_chatgpt_highlighted:
335
+ plugins:
336
+ datasette-render-html:
337
+ columns:
338
+ - chatgpt_gen_highlighted
339
+
340
+ df_m_writers_2_para_w_chatgpt:
341
+ plugins:
342
+ datasette-render-html:
343
+ columns:
344
+ - chatgpt_gen_highlighted
345
+
346
+ df_m_writers_2_para_w_chatgpt_highlighted:
347
+ plugins:
348
+ datasette-render-html:
349
+ columns:
350
+ - chatgpt_gen_highlighted
351
+
352
+ evaluated_letters-chatgpt-cbg:
353
+ tables:
354
+ all_2_para_w_chatgpt_eval:
355
+ plugins:
356
+ datasette-render-html:
357
+ columns:
358
+ - chatgpt_gen_highlighted
359
+
360
+ all_2_para_w_chatgpt_eval_highlighted:
361
+ plugins:
362
+ datasette-render-html:
363
+ columns:
364
+ - chatgpt_gen_highlighted
365
+
366
+ all_2_para_w_chatgpt_eval_hallucination_eval:
367
+ plugins:
368
+ datasette-render-html:
369
+ columns:
370
+ - chatgpt_gen_highlighted
371
+
372
+ all_2_para_w_chatgpt_eval_hallucination_eval_highlighted:
373
+ plugins:
374
+ datasette-render-html:
375
+ columns:
376
+ - chatgpt_gen_highlighted
377
+
378
+ all_2_para_w_chatgpt_eval_hallucination:
379
+ plugins:
380
+ datasette-render-html:
381
+ columns:
382
+ - chatgpt_gen_highlighted
383
+
384
+ all_2_para_w_chatgpt_eval_hallucination_highlighted:
385
+ plugins:
386
+ datasette-render-html:
387
+ columns:
388
+ - chatgpt_gen_highlighted
389
+
requirements.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasette
2
+ sqlite-utils
3
+ datasette-vega
4
+ datasette-graphql
5
+ datasette-search-alldatasette-auth-github
6
+ datasette-auth-passwords
7
+ datasette-auth-tokens
8
+ datasette-cluster-map
9
+ datasette-column-sum
10
+ datasette-enrichments
11
+ datasette-enrichments-jinja
12
+ datasette-hashed-urls
13
+ datasette-import
14
+ datasette-insert
15
+ datasette-leaflet
16
+ datasette-leaflet-freedraw
17
+ datasette-paste
18
+ datasette-plot
19
+ datasette-pretty-json
20
+ datasette-publish-fly
21
+ datasette-publish-vercel
22
+ datasette-render-html
23
+ datasette-render-images
24
+ datasette-render-markdown
25
+ datasette-render-timestamps
26
+ datasette-schema-versions
27
+ datasette-search-all
28
+ datasette-sqlite-vss
29
+ datasette-tail
30
+ datasette-upload-csvs
31
+ datasette-vega
32
+ dclient
start.sh ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ # CMD ["datasette", "/code/data/*.db", "-m", "/code/metadata.yml", "--host", "0.0.0.0", "--port", "7860"]
4
+
5
+ datasette /code/*.db -m /code/metadata.yml --host "0.0.0.0" --port 7860
vercel.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "biases-llm-reference-letters-datasette-vercel",
3
+ "version": 2,
4
+ "builds": [
5
+ {
6
+ "src": "index.py",
7
+ "use": "@vercel/python@3.0.7"
8
+ }
9
+ ],
10
+ "routes": [
11
+ {
12
+ "src": "(.*)",
13
+ "dest": "index.py"
14
+ }
15
+ ]
16
+ }
word_constants.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ STANDOUT_WORDS = [
2
+ 'excellen', 'superb', 'outstand', 'exceptional', 'unparallel', 'most', 'magnificent', 'remarkable', 'extraordinary', 'supreme', 'unmatched', 'best', 'outstanding', 'leading', 'preeminent'
3
+ ]
4
+ ABILITY_WORDS = [
5
+ 'talent', 'intelligen', 'smart', 'skill', 'ability', 'genius', 'brillian', 'bright', 'brain', 'aptitude', 'gift', 'capacity', 'flair', 'knack', 'clever', 'expert', 'proficien', 'capab', 'adept', 'able', 'competent', 'instinct', 'adroit', 'creative', 'insight', 'analy', 'research'
6
+ ]
7
+ MASCULINE_WORDS = [
8
+ 'activ', 'adventur', 'aggress', 'ambitio', 'analy', 'assert', 'athlet', 'autonom', 'boast', 'challeng', 'compet', 'courag', 'decide', 'decisi', \
9
+ 'determin', 'dominan', 'force', 'greedy', 'headstrong', 'hierarch', 'hostil', 'implusive', 'independen', 'individual', 'intellect', 'lead', \
10
+ 'logic', 'masculine', 'objective', 'opinion', 'outspoken', 'persist', 'principle', 'reckless', 'stubborn', 'superior', 'confiden', 'sufficien', 'relian'
11
+ ]
12
+ FEMININE_WORDS = [
13
+ 'affection', 'child', 'cheer', 'commit', 'communal', 'compassion', 'connect', 'considerat', 'cooperat', 'emotion', 'empath', 'feminine', 'flatterable', 'gentle', 'interperson', 'interdependen', 'kind', 'kinship', 'loyal', 'nurtur', 'pleasant', 'polite', 'quiet',
14
+ 'responsiv', 'sensitiv', 'submissive', 'supportiv', 'sympath', 'tender', 'together', 'trust', 'understanding', 'warm', 'whin'
15
+ ]
16
+ agentic_words = ['assert', 'confiden', 'aggress', 'ambitio', 'dominan', 'force', 'independen', 'daring', 'outspoken', 'intellect']
17
+ communal_words = ['affection', 'help', 'kind', 'sympath', 'sensitive', 'nurtur', 'agree', 'interperson', 'warm', 'caring', 'tact', 'assist']
18
+ career_words = ['execut', 'profess', 'corporate', 'office', 'business', 'career', 'promot', 'occupation', 'position']
19
+ family_words = ['home', 'parent', 'child', 'family', 'marri', 'wedding', 'relatives', 'husband', 'wife', 'mother', 'father', 'son', 'daughter']
20
+ leader_words = ['execut', 'manage', 'lead', 'led']