carolanderson commited on
Commit
d40a9ae
β€’
1 Parent(s): 2f010ff

replace template with app

Browse files
.ipynb_checkpoints/Dockerfile-checkpoint ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+ RUN python3 -m pip install --no-cache-dir --upgrade pip
7
+ RUN python3 -m pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["panel", "serve", "/code/app.py", "--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin", "*"]
12
+
13
+ RUN mkdir /.cache
14
+ RUN chmod 777 /.cache
15
+ RUN mkdir .chroma
16
+ RUN chmod 777 .chroma
.ipynb_checkpoints/README-checkpoint.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: U.S. Government Use of AI
3
+ emoji: πŸ“ˆ
4
+ colorFrom: gray
5
+ colorTo: green
6
+ sdk: docker
7
+ pinned: false
8
+ duplicated_from: Panel-Org/panel-template
9
+ ---
10
+
11
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
.ipynb_checkpoints/app-checkpoint.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from haystack import Document
2
+ from haystack.document_stores import InMemoryDocumentStore, ElasticsearchDocumentStore, FAISSDocumentStore
3
+ from haystack.nodes import BM25Retriever
4
+ from haystack.pipelines import DocumentSearchPipeline
5
+ import pandas as pd
6
+ import panel as pn
7
+ import param
8
+
9
+ pn.extension('tabulator')
10
+ pn.extension(sizing_mode="scale_both")
11
+
12
+ import hvplot.pandas
13
+
14
+ # load data
15
+ infile = "/Users/carolanderson/Dropbox/repos/miscellany/webapps/Agency Inventory AI Usage - Sheet1.tsv"
16
+ df = pd.read_csv(infile, sep="\t", lineterminator='\n')
17
+
18
+ # rearrange column order
19
+ col_list = ['Agency', 'Name of Inventory Item',
20
+ 'Primary Type of AI',
21
+ 'Purpose of AI', 'Length of Usage',
22
+ 'Does it directly impact the public?',
23
+ 'Vendor System',
24
+ 'Description of Inventory Item',
25
+ 'Other Notes\r']
26
+ df = df[col_list]
27
+
28
+ # remove trailing \r from 'Other Notes' header
29
+ df = df.rename(columns = {'Other Notes\r' : 'Other Notes'})
30
+
31
+ # remove trailing spaces from agency names (caused duplicate instance of "DOC")
32
+ df['Agency'] = df['Agency'].apply(lambda x : x.rstrip())
33
+
34
+ # columns not useful for filtering
35
+ no_filter_cols = ['Name of Inventory Item', 'Description of Inventory Item', "Other Notes"]
36
+
37
+ # columns to be used for filtering
38
+ filter_cols = [c for c in df.columns.unique() if c not in no_filter_cols]
39
+
40
+ # column selector for main plot
41
+ plot_column_selector = pn.widgets.Select(options=filter_cols, name="Plot category: ")
42
+
43
+ # agency selector for main plot
44
+ plot_agency_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Agency"].unique()),
45
+ value=["Select all"],
46
+ name="Optional - filter by agency")
47
+
48
+ # selectors below are all for interactive dataframe
49
+ agency_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Agency"].unique()),
50
+ value=["Select all"],
51
+ name="Agency")
52
+ type_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Primary Type of AI'].unique()),
53
+ value=["Select all"],
54
+ name='Primary Type of AI')
55
+ purpose_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Purpose of AI"].unique()),
56
+ value=["Select all"],
57
+ name="Purpose of AI")
58
+ length_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Length of Usage'].unique()),
59
+ value=["Select all"],
60
+ name="Length of Usage")
61
+ impact_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Does it directly impact the public?'].unique()),
62
+ value=["Select all"],
63
+ name='Does it directly impact the public?')
64
+ vendor_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Vendor System'].unique()),
65
+ value=["Select all"],
66
+ name='Vendor System')
67
+
68
+ row_filters = [agency_selector, type_selector, purpose_selector, length_selector, impact_selector,
69
+ vendor_selector]
70
+
71
+
72
+ def custom_plot(table, column_selector, agency_selector):
73
+ if "Select all" not in agency_selector:
74
+ table = table[table['Agency'].isin(agency_selector)]
75
+ table = table[column_selector].value_counts().sort_values(ascending=True)
76
+ return table.hvplot.barh(width=600, height=400, color="#336BCC")
77
+
78
+
79
+ def custom_table_filter(table,
80
+ agency_selector,
81
+ type_selector,
82
+ purpose_selector,
83
+ length_selector,
84
+ impact_selector,
85
+ vendor_selector):
86
+ """
87
+ This repetitive approach was the only way I could get things working with a
88
+ 'Select all' menu option.
89
+ """
90
+ if "Select all" not in agency_selector:
91
+ table = table[table["Agency"].isin(agency_selector)]
92
+ if "Select all" not in type_selector:
93
+ table = table[table['Primary Type of AI'].isin(type_selector)]
94
+ if "Select all" not in purpose_selector:
95
+ table = table[table["Purpose of AI"].isin(purpose_selector)]
96
+ if "Select all" not in length_selector:
97
+ table = table[table['Length of Usage'].isin(length_selector)]
98
+ if "Select all" not in impact_selector:
99
+ table = table[table['Does it directly impact the public?'].isin(impact_selector)]
100
+ if "Select all" not in vendor_selector:
101
+ table = table[table['Vendor System'].isin(vendor_selector)]
102
+ return table
103
+
104
+
105
+ custom_table = pn.widgets.Tabulator(df, pagination="local", page_size=350, layout="fit_data",
106
+ width=800, height=550)
107
+
108
+ custom_table.add_filter(pn.bind(custom_table_filter,
109
+ agency_selector=agency_selector,
110
+ type_selector=type_selector,
111
+ purpose_selector=purpose_selector,
112
+ length_selector=length_selector,
113
+ impact_selector=impact_selector,
114
+ vendor_selector=vendor_selector))
115
+
116
+
117
+ interactive_plot = pn.bind(custom_plot, table=df, column_selector=plot_column_selector,
118
+ agency_selector=plot_agency_selector)
119
+
120
+ overview_stacked = pn.Column(
121
+ pn.pane.Markdown("""
122
+ Plot shows the total count of entries, aggregated by various categories.
123
+ Change the category with the dropdown menu.
124
+ The total number of records in the database is 337, but some fields have missing values.
125
+ In particular, 'Vendor System' and 'Primary Type of AI' were not always filled out."""),
126
+ pn.Column(pn.Row(plot_column_selector,
127
+ plot_agency_selector),
128
+ pn.Row(interactive_plot, width=500))
129
+
130
+ )
131
+
132
+ overview_card = pn.Card(overview_stacked, header="# Overview of the data")
133
+
134
+ filename, button = custom_table.download_menu(
135
+ text_kwargs={'name': 'Enter filename ending in .csv or .json', 'value': 'default.csv'},
136
+ button_kwargs={'name': 'Download table'}
137
+ )
138
+
139
+ download_card = pn.Card(pn.pane.Markdown("""
140
+ Download current table in .csv or .json format.
141
+ File format will be automatically selected based on the file extension.
142
+ """),
143
+ filename, button, header="### Download")
144
+
145
+ table_card = pn.Card(
146
+ pn.Row(
147
+ pn.Column(
148
+ pn.pane.Markdown("""
149
+ ### Filter with the menus below
150
+ """),pn.WidgetBox(*row_filters),
151
+ styles=dict(background='#DDE6FF')
152
+ ), pn.Column(pn.pane.Markdown("""
153
+ ### Scroll horizontally and vertically to see all data
154
+ """), custom_table)),
155
+ download_card,
156
+ header="# Explore the data"
157
+ )
158
+
159
+ # stacked bar plot of impact by agency (static plot)
160
+ impact_counts = df.groupby('Agency')['Does it directly impact the public?'].value_counts()
161
+ impact_counts = impact_counts.sort_index(level="Agency", ascending=False)
162
+ impact_count_df = pd.DataFrame(impact_counts).rename(columns={'Does it directly impact the public?' : "Count"})
163
+ impact_plot = impact_count_df.hvplot.barh(stacked=True, width=500, height=400, color=[ "#019C6D", "#336BCC", "#F41903",], legend="bottom_right")
164
+
165
+ impact_card = pn.Card(
166
+ pn.Column(
167
+ pn.pane.Markdown("""
168
+ Number of systems with no, indirect, or direct impact on the public.
169
+ These judgements were made by Anna Blue and are unique to her report."""),
170
+ impact_plot), header="# Impact on the public, by agency")
171
+
172
+ # keyword search
173
+ class TableIndices(param.Parameterized):
174
+ row_indices = param.List()
175
+ col_indices = param.List()
176
+
177
+ def __call__(self):
178
+ return (self.row_indices, self.col_indices)
179
+
180
+
181
+ def run_search(text, pipeline):
182
+ if text == "":
183
+ return None
184
+ res = pipeline.run(query=text, params={"Retriever": {"top_k": 10}})
185
+ relevant_results = [r for r in res['documents'] if r.score > 0.5]
186
+ result_rows = [doc.meta['index'] for doc in relevant_results]
187
+ result_cols = [doc.meta['column_header'] for doc in relevant_results]
188
+ table_indices = TableIndices(row_indices=result_rows, col_indices=result_cols)
189
+ return table_indices
190
+
191
+
192
+ def produce_table(df, table_indices):
193
+
194
+ if not table_indices:
195
+ return None
196
+
197
+ result_df = df.iloc[table_indices.row_indices, :]
198
+ result_df = result_df.drop_duplicates()
199
+
200
+ color_df = result_df.copy()
201
+ color_df.loc[:,:] = ''
202
+ for row, col in zip(table_indices.row_indices, table_indices.col_indices):
203
+ color_df.loc[row, col] = 'background-color: yellow'
204
+
205
+ result_tab = pn.widgets.Tabulator(result_df,pagination="local", page_size=350, layout="fit_data",
206
+ width=800, height=300)
207
+
208
+ # cell coloration is working, but does not update properly unless empty search is run in between;
209
+ # otherwise it re-uses the most recent color scheme; maybe related to https://github.com/holoviz/panel/issues/3363
210
+ # result_tab.style.apply(lambda x: color_df, axis=None)
211
+ # giving up for now
212
+ return result_tab
213
+
214
+
215
+ def make_search_pane(result_tab):
216
+ if not result_tab:
217
+ return None
218
+ filename_2, button_2 = result_tab.download_menu(
219
+ text_kwargs={'name': 'Enter filename ending in .csv or .json', 'value': 'default.csv'},
220
+ button_kwargs={'name': 'Download search results'})
221
+ search_download_card = pn.Card(pn.pane.Markdown("""
222
+ Download search results in .csv or .json format.
223
+ File format will be automatically selected based on the file extension."""),
224
+ filename_2, button_2, header="### Download")
225
+ search_result = pn.Column(pn.pane.Markdown("""
226
+ ### Scroll horizontally and vertically (if needed) to see everything.
227
+ """), result_tab, search_download_card)
228
+ return search_result
229
+
230
+ # which columns to search
231
+ col_list = ['Name of Inventory Item',
232
+ 'Primary Type of AI',
233
+ 'Purpose of AI',
234
+ 'Description of Inventory Item',
235
+ 'Other Notes']
236
+
237
+ # create document store, where each string from any of the relevant columns is a doc
238
+ # save the row index as metadata
239
+ docs = []
240
+ indices = list(df.index.values)
241
+ for col in col_list:
242
+ values = df[col].tolist()
243
+ assert len(indices) == len(values)
244
+ for i, val in zip(indices, values):
245
+ dictionary = {'content' : val,
246
+ 'meta' : {"index": i, "column_header" : col}
247
+ }
248
+ docs.append(Document.from_dict(dictionary))
249
+
250
+
251
+ document_store = InMemoryDocumentStore(use_bm25=True)
252
+ document_store.write_documents(docs)
253
+ retriever = BM25Retriever(document_store=document_store)
254
+ pipeline = DocumentSearchPipeline(retriever)
255
+ text_input = pn.widgets.TextInput(name='Search', placeholder='Enter text here...')
256
+
257
+ result_indices = pn.bind(run_search, text=text_input, pipeline=pipeline)
258
+ result_table = pn.bind(produce_table, df=df, table_indices=result_indices)
259
+ result_pane = pn.bind(make_search_pane, result_tab=result_table)
260
+
261
+ search_card = pn.Card(
262
+ pn.Column(
263
+ pn.Row(
264
+ text_input,
265
+ pn.pane.Markdown("""
266
+ This will search text in the following columns:
267
+ * Name of Inventory Item
268
+ * Primary Type of AI
269
+ * Purpose of AI
270
+ * Description of Inventory Item
271
+ * Other Notes
272
+
273
+ This is a keyword search based on the BM25 algorithm as implemented in the Haystack python library.
274
+ """)),
275
+ pn.Row(result_pane),
276
+ ),
277
+ header="# Search the text"
278
+ )
279
+
280
+ main_text = """
281
+ The data visualized here come from a report by Anna Blue, a Social Impact Fellow
282
+ at the Responsible AI Institute. The report was released in May 2023. Some agencies have
283
+ released updated inventories since then, which are not reflected here.
284
+
285
+ Anna's report consolidated data released by individual government agencies in compliance with
286
+ Executive Order 13960, which requires federal agencies to produce an annual inventory of their AI usage.
287
+ See her [blog post](https://www.responsible.ai/post/federal-government-ai-use-cases) for additional details,
288
+ including links to the original data sources.
289
+ """
290
+
291
+
292
+
293
+ template = pn.template.FastListTemplate(
294
+ title='U.S. Government Use of AI',
295
+ main=[pn.pane.Markdown(main_text),
296
+ pn.Row(overview_card,impact_card),
297
+ pn.Row(table_card),
298
+ pn.Row(search_card)],
299
+ accent_base_color="#FFDAC2",
300
+ header_background="#0037A2")
301
+
302
+ template.servable()
303
+
.ipynb_checkpoints/requirements-checkpoint.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ hvplot
2
+ farm-haystack[inference]
3
+ pandas
4
+ panel
5
+ param
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Panel Template
3
  emoji: πŸ“ˆ
4
  colorFrom: gray
5
  colorTo: green
 
1
  ---
2
+ title: U.S. Government Use of AI
3
  emoji: πŸ“ˆ
4
  colorFrom: gray
5
  colorTo: green
app.py CHANGED
@@ -1,147 +1,303 @@
1
- import io
2
- import random
3
- from typing import List, Tuple
4
-
5
- import aiohttp
6
  import panel as pn
7
- from PIL import Image
8
- from transformers import CLIPModel, CLIPProcessor
9
-
10
- pn.extension(design="bootstrap", sizing_mode="stretch_width")
11
-
12
- ICON_URLS = {
13
- "brand-github": "https://github.com/holoviz/panel",
14
- "brand-twitter": "https://twitter.com/Panel_Org",
15
- "brand-linkedin": "https://www.linkedin.com/company/panel-org",
16
- "message-circle": "https://discourse.holoviz.org/",
17
- "brand-discord": "https://discord.gg/AXRHnJU6sP",
18
- }
19
-
20
-
21
- async def random_url(_):
22
- pet = random.choice(["cat", "dog"])
23
- api_url = f"https://api.the{pet}api.com/v1/images/search"
24
- async with aiohttp.ClientSession() as session:
25
- async with session.get(api_url) as resp:
26
- return (await resp.json())[0]["url"]
27
-
28
-
29
- @pn.cache
30
- def load_processor_model(
31
- processor_name: str, model_name: str
32
- ) -> Tuple[CLIPProcessor, CLIPModel]:
33
- processor = CLIPProcessor.from_pretrained(processor_name)
34
- model = CLIPModel.from_pretrained(model_name)
35
- return processor, model
36
-
37
-
38
- async def open_image_url(image_url: str) -> Image:
39
- async with aiohttp.ClientSession() as session:
40
- async with session.get(image_url) as resp:
41
- return Image.open(io.BytesIO(await resp.read()))
42
-
43
-
44
- def get_similarity_scores(class_items: List[str], image: Image) -> List[float]:
45
- processor, model = load_processor_model(
46
- "openai/clip-vit-base-patch32", "openai/clip-vit-base-patch32"
47
- )
48
- inputs = processor(
49
- text=class_items,
50
- images=[image],
51
- return_tensors="pt", # pytorch tensors
52
- )
53
- outputs = model(**inputs)
54
- logits_per_image = outputs.logits_per_image
55
- class_likelihoods = logits_per_image.softmax(dim=1).detach().numpy()
56
- return class_likelihoods[0]
57
-
58
-
59
- async def process_inputs(class_names: List[str], image_url: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  """
61
- High level function that takes in the user inputs and returns the
62
- classification results as panel objects.
63
  """
64
- try:
65
- main.disabled = True
66
- if not image_url:
67
- yield "##### ⚠️ Provide an image URL"
68
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- yield "##### βš™ Fetching image and running model..."
71
- try:
72
- pil_img = await open_image_url(image_url)
73
- img = pn.pane.Image(pil_img, height=400, align="center")
74
- except Exception as e:
75
- yield f"##### πŸ˜” Something went wrong, please try a different URL!"
76
- return
 
 
 
 
 
 
 
 
 
77
 
78
- class_items = class_names.split(",")
79
- class_likelihoods = get_similarity_scores(class_items, pil_img)
80
 
81
- # build the results column
82
- results = pn.Column("##### πŸŽ‰ Here are the results!", img)
 
 
83
 
84
- for class_item, class_likelihood in zip(class_items, class_likelihoods):
85
- row_label = pn.widgets.StaticText(
86
- name=class_item.strip(), value=f"{class_likelihood:.2%}", align="center"
87
- )
88
- row_bar = pn.indicators.Progress(
89
- value=int(class_likelihood * 100),
90
- sizing_mode="stretch_width",
91
- bar_color="secondary",
92
- margin=(0, 10),
93
- design=pn.theme.Material,
94
- )
95
- results.append(pn.Column(row_label, row_bar))
96
- yield results
97
- finally:
98
- main.disabled = False
99
-
100
-
101
- # create widgets
102
- randomize_url = pn.widgets.Button(name="Randomize URL", align="end")
103
-
104
- image_url = pn.widgets.TextInput(
105
- name="Image URL to classify",
106
- value=pn.bind(random_url, randomize_url),
107
- )
108
- class_names = pn.widgets.TextInput(
109
- name="Comma separated class names",
110
- placeholder="Enter possible class names, e.g. cat, dog",
111
- value="cat, dog, parrot",
112
- )
113
 
114
- input_widgets = pn.Column(
115
- "##### 😊 Click randomize or paste a URL to start classifying!",
116
- pn.Row(image_url, randomize_url),
117
- class_names,
118
- )
119
 
120
- # add interactivity
121
- interactive_result = pn.panel(
122
- pn.bind(process_inputs, image_url=image_url, class_names=class_names),
123
- height=600,
124
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
- # add footer
127
- footer_row = pn.Row(pn.Spacer(), align="center")
128
- for icon, url in ICON_URLS.items():
129
- href_button = pn.widgets.Button(icon=icon, width=35, height=35)
130
- href_button.js_on_click(code=f"window.open('{url}')")
131
- footer_row.append(href_button)
132
- footer_row.append(pn.Spacer())
133
-
134
- # create dashboard
135
- main = pn.WidgetBox(
136
- input_widgets,
137
- interactive_result,
138
- footer_row,
 
 
 
 
139
  )
140
 
141
- title = "Panel Demo - Image Classification"
142
- pn.template.BootstrapTemplate(
143
- title=title,
144
- main=main,
145
- main_max_width="min(50%, 698px)",
146
- header_background="#F08080",
147
- ).servable(title=title)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from haystack import Document
2
+ from haystack.document_stores import InMemoryDocumentStore, ElasticsearchDocumentStore, FAISSDocumentStore
3
+ from haystack.nodes import BM25Retriever
4
+ from haystack.pipelines import DocumentSearchPipeline
5
+ import pandas as pd
6
  import panel as pn
7
+ import param
8
+
9
+ pn.extension('tabulator')
10
+ pn.extension(sizing_mode="scale_both")
11
+
12
+ import hvplot.pandas
13
+
14
+ # load data
15
+ infile = "/Users/carolanderson/Dropbox/repos/miscellany/webapps/Agency Inventory AI Usage - Sheet1.tsv"
16
+ df = pd.read_csv(infile, sep="\t", lineterminator='\n')
17
+
18
+ # rearrange column order
19
+ col_list = ['Agency', 'Name of Inventory Item',
20
+ 'Primary Type of AI',
21
+ 'Purpose of AI', 'Length of Usage',
22
+ 'Does it directly impact the public?',
23
+ 'Vendor System',
24
+ 'Description of Inventory Item',
25
+ 'Other Notes\r']
26
+ df = df[col_list]
27
+
28
+ # remove trailing \r from 'Other Notes' header
29
+ df = df.rename(columns = {'Other Notes\r' : 'Other Notes'})
30
+
31
+ # remove trailing spaces from agency names (caused duplicate instance of "DOC")
32
+ df['Agency'] = df['Agency'].apply(lambda x : x.rstrip())
33
+
34
+ # columns not useful for filtering
35
+ no_filter_cols = ['Name of Inventory Item', 'Description of Inventory Item', "Other Notes"]
36
+
37
+ # columns to be used for filtering
38
+ filter_cols = [c for c in df.columns.unique() if c not in no_filter_cols]
39
+
40
+ # column selector for main plot
41
+ plot_column_selector = pn.widgets.Select(options=filter_cols, name="Plot category: ")
42
+
43
+ # agency selector for main plot
44
+ plot_agency_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Agency"].unique()),
45
+ value=["Select all"],
46
+ name="Optional - filter by agency")
47
+
48
+ # selectors below are all for interactive dataframe
49
+ agency_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Agency"].unique()),
50
+ value=["Select all"],
51
+ name="Agency")
52
+ type_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Primary Type of AI'].unique()),
53
+ value=["Select all"],
54
+ name='Primary Type of AI')
55
+ purpose_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df["Purpose of AI"].unique()),
56
+ value=["Select all"],
57
+ name="Purpose of AI")
58
+ length_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Length of Usage'].unique()),
59
+ value=["Select all"],
60
+ name="Length of Usage")
61
+ impact_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Does it directly impact the public?'].unique()),
62
+ value=["Select all"],
63
+ name='Does it directly impact the public?')
64
+ vendor_selector = pn.widgets.MultiSelect(options=["Select all"] + list(df['Vendor System'].unique()),
65
+ value=["Select all"],
66
+ name='Vendor System')
67
+
68
+ row_filters = [agency_selector, type_selector, purpose_selector, length_selector, impact_selector,
69
+ vendor_selector]
70
+
71
+
72
+ def custom_plot(table, column_selector, agency_selector):
73
+ if "Select all" not in agency_selector:
74
+ table = table[table['Agency'].isin(agency_selector)]
75
+ table = table[column_selector].value_counts().sort_values(ascending=True)
76
+ return table.hvplot.barh(width=600, height=400, color="#336BCC")
77
+
78
+
79
+ def custom_table_filter(table,
80
+ agency_selector,
81
+ type_selector,
82
+ purpose_selector,
83
+ length_selector,
84
+ impact_selector,
85
+ vendor_selector):
86
  """
87
+ This repetitive approach was the only way I could get things working with a
88
+ 'Select all' menu option.
89
  """
90
+ if "Select all" not in agency_selector:
91
+ table = table[table["Agency"].isin(agency_selector)]
92
+ if "Select all" not in type_selector:
93
+ table = table[table['Primary Type of AI'].isin(type_selector)]
94
+ if "Select all" not in purpose_selector:
95
+ table = table[table["Purpose of AI"].isin(purpose_selector)]
96
+ if "Select all" not in length_selector:
97
+ table = table[table['Length of Usage'].isin(length_selector)]
98
+ if "Select all" not in impact_selector:
99
+ table = table[table['Does it directly impact the public?'].isin(impact_selector)]
100
+ if "Select all" not in vendor_selector:
101
+ table = table[table['Vendor System'].isin(vendor_selector)]
102
+ return table
103
+
104
+
105
+ custom_table = pn.widgets.Tabulator(df, pagination="local", page_size=350, layout="fit_data",
106
+ width=800, height=550)
107
+
108
+ custom_table.add_filter(pn.bind(custom_table_filter,
109
+ agency_selector=agency_selector,
110
+ type_selector=type_selector,
111
+ purpose_selector=purpose_selector,
112
+ length_selector=length_selector,
113
+ impact_selector=impact_selector,
114
+ vendor_selector=vendor_selector))
115
+
116
+
117
+ interactive_plot = pn.bind(custom_plot, table=df, column_selector=plot_column_selector,
118
+ agency_selector=plot_agency_selector)
119
+
120
+ overview_stacked = pn.Column(
121
+ pn.pane.Markdown("""
122
+ Plot shows the total count of entries, aggregated by various categories.
123
+ Change the category with the dropdown menu.
124
+ The total number of records in the database is 337, but some fields have missing values.
125
+ In particular, 'Vendor System' and 'Primary Type of AI' were not always filled out."""),
126
+ pn.Column(pn.Row(plot_column_selector,
127
+ plot_agency_selector),
128
+ pn.Row(interactive_plot, width=500))
129
+
130
+ )
131
+
132
+ overview_card = pn.Card(overview_stacked, header="# Overview of the data")
133
+
134
+ filename, button = custom_table.download_menu(
135
+ text_kwargs={'name': 'Enter filename ending in .csv or .json', 'value': 'default.csv'},
136
+ button_kwargs={'name': 'Download table'}
137
+ )
138
+
139
+ download_card = pn.Card(pn.pane.Markdown("""
140
+ Download current table in .csv or .json format.
141
+ File format will be automatically selected based on the file extension.
142
+ """),
143
+ filename, button, header="### Download")
144
+
145
+ table_card = pn.Card(
146
+ pn.Row(
147
+ pn.Column(
148
+ pn.pane.Markdown("""
149
+ ### Filter with the menus below
150
+ """),pn.WidgetBox(*row_filters),
151
+ styles=dict(background='#DDE6FF')
152
+ ), pn.Column(pn.pane.Markdown("""
153
+ ### Scroll horizontally and vertically to see all data
154
+ """), custom_table)),
155
+ download_card,
156
+ header="# Explore the data"
157
+ )
158
+
159
+ # stacked bar plot of impact by agency (static plot)
160
+ impact_counts = df.groupby('Agency')['Does it directly impact the public?'].value_counts()
161
+ impact_counts = impact_counts.sort_index(level="Agency", ascending=False)
162
+ impact_count_df = pd.DataFrame(impact_counts).rename(columns={'Does it directly impact the public?' : "Count"})
163
+ impact_plot = impact_count_df.hvplot.barh(stacked=True, width=500, height=400, color=[ "#019C6D", "#336BCC", "#F41903",], legend="bottom_right")
164
+
165
+ impact_card = pn.Card(
166
+ pn.Column(
167
+ pn.pane.Markdown("""
168
+ Number of systems with no, indirect, or direct impact on the public.
169
+ These judgements were made by Anna Blue and are unique to her report."""),
170
+ impact_plot), header="# Impact on the public, by agency")
171
+
172
+ # keyword search
173
+ class TableIndices(param.Parameterized):
174
+ row_indices = param.List()
175
+ col_indices = param.List()
176
+
177
+ def __call__(self):
178
+ return (self.row_indices, self.col_indices)
179
 
180
+
181
+ def run_search(text, pipeline):
182
+ if text == "":
183
+ return None
184
+ res = pipeline.run(query=text, params={"Retriever": {"top_k": 10}})
185
+ relevant_results = [r for r in res['documents'] if r.score > 0.5]
186
+ result_rows = [doc.meta['index'] for doc in relevant_results]
187
+ result_cols = [doc.meta['column_header'] for doc in relevant_results]
188
+ table_indices = TableIndices(row_indices=result_rows, col_indices=result_cols)
189
+ return table_indices
190
+
191
+
192
+ def produce_table(df, table_indices):
193
+
194
+ if not table_indices:
195
+ return None
196
 
197
+ result_df = df.iloc[table_indices.row_indices, :]
198
+ result_df = result_df.drop_duplicates()
199
 
200
+ color_df = result_df.copy()
201
+ color_df.loc[:,:] = ''
202
+ for row, col in zip(table_indices.row_indices, table_indices.col_indices):
203
+ color_df.loc[row, col] = 'background-color: yellow'
204
 
205
+ result_tab = pn.widgets.Tabulator(result_df,pagination="local", page_size=350, layout="fit_data",
206
+ width=800, height=300)
207
+
208
+ # cell coloration is working, but does not update properly unless empty search is run in between;
209
+ # otherwise it re-uses the most recent color scheme; maybe related to https://github.com/holoviz/panel/issues/3363
210
+ # result_tab.style.apply(lambda x: color_df, axis=None)
211
+ # giving up for now
212
+ return result_tab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
 
 
 
 
 
214
 
215
+ def make_search_pane(result_tab):
216
+ if not result_tab:
217
+ return None
218
+ filename_2, button_2 = result_tab.download_menu(
219
+ text_kwargs={'name': 'Enter filename ending in .csv or .json', 'value': 'default.csv'},
220
+ button_kwargs={'name': 'Download search results'})
221
+ search_download_card = pn.Card(pn.pane.Markdown("""
222
+ Download search results in .csv or .json format.
223
+ File format will be automatically selected based on the file extension."""),
224
+ filename_2, button_2, header="### Download")
225
+ search_result = pn.Column(pn.pane.Markdown("""
226
+ ### Scroll horizontally and vertically (if needed) to see everything.
227
+ """), result_tab, search_download_card)
228
+ return search_result
229
+
230
+ # which columns to search
231
+ col_list = ['Name of Inventory Item',
232
+ 'Primary Type of AI',
233
+ 'Purpose of AI',
234
+ 'Description of Inventory Item',
235
+ 'Other Notes']
236
+
237
+ # create document store, where each string from any of the relevant columns is a doc
238
+ # save the row index as metadata
239
+ docs = []
240
+ indices = list(df.index.values)
241
+ for col in col_list:
242
+ values = df[col].tolist()
243
+ assert len(indices) == len(values)
244
+ for i, val in zip(indices, values):
245
+ dictionary = {'content' : val,
246
+ 'meta' : {"index": i, "column_header" : col}
247
+ }
248
+ docs.append(Document.from_dict(dictionary))
249
+
250
+
251
+ document_store = InMemoryDocumentStore(use_bm25=True)
252
+ document_store.write_documents(docs)
253
+ retriever = BM25Retriever(document_store=document_store)
254
+ pipeline = DocumentSearchPipeline(retriever)
255
+ text_input = pn.widgets.TextInput(name='Search', placeholder='Enter text here...')
256
+
257
+ result_indices = pn.bind(run_search, text=text_input, pipeline=pipeline)
258
+ result_table = pn.bind(produce_table, df=df, table_indices=result_indices)
259
+ result_pane = pn.bind(make_search_pane, result_tab=result_table)
260
 
261
+ search_card = pn.Card(
262
+ pn.Column(
263
+ pn.Row(
264
+ text_input,
265
+ pn.pane.Markdown("""
266
+ This will search text in the following columns:
267
+ * Name of Inventory Item
268
+ * Primary Type of AI
269
+ * Purpose of AI
270
+ * Description of Inventory Item
271
+ * Other Notes
272
+
273
+ This is a keyword search based on the BM25 algorithm as implemented in the Haystack python library.
274
+ """)),
275
+ pn.Row(result_pane),
276
+ ),
277
+ header="# Search the text"
278
  )
279
 
280
+ main_text = """
281
+ The data visualized here come from a report by Anna Blue, a Social Impact Fellow
282
+ at the Responsible AI Institute. The report was released in May 2023. Some agencies have
283
+ released updated inventories since then, which are not reflected here.
284
+
285
+ Anna's report consolidated data released by individual government agencies in compliance with
286
+ Executive Order 13960, which requires federal agencies to produce an annual inventory of their AI usage.
287
+ See her [blog post](https://www.responsible.ai/post/federal-government-ai-use-cases) for additional details,
288
+ including links to the original data sources.
289
+ """
290
+
291
+
292
+
293
+ template = pn.template.FastListTemplate(
294
+ title='U.S. Government Use of AI',
295
+ main=[pn.pane.Markdown(main_text),
296
+ pn.Row(overview_card,impact_card),
297
+ pn.Row(table_card),
298
+ pn.Row(search_card)],
299
+ accent_base_color="#FFDAC2",
300
+ header_background="#0037A2")
301
+
302
+ template.servable()
303
+
requirements.txt CHANGED
@@ -1,6 +1,5 @@
 
 
 
1
  panel
2
- jupyter
3
- transformers
4
- numpy
5
- torch
6
- aiohttp
 
1
+ hvplot
2
+ farm-haystack[inference]
3
+ pandas
4
  panel
5
+ param