Spaces:
Runtime error
Runtime error
File size: 3,323 Bytes
dbbde93 d641f66 dbbde93 d641f66 dbbde93 16d6e6c dbbde93 d641f66 ab6dff0 dbbde93 d609dea dbbde93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import math
import operator as op
import itertools as it
import functools as ft
import collections as cl
from pathlib import Path
import pandas as pd
import gradio as gr
from datasets import load_dataset
HDI = cl.namedtuple('HDI', 'lower, upper')
Parameter = cl.namedtuple('Parameter', 'name, ptype, gist')
#
# See https://cran.r-project.org/package=HDInterval
#
def hdi(values, ci=0.95):
values = sorted(filter(math.isfinite, values))
if not values:
raise ValueError('Empty data set')
n = len(values)
exclude = n - math.floor(n * ci)
left = it.islice(values, exclude)
right = it.islice(values, n - exclude, None)
diffs = ((x, y, y - x) for (x, y) in zip(left, right))
(*args, _) = min(diffs, key=op.itemgetter(-1))
return HDI(*args)
#
#
#
def load(repo):
parameter = 'parameter'
dataset = load_dataset(repo)
return (dataset
.get('train')
.to_pandas()
.filter(items=[
parameter,
'element',
'value',
])
.groupby(parameter, sort=False))
def parameters(groups):
_params = it.starmap(Parameter, (
('alpha', 'prompt', 'discrimination'),
('beta', 'prompt', 'difficulty'),
('theta', 'model', 'ability'),
))
lookup = { x.name: x for x in _params }
for (i, _) in groups:
if i in lookup:
yield lookup[i]
@ft.singledispatch
def get(param, group):
raise TypeError(type(param))
@get.register
def _(param: str, group):
return group.get_group(param)
@get.register
def _(param: Parameter, group):
return get(param.name, group)
def summarize(param, df, ci=0.95):
def _aggregate(i, g):
values = g['value']
interval = hdi(values, ci)
agg = {
param.ptype: i,
param.gist: values.median(),
'uncertainty': interval.upper - interval.lower,
}
agg.update(interval._asdict())
return agg
groups = df.groupby('element', sort=False)
records = it.starmap(_aggregate, groups)
return pd.DataFrame.from_records(records)
def rank(param, df, ascending, name='rank'):
uncertainty = 'uncertainty'
df = (df
.sort_values(by=[param.gist, uncertainty],
ascending=[ascending, not ascending])
.drop(columns=uncertainty)
.reset_index(drop=True))
df.index += 1
return df.reset_index(names=name)
def md_reader(name, prefix='_'):
path = Path(f'{prefix}{name.upper()}')
return (path
.with_suffix('.md')
.read_text())
#
#
#
with gr.Blocks() as demo:
data = load('jerome-white/alpaca-irt-stan')
gr.Markdown('# Alpaca Item Response')
with gr.Row():
with gr.Column():
gr.Markdown(md_reader('readme'))
with gr.Column():
pass
for i in parameters(data):
with gr.Row():
view = rank(i, summarize(i, get(i, data)), False)
columns = { x: f'HDI {x}' for x in HDI._fields }
for i in view.columns:
columns.setdefault(i, i.title())
view = (view
.rename(columns=columns)
.style.format(precision=4))
gr.Dataframe(view, wrap=True)
demo.launch()
|