dustalov commited on
Commit
c4d6746
·
verified ·
1 Parent(s): 18b2319

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -15
app.py CHANGED
@@ -24,11 +24,13 @@ import pandas as pd
24
 
25
 
26
  # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
27
- def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64], tolerance: float = 10e-6, limit: int = 20) \
28
- -> npt.ArrayLike:
29
  assert wins.shape == ties.shape, 'wins and ties shapes are different'
30
 
31
- pi, v = np.random.rand(wins.shape[0]), np.random.rand()
 
 
32
 
33
  converged, iterations = False, 0
34
 
@@ -44,6 +46,7 @@ def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64], toleranc
44
  )
45
 
46
  v = v_numerator / v_denominator
 
47
 
48
  pi_old = pi.copy()
49
 
@@ -60,13 +63,14 @@ def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64], toleranc
60
  )
61
 
62
  pi = pi_numerator / pi_denominator
 
63
 
64
  converged = bool(np.all(np.abs(pi / (pi + 1) - pi_old / (pi_old + 1)) < tolerance)) or (iterations >= limit)
65
 
66
  return pi
67
 
68
 
69
- def handler(file: typing.IO[bytes]) -> pd.DataFrame:
70
  try:
71
  df = pd.read_csv(file.name, dtype=str)
72
  except ValueError as e:
@@ -87,21 +91,22 @@ def handler(file: typing.IO[bytes]) -> pd.DataFrame:
87
  df_wins = pd.pivot_table(df[df['winner'].isin(['left', 'right'])],
88
  index='left', columns='right', values='winner',
89
  aggfunc='count', fill_value=0)
90
- df_wins = df_wins.reindex(labels=index, columns=index, fill_value=0)
91
 
92
  df_ties = pd.pivot_table(df[df['winner'] == 'tie'],
93
  index='left', columns='right', values='winner', aggfunc='count',
94
  fill_value=0)
95
- df_ties = df_ties.reindex(labels=index, columns=index, fill_value=0)
96
 
97
  wins = df_wins.to_numpy(dtype=np.int64)
98
  ties = df_ties.to_numpy(dtype=np.int64)
99
  ties += ties.T
100
 
101
- scores = aggregate(wins, ties)
102
 
103
  df_result = pd.DataFrame(data={'score': scores}, index=index)
104
- df_result['rank'] = df_result['score'].rank(ascending=False).astype(int)
 
105
  df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
106
  df_result.reset_index(inplace=True)
107
 
@@ -110,25 +115,33 @@ def handler(file: typing.IO[bytes]) -> pd.DataFrame:
110
 
111
  iface = gr.Interface(
112
  fn=handler,
113
- inputs=gr.File(
114
- value='example.csv',
115
- file_types=['.tsv', '.csv']
116
- ),
 
 
 
 
 
 
 
117
  outputs=gr.Dataframe(
118
- headers=['item', 'score', 'rank']
 
119
  ),
120
  title='Turn Your Side-by-Side Comparisons into Ranking!',
121
  description='''
122
  This easy-to-use tool transforms pairwise comparisons (aka side-by-side) to a meaningful ranking of items.
123
 
124
- As an input, it expects a comma-separated (CSV) file containing the following columns:
125
 
126
  - `left`: the first compared item
127
  - `right`: the second compared item
128
  - `winner`: the label indicating the winning item
129
 
130
  Possible values for `winner` are `left`, `right`, or `tie`.
131
- The provided example might be a good starting point of the format.
132
 
133
  As the output, this tool provides a table with items, their estimated scores, and ranks.
134
  ''',
 
24
 
25
 
26
  # https://gist.github.com/dustalov/41678b70c40ba5a55430fa5e77b121d9#file-newman-py
27
+ def aggregate(wins: npt.NDArray[np.int64], ties: npt.NDArray[np.int64],
28
+ seed: int = 0, tolerance: float = 10e-6, limit: int = 20) -> npt.ArrayLike:
29
  assert wins.shape == ties.shape, 'wins and ties shapes are different'
30
 
31
+ rng = np.random.default_rng(seed)
32
+
33
+ pi, v = rng.random(wins.shape[0]), rng.random()
34
 
35
  converged, iterations = False, 0
36
 
 
46
  )
47
 
48
  v = v_numerator / v_denominator
49
+ v = np.nan_to_num(v, copy=False)
50
 
51
  pi_old = pi.copy()
52
 
 
63
  )
64
 
65
  pi = pi_numerator / pi_denominator
66
+ pi = np.nan_to_num(pi, copy=False)
67
 
68
  converged = bool(np.all(np.abs(pi / (pi + 1) - pi_old / (pi_old + 1)) < tolerance)) or (iterations >= limit)
69
 
70
  return pi
71
 
72
 
73
+ def handler(file: typing.IO[bytes], seed: int) -> pd.DataFrame:
74
  try:
75
  df = pd.read_csv(file.name, dtype=str)
76
  except ValueError as e:
 
91
  df_wins = pd.pivot_table(df[df['winner'].isin(['left', 'right'])],
92
  index='left', columns='right', values='winner',
93
  aggfunc='count', fill_value=0)
94
+ df_wins = df_wins.reindex(labels=index, columns=index, fill_value=0, copy=False)
95
 
96
  df_ties = pd.pivot_table(df[df['winner'] == 'tie'],
97
  index='left', columns='right', values='winner', aggfunc='count',
98
  fill_value=0)
99
+ df_ties = df_ties.reindex(labels=index, columns=index, fill_value=0, copy=False)
100
 
101
  wins = df_wins.to_numpy(dtype=np.int64)
102
  ties = df_ties.to_numpy(dtype=np.int64)
103
  ties += ties.T
104
 
105
+ scores = aggregate(wins, ties, seed=seed)
106
 
107
  df_result = pd.DataFrame(data={'score': scores}, index=index)
108
+ df_result['rank'] = df_result['score'].rank(na_option='bottom', ascending=False).astype(int)
109
+ df_result.fillna(np.NINF, inplace=True)
110
  df_result.sort_values(by=['rank', 'score'], ascending=[True, False], inplace=True)
111
  df_result.reset_index(inplace=True)
112
 
 
115
 
116
  iface = gr.Interface(
117
  fn=handler,
118
+ inputs=[
119
+ gr.File(
120
+ value='example.csv',
121
+ file_types=['.tsv', '.csv'],
122
+ label='Comparisons'
123
+ ),
124
+ gr.Number(
125
+ label='Seed',
126
+ precision=0
127
+ )
128
+ ],
129
  outputs=gr.Dataframe(
130
+ headers=['item', 'score', 'rank'],
131
+ label='Ranking'
132
  ),
133
  title='Turn Your Side-by-Side Comparisons into Ranking!',
134
  description='''
135
  This easy-to-use tool transforms pairwise comparisons (aka side-by-side) to a meaningful ranking of items.
136
 
137
+ As an input, it expects a comma-separated (CSV) file with a header containing the following columns:
138
 
139
  - `left`: the first compared item
140
  - `right`: the second compared item
141
  - `winner`: the label indicating the winning item
142
 
143
  Possible values for `winner` are `left`, `right`, or `tie`.
144
+ The provided example might be a good starting point.
145
 
146
  As the output, this tool provides a table with items, their estimated scores, and ranks.
147
  ''',