Qifan Zhang commited on
Commit
0e97d35
β€’
1 Parent(s): 8cd5cbf

update p2_flexibility, ui

Browse files
.gitignore CHANGED
@@ -1,3 +1,5 @@
1
- data
2
  .idea
3
- *.csv
 
 
 
 
 
1
  .idea
2
+ data/example
3
+ data/tmp
4
+
5
+ output.csv
app.py CHANGED
@@ -50,18 +50,13 @@ def process(task_name: str,
50
  return {'Error': e}, None, None
51
 
52
 
 
53
  task_name_dropdown = gr.components.Dropdown(
54
  label='Task Name',
55
  value='Originality',
56
  choices=['Originality', 'Flexibility']
57
  )
58
 
59
- model_name_input = gr.components.Textbox(
60
- value='paraphrase-multilingual-MiniLM-L12-v2',
61
- lines=1,
62
- type='text'
63
- )
64
-
65
  model_name_dropdown = gr.components.Dropdown(
66
  label='Model Name',
67
  value=list_models[0],
@@ -69,11 +64,16 @@ model_name_dropdown = gr.components.Dropdown(
69
  )
70
 
71
  text_input = gr.components.Textbox(
72
- value='id,prompt,response\n',
73
  lines=10,
74
  type='text'
75
  )
76
 
 
 
 
 
 
77
  text_output = gr.components.Textbox(
78
  label='Output',
79
  type='text'
@@ -83,16 +83,14 @@ dataframe_output = gr.components.Dataframe(
83
  label='DataFrame'
84
  )
85
 
86
- description = open('description.txt', 'r').read()
87
-
88
  file_output = gr.components.File(label='Output File',
89
  file_count='single',
90
  file_types=['', '.', '.csv', '.xls', '.xlsx'])
91
 
92
  app = gr.Interface(
93
  fn=process,
94
- inputs=[task_name_dropdown, model_name_dropdown, text_input, 'file'],
95
  outputs=[text_output, dataframe_output, file_output],
96
- description=description
97
  )
98
  app.launch()
 
50
  return {'Error': e}, None, None
51
 
52
 
53
+ # input
54
  task_name_dropdown = gr.components.Dropdown(
55
  label='Task Name',
56
  value='Originality',
57
  choices=['Originality', 'Flexibility']
58
  )
59
 
 
 
 
 
 
 
60
  model_name_dropdown = gr.components.Dropdown(
61
  label='Model Name',
62
  value=list_models[0],
 
64
  )
65
 
66
  text_input = gr.components.Textbox(
67
+ value=open('data/example.csv', 'r').read(),
68
  lines=10,
69
  type='text'
70
  )
71
 
72
+ # output
73
+ file_input = gr.components.File(label='Input File',
74
+ file_count='single',
75
+ file_types=['', '.', '.csv', '.xls', '.xlsx'])
76
+
77
  text_output = gr.components.Textbox(
78
  label='Output',
79
  type='text'
 
83
  label='DataFrame'
84
  )
85
 
 
 
86
  file_output = gr.components.File(label='Output File',
87
  file_count='single',
88
  file_types=['', '.', '.csv', '.xls', '.xlsx'])
89
 
90
  app = gr.Interface(
91
  fn=process,
92
+ inputs=[task_name_dropdown, model_name_dropdown, text_input, file_input],
93
  outputs=[text_output, dataframe_output, file_output],
94
+ description=open('data/description.txt', 'r').read()
95
  )
96
  app.launch()
description.txt β†’ data/description.txt RENAMED
File without changes
data/example.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ id,prompt,response
2
+ 1,εΊŠε•,过滀器
3
+ 1,εΊŠε•,εšζˆζΈ”η½‘ζžι±Ό
4
+ 1,εΊŠε•,εšζˆζž•ε€΄
5
+ 1,η‰™εˆ·,捅人
6
+ 1,η‰™εˆ·,η”¨δ½œιž‹ζ‹”
7
+ 1,η‰™εˆ·,ε½“ι£žι•–ζ‰”
8
+ 2,εΊŠε•,做蒫子
9
+ 2,εΊŠε•,δΏζš–
10
+ 2,εΊŠε•,η»‘εœ¨ζ ‘δΈŠεšζˆεŠεΊŠ
utils/models.py CHANGED
@@ -1,6 +1,6 @@
1
- import numpy as np
2
- import torch
3
  from functools import lru_cache
 
 
4
  from sentence_transformers import SentenceTransformer
5
 
6
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -20,6 +20,6 @@ class SBert:
20
  self.model = SentenceTransformer(path, device=DEVICE)
21
 
22
  @lru_cache(maxsize=10000)
23
- def __call__(self, x) -> np.ndarray:
24
- y = self.model.encode(x)
25
  return y
 
 
 
1
  from functools import lru_cache
2
+
3
+ import torch
4
  from sentence_transformers import SentenceTransformer
5
 
6
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
 
20
  self.model = SentenceTransformer(path, device=DEVICE)
21
 
22
  @lru_cache(maxsize=10000)
23
+ def __call__(self, x) -> torch.Tensor:
24
+ y = self.model.encode(x, convert_to_tensor=True)
25
  return y
utils/pipeline.py CHANGED
@@ -9,22 +9,37 @@ def p0_originality(df: pd.DataFrame, model_name: str) -> pd.DataFrame:
9
  assert 'response' in df.columns
10
  model = SBert(model_name)
11
 
12
- def get_cos_sim(model, prompt: str, response: str) -> float:
13
  prompt_vec = model(prompt)
14
  response_vec = model(response)
15
  score = cos_sim(prompt_vec, response_vec).item()
16
  return score
17
 
18
- df['originality'] = df.apply(lambda x: 1 - get_cos_sim(model, x['prompt'], x['response']), axis=1)
19
  return df
20
 
21
 
22
  def p1_flexibility(df: pd.DataFrame, model_name: str) -> pd.DataFrame:
23
- df = p0_originality(df, model_name)
 
24
  assert 'id' in df.columns
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  df_out = df.groupby(by=['id', 'prompt']) \
26
- .agg({'id': 'first', 'prompt': 'first', 'originality': 'mean'}) \
27
- .rename(columns={'originality': 'flexibility'}) \
28
  .reset_index(drop=True)
29
  return df_out
30
 
 
9
  assert 'response' in df.columns
10
  model = SBert(model_name)
11
 
12
+ def get_cos_sim(prompt: str, response: str) -> float:
13
  prompt_vec = model(prompt)
14
  response_vec = model(response)
15
  score = cos_sim(prompt_vec, response_vec).item()
16
  return score
17
 
18
+ df['originality'] = df.apply(lambda x: 1 - get_cos_sim(x['prompt'], x['response']), axis=1)
19
  return df
20
 
21
 
22
  def p1_flexibility(df: pd.DataFrame, model_name: str) -> pd.DataFrame:
23
+ assert 'prompt' in df.columns
24
+ assert 'response' in df.columns
25
  assert 'id' in df.columns
26
+ model = SBert(model_name)
27
+
28
+ def get_cos_sim(responses: list[str]) -> float:
29
+ responses_vec = [model(_) for _ in responses]
30
+ count = 0
31
+ score = 0
32
+ for i in range(len(responses_vec)):
33
+ for j in range(1, len(responses_vec)):
34
+ if i == j:
35
+ continue
36
+ score += cos_sim(responses_vec[i], responses_vec[j]).item()
37
+ count += 1
38
+ return score / count
39
+
40
  df_out = df.groupby(by=['id', 'prompt']) \
41
+ .agg({'id': 'first', 'prompt': 'first', 'response': get_cos_sim}) \
42
+ .rename(columns={'response': 'flexibility'}) \
43
  .reset_index(drop=True)
44
  return df_out
45