kevineen commited on
Commit
654e2a8
1 Parent(s): a78ff01
Files changed (5) hide show
  1. run.py +53 -26
  2. script/custom.js +2 -3
  3. style.css +0 -0
  4. target_dataset.txt +70 -0
  5. type/dataset_type.py +59 -3
run.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  from huggingface_hub import list_models
3
  from datasets import load_dataset
4
- from type.dataset_type import AutoGeneratedJapaneseQAData
5
 
6
  js = """
7
  function blockEnter(event) {
@@ -16,11 +16,17 @@ function blockEnter(event) {
16
  }
17
  """
18
 
 
19
 
20
- def hello(profile: gr.OAuthProfile | None) -> str:
 
 
 
 
 
21
  if profile is None:
22
  return "プライベートデータセット取得のためにログインしてください。"
23
- return f"{profile.username}さん、よろしくお願いいたします。"
24
 
25
 
26
  def list_private_models(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
@@ -59,39 +65,60 @@ def display_dataset(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken
59
  else:
60
  return gr.update(visible=True, value="データセットのロードに失敗しました。"), None, None
61
 
 
 
 
 
 
 
 
 
 
62
 
63
- with gr.Blocks() as demo:
64
  gr.Markdown("# 自動生成データセット アノテーション for Tanuki 2Phase")
65
 
66
- with gr.Row(equal_height=True):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- login_btn = gr.LoginButton(visible=True, scale=1)
69
- logout_btn = gr.LogoutButton(visible=False, scale=1)
 
70
 
71
- m1 = gr.Markdown()
72
- # m2 = gr.Markdown(scale=1)
73
- data_load_btn = gr.Button("データセットを読み込む")
74
 
75
- demo.load(hello, inputs=None, outputs=m1)
76
- # demo.load(list_private_models, inputs=None, outputs=m2)
77
- demo.load(toggle_buttons, inputs=None, outputs=[login_btn, logout_btn])
 
 
78
 
79
- with gr.Row(equal_height=True):
80
- good_btn = gr.Button("良い")
81
- bad_btn = gr.Button("悪い")
82
 
83
- dataset_display = gr.Markdown(visible=False)
84
- question_text = gr.Textbox(label="質問: ", interactive=False)
85
- answer_text = gr.Textbox(label="回答: 改行はShift+Enterです。", elem_id="answer", interactive=True)
86
 
87
- data_load_btn.click(
88
- display_dataset,
89
- inputs=None,
90
- outputs=[dataset_display, question_text, answer_text],
91
- )
92
 
93
- def on_submit(answer_text):
94
- return f" {answer_text}"
95
 
96
 
97
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  from huggingface_hub import list_models
3
  from datasets import load_dataset
4
+ from type.dataset_type import TanukiPhase2AnnotationDataset
5
 
6
  js = """
7
  function blockEnter(event) {
 
16
  }
17
  """
18
 
19
+ target_dataset = TanukiPhase2AnnotationDataset()
20
 
21
+ def load_css():
22
+ with open("style.css", "r") as file:
23
+ css_content = file.read()
24
+ return css_content
25
+
26
+ def set_name(profile: gr.OAuthProfile | None) -> str:
27
  if profile is None:
28
  return "プライベートデータセット取得のためにログインしてください。"
29
+ return f'{profile.username}さん、よろしくお願いいたします。'
30
 
31
 
32
  def list_private_models(profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None) -> str:
 
65
  else:
66
  return gr.update(visible=True, value="データセットのロードに失敗しました。"), None, None
67
 
68
+ def switch_theme(theme):
69
+ if theme == "Dark":
70
+ return gr.themes.Default()
71
+ else:
72
+ return gr.themes.Monochrome()
73
+
74
+ theme_ = gr.State("Light")
75
+
76
+ with gr.Blocks(theme=theme_, css=load_css()) as demo:
77
 
 
78
  gr.Markdown("# 自動生成データセット アノテーション for Tanuki 2Phase")
79
 
80
+ with gr.Tab("アノテーション"):
81
+
82
+ def update_theme():
83
+ new_theme = "Dark" if theme_.value == "Light" else "Light"
84
+ theme_.value = new_theme
85
+ return switch_theme(new_theme)
86
+
87
+ with gr.Row(equal_height=True):
88
+
89
+ login_btn = gr.LoginButton(visible=True, scale=1)
90
+ logout_btn = gr.LogoutButton(visible=False, scale=1)
91
+
92
+ profile = gr.Markdown()
93
+ data_load_btn = gr.Button("データセットを読み込む")
94
+ theme_button = gr.Button("Switch Theme")
95
+ theme_button.click(fn=update_theme, outputs=None)
96
+
97
+ demo.load(set_name, inputs=None, outputs=profile)
98
+ demo.load(toggle_buttons, inputs=None, outputs=[login_btn, logout_btn])
99
+
100
+ dataset_display = gr.Markdown(visible=False)
101
+ question_text = gr.Textbox(label="質問: ", interactive=False)
102
 
103
+ with gr.Row(equal_height=True):
104
+ good_btn = gr.Button("良い")
105
+ bad_btn = gr.Button("悪い")
106
 
107
+ answer_text = gr.Textbox(label="回答: 改行はShift+Enterです。", elem_id="answer", interactive=True)
 
 
108
 
109
+ data_load_btn.click(
110
+ display_dataset,
111
+ inputs=None,
112
+ outputs=[dataset_display, question_text, answer_text],
113
+ )
114
 
115
+ def on_submit(answer_text):
116
+ return f" {answer_text}"
 
117
 
 
 
 
118
 
119
+ with gr.Tab("アノテ済みデータセット"):
 
 
 
 
120
 
121
+ gr.Textbox("データセットID", lines=1, placeholder="データセットIDを入力してください。")
 
122
 
123
 
124
  if __name__ == "__main__":
script/custom.js CHANGED
@@ -1,4 +1,3 @@
1
  function prevent (event) {
2
- alert('Hello, World!');
3
-
4
- };
 
1
  function prevent (event) {
2
+ // alert('Hello, World!');
3
+ };
 
style.css ADDED
File without changes
target_dataset.txt ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## hatakeyama-llm-team/WikiBookJa
2
+ fileSize 29.3MB
3
+ - index int64
4
+ - text string
5
+
6
+ ## hatakeyama-llm-team/AutoGeneratedJapaneseQA
7
+ fileSize 33.1MB
8
+ - question string
9
+ - database string
10
+ - answer string
11
+ - score float64
12
+
13
+ ## hatakeyama-llm-team/AutoGeneratedJapaneseQA-other
14
+ fileSize 12.9MB
15
+ - question string
16
+ - database string
17
+ - answer string
18
+ - score float64
19
+
20
+ ## kanhatakeyama/AutoWikiQA
21
+ fileSize 232MB
22
+ - question string
23
+ - inst_question string
24
+ - inst_answer_0 string
25
+ - text string
26
+ - database string
27
+ - answer string
28
+ - answer_0 string
29
+ - answer_1 string
30
+
31
+ ## kanhatakeyama/ChatbotArenaJaMixtral8x22b
32
+ fileSize 13.9MB
33
+ - question string
34
+ - inst_question string
35
+ - inst_answer_0 string
36
+ - text string
37
+ - database string
38
+ - answer string
39
+
40
+ # kanhatakeyama/OrcaJaMixtral8x22b
41
+ fileSize 1.24GB
42
+ - question string
43
+ - inst_question string
44
+ - inst_answer_0 string
45
+ - text string
46
+ - database string
47
+ - answer string
48
+
49
+ # kanhatakeyama/AutoMultiTurnByMixtral8x22b
50
+ fileSize 270MB
51
+ - q1 string
52
+ - a1 string
53
+ - q2 string
54
+ - a2 string
55
+ - database string
56
+
57
+ # kanhatakeyama/LogicalDatasetsByMixtral8x22b
58
+ fileSize 116MB
59
+ - question string
60
+ - answer string
61
+
62
+ ## output_dataset
63
+ - id int64
64
+ - dataset string
65
+ - dataset_id int64
66
+ - who string
67
+ - good bool
68
+ - bad bool
69
+ - is_proofreading string
70
+ - t_proofreading string
type/dataset_type.py CHANGED
@@ -1,10 +1,66 @@
1
  # データセットのファイル構造に合わせて型定義
2
- class WikiBookJaData:
 
3
  index: int
4
  text: str
5
 
6
- class AutoGeneratedJapaneseQAData:
 
 
 
 
 
 
 
 
 
 
 
 
7
  question: str
 
 
 
 
8
  database: str
9
  answer: str
10
- score: float
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # データセットのファイル構造に合わせて型定義
2
+
3
+ class WikiBookJa:
4
  index: int
5
  text: str
6
 
7
+ class AutoGeneratedJapaneseQA:
8
+ question: str
9
+ database: str
10
+ answer: str
11
+ score: float
12
+
13
+ class AutoGeneratedJapaneseQA_other:
14
+ question: str
15
+ database: str
16
+ answer: str
17
+ score: float
18
+
19
+ class AutoWikiQA:
20
  question: str
21
+ answer: str
22
+ inst_question: str
23
+ inst_answer_0: str
24
+ text : str
25
  database: str
26
  answer: str
27
+ answer_0: str
28
+ answer_1: str
29
+
30
+ class ChatbotArenaJaMixtral8x22b:
31
+ question: str
32
+ inst_question: str
33
+ inst_answer_0: str
34
+ text: str
35
+ database: str
36
+ answer: str
37
+
38
+ class OrcaJaMixtral8x22b:
39
+ question: str
40
+ inst_question: str
41
+ inst_answer_0: str
42
+ text: str
43
+ database: str
44
+ answer: str
45
+
46
+ class AutoMultiTurnByMixtral8x22b:
47
+ q1: str
48
+ a1: str
49
+ q2: str
50
+ a2: str
51
+ database: str
52
+
53
+ class LogicalDatasetsByMixtral8x22b:
54
+ question: str
55
+ answer: str
56
+
57
+
58
+ class TanukiPhase2AnnotationDataset:
59
+ id: int
60
+ dataset: str
61
+ database_id: int
62
+ who: str
63
+ good: bool
64
+ bad: bool
65
+ is_proofreading: bool
66
+ t_proofreading: str