File size: 8,584 Bytes
f229c82
 
 
 
847658d
 
 
 
8e298e3
 
 
 
 
 
 
847658d
8e298e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
847658d
 
8e298e3
847658d
 
8e298e3
847658d
8e298e3
847658d
 
 
8e298e3
847658d
 
 
8e298e3
847658d
 
8e298e3
 
 
 
847658d
8e298e3
 
847658d
 
8e298e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
847658d
 
8e298e3
 
 
 
 
 
 
 
 
 
 
 
f229c82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e298e3
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import streamlit as st

st.set_page_config(page_title="HAERAE Open Research Questions", layout="wide")

# Title (always in English)
st.title("HAERAE Open Research Questions")

# Language selection below the title
lang = st.radio("Language / ์–ธ์–ด", ["English", "ํ•œ๊ตญ์–ด"])

# Content in both languages
content = {
    "English": {
        "intro": """
        HAERAE is a non-profit research lab focused on the interpretability and evaluation of Korean language models. 
        Our mission is to advance the field with insightful benchmarks and tools.

        We've been doing most of our projects internally, but for those that have been unsolvable, 
        we are planning to open them to get help from the open-source community.
        """,
        "challenge_title": "HAERAE-Math Challenge",
        "challenge_desc": """
        Today we are introducing our first challenge: HAERAE-Math. We've created high-quality instructions on math 
        but don't have an idea on how to generate high-quality answers for them. We are looking for solutions that 
        use open-source models with openly available licenses.

        We have created a total of 20,000 instructions already and are generating more. We've opened up a preview 
        of 50 of them in this link: [HAERAE-Math Samples](https://huggingface.co/datasets/HAERAE-HUB/HAERAE-Math-samples)

        For those who generate answers for the 50 and share the methodology/results with us, we'll share the 
        remaining instructions and credit for the resulting dataset.
        """,
        "example_title": "Example Question",
        "how_to_title": "How to Participate",
        "how_to": """
        1. Access the 50 sample questions from the provided Hugging Face dataset link.
        2. Generate high-quality answers for these questions using open-source models.
        3. Document your methodology and results.
        4. Share your findings with us through [contact information or submission form].
        5. If your approach is promising, we'll provide access to the full dataset of 20,000 instructions.
        """,
        "why_title": "Why Participate?",
        "why": """
        - Contribute to advancing Korean language model research
        - Gain access to a large, high-quality dataset of math instructions
        - Collaborate with HAERAE researchers
        - Potential for co-authorship on related publications
        """,
        "contact_title": "Contact Us",
        "contact": """
        For more information or to submit your results, please contact us at:
        [spthsrbwls123@yonsei.ac.kr](spthsrbwls123@yonsei.ac.kr)
        """,
        "sidebar_title": "About HAERAE",
        "sidebar_content": """
        HAERAE is a non-profit research lab dedicated to advancing the field of 
        Korean language model interpretability and evaluation. Our work focuses on 
        creating insightful benchmarks and tools to push the boundaries of NLP research.
        """
    },
    "ํ•œ๊ตญ์–ด": {
        "intro": """
        HAERAE๋Š” ํ•œ๊ตญ์–ด ์–ธ์–ด ๋ชจ๋ธ์˜ ํ•ด์„๊ณผ ํ‰๊ฐ€์˜ ์—ฐ๊ตฌ๋ฅผ ์œ„ํ•ด ์„ค๋ฆฝ๋œ ๋น„์˜๋ฆฌ ์—ฐ๊ตฌํŒ€์ž…๋‹ˆ๋‹ค.
        ์ €ํฌ๋Š” ๋‹ค์–‘ํ•œ ๋ฒค์น˜๋งˆํฌ์™€ ์—ฐ๊ตฌ๋ฅผ ํ†ตํ•ด ํ•œ๊ตญ์–ด ์ž์—ฐ์–ด ์ฒ˜๋ฆฌ ์—ฐ๊ตฌ๋ฅผ ๋ฐœ์ „์‹œํ‚ค๊ธฐ ์œ„ํ•ด ๋…ธ๋ ฅํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. 

        ๊ธฐ์กด์—๋Š” ๋Œ€๋ถ€๋ถ„์˜ ํ”„๋กœ์ ํŠธ๋ฅผ ๋‚ด๋ถ€์ ์œผ๋กœ ์ˆ˜ํ–‰ํ•ด ์™”์ง€๋งŒ, ๋‚ด๋ถ€์ ์œผ๋กœ ํ•ด๊ฒฐํ•˜๊ธฐ ์–ด๋ ค์šด ๋ฌธ์ œ๋“ค์— ๋Œ€ํ•ด์„œ๋Š”
        ์˜คํ”ˆ ์†Œ์Šค ์ปค๋ฎค๋‹ˆํ‹ฐ์˜ ๋„์›€์„ ๋ฐ›๊ณ ์ž Open-Research-Question ํ”„๋กœ๊ทธ๋žจ์„ ์šด์˜ํ•˜๊ฒŒ ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. 
        """,
        "challenge_title": "HAERAE-Math Challenge",
        "challenge_desc": """
        ์ €ํฌ ํŒ€์€ [QARV-Instruct](https://huggingface.co/datasets/HAERAE-HUB/qarv-instruct-ko) ๋ถ€ํ„ฐ ์‹œ์ž‘ํ•ด์„œ ๊ณ ํ’ˆ์งˆ์˜ ํ•œ๊ตญ์–ด ์ง€์‹œ๋ฌธ ๋ฐ์ดํ„ฐ๋ฅผ 
        ๋งŒ๋“ค๊ธฐ ์œ„ํ•ด ๋…ธ๋ ฅ ์ค‘์— ์žˆ์Šต๋‹ˆ๋‹ค. ์ด ๊ณผ์ •์—์„œ ๋งค์šฐ ๋†’์€ ์ˆ˜์ค€์˜ ์ˆ˜ํ•™ ์ง€์‹œ๋ฌธ์„ ์ œ์ž‘ํ•˜์˜€์œผ๋‚˜, ํ•ด๋‹น ์ง€์‹œ๋ฌธ์— ๋Œ€ํ•ด ์ ์ ˆํ•œ ๋‹ต๋ณ€์„ ๋งŒ๋“ค์ง€ ๋ชปํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. 
        ์ €ํฌ๋Š” ์ด๋ฒˆ ์ฑŒ๋ฆฐ์ง€๋ฅผ ํ†ตํ•ด ์˜คํ”ˆ์†Œ์Šค LLM์„ ์‚ฌ์šฉํ•˜์—ฌ ํ•ด๋‹น ๋ฌธ์ œ๋“ค์— ๋Œ€ํ•œ ๋‹ต์„ ์ œ์ž‘ํ•  ์ˆ˜ ์žˆ๋Š” ์†”๋ฃจ์…˜์„ ์ฐพ๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. 

        ํ˜„์žฌ๋Š” ์ด 20,000๊ฐœ์˜ ์ง€์‹œ๋ฌธ์„ ์ด๋ฏธ ๋งŒ๋“ค์—ˆ์œผ๋ฉฐ ์ถ”๊ฐ€์ ์œผ๋กœ ์ƒ์„ฑํ•˜๋Š” ๊ณผ์ • ์ค‘์— ์žˆ์Šต๋‹ˆ๋‹ค. 
        ์ƒ์„ฑ๋œ ์ง€์‹œ๋ฌธ ์ค‘ ๋žœ๋ค์œผ๋กœ ์ƒ˜ํ”Œ๋ง๋œ 50๊ฐœ์˜ ์งˆ๋ฌธ์„ ๋‹ค์Œ ๋งํฌ์—์„œ ๋ณด์‹ค ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. 
        [HAERAE-Math ์ƒ˜ํ”Œ](https://huggingface.co/datasets/HAERAE-HUB/HAERAE-Math-samples)

        50๊ฐœ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜๊ณ  ๋ฐฉ๋ฒ•๋ก /๊ฒฐ๊ณผ๋ฅผ ์ €ํฌ์—๊ฒŒ ๊ณต์œ ํ•ด์ฃผ์‹œ๋Š” ๋ถ„๋“ค๊ผ ์ €ํฌ๊ฐ€ ์ƒ์„ฑํ•œ ์ „์ฒด ์ง€์‹œ๋ฌธ๊ณผ ์ตœ์ข… ๋ฐ์ดํ„ฐ์…‹์— ๋Œ€ํ•œ ๊ธฐ์—ฌ๋„๋ฅผ ์ธ์ •ํ•ด๋“œ๋ฆด ์˜ˆ์ •์ž…๋‹ˆ๋‹ค. 
        (๊ฒฐ๊ณผ์— ๋”ฐ๋ผ ๋…ผ๋ฌธํ™”๋„ ๊ณ ๋ฏผ ์ค‘์— ์žˆ์Šต๋‹ˆ๋‹ค.)
        """,
        "example_title": "์˜ˆ์‹œ ์งˆ๋ฌธ",
        "how_to_title": "์ฐธ์—ฌ ๋ฐฉ๋ฒ•",
        "how_to": """
        1. ์ œ๊ณต๋œ Hugging Face ๋ฐ์ดํ„ฐ์…‹ ๋งํฌ์—์„œ 50๊ฐœ์˜ ์ƒ˜ํ”Œ ์งˆ๋ฌธ์„ ํ™•์ธํ•ฉ๋‹ˆ๋‹ค. 
        2. ์˜คํ”ˆ ์†Œ์Šค ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜์—ฌ ์ด ์งˆ๋ฌธ๋“ค์— ๋Œ€ํ•œ ๊ณ ํ’ˆ์งˆ ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
        3. ๋ฐฉ๋ฒ•๋ก ๊ณผ ๊ฒฐ๊ณผ๋ฅผ ๋ฌธ์„œํ™”ํ•ฉ๋‹ˆ๋‹ค.
        4. [์—ฐ๋ฝ์ฒ˜ ์ •๋ณด ๋˜๋Š” ์ œ์ถœ ์–‘์‹]์„ ํ†ตํ•ด ๊ท€ํ•˜์˜ ๊ฒฐ๊ณผ๋ฅผ ์ €ํฌ์™€ ๊ณต์œ ํ•ฉ๋‹ˆ๋‹ค.
        5. ๊ท€ํ•˜์˜ ์ ‘๊ทผ ๋ฐฉ์‹์ด ์œ ์˜๋ฏธ ํ•˜๋‹ค๊ณ  ํŒ๋‹จ ๋œ๋‹ค๋ฉด, ๋‚˜๋จธ์ง€ ์ง€์‹œ๋ฌธ ๋ฐ์ดํ„ฐ์…‹์— ๋Œ€ํ•œ ์ ‘๊ทผ ๊ถŒํ•œ์„ ์ œ๊ณตํ•ด ๋“œ๋ฆฝ๋‹ˆ๋‹ค.
        """,
        "why_title": "์™œ ์ฐธ์—ฌํ•ด์•ผ ํ•˜๋‚˜์š”?",
        "why": """
        - ํ•œ๊ตญ์–ด ์–ธ์–ด ๋ชจ๋ธ ์—ฐ๊ตฌ ๋ฐœ์ „์— ๊ธฐ์—ฌ
        - ๋Œ€๊ทœ๋ชจ์˜ ๊ณ ํ’ˆ์งˆ ์ˆ˜ํ•™ ์ง€์‹œ๋ฌธ ๋ฐ์ดํ„ฐ์…‹์— ์ ‘๊ทผ
        - HAERAE ์—ฐ๊ตฌ์›๋“ค๊ณผ ํ˜‘๋ ฅ
        - ๊ด€๋ จ ์ถœํŒ๋ฌผ์˜ ๊ณต๋™ ์ €์ž๊ฐ€ ๋  ๊ฐ€๋Šฅ์„ฑ
        """,
        "contact_title": "์—ฐ๋ฝ์ฒ˜",
        "contact": """
        ๋” ๋งŽ์€ ์ •๋ณด๋ฅผ ์›ํ•˜์‹œ๊ฑฐ๋‚˜ ๊ฒฐ๊ณผ๋ฅผ ์ œ์ถœํ•˜๋ ค๋ฉด ๋‹ค์Œ ์—ฐ๋ฝ์ฒ˜๋กœ ๋ฌธ์˜ํ•ด ์ฃผ์„ธ์š”:
        [spthsrbwls123@yonsei.ac.kr](spthsrbwls123@yonsei.ac.kr)
        """,
        "sidebar_title": "HAERAE ์†Œ๊ฐœ",
        "sidebar_content": """
        HAERAE๋Š” ํ•œ๊ตญ์–ด ์–ธ์–ด ๋ชจ๋ธ์˜ ํ•ด์„๊ณผ ํ‰๊ฐ€์˜ ์—ฐ๊ตฌ๋ฅผ ์œ„ํ•ด ์„ค๋ฆฝ๋œ ๋น„์˜๋ฆฌ ์—ฐ๊ตฌํŒ€์ž…๋‹ˆ๋‹ค.
        ์ €ํฌ๋Š” ๋‹ค์–‘ํ•œ ๋ฒค์น˜๋งˆํฌ์™€ ์—ฐ๊ตฌ๋ฅผ ํ†ตํ•ด ํ•œ๊ตญ์–ด ์ž์—ฐ์–ด ์ฒ˜๋ฆฌ ์—ฐ๊ตฌ๋ฅผ ๋ฐœ์ „์‹œํ‚ค๊ธฐ ์œ„ํ•ด ๋…ธ๋ ฅํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. 
        """
    }
}

# Main content
st.write(content[lang]["intro"])

st.header(content[lang]["challenge_title"])

st.write(content[lang]["challenge_desc"])

st.subheader(content[lang]["example_title"])

example_question = """
ํ•œ๊ตญ์˜ ๋ณด์•ˆ ์ „๋ฌธ๊ฐ€๊ฐ€ ๊ณ ๋„ํ™”๋œ ๋ฐ์ดํ„ฐ ๋ณดํ˜ธ ์‹œ์Šคํ…œ์„ ๊ฐœ๋ฐœํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ์ด ์‹œ์Šคํ…œ์€ 3์ฐจ์› ๊ธฐํ•˜ํ•™์  ์ž ๊ธˆ ๋ฉ”์ปค๋‹ˆ์ฆ˜์„ ์‚ฌ์šฉํ•˜๋Š”๋ฐ, ์ž ๊ธˆ ์žฅ์น˜๋Š” ์›๋ฟ” ๋ชจ์–‘์œผ๋กœ ๋˜์–ด ์žˆ๊ณ , ๋ฐ‘๋ฉด์˜ ๋ฐ˜์ง€๋ฆ„์€ 6cm, ๋†’์ด๋Š” 8cm์ž…๋‹ˆ๋‹ค. ์ด ์›๋ฟ” ๋ชจ์–‘์˜ ์ž ๊ธˆ ์žฅ์น˜์—๋Š” ์›ํ†ต ๋ชจ์–‘์˜ ์—ด์‡ ๊ฐ€ ๋”ฑ ๋งž๊ฒŒ ๋“ค์–ด๊ฐ€๊ฒŒ ์„ค๊ณ„๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.

๋ณด์•ˆ ์ „๋ฌธ๊ฐ€๋Š” ๋” ๋†’์€ ์ˆ˜์ค€์˜ ๋ณด์•ˆ์„ ์œ„ํ•ด ์›ํ†ต ๋ชจ์–‘์˜ ์—ด์‡  ์•ˆ์— ๊ตฌ ๋ชจ์–‘์˜ ์ž ๊ธˆ ์žฅ์น˜๋ฅผ ์ถ”๊ฐ€ํ•˜๋ ค๊ณ  ํ•ฉ๋‹ˆ๋‹ค. ์ด ๊ตฌ๋Š” ์›ํ†ต ์•ˆ์— ๋”ฑ ๋“ค์–ด๊ฐ€๋„๋ก ์„ค๊ณ„๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.

๋‹ค์Œ์˜ ์งˆ๋ฌธ๋“ค์„ ํ•ด๊ฒฐํ•˜์‹œ๊ธฐ ๋ฐ”๋ž๋‹ˆ๋‹ค:

1. ์›๋ฟ” ์•ˆ์— ๋”ฑ ๋“ค์–ด๊ฐ€๊ฒŒ ์„ค๊ณ„๋œ ์›ํ†ต์˜ ๋ฐ˜์ง€๋ฆ„์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?
2. ์›ํ†ต ์•ˆ์— ๋”ฑ ๋“ค์–ด๊ฐ€๊ฒŒ ์„ค๊ณ„๋œ ๊ตฌ์˜ ๋ถ€ํ”ผ๋Š” ์–ผ๋งˆ์ธ๊ฐ€์š”?
3. ์›๋ฟ”, ์›ํ†ต, ๊ตฌ๊ฐ€ ๋ชจ๋‘ ๊ฐ™์€ ์ค‘์‹ฌ์ถ•์„ ๊ณต์œ ํ•˜๊ณ  ์žˆ์œผ๋ฉฐ ์›๋ฟ”์˜ ๊ผญ๋Œ€๊ธฐ์ ๊ณผ ์›ํ†ต, ๊ตฌ์˜ ์ค‘์‹ฌ์ ์ด ๋™์ผํ•˜๋‹ค๊ณ  ๊ฐ€์ •ํ•˜๋ฉด, ์›๋ฟ”์—์„œ ์›ํ†ต์ด ์ฐจ์ง€ํ•˜๋Š” ๋น„์œจ์„ ๊ตฌํ•˜์‹œ์˜ค.
4. ์ด์ œ ์›๋ฟ”์˜ ๋†’์ด๋ฅผ 2๋ฐฐ๋กœ ๋Š˜๋ฆฌ์ž. ์›๋ฟ”์˜ ๋†’์ด๊ฐ€ 16cm๊ฐ€ ๋˜์—ˆ์„ ๋•Œ, ์›ํ†ต๊ณผ ๊ตฌ์˜ ํฌ๊ธฐ์™€ ๋ถ€ํ”ผ๋Š” ์–ด๋–ป๊ฒŒ ๋ณ€ํ•˜๋‚˜์š”?
5. ์›๋ฟ”์˜ ๋†’์ด์™€ ๋ฐ‘๋ฉด์˜ ๋ฐ˜์ง€๋ฆ„์„ ๊ฐ๊ฐ h์™€ r์ด๋ผ๊ณ  ํ•  ๋•Œ, ์›ํ†ต๊ณผ ๊ตฌ์˜ ์ตœ๋Œ€ ๋ถ€ํ”ผ๋ฅผ r๊ณผ h๋กœ ํ‘œํ˜„ํ•˜์‹œ์˜ค.

์›๋ฟ”, ์›ํ†ต, ๊ตฌ์˜ ๋ถ€ํ”ผ ๊ณต์‹์„ ์‚ฌ์šฉํ•˜์—ฌ ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐํ•˜์‹œ๊ธฐ ๋ฐ”๋ž๋‹ˆ๋‹ค:

์›๋ฟ”์˜ ๋ถ€ํ”ผ: V = 1/3ฯ€rยฒh
์›ํ†ต์˜ ๋ถ€ํ”ผ: V = ฯ€rยฒh
๊ตฌ์˜ ๋ถ€ํ”ผ: V = 4/3ฯ€rยณ
"""

st.code(example_question, language="markdown")

st.header(content[lang]["how_to_title"])

st.write(content[lang]["how_to"])

st.header(content[lang]["why_title"])

st.write(content[lang]["why"])

st.header(content[lang]["contact_title"])

st.write(content[lang]["contact"])

st.sidebar.title(content[lang]["sidebar_title"])
st.sidebar.info(content[lang]["sidebar_content"])