Petr Tsvetkov commited on
Commit
1b24b13
·
0 Parent(s):

Add .gitignore file and main application script

Browse files

This commit introduces a .gitignore file that ignores various file types and directories related to Python, PyCharm, and venv. Additionally, the main application script, app.py, is added, which uses gradio for interface, loads data using the 'datasets' package, and includes functionality for user feedback and navigation.

Files changed (2) hide show
  1. .gitignore +282 -0
  2. app.py +128 -0
.gitignore ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by https://www.toptal.com/developers/gitignore/api/pycharm+all,venv,python
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=pycharm+all,venv,python
3
+
4
+ ### PyCharm+all ###
5
+ # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
6
+ # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
7
+
8
+ # User-specific stuff
9
+ .idea/**/workspace.xml
10
+ .idea/**/tasks.xml
11
+ .idea/**/usage.statistics.xml
12
+ .idea/**/dictionaries
13
+ .idea/**/shelf
14
+
15
+ # AWS User-specific
16
+ .idea/**/aws.xml
17
+
18
+ # Generated files
19
+ .idea/**/contentModel.xml
20
+
21
+ # Sensitive or high-churn files
22
+ .idea/**/dataSources/
23
+ .idea/**/dataSources.ids
24
+ .idea/**/dataSources.local.xml
25
+ .idea/**/sqlDataSources.xml
26
+ .idea/**/dynamic.xml
27
+ .idea/**/uiDesigner.xml
28
+ .idea/**/dbnavigator.xml
29
+
30
+ # Gradle
31
+ .idea/**/gradle.xml
32
+ .idea/**/libraries
33
+
34
+ # Gradle and Maven with auto-import
35
+ # When using Gradle or Maven with auto-import, you should exclude module files,
36
+ # since they will be recreated, and may cause churn. Uncomment if using
37
+ # auto-import.
38
+ # .idea/artifacts
39
+ # .idea/compiler.xml
40
+ # .idea/jarRepositories.xml
41
+ # .idea/modules.xml
42
+ # .idea/*.iml
43
+ # .idea/modules
44
+ # *.iml
45
+ # *.ipr
46
+
47
+ # CMake
48
+ cmake-build-*/
49
+
50
+ # Mongo Explorer plugin
51
+ .idea/**/mongoSettings.xml
52
+
53
+ # File-based project format
54
+ *.iws
55
+
56
+ # IntelliJ
57
+ out/
58
+
59
+ # mpeltonen/sbt-idea plugin
60
+ .idea_modules/
61
+
62
+ # JIRA plugin
63
+ atlassian-ide-plugin.xml
64
+
65
+ # Cursive Clojure plugin
66
+ .idea/replstate.xml
67
+
68
+ # SonarLint plugin
69
+ .idea/sonarlint/
70
+
71
+ # Crashlytics plugin (for Android Studio and IntelliJ)
72
+ com_crashlytics_export_strings.xml
73
+ crashlytics.properties
74
+ crashlytics-build.properties
75
+ fabric.properties
76
+
77
+ # Editor-based Rest Client
78
+ .idea/httpRequests
79
+
80
+ # Android studio 3.1+ serialized cache file
81
+ .idea/caches/build_file_checksums.ser
82
+
83
+ ### PyCharm+all Patch ###
84
+ # Ignore everything but code style settings and run configurations
85
+ # that are supposed to be shared within teams.
86
+
87
+ .idea/*
88
+
89
+ !.idea/codeStyles
90
+ !.idea/runConfigurations
91
+
92
+ ### Python ###
93
+ # Byte-compiled / optimized / DLL files
94
+ __pycache__/
95
+ *.py[cod]
96
+ *$py.class
97
+
98
+ # C extensions
99
+ *.so
100
+
101
+ # Distribution / packaging
102
+ .Python
103
+ build/
104
+ develop-eggs/
105
+ dist/
106
+ downloads/
107
+ eggs/
108
+ .eggs/
109
+ lib/
110
+ lib64/
111
+ parts/
112
+ sdist/
113
+ var/
114
+ wheels/
115
+ share/python-wheels/
116
+ *.egg-info/
117
+ .installed.cfg
118
+ *.egg
119
+ MANIFEST
120
+
121
+ # PyInstaller
122
+ # Usually these files are written by a python script from a template
123
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
124
+ *.manifest
125
+ *.spec
126
+
127
+ # Installer logs
128
+ pip-log.txt
129
+ pip-delete-this-directory.txt
130
+
131
+ # Unit test / coverage reports
132
+ htmlcov/
133
+ .tox/
134
+ .nox/
135
+ .coverage
136
+ .coverage.*
137
+ .cache
138
+ nosetests.xml
139
+ coverage.xml
140
+ *.cover
141
+ *.py,cover
142
+ .hypothesis/
143
+ .pytest_cache/
144
+ cover/
145
+
146
+ # Translations
147
+ *.mo
148
+ *.pot
149
+
150
+ # Django stuff:
151
+ *.log
152
+ local_settings.py
153
+ db.sqlite3
154
+ db.sqlite3-journal
155
+
156
+ # Flask stuff:
157
+ instance/
158
+ .webassets-cache
159
+
160
+ # Scrapy stuff:
161
+ .scrapy
162
+
163
+ # Sphinx documentation
164
+ docs/_build/
165
+
166
+ # PyBuilder
167
+ .pybuilder/
168
+ target/
169
+
170
+ # Jupyter Notebook
171
+ .ipynb_checkpoints
172
+
173
+ # IPython
174
+ profile_default/
175
+ ipython_config.py
176
+
177
+ # pyenv
178
+ # For a library or package, you might want to ignore these files since the code is
179
+ # intended to run in multiple environments; otherwise, check them in:
180
+ # .python-version
181
+
182
+ # pipenv
183
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
184
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
185
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
186
+ # install all needed dependencies.
187
+ #Pipfile.lock
188
+
189
+ # poetry
190
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
191
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
192
+ # commonly ignored for libraries.
193
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
194
+ #poetry.lock
195
+
196
+ # pdm
197
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
198
+ #pdm.lock
199
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
200
+ # in version control.
201
+ # https://pdm.fming.dev/#use-with-ide
202
+ .pdm.toml
203
+
204
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
205
+ __pypackages__/
206
+
207
+ # Celery stuff
208
+ celerybeat-schedule
209
+ celerybeat.pid
210
+
211
+ # SageMath parsed files
212
+ *.sage.py
213
+
214
+ # Environments
215
+ .env
216
+ .venv
217
+ env/
218
+ venv/
219
+ ENV/
220
+ env.bak/
221
+ venv.bak/
222
+
223
+ # Spyder project settings
224
+ .spyderproject
225
+ .spyproject
226
+
227
+ # Rope project settings
228
+ .ropeproject
229
+
230
+ # mkdocs documentation
231
+ /site
232
+
233
+ # mypy
234
+ .mypy_cache/
235
+ .dmypy.json
236
+ dmypy.json
237
+
238
+ # Pyre type checker
239
+ .pyre/
240
+
241
+ # pytype static type analyzer
242
+ .pytype/
243
+
244
+ # Cython debug symbols
245
+ cython_debug/
246
+
247
+ # PyCharm
248
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
249
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
250
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
251
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
252
+ #.idea/
253
+
254
+ ### Python Patch ###
255
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
256
+ poetry.toml
257
+
258
+ # ruff
259
+ .ruff_cache/
260
+
261
+ # LSP config files
262
+ pyrightconfig.json
263
+
264
+ ### venv ###
265
+ # Virtualenv
266
+ # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
267
+ [Bb]in
268
+ [Ii]nclude
269
+ [Ll]ib
270
+ [Ll]ib64
271
+ [Ll]ocal
272
+ [Ss]cripts
273
+ pyvenv.cfg
274
+ pip-selfcheck.json
275
+
276
+ # End of https://www.toptal.com/developers/gitignore/api/pycharm+all,venv,python
277
+
278
+ .idea
279
+
280
+ data
281
+ feedback
282
+ flagged
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+
3
+ import gradio as gr
4
+ from datasets import load_dataset
5
+
6
+ configuration = "commitchronicle-py-long" # select a configuration
7
+ dataset = load_dataset("JetBrains-Research/lca-cmg",
8
+ configuration,
9
+ split="test",
10
+ cache_dir="data")
11
+ n_samples = len(dataset)
12
+
13
+ saver = gr.CSVLogger()
14
+
15
+
16
+ def get_github_link(repo, hash):
17
+ repo_url = f"https://github.com/{repo}/commit/{hash}"
18
+ return repo_url
19
+
20
+
21
+ def update_commit_view(sample_ind):
22
+ if sample_ind >= n_samples:
23
+ return None
24
+
25
+ record = dataset[sample_ind]
26
+ github_link_md = f"[See the commit on GitHub]({get_github_link(record['repo'], record['hash'])})"
27
+ diff_json = record['mods']
28
+ commit_msg = record['message']
29
+ repo_val = record['repo']
30
+ hash_val = record['hash']
31
+ return github_link_md, diff_json, commit_msg, repo_val, hash_val
32
+
33
+
34
+ def next_sample(current_sample_ind):
35
+ if current_sample_ind == n_samples:
36
+ return None
37
+
38
+ current_sample_ind += 1
39
+ updated_view = update_commit_view(current_sample_ind)
40
+ return (current_sample_ind,) + updated_view
41
+
42
+
43
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
44
+ with gr.Row():
45
+ current_sample_sld = gr.Slider(minimum=0, maximum=n_samples, step=1,
46
+ value=0,
47
+ interactive=False,
48
+ label='sample_ind',
49
+ info=f"Samples labeled/skipped (out of {n_samples})",
50
+ show_label=False,
51
+ container=False,
52
+ scale=5)
53
+ with gr.Column(scale=1):
54
+ repo_val = gr.Textbox(interactive=False, container=False, label='repo', visible=False)
55
+ hash_val = gr.Textbox(interactive=False, container=False, label='hash', visible=False)
56
+ skip_btn = gr.Button("Skip the current sample")
57
+ with gr.Row():
58
+ with gr.Column(scale=2):
59
+ github_link = gr.Markdown()
60
+ diff_view = gr.JSON()
61
+ with gr.Column(scale=1):
62
+ commit_msg = gr.Textbox(label="AI-generated commit message",
63
+ interactive=False,
64
+ )
65
+ gr.Markdown("## Please, answer the questions below")
66
+ verbosity_feedback = gr.Radio(info='How can you describe the length of the commit message above?',
67
+ label='verbosity',
68
+ show_label=False,
69
+ choices=[
70
+ ('Too short', 0),
71
+ ('Just right', 1),
72
+ ('Too verbose', 2)])
73
+ correctness_feedback = gr.Radio(info='Is the commit message factually correct?',
74
+ label='is_correct',
75
+ show_label=False,
76
+ choices=[
77
+ ('Yes', True),
78
+ ('No', False)])
79
+ format_feedback = gr.Slider(info='Rate the commit message\'s format (1 - very bad, 5 - very good)',
80
+ label='format_score',
81
+ show_label=False,
82
+ minimum=1,
83
+ step=1,
84
+ interactive=True,
85
+ maximum=5)
86
+ submit_btn = gr.Button("Submit and continue")
87
+ session_val = gr.Textbox(info='Session', interactive=False, container=True, show_label=False,
88
+ label='session')
89
+
90
+ commit_view = [
91
+ github_link,
92
+ diff_view,
93
+ commit_msg,
94
+ repo_val,
95
+ hash_val
96
+ ]
97
+
98
+ feedback_form = [
99
+ current_sample_sld,
100
+ session_val,
101
+ repo_val,
102
+ hash_val,
103
+ verbosity_feedback,
104
+ correctness_feedback,
105
+ format_feedback
106
+ ]
107
+
108
+ saver.setup(feedback_form, "feedback")
109
+
110
+ skip_btn.click(next_sample, inputs=[current_sample_sld], outputs=[current_sample_sld] + commit_view)
111
+
112
+
113
+ def submit(*args):
114
+ saver.flag(args)
115
+ return next_sample(args[0])
116
+
117
+
118
+ submit_btn.click(submit, inputs=feedback_form, outputs=[current_sample_sld] + commit_view)
119
+
120
+
121
+ def init_session(*args):
122
+ session = str(uuid.uuid4())
123
+ return (session,) + update_commit_view(*args)
124
+
125
+
126
+ demo.load(init_session, inputs=[current_sample_sld], outputs=[session_val] + commit_view)
127
+
128
+ demo.launch()