louis030195 commited on
Commit
a435b0c
β€’
1 Parent(s): cc7c94c
Files changed (5) hide show
  1. .pylintrc +333 -0
  2. Makefile +15 -0
  3. app.py +107 -0
  4. requirements-test.txt +12 -0
  5. requirements.txt +3 -0
.pylintrc ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [MASTER]
2
+
3
+ # Specify a configuration file.
4
+ #rcfile=
5
+
6
+ # Python code to execute, usually for sys.path manipulation such as
7
+ # pygtk.require().
8
+ #init-hook=
9
+
10
+ # Profiled execution.
11
+ profile=no
12
+
13
+ # Add files or directories to the blacklist. They should be base names, not
14
+ # paths.
15
+ ignore=CVS
16
+
17
+ # Pickle collected data for later comparisons.
18
+ persistent=yes
19
+
20
+ # List of plugins (as comma separated values of python modules names) to load,
21
+ # usually to register additional checkers.
22
+ load-plugins=
23
+
24
+
25
+ [MESSAGES CONTROL]
26
+
27
+ # Enable the message, report, category or checker with the given id(s). You can
28
+ # either give multiple identifier separated by comma (,) or put this option
29
+ # multiple time. See also the "--disable" option for examples.
30
+ enable=indexing-exception,old-raise-syntax
31
+
32
+ # Disable the message, report, category or checker with the given id(s). You
33
+ # can either give multiple identifiers separated by comma (,) or put this
34
+ # option multiple times (only on the command line, not in the configuration
35
+ # file where it should appear only once).You can also use "--disable=all" to
36
+ # disable everything first and then reenable specific checks. For example, if
37
+ # you want to run only the similarities checker, you can use "--disable=all
38
+ # --enable=similarities". If you want to run only the classes checker, but have
39
+ # no Warning level messages displayed, use"--disable=all --enable=classes
40
+ # --disable=W"
41
+ disable=design,similarities,no-self-use,attribute-defined-outside-init,locally-disabled,star-args,pointless-except,bad-option-value,global-statement,fixme,suppressed-message,useless-suppression,locally-enabled,no-member,no-name-in-module,import-error,unsubscriptable-object,unbalanced-tuple-unpacking,undefined-variable,not-context-manager,no-else-return,wrong-import-order,unnecessary-pass,logging-fstring-interpolation,logging-format-interpolation,C0330,C0114
42
+
43
+
44
+ # Set the cache size for astng objects.
45
+ cache-size=500
46
+
47
+
48
+ [REPORTS]
49
+
50
+ # Set the output format. Available formats are text, parseable, colorized, msvs
51
+ # (visual studio) and html. You can also give a reporter class, eg
52
+ # mypackage.mymodule.MyReporterClass.
53
+ output-format=text
54
+
55
+ # Put messages in a separate file for each module / package specified on the
56
+ # command line instead of printing them on stdout. Reports (if any) will be
57
+ # written in a file name "pylint_global.[txt|html]".
58
+ files-output=no
59
+
60
+ # Tells whether to display a full report or only the messages
61
+ reports=no
62
+
63
+ # Python expression which should return a note less than 10 (10 is the highest
64
+ # note). You have access to the variables errors warning, statement which
65
+ # respectively contain the number of errors / warnings messages and the total
66
+ # number of statements analyzed. This is used by the global evaluation report
67
+ # (RP0004).
68
+ evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
69
+
70
+ # Add a comment according to your evaluation note. This is used by the global
71
+ # evaluation report (RP0004).
72
+ comment=no
73
+
74
+ # Template used to display messages. This is a python new-style format string
75
+ # used to format the message information. See doc for all details
76
+ #msg-template=
77
+
78
+
79
+ [TYPECHECK]
80
+
81
+ # Tells whether missing members accessed in mixin class should be ignored. A
82
+ # mixin class is detected if its name ends with "mixin" (case insensitive).
83
+ ignore-mixin-members=yes
84
+
85
+ # List of classes names for which member attributes should not be checked
86
+ # (useful for classes with attributes dynamically set).
87
+ ignored-classes=SQLObject
88
+
89
+ # When zope mode is activated, add a predefined set of Zope acquired attributes
90
+ # to generated-members.
91
+ zope=no
92
+
93
+ # List of members which are set dynamically and missed by pylint inference
94
+ # system, and so shouldn't trigger E0201 when accessed. Python regular
95
+ # expressions are accepted.
96
+ generated-members=REQUEST,acl_users,aq_parent
97
+
98
+ # List of decorators that create context managers from functions, such as
99
+ # contextlib.contextmanager.
100
+ contextmanager-decorators=contextlib.contextmanager,contextlib2.contextmanager
101
+
102
+
103
+ [VARIABLES]
104
+
105
+ # Tells whether we should check for unused import in __init__ files.
106
+ init-import=no
107
+
108
+ # A regular expression matching the beginning of the name of dummy variables
109
+ # (i.e. not used).
110
+ dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_)
111
+
112
+ # List of additional names supposed to be defined in builtins. Remember that
113
+ # you should avoid to define new builtins when possible.
114
+ additional-builtins=
115
+
116
+
117
+ [BASIC]
118
+
119
+ # Required attributes for module, separated by a comma
120
+ required-attributes=
121
+
122
+ # List of builtins function names that should not be used, separated by a comma
123
+ bad-functions=apply,input,reduce
124
+
125
+
126
+ # Disable the report(s) with the given id(s).
127
+ # All non-Google reports are disabled by default.
128
+ disable-report=R0001,R0002,R0003,R0004,R0101,R0102,R0201,R0202,R0220,R0401,R0402,R0701,R0801,R0901,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0921,R0922,R0923
129
+
130
+ # Regular expression which should only match correct module names
131
+ module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
132
+
133
+ # Regular expression which should only match correct module level names
134
+ const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$
135
+
136
+ # Regular expression which should only match correct class names
137
+ class-rgx=^_?[A-Z][a-zA-Z0-9]*$
138
+
139
+ # Regular expression which should only match correct function names
140
+ function-rgx=^(?:(?P<camel_case>_?[A-Z][a-zA-Z0-9]*)|(?P<snake_case>_?[a-z][a-z0-9_]*))$
141
+
142
+ # Regular expression which should only match correct method names
143
+ method-rgx=^(?:(?P<exempt>__[a-z0-9_]+__|next)|(?P<camel_case>_{0,2}[A-Z][a-zA-Z0-9]*)|(?P<snake_case>_{0,2}[a-z][a-z0-9_]*))$
144
+
145
+ # Regular expression which should only match correct instance attribute names
146
+ attr-rgx=^_{0,2}[a-z][a-z0-9_]*$
147
+
148
+ # Regular expression which should only match correct argument names
149
+ argument-rgx=^[a-z][a-z0-9_]*$
150
+
151
+ # Regular expression which should only match correct variable names
152
+ variable-rgx=^[a-z][a-z0-9_]*$
153
+
154
+ # Regular expression which should only match correct attribute names in class
155
+ # bodies
156
+ class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$
157
+
158
+ # Regular expression which should only match correct list comprehension /
159
+ # generator expression variable names
160
+ inlinevar-rgx=^[a-z][a-z0-9_]*$
161
+
162
+ # Good variable names which should always be accepted, separated by a comma
163
+ good-names=main,_
164
+
165
+ # Bad variable names which should always be refused, separated by a comma
166
+ bad-names=
167
+
168
+ # Regular expression which should only match function or class names that do
169
+ # not require a docstring.
170
+ no-docstring-rgx=(__.*__|main)
171
+
172
+ # Minimum line length for functions/classes that require docstrings, shorter
173
+ # ones are exempt.
174
+ docstring-min-length=10
175
+
176
+
177
+ [FORMAT]
178
+
179
+ # Maximum number of characters on a single line.
180
+ max-line-length=120
181
+
182
+ # Regexp for a line that is allowed to be longer than the limit.
183
+ ignore-long-lines=(?x)
184
+ (^\s*(import|from)\s
185
+ |\$Id:\s\/\/depot\/.+#\d+\s\$
186
+ |^[a-zA-Z_][a-zA-Z0-9_]*\s*=\s*("[^"]\S+"|'[^']\S+')
187
+ |^\s*\#\ LINT\.ThenChange
188
+ |^[^#]*\#\ type:\ [a-zA-Z_][a-zA-Z0-9_.,[\] ]*$
189
+ |pylint
190
+ |"""
191
+ |\#
192
+ |lambda
193
+ |(https?|ftp):)
194
+
195
+ # Allow the body of an if to be on the same line as the test if there is no
196
+ # else.
197
+ single-line-if-stmt=y
198
+
199
+ # List of optional constructs for which whitespace checking is disabled
200
+ no-space-check=
201
+
202
+ # Maximum number of lines in a module
203
+ max-module-lines=99999
204
+
205
+ # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
206
+ # tab).
207
+ indent-string=' '
208
+
209
+ # Set the linting for string quotes
210
+ string-quote=single
211
+ triple-quote=double
212
+ docstring-quote=double
213
+
214
+ [SIMILARITIES]
215
+
216
+ # Minimum lines number of a similarity.
217
+ min-similarity-lines=4
218
+
219
+ # Ignore comments when computing similarities.
220
+ ignore-comments=yes
221
+
222
+ # Ignore docstrings when computing similarities.
223
+ ignore-docstrings=yes
224
+
225
+ # Ignore imports when computing similarities.
226
+ ignore-imports=no
227
+
228
+
229
+ [MISCELLANEOUS]
230
+
231
+ # List of note tags to take in consideration, separated by a comma.
232
+ notes=
233
+
234
+
235
+ [IMPORTS]
236
+
237
+ # Deprecated modules which should not be used, separated by a comma
238
+ deprecated-modules=regsub,TERMIOS,Bastion,rexec,sets
239
+
240
+ # Create a graph of every (i.e. internal and external) dependencies in the
241
+ # given file (report RP0402 must not be disabled)
242
+ import-graph=
243
+
244
+ # Create a graph of external dependencies in the given file (report RP0402 must
245
+ # not be disabled)
246
+ ext-import-graph=
247
+
248
+ # Create a graph of internal dependencies in the given file (report RP0402 must
249
+ # not be disabled)
250
+ int-import-graph=
251
+
252
+ extension-pkg-whitelist=_jsonnet
253
+
254
+
255
+ [CLASSES]
256
+
257
+ # List of interface methods to ignore, separated by a comma. This is used for
258
+ # instance to not check methods defines in Zope's Interface base class.
259
+ ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
260
+
261
+ # List of method names used to declare (i.e. assign) instance attributes.
262
+ defining-attr-methods=__init__,__new__,setUp
263
+
264
+ # List of valid names for the first argument in a class method.
265
+ valid-classmethod-first-arg=cls,class_
266
+
267
+ # List of valid names for the first argument in a metaclass class method.
268
+ valid-metaclass-classmethod-first-arg=mcs
269
+
270
+
271
+ [DESIGN]
272
+
273
+ # Maximum number of arguments for function / method
274
+ max-args=5
275
+
276
+ # Argument names that match this expression will be ignored. Default to name
277
+ # with leading underscore
278
+ ignored-argument-names=_.*
279
+
280
+ # Maximum number of locals for function / method body
281
+ max-locals=15
282
+
283
+ # Maximum number of return / yield for function / method body
284
+ max-returns=6
285
+
286
+ # Maximum number of branch for function / method body
287
+ max-branches=12
288
+
289
+ # Maximum number of statements in function / method body
290
+ max-statements=50
291
+
292
+ # Maximum number of parents for a class (see R0901).
293
+ max-parents=7
294
+
295
+ # Maximum number of attributes for a class (see R0902).
296
+ max-attributes=7
297
+
298
+ # Minimum number of public methods for a class (see R0903).
299
+ min-public-methods=2
300
+
301
+ # Maximum number of public methods for a class (see R0904).
302
+ max-public-methods=20
303
+
304
+
305
+ [EXCEPTIONS]
306
+
307
+ # Exceptions that will emit a warning when being caught. Defaults to
308
+ # "Exception"
309
+ overgeneral-exceptions=Exception,StandardError,BaseException
310
+
311
+
312
+ [AST]
313
+
314
+ # Maximum line length for lambdas
315
+ short-func-length=1
316
+
317
+ # List of module members that should be marked as deprecated.
318
+ # All of the string functions are listed in 4.1.4 Deprecated string functions
319
+ # in the Python 2.4 docs.
320
+ deprecated-members=string.atof,string.atoi,string.atol,string.capitalize,string.expandtabs,string.find,string.rfind,string.index,string.rindex,string.count,string.lower,string.split,string.rsplit,string.splitfields,string.join,string.joinfields,string.lstrip,string.rstrip,string.strip,string.swapcase,string.translate,string.upper,string.ljust,string.rjust,string.center,string.zfill,string.replace,sys.exitfunc
321
+
322
+
323
+ [DOCSTRING]
324
+
325
+ # List of exceptions that do not need to be mentioned in the Raises section of
326
+ # a docstring.
327
+ ignore-exceptions=AssertionError,NotImplementedError,StopIteration,TypeError
328
+
329
+ [TOKENS]
330
+
331
+ # Number of spaces of indent required when the last token on the preceding line
332
+ # is an open (, [, or {.
333
+ indent-after-paren=4
Makefile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ push:
2
+ git add .
3
+ git commit -m "πŸ™ˆ"
4
+ git push
5
+
6
+
7
+ install: ## [DEVELOPMENT] Install the API dependencies
8
+ virtualenv env; \
9
+ source env/bin/activate; \
10
+ pip install -r requirements.txt; \
11
+ pip install -r requirements-test.txt
12
+ @echo "Done, run '\033[0;31msource env/bin/activate\033[0m' to activate the virtual environment"
13
+
14
+ run: ## [DEVELOPMENT] Run the streamlit app
15
+ streamlit run app.py
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from datasets import load_dataset
3
+ import pandas as pd
4
+
5
+
6
+ dataset = load_dataset("Langame/starter2")
7
+
8
+ conversation_starters = []
9
+ for i in range(len(dataset["train"])):
10
+ conversation_starters.append({
11
+ "conversation_starter": dataset["train"][i]["content"],
12
+ "topics": dataset["train"][i]["topics"]
13
+ })
14
+
15
+ df = pd.DataFrame(conversation_starters)
16
+
17
+ from pandas.api.types import (
18
+ is_categorical_dtype,
19
+ is_datetime64_any_dtype,
20
+ is_numeric_dtype,
21
+ is_object_dtype,
22
+ )
23
+
24
+ st.title("Conversation starters")
25
+
26
+ st.write(
27
+ """Quick hack to search conversation starters for your next conversations in https://huggingface.co/datasets/Langame/starter2
28
+ """
29
+ )
30
+
31
+
32
+ def filter_dataframe(df: pd.DataFrame) -> pd.DataFrame:
33
+ """
34
+ Adds a UI on top of a dataframe to let viewers filter columns
35
+ Args:
36
+ df (pd.DataFrame): Original dataframe
37
+ Returns:
38
+ pd.DataFrame: Filtered dataframe
39
+ """
40
+ modify = st.checkbox("Add filters")
41
+
42
+ if not modify:
43
+ return df
44
+
45
+ df = df.copy()
46
+
47
+ # Try to convert datetimes into a standard format (datetime, no timezone)
48
+ for col in df.columns:
49
+ if is_object_dtype(df[col]):
50
+ try:
51
+ df[col] = pd.to_datetime(df[col])
52
+ except Exception:
53
+ pass
54
+
55
+ if is_datetime64_any_dtype(df[col]):
56
+ df[col] = df[col].dt.tz_localize(None)
57
+
58
+ modification_container = st.container()
59
+
60
+ with modification_container:
61
+ to_filter_columns = st.multiselect("Filter dataframe on", df.columns)
62
+ for column in to_filter_columns:
63
+ left, right = st.columns((1, 20))
64
+ left.write("↳")
65
+ # Treat columns with < 10 unique values as categorical
66
+ if is_categorical_dtype(df[column]) or df[column].nunique() < 10:
67
+ user_cat_input = right.multiselect(
68
+ f"Values for {column}",
69
+ df[column].unique(),
70
+ default=list(df[column].unique()),
71
+ )
72
+ df = df[df[column].isin(user_cat_input)]
73
+ elif is_numeric_dtype(df[column]):
74
+ _min = float(df[column].min())
75
+ _max = float(df[column].max())
76
+ step = (_max - _min) / 100
77
+ user_num_input = right.slider(
78
+ f"Values for {column}",
79
+ _min,
80
+ _max,
81
+ (_min, _max),
82
+ step=step,
83
+ )
84
+ df = df[df[column].between(*user_num_input)]
85
+ elif is_datetime64_any_dtype(df[column]):
86
+ user_date_input = right.date_input(
87
+ f"Values for {column}",
88
+ value=(
89
+ df[column].min(),
90
+ df[column].max(),
91
+ ),
92
+ )
93
+ if len(user_date_input) == 2:
94
+ user_date_input = tuple(map(pd.to_datetime, user_date_input))
95
+ start_date, end_date = user_date_input
96
+ df = df.loc[df[column].between(start_date, end_date)]
97
+ else:
98
+ user_text_input = right.text_input(
99
+ f"Substring or regex in {column}",
100
+ )
101
+ if user_text_input:
102
+ df = df[df[column].str.contains(user_text_input)]
103
+
104
+ return df
105
+
106
+
107
+ st.dataframe(filter_dataframe(df))
requirements-test.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ black
2
+ mypy
3
+ pylint
4
+ pytest-cov
5
+ pytest-xdist
6
+ pytest
7
+ ipykernel
8
+ ipywidgets
9
+ httpx
10
+ trio
11
+ pytest-asyncio
12
+ watchdog
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ pandas
2
+ datasets
3
+ streamlit