alexshengzhili commited on
Commit
e72aedf
1 Parent(s): a062426

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. .github/PULL_REQUEST_TEMPLATE.md +17 -0
  3. .github/workflows/python-package.yml +30 -0
  4. .gitignore +30 -0
  5. .pylintrc +449 -0
  6. 2023-07-05-conv.json +0 -0
  7. LICENSE +201 -0
  8. README.md +291 -7
  9. assets/demo_narrow.gif +3 -0
  10. assets/qa_browser.png +0 -0
  11. assets/screenshot_cli.png +0 -0
  12. assets/screenshot_gui.png +0 -0
  13. assets/server_arch.png +0 -0
  14. assets/vicuna_logo.jpeg +0 -0
  15. controller.log +228 -0
  16. data/dummy_conversation.json +0 -0
  17. docker/Dockerfile +6 -0
  18. docker/docker-compose.yml +40 -0
  19. docs/arena.md +9 -0
  20. docs/commands/data_cleaning.md +19 -0
  21. docs/commands/leaderboard.md +15 -0
  22. docs/commands/local_cluster.md +30 -0
  23. docs/commands/pypi.md +11 -0
  24. docs/commands/test_process.md +39 -0
  25. docs/commands/webserver.md +82 -0
  26. docs/gptq.md +59 -0
  27. docs/langchain_integration.md +90 -0
  28. docs/model_support.md +55 -0
  29. docs/openai_api.md +131 -0
  30. docs/server_arch.md +2 -0
  31. docs/training.md +60 -0
  32. docs/vicuna_weights_version.md +94 -0
  33. docs/vllm_integration.md +15 -0
  34. fastchat/__init__.py +1 -0
  35. fastchat/__pycache__/__init__.cpython-311.pyc +0 -0
  36. fastchat/__pycache__/constants.cpython-311.pyc +0 -0
  37. fastchat/__pycache__/conversation.cpython-311.pyc +0 -0
  38. fastchat/__pycache__/utils.cpython-311.pyc +0 -0
  39. fastchat/constants.py +58 -0
  40. fastchat/conversation.py +876 -0
  41. fastchat/data/__init__.py +0 -0
  42. fastchat/data/clean_sharegpt.py +217 -0
  43. fastchat/data/convert_alpaca.py +38 -0
  44. fastchat/data/extract_gpt4_only.py +32 -0
  45. fastchat/data/extract_single_round.py +29 -0
  46. fastchat/data/filter_wrong_format.py +44 -0
  47. fastchat/data/get_stats.py +48 -0
  48. fastchat/data/hardcoded_questions.py +165 -0
  49. fastchat/data/inspect_data.py +33 -0
  50. fastchat/data/merge.py +24 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/demo_narrow.gif filter=lfs diff=lfs merge=lfs -text
.github/PULL_REQUEST_TEMPLATE.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Thank you for your contribution! -->
2
+
3
+ <!-- Please add a reviewer to the assignee section when you create a PR. If you don't have the access to it, we will shortly find a reviewer and assign them to your PR. -->
4
+
5
+ ## Why are these changes needed?
6
+
7
+ <!-- Please give a short summary of the change and the problem this solves. -->
8
+
9
+ ## Related issue number (if applicable)
10
+
11
+ <!-- For example: "Closes #1234" -->
12
+
13
+ ## Checks
14
+
15
+ - [ ] I've run `format.sh` to lint the changes in this PR.
16
+ - [ ] I've included any doc changes needed.
17
+ - [ ] I've made sure the relevant tests are passing (if applicable).
.github/workflows/python-package.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Python package
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ build:
7
+
8
+ runs-on: ubuntu-latest
9
+ strategy:
10
+ fail-fast: false
11
+ matrix:
12
+ python-version: ["3.10"]
13
+
14
+ steps:
15
+ - uses: actions/checkout@v3
16
+ - name: Set up Python ${{ matrix.python-version }}
17
+ uses: actions/setup-python@v4
18
+ with:
19
+ python-version: ${{ matrix.python-version }}
20
+ cache: 'pip'
21
+ - name: Install dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ python -m pip install -e '.[dev]'
25
+ - name: Run linter
26
+ run: |
27
+ pylint -d all -e E0602 ./fastchat/
28
+ - name: Check formatting
29
+ run: |
30
+ black --check .
.gitignore ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__
3
+ *.pyc
4
+ *.egg-info
5
+ dist
6
+ .venv
7
+
8
+ # Log
9
+ *.log
10
+ *.log.*
11
+ *.json
12
+ !playground/deepspeed_config_s2.json
13
+ !playground/deepspeed_config_s3.json
14
+
15
+ # Editor
16
+ .idea
17
+ *.swp
18
+
19
+ # Other
20
+ .DS_Store
21
+ wandb
22
+ output
23
+
24
+ # Data
25
+ *.pkl
26
+ *.csv
27
+ tests/state_of_the_union.txt
28
+
29
+ # Build
30
+ build
.pylintrc ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This Pylint rcfile contains a best-effort configuration to uphold the
2
+ # best-practices and style described in the Google Python style guide:
3
+ # https://google.github.io/styleguide/pyguide.html
4
+ #
5
+ # Its canonical open-source location is:
6
+ # https://google.github.io/styleguide/pylintrc
7
+
8
+ [MASTER]
9
+
10
+ # Files or directories to be skipped. They should be base names, not paths.
11
+ ignore=third_party,ray_patches,providers
12
+
13
+ # Files or directories matching the regex patterns are skipped. The regex
14
+ # matches against base names, not paths.
15
+ ignore-patterns=
16
+
17
+ # Pickle collected data for later comparisons.
18
+ persistent=no
19
+
20
+ # List of plugins (as comma separated values of python modules names) to load,
21
+ # usually to register additional checkers.
22
+ load-plugins=
23
+
24
+ # Use multiple processes to speed up Pylint.
25
+ jobs=4
26
+
27
+ # Allow loading of arbitrary C extensions. Extensions are imported into the
28
+ # active Python interpreter and may run arbitrary code.
29
+ unsafe-load-any-extension=no
30
+
31
+
32
+ [MESSAGES CONTROL]
33
+
34
+ # Only show warnings with the listed confidence levels. Leave empty to show
35
+ # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
36
+ confidence=
37
+
38
+ # Enable the message, report, category or checker with the given id(s). You can
39
+ # either give multiple identifier separated by comma (,) or put this option
40
+ # multiple time (only on the command line, not in the configuration file where
41
+ # it should appear only once). See also the "--disable" option for examples.
42
+ #enable=
43
+
44
+ # Disable the message, report, category or checker with the given id(s). You
45
+ # can either give multiple identifiers separated by comma (,) or put this
46
+ # option multiple times (only on the command line, not in the configuration
47
+ # file where it should appear only once).You can also use "--disable=all" to
48
+ # disable everything first and then reenable specific checks. For example, if
49
+ # you want to run only the similarities checker, you can use "--disable=all
50
+ # --enable=similarities". If you want to run only the classes checker, but have
51
+ # no Warning level messages displayed, use"--disable=all --enable=classes
52
+ # --disable=W"
53
+ disable=abstract-method,
54
+ apply-builtin,
55
+ arguments-differ,
56
+ attribute-defined-outside-init,
57
+ backtick,
58
+ bad-option-value,
59
+ basestring-builtin,
60
+ buffer-builtin,
61
+ c-extension-no-member,
62
+ consider-using-enumerate,
63
+ cmp-builtin,
64
+ cmp-method,
65
+ coerce-builtin,
66
+ coerce-method,
67
+ delslice-method,
68
+ div-method,
69
+ duplicate-code,
70
+ eq-without-hash,
71
+ execfile-builtin,
72
+ file-builtin,
73
+ filter-builtin-not-iterating,
74
+ fixme,
75
+ getslice-method,
76
+ global-statement,
77
+ hex-method,
78
+ idiv-method,
79
+ implicit-str-concat-in-sequence,
80
+ import-error,
81
+ import-self,
82
+ import-star-module-level,
83
+ inconsistent-return-statements,
84
+ input-builtin,
85
+ intern-builtin,
86
+ invalid-str-codec,
87
+ locally-disabled,
88
+ logging-format-interpolation, # FIXME(sky): make pass.
89
+ logging-fstring-interpolation, # FIXME(sky): make pass.
90
+ long-builtin,
91
+ long-suffix,
92
+ map-builtin-not-iterating,
93
+ misplaced-comparison-constant,
94
+ missing-function-docstring,
95
+ metaclass-assignment,
96
+ next-method-called,
97
+ next-method-defined,
98
+ no-absolute-import,
99
+ no-else-break,
100
+ no-else-continue,
101
+ no-else-raise,
102
+ no-else-return,
103
+ no-init, # added
104
+ no-member,
105
+ no-name-in-module,
106
+ no-self-use,
107
+ nonzero-method,
108
+ oct-method,
109
+ old-division,
110
+ old-ne-operator,
111
+ old-octal-literal,
112
+ old-raise-syntax,
113
+ parameter-unpacking,
114
+ print-statement,
115
+ raising-string,
116
+ range-builtin-not-iterating,
117
+ raw_input-builtin,
118
+ rdiv-method,
119
+ reduce-builtin,
120
+ relative-import,
121
+ reload-builtin,
122
+ round-builtin,
123
+ setslice-method,
124
+ signature-differs,
125
+ standarderror-builtin,
126
+ suppressed-message,
127
+ sys-max-int,
128
+ too-few-public-methods,
129
+ too-many-ancestors,
130
+ too-many-arguments,
131
+ too-many-boolean-expressions,
132
+ too-many-branches,
133
+ too-many-instance-attributes,
134
+ too-many-locals,
135
+ too-many-nested-blocks,
136
+ too-many-public-methods,
137
+ too-many-return-statements,
138
+ too-many-statements,
139
+ trailing-newlines,
140
+ unichr-builtin,
141
+ unicode-builtin,
142
+ unnecessary-pass,
143
+ unpacking-in-except,
144
+ useless-else-on-loop,
145
+ useless-object-inheritance,
146
+ useless-suppression,
147
+ using-cmp-argument,
148
+ wrong-import-order,
149
+ xrange-builtin,
150
+ zip-builtin-not-iterating,
151
+
152
+
153
+ [REPORTS]
154
+
155
+ # Set the output format. Available formats are text, parseable, colorized, msvs
156
+ # (visual studio) and html. You can also give a reporter class, eg
157
+ # mypackage.mymodule.MyReporterClass.
158
+ output-format=text
159
+
160
+ # Put messages in a separate file for each module / package specified on the
161
+ # command line instead of printing them on stdout. Reports (if any) will be
162
+ # written in a file name "pylint_global.[txt|html]". This option is deprecated
163
+ # and it will be removed in Pylint 2.0.
164
+ files-output=no
165
+
166
+ # Tells whether to display a full report or only the messages
167
+ reports=no
168
+
169
+ # Python expression which should return a note less than 10 (10 is the highest
170
+ # note). You have access to the variables errors warning, statement which
171
+ # respectively contain the number of errors / warnings messages and the total
172
+ # number of statements analyzed. This is used by the global evaluation report
173
+ # (RP0004).
174
+ evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
175
+
176
+ # Template used to display messages. This is a python new-style format string
177
+ # used to format the message information. See doc for all details
178
+ #msg-template=
179
+
180
+
181
+ [BASIC]
182
+
183
+ # Good variable names which should always be accepted, separated by a comma
184
+ good-names=main,_
185
+
186
+ # Bad variable names which should always be refused, separated by a comma
187
+ bad-names=
188
+
189
+ # Colon-delimited sets of names that determine each other's naming style when
190
+ # the name regexes allow several styles.
191
+ name-group=
192
+
193
+ # Include a hint for the correct naming format with invalid-name
194
+ include-naming-hint=no
195
+
196
+ # List of decorators that produce properties, such as abc.abstractproperty. Add
197
+ # to this list to register other decorators that produce valid properties.
198
+ property-classes=abc.abstractproperty,cached_property.cached_property,cached_property.threaded_cached_property,cached_property.cached_property_with_ttl,cached_property.threaded_cached_property_with_ttl
199
+
200
+ # Regular expression matching correct function names
201
+ function-rgx=^(?:(?P<exempt>setUp|tearDown|setUpModule|tearDownModule)|(?P<camel_case>_?[A-Z][a-zA-Z0-9]*)|(?P<snake_case>_?[a-z][a-z0-9_]*))$
202
+
203
+ # Regular expression matching correct variable names
204
+ variable-rgx=^[a-z][a-z0-9_]*$
205
+
206
+ # Regular expression matching correct constant names
207
+ const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$
208
+
209
+ # Regular expression matching correct attribute names
210
+ attr-rgx=^_{0,2}[a-z][a-z0-9_]*$
211
+
212
+ # Regular expression matching correct argument names
213
+ argument-rgx=^[a-z][a-z0-9_]*$
214
+
215
+ # Regular expression matching correct class attribute names
216
+ class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$
217
+
218
+ # Regular expression matching correct inline iteration names
219
+ inlinevar-rgx=^[a-z][a-z0-9_]*$
220
+
221
+ # Regular expression matching correct class names
222
+ class-rgx=^_?[A-Z][a-zA-Z0-9]*$
223
+
224
+ # Regular expression matching correct module names
225
+ module-rgx=^(_?[a-z][a-z0-9_]*|__init__)$
226
+
227
+ # Regular expression matching correct method names
228
+ method-rgx=(?x)^(?:(?P<exempt>_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P<camel_case>_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P<snake_case>_{0,2}[a-z][a-z0-9_]*))$
229
+
230
+ # Regular expression which should only match function or class names that do
231
+ # not require a docstring.
232
+ no-docstring-rgx=(__.*__|main|test.*|.*test|.*Test)$
233
+
234
+ # Minimum line length for functions/classes that require docstrings, shorter
235
+ # ones are exempt.
236
+ docstring-min-length=10
237
+
238
+
239
+ [TYPECHECK]
240
+
241
+ # List of decorators that produce context managers, such as
242
+ # contextlib.contextmanager. Add to this list to register other decorators that
243
+ # produce valid context managers.
244
+ contextmanager-decorators=contextlib.contextmanager,contextlib2.contextmanager
245
+
246
+ # Tells whether missing members accessed in mixin class should be ignored. A
247
+ # mixin class is detected if its name ends with "mixin" (case insensitive).
248
+ ignore-mixin-members=yes
249
+
250
+ # List of module names for which member attributes should not be checked
251
+ # (useful for modules/projects where namespaces are manipulated during runtime
252
+ # and thus existing member attributes cannot be deduced by static analysis. It
253
+ # supports qualified module names, as well as Unix pattern matching.
254
+ ignored-modules=
255
+
256
+ # List of class names for which member attributes should not be checked (useful
257
+ # for classes with dynamically set attributes). This supports the use of
258
+ # qualified names.
259
+ ignored-classes=optparse.Values,thread._local,_thread._local
260
+
261
+ # List of members which are set dynamically and missed by pylint inference
262
+ # system, and so shouldn't trigger E1101 when accessed. Python regular
263
+ # expressions are accepted.
264
+ generated-members=
265
+
266
+
267
+ [FORMAT]
268
+
269
+ # Maximum number of characters on a single line.
270
+ max-line-length=100
271
+
272
+ # TODO(https://github.com/PyCQA/pylint/issues/3352): Direct pylint to exempt
273
+ # lines made too long by directives to pytype.
274
+
275
+ # Regexp for a line that is allowed to be longer than the limit.
276
+ ignore-long-lines=(?x)(
277
+ ^\s*(\#\ )?<?https?://\S+>?$|
278
+ ^\s*(from\s+\S+\s+)?import\s+.+$)
279
+
280
+ # Allow the body of an if to be on the same line as the test if there is no
281
+ # else.
282
+ single-line-if-stmt=yes
283
+
284
+ # List of optional constructs for which whitespace checking is disabled. `dict-
285
+ # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
286
+ # `trailing-comma` allows a space between comma and closing bracket: (a, ).
287
+ # `empty-line` allows space-only lines.
288
+ no-space-check=
289
+
290
+ # Maximum number of lines in a module
291
+ max-module-lines=99999
292
+
293
+ # String used as indentation unit. The internal Google style guide mandates 2
294
+ # spaces. Google's externaly-published style guide says 4, consistent with
295
+ # PEP 8. Here we use 4 spaces.
296
+ indent-string=' '
297
+
298
+ # Number of spaces of indent required inside a hanging or continued line.
299
+ indent-after-paren=4
300
+
301
+ # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
302
+ expected-line-ending-format=
303
+
304
+
305
+ [MISCELLANEOUS]
306
+
307
+ # List of note tags to take in consideration, separated by a comma.
308
+ notes=TODO
309
+
310
+
311
+ [STRING]
312
+
313
+ # This flag controls whether inconsistent-quotes generates a warning when the
314
+ # character used as a quote delimiter is used inconsistently within a module.
315
+ check-quote-consistency=yes
316
+
317
+
318
+ [VARIABLES]
319
+
320
+ # Tells whether we should check for unused import in __init__ files.
321
+ init-import=no
322
+
323
+ # A regular expression matching the name of dummy variables (i.e. expectedly
324
+ # not used).
325
+ dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_)
326
+
327
+ # List of additional names supposed to be defined in builtins. Remember that
328
+ # you should avoid to define new builtins when possible.
329
+ additional-builtins=
330
+
331
+ # List of strings which can identify a callback function by name. A callback
332
+ # name must start or end with one of those strings.
333
+ callbacks=cb_,_cb
334
+
335
+ # List of qualified module names which can have objects that can redefine
336
+ # builtins.
337
+ redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functools
338
+
339
+
340
+ [LOGGING]
341
+
342
+ # Logging modules to check that the string format arguments are in logging
343
+ # function parameter format
344
+ logging-modules=logging,absl.logging,tensorflow.io.logging
345
+
346
+
347
+ [SIMILARITIES]
348
+
349
+ # Minimum lines number of a similarity.
350
+ min-similarity-lines=4
351
+
352
+ # Ignore comments when computing similarities.
353
+ ignore-comments=yes
354
+
355
+ # Ignore docstrings when computing similarities.
356
+ ignore-docstrings=yes
357
+
358
+ # Ignore imports when computing similarities.
359
+ ignore-imports=no
360
+
361
+
362
+ [SPELLING]
363
+
364
+ # Spelling dictionary name. Available dictionaries: none. To make it working
365
+ # install python-enchant package.
366
+ spelling-dict=
367
+
368
+ # List of comma separated words that should not be checked.
369
+ spelling-ignore-words=
370
+
371
+ # A path to a file that contains private dictionary; one word per line.
372
+ spelling-private-dict-file=
373
+
374
+ # Tells whether to store unknown words to indicated private dictionary in
375
+ # --spelling-private-dict-file option instead of raising a message.
376
+ spelling-store-unknown-words=no
377
+
378
+
379
+ [IMPORTS]
380
+
381
+ # Deprecated modules which should not be used, separated by a comma
382
+ deprecated-modules=regsub,
383
+ TERMIOS,
384
+ Bastion,
385
+ rexec,
386
+ sets
387
+
388
+ # Create a graph of every (i.e. internal and external) dependencies in the
389
+ # given file (report RP0402 must not be disabled)
390
+ import-graph=
391
+
392
+ # Create a graph of external dependencies in the given file (report RP0402 must
393
+ # not be disabled)
394
+ ext-import-graph=
395
+
396
+ # Create a graph of internal dependencies in the given file (report RP0402 must
397
+ # not be disabled)
398
+ int-import-graph=
399
+
400
+ # Force import order to recognize a module as part of the standard
401
+ # compatibility libraries.
402
+ known-standard-library=
403
+
404
+ # Force import order to recognize a module as part of a third party library.
405
+ known-third-party=enchant, absl
406
+
407
+ # Analyse import fallback blocks. This can be used to support both Python 2 and
408
+ # 3 compatible code, which means that the block might have code that exists
409
+ # only in one or another interpreter, leading to false positives when analysed.
410
+ analyse-fallback-blocks=no
411
+
412
+
413
+ [CLASSES]
414
+
415
+ # List of method names used to declare (i.e. assign) instance attributes.
416
+ defining-attr-methods=__init__,
417
+ __new__,
418
+ setUp
419
+
420
+ # List of member names, which should be excluded from the protected access
421
+ # warning.
422
+ exclude-protected=_asdict,
423
+ _fields,
424
+ _replace,
425
+ _source,
426
+ _make
427
+
428
+ # List of valid names for the first argument in a class method.
429
+ valid-classmethod-first-arg=cls,
430
+ class_
431
+
432
+ # List of valid names for the first argument in a metaclass class method.
433
+ valid-metaclass-classmethod-first-arg=mcs
434
+
435
+
436
+ [EXCEPTIONS]
437
+
438
+ # Exceptions that will emit a warning when being caught. Defaults to
439
+ # "Exception"
440
+ overgeneral-exceptions=StandardError,
441
+ Exception,
442
+ BaseException
443
+
444
+ #######
445
+
446
+ # https://github.com/edaniszewski/pylint-quotes#configuration
447
+ string-quote=single
448
+ triple-quote=double
449
+ docstring-quote=double
2023-07-05-conv.json ADDED
The diff for this file is too large to render. See raw diff
 
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md CHANGED
@@ -1,12 +1,296 @@
1
  ---
2
- title: Calahealthgpt
3
- emoji: 📈
4
- colorFrom: gray
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 3.35.2
8
- app_file: app.py
9
- pinned: false
10
  ---
 
 
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: calahealthgpt
3
+ app_file: fastchat/serve/gradio_web_server.py
 
 
4
  sdk: gradio
5
  sdk_version: 3.35.2
 
 
6
  ---
7
+ # FastChat
8
+ | [**Demo**](https://chat.lmsys.org/) | [**Arena**](https://arena.lmsys.org) | [**Discord**](https://discord.gg/HSWAKCrnFx) | [**Twitter**](https://twitter.com/lmsysorg) |
9
 
10
+ FastChat is an open platform for training, serving, and evaluating large language model based chatbots. The core features include:
11
+ - The weights, training code, and evaluation code for state-of-the-art models (e.g., Vicuna, FastChat-T5).
12
+ - A distributed multi-model serving system with web UI and OpenAI-compatible RESTful APIs.
13
+
14
+ ## News
15
+ - [2023/06] 🔥 We introduced **LongChat**, our long-context chatbots and evaluation tools. Check out the blog [post](https://lmsys.org/blog/2023-06-29-longchat/) and [code](https://github.com/DachengLi1/LongChat/).
16
+ - [2023/05] We introduced **Chatbot Arena** for battles among LLMs. Check out the blog [post](https://lmsys.org/blog/2023-05-03-arena) and [demo](https://arena.lmsys.org).
17
+ - [2023/04] We released **FastChat-T5** compatible with commercial usage. Check out the [weights](#fastchat-t5) and [demo](https://chat.lmsys.org).
18
+ - [2023/03] We released **Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90% ChatGPT Quality**. Check out the blog [post](https://vicuna.lmsys.org) and [demo](https://chat.lmsys.org).
19
+
20
+ <a href="https://chat.lmsys.org"><img src="assets/demo_narrow.gif" width="70%"></a>
21
+
22
+ ## Contents
23
+ - [Install](#install)
24
+ - [Model Weights](#model-weights)
25
+ - [Inference with Command Line Interface](#inference-with-command-line-interface)
26
+ - [Serving with Web GUI](#serving-with-web-gui)
27
+ - [API](#api)
28
+ - [Evaluation](#evaluation)
29
+ - [Fine-tuning](#fine-tuning)
30
+ - [Citation](#citation)
31
+
32
+ ## Install
33
+
34
+ ### Method 1: With pip
35
+
36
+ ```bash
37
+ pip3 install fschat
38
+ ```
39
+
40
+ ### Method 2: From source
41
+
42
+ 1. Clone this repository and navigate to the FastChat folder
43
+ ```bash
44
+ git clone https://github.com/lm-sys/FastChat.git
45
+ cd FastChat
46
+ ```
47
+
48
+ If you are running on Mac:
49
+ ```bash
50
+ brew install rust cmake
51
+ ```
52
+
53
+ 2. Install Package
54
+ ```bash
55
+ pip3 install --upgrade pip # enable PEP 660 support
56
+ pip3 install -e .
57
+ ```
58
+
59
+ ## Model Weights
60
+ ### Vicuna Weights
61
+ We release [Vicuna](https://lmsys.org/blog/2023-03-30-vicuna/) weights v1.3 as merged weights directly. You do not need to apply delta.
62
+ Vicuna is based on LLaMA and should be used under LLaMA's [model license](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md).
63
+
64
+ You can use the commands below to start chatting. It will automatically download the weights from Hugging Face repos.
65
+ See more command options and how to handle out-of-memory in the "Inference with Command Line Interface" section below.
66
+
67
+ | Size | Chat Command | Hugging Face Repo |
68
+ | --- | --- | --- |
69
+ | 7B | `python3 -m fastchat.serve.cli --model-path lmsys/vicuna-7b-v1.3` | [lmsys/vicuna-7b-v1.3](https://huggingface.co/lmsys/vicuna-7b-v1.3) |
70
+ | 13B | `python3 -m fastchat.serve.cli --model-path lmsys/vicuna-13b-v1.3` | [lmsys/vicuna-13b-v1.3](https://huggingface.co/lmsys/vicuna-13b-v1.3) |
71
+ | 33B | `python3 -m fastchat.serve.cli --model-path lmsys/vicuna-33b-v1.3` | [lmsys/vicuna-33b-v1.3](https://huggingface.co/lmsys/vicuna-33b-v1.3) |
72
+
73
+ **Old weights**: see [docs/vicuna_weights_version.md](docs/vicuna_weights_version.md) for all versions of weights and their differences.
74
+
75
+ ### LongChat
76
+ We release LongChat models under LLaMA's [model license](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md).
77
+
78
+ | Size | Chat Command | Hugging Face Repo |
79
+ | --- | --- | --- |
80
+ | 7B | `python3 -m fastchat.serve.cli --model-path lmsys/longchat-7b-16k` | [lmsys/longchat-7b-16k](https://huggingface.co/lmsys/longchat-7b-16k) |
81
+ | 13B | `python3 -m fastchat.serve.cli --model-path lmsys/longchat-13b-16k` | [lmsys/longchat-13b-16k](https://huggingface.co/lmsys/longchat-13b-16k) |
82
+
83
+ ### FastChat-T5
84
+ You can use the commands below to chat with FastChat-T5. It will automatically download the weights from Hugging Face repos.
85
+
86
+ | Size | Chat Command | Hugging Face Repo |
87
+ | --- | --- | --- |
88
+ | 3B | `python3 -m fastchat.serve.cli --model-path lmsys/fastchat-t5-3b-v1.0` | [lmsys/fastchat-t5-3b-v1.0](https://huggingface.co/lmsys/fastchat-t5-3b-v1.0) |
89
+
90
+ ## Inference with Command Line Interface
91
+
92
+ <a href="https://chat.lmsys.org"><img src="assets/screenshot_cli.png" width="70%"></a>
93
+
94
+ (Experimental Feature: You can specify `--style rich` to enable rich text output and better text streaming quality for some non-ASCII content. This may not work properly on certain terminals.)
95
+
96
+ #### Supported Models
97
+ FastChat supports a wide range of models, including
98
+ Vicuna, Alpaca, Baize, ChatGLM, Dolly, Falcon, FastChat-T5, GPT4ALL, Guanaco, MTP, OpenAssistant, RedPajama, StableLM, WizardLM, and more.
99
+
100
+ See a complete list of supported models and instructions to add a new model [here](docs/model_support.md).
101
+
102
+ #### Single GPU
103
+ The command below requires around 14GB of GPU memory for Vicuna-7B and 28GB of GPU memory for Vicuna-13B.
104
+ See the "No Enough Memory" section below if you do not have enough memory.
105
+ `--model-path` can be a local folder or a Hugging Face repo name.
106
+ ```
107
+ python3 -m fastchat.serve.cli --model-path lmsys/vicuna-7b-v1.3
108
+ ```
109
+
110
+ #### Multiple GPUs
111
+ You can use model parallelism to aggregate GPU memory from multiple GPUs on the same machine.
112
+ ```
113
+ python3 -m fastchat.serve.cli --model-path lmsys/vicuna-7b-v1.3 --num-gpus 2
114
+ ```
115
+
116
+ #### CPU Only
117
+ This runs on the CPU only and does not require GPU. It requires around 30GB of CPU memory for Vicuna-7B and around 60GB of CPU memory for Vicuna-13B.
118
+ ```
119
+ python3 -m fastchat.serve.cli --model-path lmsys/vicuna-7b-v1.3 --device cpu
120
+ ```
121
+
122
+ #### Metal Backend (Mac Computers with Apple Silicon or AMD GPUs)
123
+ Use `--device mps` to enable GPU acceleration on Mac computers (requires torch >= 2.0).
124
+ Use `--load-8bit` to turn on 8-bit compression.
125
+ ```
126
+ python3 -m fastchat.serve.cli --model-path lmsys/vicuna-7b-v1.3 --device mps --load-8bit
127
+ ```
128
+ Vicuna-7B can run on a 32GB M1 Macbook with 1 - 2 words / second.
129
+
130
+ #### Intel XPU (Intel Data Center and Arc A-Series GPUs)
131
+ Install the [Intel Extension for PyTorch](https://intel.github.io/intel-extension-for-pytorch/xpu/latest/tutorials/installation.html). Set the OneAPI environment variables:
132
+ ```
133
+ source /opt/intel/oneapi/setvars.sh
134
+ ```
135
+
136
+ Use `--device xpu` to enable XPU/GPU acceleration.
137
+ ```
138
+ python3 -m fastchat.serve.cli --model-path lmsys/vicuna-7b-v1.3 --device xpu
139
+ ```
140
+ Vicuna-7B can run on an Intel Arc A770 16GB.
141
+
142
+ #### No Enough Memory
143
+ If you do not have enough memory, you can enable 8-bit compression by adding `--load-8bit` to commands above.
144
+ This can reduce memory usage by around half with slightly degraded model quality.
145
+ It is compatible with the CPU, GPU, and Metal backend.
146
+ Vicuna-13B with 8-bit compression can run on a single NVIDIA 3090/4080/T4/V100(16GB) GPU.
147
+ ```
148
+ python3 -m fastchat.serve.cli --model-path lmsys/vicuna-7b-v1.3 --load-8bit
149
+ ```
150
+
151
+ In addition to that, you can add `--cpu-offloading` to commands above to offload weights that don't fit on your GPU onto the CPU memory. This requires 8-bit compression to be enabled and the bitsandbytes package to be installed, which is only available on linux operating systems.
152
+
153
+ #### More Platforms
154
+ - FastChat supports GPTQ 4bit inference with [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa). See [docs/gptq.md](/docs/gptq.md).
155
+ - [MLC LLM](https://mlc.ai/mlc-llm/), backed by [TVM Unity](https://github.com/apache/tvm/tree/unity) compiler, deploys Vicuna natively on phones, consumer-class GPUs and web browsers via Vulkan, Metal, CUDA and WebGPU.
156
+
157
+ ## Serving with Web GUI
158
+
159
+ <a href="https://chat.lmsys.org"><img src="assets/screenshot_gui.png" width="70%"></a>
160
+
161
+ To serve using the web UI, you need three main components: web servers that interface with users, model workers that host one or more models, and a controller to coordinate the webserver and model workers. You can learn more about the architecture [here](docs/server_arch.md).
162
+
163
+ Here are the commands to follow in your terminal:
164
+
165
+ #### Launch the controller
166
+ ```bash
167
+ python3 -m fastchat.serve.controller
168
+ ```
169
+
170
+ This controller manages the distributed workers.
171
+
172
+ #### Launch the model worker(s)
173
+ ```bash
174
+ python3 -m fastchat.serve.model_worker --model-path lmsys/vicuna-7b-v1.3
175
+ ```
176
+ Wait until the process finishes loading the model and you see "Uvicorn running on ...". The model worker will register itself to the controller .
177
+
178
+ To ensure that your model worker is connected to your controller properly, send a test message using the following command:
179
+ ```bash
180
+ python3 -m fastchat.serve.test_message --model-name vicuna-7b-v1.3
181
+ ```
182
+ You will see a short output.
183
+
184
+ #### Launch the Gradio web server
185
+ ```bash
186
+ python3 -m fastchat.serve.gradio_web_server
187
+ ```
188
+
189
+ This is the user interface that users will interact with.
190
+
191
+ By following these steps, you will be able to serve your models using the web UI. You can open your browser and chat with a model now.
192
+ If the models do not show up, try to reboot the gradio web server.
193
+
194
+ #### (Optional): Advanced Features
195
+ - You can register multiple model workers to a single controller, which can be used for serving a single model with higher throughput or serving multiple models at the same time. When doing so, please allocate different GPUs and ports for different model workers.
196
+ ```
197
+ # worker 0
198
+ CUDA_VISIBLE_DEVICES=0 python3 -m fastchat.serve.model_worker --model-path lmsys/vicuna-7b-v1.3 --controller http://localhost:21001 --port 31000 --worker http://localhost:31000
199
+ # worker 1
200
+ CUDA_VISIBLE_DEVICES=1 python3 -m fastchat.serve.model_worker --model-path lmsys/fastchat-t5-3b-v1.0 --controller http://localhost:21001 --port 31001 --worker http://localhost:31001
201
+ ```
202
+ - You can also launch a multi-tab gradio server, which includes the Chatbot Arena tabs.
203
+ ```bash
204
+ python3 -m fastchat.serve.gradio_web_server_multi
205
+ ```
206
+
207
+ ## API
208
+ ### OpenAI-Compatible RESTful APIs & SDK
209
+ FastChat provides OpenAI-compatible APIs for its supported models, so you can use FastChat as a local drop-in replacement for OpenAI APIs.
210
+ The FastChat server is compatible with both [openai-python](https://github.com/openai/openai-python) library and cURL commands.
211
+ See [docs/openai_api.md](docs/openai_api.md).
212
+
213
+ ### Hugging Face Generation APIs
214
+ See [fastchat/serve/huggingface_api.py](fastchat/serve/huggingface_api.py).
215
+
216
+ ### LangChain Integration
217
+ See [docs/langchain_integration](docs/langchain_integration.md).
218
+
219
+ ## Evaluation
220
+ We use MT-bench, a set of challenging multi-turn open-ended questions to evaluate models.
221
+ To automate the evaluation process, we prompt strong LLMs like GPT-4 to act as judges and assess the quality of the models' responses.
222
+ See instructions for running MT-bench at [fastchat/llm_judge](fastchat/llm_judge).
223
+
224
+ MT-bench is the new recommended way to benchmark your models. If you are still looking for the old 80 questions used in the vicuna blog post, please go to [vicuna-blog-eval](https://github.com/lm-sys/vicuna-blog-eval).
225
+
226
+ ## Fine-tuning
227
+ ### Data
228
+
229
+ Vicuna is created by fine-tuning a LLaMA base model using approximately 70K user-shared conversations gathered from ShareGPT.com with public APIs. To ensure data quality, we convert the HTML back to markdown and filter out some inappropriate or low-quality samples. Additionally, we divide lengthy conversations into smaller segments that fit the model's maximum context length. For detailed instructions to clean the ShareGPT data, check out [here](docs/commands/data_cleaning.md).
230
+
231
+ We will not release the ShareGPT dataset. If you would like to try the fine-tuning code, you can run it with some dummy conversations in [dummy_conversation.json](data/dummy_conversation.json). You can follow the same format and plug in your own data.
232
+
233
+ ### Code and Hyperparameters
234
+ Our code is based on [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) with additional support for multi-turn conversations.
235
+ We use similar hyperparameters as the Stanford Alpaca.
236
+
237
+ | Hyperparameter | Global Batch Size | Learning rate | Epochs | Max length | Weight decay |
238
+ | --- | ---: | ---: | ---: | ---: | ---: |
239
+ | Vicuna-13B | 128 | 2e-5 | 3 | 2048 | 0 |
240
+
241
+ ### Fine-tuning Vicuna-7B with Local GPUs
242
+ You can use the following command to train Vicuna-7B with 4 x A100 (40GB).
243
+ Update `--model_name_or_path` with the actual path to LLaMA weights and `--data_path` with the actual path to data.
244
+
245
+ ```bash
246
+ torchrun --nproc_per_node=4 --master_port=20001 fastchat/train/train_mem.py \
247
+ --model_name_or_path ~/model_weights/llama-7b \
248
+ --data_path data/dummy_conversation.json \
249
+ --bf16 True \
250
+ --output_dir output_vicuna \
251
+ --num_train_epochs 3 \
252
+ --per_device_train_batch_size 2 \
253
+ --per_device_eval_batch_size 2 \
254
+ --gradient_accumulation_steps 16 \
255
+ --evaluation_strategy "no" \
256
+ --save_strategy "steps" \
257
+ --save_steps 1200 \
258
+ --save_total_limit 10 \
259
+ --learning_rate 2e-5 \
260
+ --weight_decay 0. \
261
+ --warmup_ratio 0.03 \
262
+ --lr_scheduler_type "cosine" \
263
+ --logging_steps 1 \
264
+ --fsdp "full_shard auto_wrap" \
265
+ --fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \
266
+ --tf32 True \
267
+ --model_max_length 2048 \
268
+ --gradient_checkpointing True \
269
+ --lazy_preprocess True
270
+ ```
271
+
272
+ If you meet out-of-memory during model saving, see solutions [here](https://github.com/pytorch/pytorch/issues/98823).
273
+
274
+ ### Other models and LoRA support
275
+ More instructions to train other models (e.g., FastChat-T5) and use LoRA are in [docs/training.md](docs/training.md).
276
+
277
+ ### Fine-tuning on Any Cloud with SkyPilot
278
+ [SkyPilot](https://github.com/skypilot-org/skypilot) is a framework built by UC Berkeley for easily and cost effectively running ML workloads on any cloud (AWS, GCP, Azure, Lambda, etc.).
279
+ Find SkyPilot documentation [here](https://github.com/skypilot-org/skypilot/tree/master/llm/vicuna) on using managed spot instances to train Vicuna and save on your cloud costs.
280
+
281
+ ## Citation
282
+ The code (training, serving, and evaluation) in this repository is mostly developed for or derived from the paper below.
283
+ Please cite it if you find the repository helpful.
284
+
285
+ ```
286
+ @misc{zheng2023judging,
287
+ title={Judging LLM-as-a-judge with MT-Bench and Chatbot Arena},
288
+ author={Lianmin Zheng and Wei-Lin Chiang and Ying Sheng and Siyuan Zhuang and Zhanghao Wu and Yonghao Zhuang and Zi Lin and Zhuohan Li and Dacheng Li and Eric. P Xing and Hao Zhang and Joseph E. Gonzalez and Ion Stoica},
289
+ year={2023},
290
+ eprint={2306.05685},
291
+ archivePrefix={arXiv},
292
+ primaryClass={cs.CL}
293
+ }
294
+ ```
295
+
296
+ We are also planning to add more of our research to this repository.
assets/demo_narrow.gif ADDED

Git LFS Details

  • SHA256: 6e02d6a8fc6820f784105b3515b595730cc74542b4cf3f2a84a4361a0db17766
  • Pointer size: 133 Bytes
  • Size of remote file: 12.3 MB
assets/qa_browser.png ADDED
assets/screenshot_cli.png ADDED
assets/screenshot_gui.png ADDED
assets/server_arch.png ADDED
assets/vicuna_logo.jpeg ADDED
controller.log ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-07-05 20:23:27 | INFO | controller | args: Namespace(host='localhost', port=21001, dispatch_method='shortest_queue')
2
+ 2023-07-05 20:23:27 | ERROR | stderr | INFO: Started server process [380845]
3
+ 2023-07-05 20:23:27 | ERROR | stderr | INFO: Waiting for application startup.
4
+ 2023-07-05 20:23:27 | ERROR | stderr | INFO: Application startup complete.
5
+ 2023-07-05 20:23:27 | ERROR | stderr | INFO: Uvicorn running on http://localhost:21001 (Press CTRL+C to quit)
6
+ 2023-07-05 20:24:21 | INFO | stdout | INFO: 127.0.0.1:33748 - "POST /refresh_all_workers HTTP/1.1" 200 OK
7
+ 2023-07-05 20:24:21 | INFO | stdout | INFO: 127.0.0.1:33762 - "POST /list_models HTTP/1.1" 200 OK
8
+ 2023-07-05 20:27:33 | ERROR | stderr | INFO: Shutting down
9
+ 2023-07-05 20:27:34 | ERROR | stderr | INFO: Waiting for application shutdown.
10
+ 2023-07-05 20:27:34 | ERROR | stderr | INFO: Application shutdown complete.
11
+ 2023-07-05 20:27:34 | ERROR | stderr | INFO: Finished server process [380845]
12
+ 2023-07-05 20:27:38 | ERROR | stderr | Exception ignored in: <module 'threading' from '/opt/conda/envs/fastchat/lib/python3.11/threading.py'>
13
+ 2023-07-05 20:27:38 | ERROR | stderr | Traceback (most recent call last):
14
+ 2023-07-05 20:27:38 | ERROR | stderr | File "/opt/conda/envs/fastchat/lib/python3.11/threading.py", line 1583, in _shutdown
15
+ 2023-07-05 20:27:38 | ERROR | stderr | lock.acquire()
16
+ 2023-07-05 20:27:38 | ERROR | stderr | KeyboardInterrupt:
17
+ 2023-07-05 20:27:41 | INFO | controller | args: Namespace(host='localhost', port=21001, dispatch_method='shortest_queue')
18
+ 2023-07-05 20:27:41 | ERROR | stderr | INFO: Started server process [382194]
19
+ 2023-07-05 20:27:41 | ERROR | stderr | INFO: Waiting for application startup.
20
+ 2023-07-05 20:27:41 | ERROR | stderr | INFO: Application startup complete.
21
+ 2023-07-05 20:27:41 | ERROR | stderr | INFO: Uvicorn running on http://localhost:21001 (Press CTRL+C to quit)
22
+ 2023-07-05 20:27:49 | INFO | stdout | INFO: 127.0.0.1:43004 - "POST /refresh_all_workers HTTP/1.1" 200 OK
23
+ 2023-07-05 20:27:49 | INFO | stdout | INFO: 127.0.0.1:43006 - "POST /list_models HTTP/1.1" 200 OK
24
+ 2023-07-05 20:33:25 | ERROR | stderr | INFO: Shutting down
25
+ 2023-07-05 20:33:25 | ERROR | stderr | INFO: Waiting for application shutdown.
26
+ 2023-07-05 20:33:25 | ERROR | stderr | INFO: Application shutdown complete.
27
+ 2023-07-05 20:33:25 | ERROR | stderr | INFO: Finished server process [382194]
28
+ 2023-07-05 20:33:58 | INFO | controller | args: Namespace(host='localhost', port=21001, dispatch_method='shortest_queue')
29
+ 2023-07-05 20:33:58 | ERROR | stderr | INFO: Started server process [383813]
30
+ 2023-07-05 20:33:58 | ERROR | stderr | INFO: Waiting for application startup.
31
+ 2023-07-05 20:33:58 | ERROR | stderr | INFO: Application startup complete.
32
+ 2023-07-05 20:33:58 | ERROR | stderr | INFO: Uvicorn running on http://localhost:21001 (Press CTRL+C to quit)
33
+ 2023-07-05 20:34:33 | INFO | stdout | INFO: 127.0.0.1:43794 - "POST /refresh_all_workers HTTP/1.1" 200 OK
34
+ 2023-07-05 20:34:33 | INFO | stdout | INFO: 127.0.0.1:43810 - "POST /list_models HTTP/1.1" 200 OK
35
+ 2023-07-05 20:35:52 | ERROR | stderr | INFO: Shutting down
36
+ 2023-07-05 20:35:52 | ERROR | stderr | INFO: Waiting for application shutdown.
37
+ 2023-07-05 20:35:52 | ERROR | stderr | INFO: Application shutdown complete.
38
+ 2023-07-05 20:35:52 | ERROR | stderr | INFO: Finished server process [383813]
39
+ 2023-07-05 20:35:58 | ERROR | stderr | Exception ignored in: <module 'threading' from '/opt/conda/envs/fastchat/lib/python3.11/threading.py'>
40
+ 2023-07-05 20:35:58 | ERROR | stderr | Traceback (most recent call last):
41
+ 2023-07-05 20:35:58 | ERROR | stderr | File "/opt/conda/envs/fastchat/lib/python3.11/threading.py", line 1583, in _shutdown
42
+ 2023-07-05 20:35:58 | ERROR | stderr | lock.acquire()
43
+ 2023-07-05 20:35:58 | ERROR | stderr | KeyboardInterrupt:
44
+ 2023-07-05 20:36:01 | INFO | controller | args: Namespace(host='localhost', port=21001, dispatch_method='shortest_queue')
45
+ 2023-07-05 20:36:01 | ERROR | stderr | INFO: Started server process [384943]
46
+ 2023-07-05 20:36:01 | ERROR | stderr | INFO: Waiting for application startup.
47
+ 2023-07-05 20:36:01 | ERROR | stderr | INFO: Application startup complete.
48
+ 2023-07-05 20:36:01 | ERROR | stderr | INFO: Uvicorn running on http://localhost:21001 (Press CTRL+C to quit)
49
+ 2023-07-05 20:36:07 | INFO | stdout | INFO: 127.0.0.1:50858 - "POST /refresh_all_workers HTTP/1.1" 200 OK
50
+ 2023-07-05 20:36:07 | INFO | stdout | INFO: 127.0.0.1:50868 - "POST /list_models HTTP/1.1" 200 OK
51
+ 2023-07-05 21:07:09 | ERROR | stderr | INFO: Shutting down
52
+ 2023-07-05 21:07:09 | ERROR | stderr | INFO: Waiting for application shutdown.
53
+ 2023-07-05 21:07:09 | ERROR | stderr | INFO: Application shutdown complete.
54
+ 2023-07-05 21:07:09 | ERROR | stderr | INFO: Finished server process [384943]
55
+ 2023-07-05 21:07:13 | ERROR | stderr | Exception ignored in: <module 'threading' from '/opt/conda/envs/fastchat/lib/python3.11/threading.py'>
56
+ 2023-07-05 21:07:13 | ERROR | stderr | Traceback (most recent call last):
57
+ 2023-07-05 21:07:13 | ERROR | stderr | File "/opt/conda/envs/fastchat/lib/python3.11/threading.py", line 1583, in _shutdown
58
+ 2023-07-05 21:07:13 | ERROR | stderr | lock.acquire()
59
+ 2023-07-05 21:07:13 | ERROR | stderr | KeyboardInterrupt:
60
+ 2023-07-05 21:07:16 | INFO | controller | args: Namespace(host='localhost', port=21001, dispatch_method='shortest_queue')
61
+ 2023-07-05 21:07:16 | ERROR | stderr | INFO: Started server process [393277]
62
+ 2023-07-05 21:07:16 | ERROR | stderr | INFO: Waiting for application startup.
63
+ 2023-07-05 21:07:16 | ERROR | stderr | INFO: Application startup complete.
64
+ 2023-07-05 21:07:16 | ERROR | stderr | INFO: Uvicorn running on http://localhost:21001 (Press CTRL+C to quit)
65
+ 2023-07-05 21:07:24 | INFO | stdout | INFO: 127.0.0.1:38342 - "POST /refresh_all_workers HTTP/1.1" 200 OK
66
+ 2023-07-05 21:07:24 | INFO | stdout | INFO: 127.0.0.1:38356 - "POST /list_models HTTP/1.1" 200 OK
67
+ 2023-07-05 21:07:39 | INFO | stdout | INFO: 127.0.0.1:58236 - "POST /get_worker_address HTTP/1.1" 200 OK
68
+ 2023-07-05 21:08:25 | ERROR | stderr | INFO: Shutting down
69
+ 2023-07-05 21:08:25 | ERROR | stderr | INFO: Waiting for application shutdown.
70
+ 2023-07-05 21:08:25 | ERROR | stderr | INFO: Application shutdown complete.
71
+ 2023-07-05 21:08:25 | ERROR | stderr | INFO: Finished server process [393277]
72
+ 2023-07-05 21:08:31 | ERROR | stderr | Exception ignored in: <module 'threading' from '/opt/conda/envs/fastchat/lib/python3.11/threading.py'>
73
+ 2023-07-05 21:08:31 | ERROR | stderr | Traceback (most recent call last):
74
+ 2023-07-05 21:08:31 | ERROR | stderr | File "/opt/conda/envs/fastchat/lib/python3.11/threading.py", line 1583, in _shutdown
75
+ 2023-07-05 21:08:31 | ERROR | stderr | lock.acquire()
76
+ 2023-07-05 21:08:31 | ERROR | stderr | KeyboardInterrupt:
77
+ 2023-07-05 21:08:34 | INFO | controller | args: Namespace(host='localhost', port=21001, dispatch_method='shortest_queue')
78
+ 2023-07-05 21:08:34 | ERROR | stderr | INFO: Started server process [393958]
79
+ 2023-07-05 21:08:34 | ERROR | stderr | INFO: Waiting for application startup.
80
+ 2023-07-05 21:08:34 | ERROR | stderr | INFO: Application startup complete.
81
+ 2023-07-05 21:08:34 | ERROR | stderr | INFO: Uvicorn running on http://localhost:21001 (Press CTRL+C to quit)
82
+ 2023-07-05 21:08:43 | INFO | stdout | INFO: 127.0.0.1:42028 - "POST /refresh_all_workers HTTP/1.1" 200 OK
83
+ 2023-07-05 21:08:43 | INFO | stdout | INFO: 127.0.0.1:42036 - "POST /list_models HTTP/1.1" 200 OK
84
+ 2023-07-05 21:08:50 | INFO | stdout | INFO: 127.0.0.1:40684 - "POST /get_worker_address HTTP/1.1" 200 OK
85
+ 2023-07-05 21:09:07 | INFO | stdout | INFO: 127.0.0.1:57642 - "POST /get_worker_address HTTP/1.1" 200 OK
86
+ 2023-07-05 21:12:16 | ERROR | stderr | INFO: Shutting down
87
+ 2023-07-05 21:12:16 | ERROR | stderr | INFO: Waiting for application shutdown.
88
+ 2023-07-05 21:12:16 | ERROR | stderr | INFO: Application shutdown complete.
89
+ 2023-07-05 21:12:16 | ERROR | stderr | INFO: Finished server process [393958]
90
+ 2023-07-05 21:12:17 | ERROR | stderr | Exception ignored in: <module 'threading' from '/opt/conda/envs/fastchat/lib/python3.11/threading.py'>
91
+ 2023-07-05 21:12:17 | ERROR | stderr | Traceback (most recent call last):
92
+ 2023-07-05 21:12:17 | ERROR | stderr | File "/opt/conda/envs/fastchat/lib/python3.11/threading.py", line 1583, in _shutdown
93
+ 2023-07-05 21:12:17 | ERROR | stderr | lock.acquire()
94
+ 2023-07-05 21:12:17 | ERROR | stderr | KeyboardInterrupt:
95
+ 2023-07-05 21:13:24 | INFO | controller | args: Namespace(host='localhost', port=21001, dispatch_method='shortest_queue')
96
+ 2023-07-05 21:13:24 | ERROR | stderr | INFO: Started server process [395717]
97
+ 2023-07-05 21:13:24 | ERROR | stderr | INFO: Waiting for application startup.
98
+ 2023-07-05 21:13:24 | ERROR | stderr | INFO: Application startup complete.
99
+ 2023-07-05 21:13:24 | ERROR | stderr | INFO: Uvicorn running on http://localhost:21001 (Press CTRL+C to quit)
100
+ 2023-07-05 21:13:33 | INFO | stdout | INFO: 127.0.0.1:34954 - "POST /refresh_all_workers HTTP/1.1" 200 OK
101
+ 2023-07-05 21:13:33 | INFO | stdout | INFO: 127.0.0.1:34962 - "POST /list_models HTTP/1.1" 200 OK
102
+ 2023-07-05 21:14:04 | INFO | stdout | INFO: 127.0.0.1:45190 - "POST /get_worker_address HTTP/1.1" 200 OK
103
+ 2023-07-05 21:17:19 | INFO | stdout | INFO: 127.0.0.1:43578 - "POST /refresh_all_workers HTTP/1.1" 200 OK
104
+ 2023-07-05 21:17:19 | INFO | stdout | INFO: 127.0.0.1:43592 - "POST /list_models HTTP/1.1" 200 OK
105
+ 2023-07-05 21:17:36 | INFO | stdout | INFO: 127.0.0.1:57170 - "POST /get_worker_address HTTP/1.1" 200 OK
106
+ 2023-07-05 21:20:16 | INFO | stdout | INFO: 127.0.0.1:55178 - "POST /refresh_all_workers HTTP/1.1" 200 OK
107
+ 2023-07-05 21:20:17 | INFO | stdout | INFO: 127.0.0.1:55180 - "POST /list_models HTTP/1.1" 200 OK
108
+ 2023-07-05 21:20:21 | ERROR | stderr | INFO: Shutting down
109
+ 2023-07-05 21:20:21 | ERROR | stderr | INFO: Waiting for application shutdown.
110
+ 2023-07-05 21:20:21 | ERROR | stderr | INFO: Application shutdown complete.
111
+ 2023-07-05 21:20:21 | ERROR | stderr | INFO: Finished server process [395717]
112
+ 2023-07-05 21:20:22 | ERROR | stderr | Exception ignored in: <module 'threading' from '/opt/conda/envs/fastchat/lib/python3.11/threading.py'>
113
+ 2023-07-05 21:20:22 | ERROR | stderr | Traceback (most recent call last):
114
+ 2023-07-05 21:20:22 | ERROR | stderr | File "/opt/conda/envs/fastchat/lib/python3.11/threading.py", line 1583, in _shutdown
115
+ 2023-07-05 21:20:22 | ERROR | stderr | lock.acquire()
116
+ 2023-07-05 21:20:22 | ERROR | stderr | KeyboardInterrupt:
117
+ 2023-07-05 21:20:25 | INFO | controller | args: Namespace(host='localhost', port=21001, dispatch_method='shortest_queue')
118
+ 2023-07-05 21:20:25 | ERROR | stderr | INFO: Started server process [398096]
119
+ 2023-07-05 21:20:25 | ERROR | stderr | INFO: Waiting for application startup.
120
+ 2023-07-05 21:20:25 | ERROR | stderr | INFO: Application startup complete.
121
+ 2023-07-05 21:20:25 | ERROR | stderr | INFO: Uvicorn running on http://localhost:21001 (Press CTRL+C to quit)
122
+ 2023-07-05 21:20:33 | INFO | stdout | INFO: 127.0.0.1:41918 - "POST /refresh_all_workers HTTP/1.1" 200 OK
123
+ 2023-07-05 21:20:33 | INFO | stdout | INFO: 127.0.0.1:41934 - "POST /list_models HTTP/1.1" 200 OK
124
+ 2023-07-05 21:20:46 | INFO | stdout | INFO: 127.0.0.1:45420 - "POST /get_worker_address HTTP/1.1" 200 OK
125
+ 2023-07-05 21:22:17 | INFO | stdout | INFO: 127.0.0.1:38930 - "POST /get_worker_address HTTP/1.1" 200 OK
126
+ 2023-07-05 21:23:04 | ERROR | stderr | INFO: Shutting down
127
+ 2023-07-05 21:23:04 | ERROR | stderr | INFO: Waiting for application shutdown.
128
+ 2023-07-05 21:23:04 | ERROR | stderr | INFO: Application shutdown complete.
129
+ 2023-07-05 21:23:04 | ERROR | stderr | INFO: Finished server process [398096]
130
+ 2023-07-05 21:23:04 | ERROR | stderr | Exception ignored in: <module 'threading' from '/opt/conda/envs/fastchat/lib/python3.11/threading.py'>
131
+ 2023-07-05 21:23:04 | ERROR | stderr | Traceback (most recent call last):
132
+ 2023-07-05 21:23:04 | ERROR | stderr | File "/opt/conda/envs/fastchat/lib/python3.11/threading.py", line 1583, in _shutdown
133
+ 2023-07-05 21:23:04 | ERROR | stderr | lock.acquire()
134
+ 2023-07-05 21:23:04 | ERROR | stderr | KeyboardInterrupt:
135
+ 2023-07-05 21:23:07 | INFO | controller | args: Namespace(host='localhost', port=21001, dispatch_method='shortest_queue')
136
+ 2023-07-05 21:23:07 | ERROR | stderr | INFO: Started server process [399120]
137
+ 2023-07-05 21:23:07 | ERROR | stderr | INFO: Waiting for application startup.
138
+ 2023-07-05 21:23:07 | ERROR | stderr | INFO: Application startup complete.
139
+ 2023-07-05 21:23:07 | ERROR | stderr | INFO: Uvicorn running on http://localhost:21001 (Press CTRL+C to quit)
140
+ 2023-07-05 21:23:11 | INFO | stdout | INFO: 127.0.0.1:50332 - "POST /refresh_all_workers HTTP/1.1" 200 OK
141
+ 2023-07-05 21:23:11 | INFO | stdout | INFO: 127.0.0.1:50342 - "POST /list_models HTTP/1.1" 200 OK
142
+ 2023-07-05 21:23:17 | INFO | stdout | INFO: 127.0.0.1:42516 - "POST /get_worker_address HTTP/1.1" 200 OK
143
+ 2023-07-05 22:00:45 | INFO | stdout | INFO: 127.0.0.1:49452 - "POST /get_worker_address HTTP/1.1" 200 OK
144
+ 2023-07-05 22:03:10 | INFO | stdout | INFO: 127.0.0.1:59704 - "POST /get_worker_address HTTP/1.1" 200 OK
145
+ 2023-07-05 22:13:25 | ERROR | stderr | INFO: Shutting down
146
+ 2023-07-05 22:13:25 | ERROR | stderr | INFO: Waiting for application shutdown.
147
+ 2023-07-05 22:13:25 | ERROR | stderr | INFO: Application shutdown complete.
148
+ 2023-07-05 22:13:25 | ERROR | stderr | INFO: Finished server process [399120]
149
+ 2023-07-05 22:13:32 | ERROR | stderr | Exception ignored in: <module 'threading' from '/opt/conda/envs/fastchat/lib/python3.11/threading.py'>
150
+ 2023-07-05 22:13:32 | ERROR | stderr | Traceback (most recent call last):
151
+ 2023-07-05 22:13:32 | ERROR | stderr | File "/opt/conda/envs/fastchat/lib/python3.11/threading.py", line 1583, in _shutdown
152
+ 2023-07-05 22:13:32 | ERROR | stderr | lock.acquire()
153
+ 2023-07-05 22:13:32 | ERROR | stderr | KeyboardInterrupt:
154
+ 2023-07-05 22:13:35 | INFO | controller | args: Namespace(host='localhost', port=21001, dispatch_method='shortest_queue')
155
+ 2023-07-05 22:13:35 | ERROR | stderr | INFO: Started server process [411968]
156
+ 2023-07-05 22:13:35 | ERROR | stderr | INFO: Waiting for application startup.
157
+ 2023-07-05 22:13:35 | ERROR | stderr | INFO: Application startup complete.
158
+ 2023-07-05 22:13:35 | ERROR | stderr | INFO: Uvicorn running on http://localhost:21001 (Press CTRL+C to quit)
159
+ 2023-07-05 22:13:41 | INFO | stdout | INFO: 127.0.0.1:56746 - "POST /refresh_all_workers HTTP/1.1" 200 OK
160
+ 2023-07-05 22:13:41 | INFO | stdout | INFO: 127.0.0.1:56758 - "POST /list_models HTTP/1.1" 200 OK
161
+ 2023-07-05 22:14:23 | INFO | stdout | INFO: 127.0.0.1:44066 - "POST /get_worker_address HTTP/1.1" 200 OK
162
+ 2023-07-05 22:16:03 | INFO | stdout | INFO: 127.0.0.1:38550 - "POST /refresh_all_workers HTTP/1.1" 200 OK
163
+ 2023-07-05 22:16:03 | INFO | stdout | INFO: 127.0.0.1:38564 - "POST /list_models HTTP/1.1" 200 OK
164
+ 2023-07-05 22:16:15 | INFO | stdout | INFO: 127.0.0.1:56662 - "POST /get_worker_address HTTP/1.1" 200 OK
165
+ 2023-07-05 22:18:10 | ERROR | stderr | INFO: Shutting down
166
+ 2023-07-05 22:18:10 | ERROR | stderr | INFO: Waiting for application shutdown.
167
+ 2023-07-05 22:18:10 | ERROR | stderr | INFO: Application shutdown complete.
168
+ 2023-07-05 22:18:10 | ERROR | stderr | INFO: Finished server process [411968]
169
+ 2023-07-05 22:18:13 | ERROR | stderr | Exception ignored in: <module 'threading' from '/opt/conda/envs/fastchat/lib/python3.11/threading.py'>
170
+ 2023-07-05 22:18:13 | ERROR | stderr | Traceback (most recent call last):
171
+ 2023-07-05 22:18:13 | ERROR | stderr | File "/opt/conda/envs/fastchat/lib/python3.11/threading.py", line 1583, in _shutdown
172
+ 2023-07-05 22:18:13 | ERROR | stderr | lock.acquire()
173
+ 2023-07-05 22:18:13 | ERROR | stderr | KeyboardInterrupt:
174
+ 2023-07-05 22:18:15 | INFO | controller | args: Namespace(host='localhost', port=21001, dispatch_method='shortest_queue')
175
+ 2023-07-05 22:18:16 | ERROR | stderr | INFO: Started server process [413792]
176
+ 2023-07-05 22:18:16 | ERROR | stderr | INFO: Waiting for application startup.
177
+ 2023-07-05 22:18:16 | ERROR | stderr | INFO: Application startup complete.
178
+ 2023-07-05 22:18:16 | ERROR | stderr | INFO: Uvicorn running on http://localhost:21001 (Press CTRL+C to quit)
179
+ 2023-07-05 22:18:20 | INFO | stdout | INFO: 127.0.0.1:58490 - "POST /refresh_all_workers HTTP/1.1" 200 OK
180
+ 2023-07-05 22:18:20 | INFO | stdout | INFO: 127.0.0.1:58506 - "POST /list_models HTTP/1.1" 200 OK
181
+ 2023-07-05 22:18:29 | INFO | stdout | INFO: 127.0.0.1:56524 - "POST /get_worker_address HTTP/1.1" 200 OK
182
+ 2023-07-05 22:18:44 | INFO | stdout | INFO: 127.0.0.1:36906 - "POST /refresh_all_workers HTTP/1.1" 200 OK
183
+ 2023-07-05 22:18:44 | INFO | stdout | INFO: 127.0.0.1:36910 - "POST /list_models HTTP/1.1" 200 OK
184
+ 2023-07-05 22:21:32 | INFO | stdout | INFO: 127.0.0.1:38644 - "POST /refresh_all_workers HTTP/1.1" 200 OK
185
+ 2023-07-05 22:21:32 | INFO | stdout | INFO: 127.0.0.1:38650 - "POST /list_models HTTP/1.1" 200 OK
186
+ 2023-07-05 22:24:37 | INFO | stdout | INFO: 127.0.0.1:60104 - "POST /refresh_all_workers HTTP/1.1" 200 OK
187
+ 2023-07-05 22:24:37 | INFO | stdout | INFO: 127.0.0.1:60118 - "POST /list_models HTTP/1.1" 200 OK
188
+ 2023-07-05 22:48:28 | ERROR | stderr | Exception ignored in: <module 'threading' from '/opt/conda/envs/fastchat/lib/python3.11/threading.py'>
189
+ 2023-07-05 22:48:28 | ERROR | stderr | Traceback (most recent call last):
190
+ 2023-07-05 22:48:28 | ERROR | stderr | File "/opt/conda/envs/fastchat/lib/python3.11/threading.py", line 1583, in _shutdown
191
+ 2023-07-05 22:48:28 | ERROR | stderr | lock.acquire()
192
+ 2023-07-05 22:48:28 | ERROR | stderr | KeyboardInterrupt:
193
+ 2023-07-05 22:55:54 | INFO | stdout | INFO: 127.0.0.1:58490 - "POST /refresh_all_workers HTTP/1.1" 200 OK
194
+ 2023-07-05 22:55:54 | INFO | stdout | INFO: 127.0.0.1:58502 - "POST /list_models HTTP/1.1" 200 OK
195
+ 2023-07-05 22:55:55 | ERROR | stderr | INFO: Shutting down
196
+ 2023-07-05 22:55:55 | ERROR | stderr | INFO: Waiting for application shutdown.
197
+ 2023-07-05 22:55:55 | ERROR | stderr | INFO: Application shutdown complete.
198
+ 2023-07-05 22:55:55 | ERROR | stderr | INFO: Finished server process [413792]
199
+ 2023-07-05 22:56:00 | ERROR | stderr | Exception ignored in: <module 'threading' from '/opt/conda/envs/fastchat/lib/python3.11/threading.py'>
200
+ 2023-07-05 22:56:00 | ERROR | stderr | Traceback (most recent call last):
201
+ 2023-07-05 22:56:00 | ERROR | stderr | File "/opt/conda/envs/fastchat/lib/python3.11/threading.py", line 1583, in _shutdown
202
+ 2023-07-05 22:56:00 | ERROR | stderr | lock.acquire()
203
+ 2023-07-05 22:56:00 | ERROR | stderr | KeyboardInterrupt:
204
+ 2023-07-05 22:56:14 | INFO | controller | args: Namespace(host='localhost', port=21001, dispatch_method='shortest_queue')
205
+ 2023-07-05 22:56:14 | ERROR | stderr | INFO: Started server process [424798]
206
+ 2023-07-05 22:56:14 | ERROR | stderr | INFO: Waiting for application startup.
207
+ 2023-07-05 22:56:14 | ERROR | stderr | INFO: Application startup complete.
208
+ 2023-07-05 22:56:14 | ERROR | stderr | INFO: Uvicorn running on http://localhost:21001 (Press CTRL+C to quit)
209
+ 2023-07-05 23:00:32 | ERROR | stderr | INFO: Shutting down
210
+ 2023-07-05 23:00:32 | ERROR | stderr | INFO: Waiting for application shutdown.
211
+ 2023-07-05 23:00:32 | ERROR | stderr | INFO: Application shutdown complete.
212
+ 2023-07-05 23:00:32 | ERROR | stderr | INFO: Finished server process [424798]
213
+ 2023-07-05 23:00:35 | ERROR | stderr | Exception ignored in: <module 'threading' from '/opt/conda/envs/fastchat/lib/python3.11/threading.py'>
214
+ 2023-07-05 23:00:35 | ERROR | stderr | Traceback (most recent call last):
215
+ 2023-07-05 23:00:35 | ERROR | stderr | File "/opt/conda/envs/fastchat/lib/python3.11/threading.py", line 1583, in _shutdown
216
+ 2023-07-05 23:00:35 | ERROR | stderr | lock.acquire()
217
+ 2023-07-05 23:00:35 | ERROR | stderr | KeyboardInterrupt:
218
+ 2023-07-05 23:01:02 | ERROR | stderr | usage: controller.py [-h] [--host HOST] [--port PORT] [--dispatch-method {lottery,shortest_queue}]
219
+ 2023-07-05 23:01:02 | ERROR | stderr | controller.py: error: unrecognized arguments: --add-chatgpt --share
220
+ 2023-07-05 23:01:07 | INFO | controller | args: Namespace(host='localhost', port=21001, dispatch_method='shortest_queue')
221
+ 2023-07-05 23:01:07 | ERROR | stderr | INFO: Started server process [426596]
222
+ 2023-07-05 23:01:07 | ERROR | stderr | INFO: Waiting for application startup.
223
+ 2023-07-05 23:01:07 | ERROR | stderr | INFO: Application startup complete.
224
+ 2023-07-05 23:01:07 | ERROR | stderr | INFO: Uvicorn running on http://localhost:21001 (Press CTRL+C to quit)
225
+ 2023-07-05 23:01:18 | INFO | stdout | INFO: 127.0.0.1:44990 - "POST /refresh_all_workers HTTP/1.1" 200 OK
226
+ 2023-07-05 23:01:18 | INFO | stdout | INFO: 127.0.0.1:45004 - "POST /list_models HTTP/1.1" 200 OK
227
+ 2023-07-05 23:07:42 | INFO | stdout | INFO: 127.0.0.1:46560 - "POST /refresh_all_workers HTTP/1.1" 200 OK
228
+ 2023-07-05 23:07:42 | INFO | stdout | INFO: 127.0.0.1:46570 - "POST /list_models HTTP/1.1" 200 OK
data/dummy_conversation.json ADDED
The diff for this file is too large to render. See raw diff
 
docker/Dockerfile ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:11.7.1-runtime-ubuntu20.04
2
+
3
+ RUN apt-get update -y && apt-get install -y python3.9 python3.9-distutils curl
4
+ RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
5
+ RUN python3.9 get-pip.py
6
+ RUN pip3 install fschat
docker/docker-compose.yml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: "3.9"
2
+
3
+ services:
4
+ fastchat-controller:
5
+ build:
6
+ context: .
7
+ dockerfile: Dockerfile
8
+ image: fastchat:latest
9
+ ports:
10
+ - "21001:21001"
11
+ entrypoint: ["python3.9", "-m", "fastchat.serve.controller", "--host", "0.0.0.0", "--port", "21001"]
12
+ fastchat-model-worker:
13
+ build:
14
+ context: .
15
+ dockerfile: Dockerfile
16
+ volumes:
17
+ - huggingface:/root/.cache/huggingface
18
+ environment:
19
+ FASTCHAT_CONTROLLER_URL: http://fastchat-controller:21001
20
+ image: fastchat:latest
21
+ deploy:
22
+ resources:
23
+ reservations:
24
+ devices:
25
+ - driver: nvidia
26
+ count: 1
27
+ capabilities: [gpu]
28
+ entrypoint: ["python3.9", "-m", "fastchat.serve.model_worker", "--model-name", 'vicuna-7b-v1.3', "--model-path", "lmsys/vicuna-7b-v1.3", "--worker-address", "http://fastchat-model-worker:21002", "--controller-address", "http://fastchat-controller:21001", "--host", "0.0.0.0", "--port", "21002"]
29
+ fastchat-api-server:
30
+ build:
31
+ context: .
32
+ dockerfile: Dockerfile
33
+ environment:
34
+ FASTCHAT_CONTROLLER_URL: http://fastchat-controller:21001
35
+ image: fastchat:latest
36
+ ports:
37
+ - "8000:8000"
38
+ entrypoint: ["python3.9", "-m", "fastchat.serve.openai_api_server", "--controller-address", "http://fastchat-controller:21001", "--host", "0.0.0.0", "--port", "8000"]
39
+ volumes:
40
+ huggingface:
docs/arena.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Chatbot Arena
2
+ Chatbot Arena is an LLM benchmark platform featuring anonymous, randomized battles, available at https://arena.lmsys.org.
3
+ We invite the entire community to join this benchmarking effort by contributing your votes and models.
4
+
5
+ ## How to add a new model
6
+ If you want to see a specific model in the arena, you can follow the steps below.
7
+
8
+ 1. Contribute code to support this model in FastChat by submitting a pull request. See [instructions](model_support.md#how-to-support-a-new-model).
9
+ 2. After the model is supported, we will try to schedule some computing resources to host the model in the arena. However, due to the limited resources we have, we may not be able to serve every model. We will select the models based on popularity, quality, diversity, and other factors.
docs/commands/data_cleaning.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Data cleaning
2
+
3
+ ## Requirements
4
+ ```
5
+ pip3 install bs4 markdownify
6
+ pip3 install polyglot pyicu pycld2
7
+ ```
8
+
9
+ ## Steps
10
+ ```
11
+ # Convert html to markdown
12
+ python3 -m fastchat.data.clean_sharegpt --in sharegpt_html.json --out sharegpt_clean.json
13
+
14
+ # Keep or remove specific languages
15
+ python3 -m fastchat.data.optional_clean --in sharegpt_clean.json --out sharegpt_clean_lang.json --skip-lang SOME_LANGUAGE_CODE
16
+
17
+ # Split long conversations
18
+ python3 -m fastchat.data.split_long_conversation --in sharegpt_clean_lang.json --out sharegpt_clean_lang_split.json --model-name /home/ubuntu/model_weights/llama-7b/
19
+ ```
docs/commands/leaderboard.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Get logs
2
+ ```
3
+ gsutil -m rsync -r gs://fastchat_logs ~/fastchat_logs/
4
+ ```
5
+
6
+ ### Clean battle data
7
+ ```
8
+ cd ~/FastChat/fastchat/serve/monitor
9
+ python3 clean_battle_data.py
10
+ ```
11
+
12
+ ### Run Elo analysis
13
+ ```
14
+ python3 elo_analysis.py --clean-battle-file clean_battle_20230523.json
15
+ ```
docs/commands/local_cluster.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Local GPU cluster (node-01)
2
+ ```
3
+ python3 -m fastchat.serve.controller --host 0.0.0.0 --port 10002
4
+
5
+ CUDA_VISIBLE_DEVICES=0 python3 -m fastchat.serve.model_worker --model-path ~/model_weights/vicuna-13b/ --controller http://localhost:10002 --port 31000 --worker http://localhost:31000
6
+ CUDA_VISIBLE_DEVICES=1 python3 -m fastchat.serve.model_worker --model-path ~/model_weights/vicuna-13b/ --controller http://localhost:10002 --port 31001 --worker http://localhost:31001
7
+ CUDA_VISIBLE_DEVICES=2 python3 -m fastchat.serve.model_worker --model-path ~/model_weights/bair-chat-13b/ --controller http://localhost:10002 --port 31002 --worker http://localhost:31002
8
+ CUDA_VISIBLE_DEVICES=3 python3 -m fastchat.serve.model_worker --model-path ~/model_weights/alpaca-chat-13b/ --controller http://localhost:10002 --port 31003 --worker http://localhost:31003
9
+
10
+ python3 -m fastchat.serve.test_message --model vicuna-13b --controller http://localhost:10002
11
+ ```
12
+
13
+ ### Web server
14
+ ```
15
+ python3 -m fastchat.serve.controller --host 0.0.0.0 --port 21001
16
+
17
+ python3 -m fastchat.serve.register_worker --controller http://localhost:21001 --worker-name https://
18
+
19
+ python3 -m fastchat.serve.test_message --model vicuna-13b --controller http://localhost:21001
20
+
21
+ python3 -m fastchat.serve.gradio_web_server --controller http://localhost:21001
22
+ ```
23
+
24
+ ### Local GPU cluster (node-02)
25
+ ```
26
+ CUDA_VISIBLE_DEVICES=0 python3 -m fastchat.serve.model_worker --model-path ~/model_weights/vicuna-13b/ --controller http://node-01:10002 --host 0.0.0.0 --port 31000 --worker http://$(hostname):31000
27
+ CUDA_VISIBLE_DEVICES=1 python3 -m fastchat.serve.model_worker --model-path ~/model_weights/vicuna-13b/ --controller http://node-01:10002 --host 0.0.0.0 --port 31001 --worker http://$(hostname):31001
28
+ CUDA_VISIBLE_DEVICES=2 python3 -m fastchat.serve.model_worker --model-path ~/model_weights/vicuna-13b/ --controller http://node-01:10002 --host 0.0.0.0 --port 31002 --worker http://$(hostname):31002
29
+ CUDA_VISIBLE_DEVICES=3 python3 -m fastchat.serve.model_worker --model-path ~/model_weights/vicuna-13b/ --controller http://node-01:10002 --host 0.0.0.0 --port 31003 --worker http://$(hostname):31003
30
+ ```
docs/commands/pypi.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Requirement
2
+ ```
3
+ python3 -m pip install twine
4
+ python3 -m pip install --upgrade pip
5
+ pip3 install build
6
+ ```
7
+
8
+ ### Upload
9
+ ```
10
+ bash scripts/upload_pypi.sh
11
+ ```
docs/commands/test_process.md ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Test CLI Inference
2
+
3
+ ```
4
+ python3 test_cli.py
5
+ ```
6
+
7
+ ### Test OpenAI API Server
8
+
9
+ ```
10
+ python3 launch_openai_api_test_server.py
11
+ ```
12
+
13
+ ```
14
+ python3 test_openai_api.py
15
+ ```
16
+
17
+ ### Test GUI Serving
18
+
19
+ ```
20
+ python3 -m fastchat.serve.controller
21
+ ```
22
+
23
+ ```
24
+ CUDA_VISIBLE_DEVICES=0,1 python3 -m fastchat.serve.model_worker --model-path ~/model_weights/koala-13b --num-gpus 2 --port 30000 --worker http://localhost:30000
25
+ CUDA_VISIBLE_DEVICES=2,3 python3 -m fastchat.serve.model_worker --model-path ~/model_weights/alpaca-13b --num-gpus 2 --port 30002 --worker http://localhost:30002
26
+ CUDA_VISIBLE_DEVICES=4,5 python3 -m fastchat.serve.model_worker --model-path ~/model_weights/vicuna-13b --port 30004 --worker http://localhost:30004 --num-gpus 2
27
+ CUDA_VISIBLE_DEVICES=6,7 python3 -m fastchat.serve.model_worker --model-path OpenAssistant/oasst-sft-1-pythia-12b --port 30006 --worker http://localhost:30006 --num-gpus 2
28
+
29
+ CUDA_VISIBLE_DEVICES=0,1 python3 -m fastchat.serve.model_worker --model-path StabilityAI/stablelm-tuned-alpha-7b --num-gpus 2 --port 30000 --worker http://localhost:30000
30
+ CUDA_VISIBLE_DEVICES=2,3 python3 -m fastchat.serve.model_worker --model-path databricks/dolly-v2-12b --num-gpus 2 --port 30002 --worker http://localhost:30002
31
+ CUDA_VISIBLE_DEVICES=4 python3 -m fastchat.serve.model_worker --model-path THUDM/chatglm-6b --port 30004 --worker http://localhost:30004
32
+ CUDA_VISIBLE_DEVICES=5 python3 -m fastchat.serve.model_worker --model-path lmsys/fastchat-t5-3b-v1.0 --port 30005 --worker http://localhost:30005
33
+ CUDA_VISIBLE_DEVICES=6 python3 -m fastchat.serve.model_worker --model-path ~/model_weights/baize-7b --port 30006 --worker http://localhost:30006
34
+ CUDA_VISIBLE_DEVICES=7 python3 -m fastchat.serve.model_worker --model-path ~/model_weights/RWKV-4-Raven-7B-v11x-Eng99%-Other1%-20230429-ctx8192.pth --port 30007 --worker http://localhost:30007
35
+ ```
36
+
37
+ ```
38
+ python3 -m fastchat.serve.gradio_web_server_multi
39
+ ```
docs/commands/webserver.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Install
2
+ ```
3
+ sudo apt update
4
+ sudo apt install tmux htop
5
+
6
+ wget https://repo.anaconda.com/archive/Anaconda3-2022.10-Linux-x86_64.sh
7
+ bash Anaconda3-2022.10-Linux-x86_64.sh
8
+
9
+ conda create -n fastchat python=3.9
10
+ conda activate fastchat
11
+
12
+ git clone https://github.com/lm-sys/FastChat.git
13
+ cd FastChat
14
+ pip3 install -e .
15
+ ```
16
+
17
+
18
+ ### Launch servers
19
+ ```
20
+ cd fastchat_logs/controller
21
+ python3 -m fastchat.serve.controller --host 0.0.0.0 --port 21001
22
+ python3 -m fastchat.serve.register_worker --controller http://localhost:21001 --worker-name https://
23
+ python3 -m fastchat.serve.test_message --model vicuna-13b --controller http://localhost:21001
24
+
25
+ cd fastchat_logs/server0
26
+
27
+ export OPENAI_API_KEY=
28
+ export ANTHROPIC_API_KEY=
29
+
30
+ python3 -m fastchat.serve.gradio_web_server_multi --controller http://localhost:21001 --concurrency 10 --add-chatgpt --add-claude --add-palm --anony-only --elo ~/elo_results/elo_results_20230619.pkl --leaderboard-table-file ~/elo_results/leaderboard_table_20230619.csv
31
+
32
+ python3 backup_logs.py
33
+ ```
34
+
35
+
36
+ ### Check the launch time
37
+ ```
38
+ for i in $(seq 0 11); do cat fastchat_logs/server$i/gradio_web_server.log | grep "Running on local URL" | tail -n 1; done
39
+ ```
40
+
41
+
42
+ ### Increase the limit of max open files
43
+ One process (do not need reboot)
44
+ ```
45
+ sudo prlimit --nofile=1048576:1048576 --pid=$id
46
+
47
+ for id in $(ps -ef | grep gradio_web_server | awk '{print $2}'); do echo $id; prlimit --nofile=1048576:1048576 --pid=$id; done
48
+ ```
49
+
50
+ System (need reboot): Add the lines below to `/etc/security/limits.conf`
51
+ ```
52
+ * hard nofile 65535
53
+ * soft nofile 65535
54
+ ```
55
+
56
+
57
+ ### Gradio edit (3.35.2)
58
+ 1. gtag and canvas
59
+ ```
60
+ vim /home/vicuna/anaconda3/envs/fastchat/lib/python3.9/site-packages/gradio/templates/frontend/index.html
61
+ ```
62
+
63
+ ```
64
+ <!-- Google tag (gtag.js) -->
65
+ <script async src="https://www.googletagmanager.com/gtag/js?id=G-K6D24EE9ED"></script><script>
66
+ window.dataLayer = window.dataLayer || [];
67
+ function gtag(){dataLayer.push(arguments);}
68
+ gtag('js', new Date());
69
+ gtag('config', 'G-K6D24EE9ED');
70
+ window.__gradio_mode__ = "app";
71
+ </script>
72
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/html2canvas/1.4.1/html2canvas.min.js"></script>
73
+ ```
74
+
75
+ 2. Loading
76
+ ```
77
+ vim /home/vicuna/anaconda3/envs/fastchat/lib/python3.9/site-packages/gradio/templates/frontend/assets/index-188ef5e8.js
78
+ ```
79
+
80
+ ```
81
+ %s/"Loading..."/"Loading...(Please refresh if it takes more than 30 seconds)"/g
82
+ ```
docs/gptq.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GPTQ 4bit Inference
2
+
3
+ Support GPTQ 4bit inference with [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa).
4
+
5
+ 1. Window user: use the `old-cuda` branch.
6
+ 2. Linux user: recommend the `fastest-inference-4bit` branch.
7
+
8
+ ## Install
9
+
10
+ Setup environment:
11
+ ```bash
12
+ # cd /path/to/FastChat
13
+ git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa.git repositories/GPTQ-for-LLaMa
14
+ cd repositories/GPTQ-for-LLaMa
15
+ # Window's user should use the `old-cuda` branch
16
+ git switch fastest-inference-4bit
17
+ # Install `quant-cuda` package in FastChat's virtualenv
18
+ python3 setup_cuda.py install
19
+ pip3 install texttable
20
+ ```
21
+
22
+ Chat with the CLI:
23
+ ```bash
24
+ python3 -m fastchat.serve.cli \
25
+ --model-path models/vicuna-7B-1.1-GPTQ-4bit-128g \
26
+ --gptq-wbits 4 \
27
+ --gptq-groupsize 128
28
+ ```
29
+
30
+ Start model worker:
31
+ ```bash
32
+ # Download quantized model from huggingface
33
+ # Make sure you have git-lfs installed (https://git-lfs.com)
34
+ git lfs install
35
+ git clone https://huggingface.co/TheBloke/vicuna-7B-1.1-GPTQ-4bit-128g models/vicuna-7B-1.1-GPTQ-4bit-128g
36
+
37
+ python3 -m fastchat.serve.model_worker \
38
+ --model-path models/vicuna-7B-1.1-GPTQ-4bit-128g \
39
+ --gptq-wbits 4 \
40
+ --gptq-groupsize 128
41
+
42
+ # You can specify which quantized model to use
43
+ python3 -m fastchat.serve.model_worker \
44
+ --model-path models/vicuna-7B-1.1-GPTQ-4bit-128g \
45
+ --gptq-ckpt models/vicuna-7B-1.1-GPTQ-4bit-128g/vicuna-7B-1.1-GPTQ-4bit-128g.safetensors \
46
+ --gptq-wbits 4 \
47
+ --gptq-groupsize 128 \
48
+ --gptq-act-order
49
+ ```
50
+
51
+ ## Benchmark
52
+
53
+ | LLaMA-13B | branch | Bits | group-size | memory(MiB) | PPL(c4) | Median(s/token) | act-order | speed up |
54
+ | --------- | ---------------------- | ---- | ---------- | ----------- | ------- | --------------- | --------- | -------- |
55
+ | FP16 | fastest-inference-4bit | 16 | - | 26634 | 6.96 | 0.0383 | - | 1x |
56
+ | GPTQ | triton | 4 | 128 | 8590 | 6.97 | 0.0551 | - | 0.69x |
57
+ | GPTQ | fastest-inference-4bit | 4 | 128 | 8699 | 6.97 | 0.0429 | true | 0.89x |
58
+ | GPTQ | fastest-inference-4bit | 4 | 128 | 8699 | 7.03 | 0.0287 | false | 1.33x |
59
+ | GPTQ | fastest-inference-4bit | 4 | -1 | 8448 | 7.12 | 0.0284 | false | 1.44x |
docs/langchain_integration.md ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Local LangChain with FastChat
2
+
3
+ [LangChain](https://python.langchain.com/en/latest/index.html) is a library that facilitates the development of applications by leveraging large language models (LLMs) and enabling their composition with other sources of computation or knowledge.
4
+ FastChat's OpenAI-compatible [API server](openai_api.md) enables using LangChain with open models seamlessly.
5
+
6
+ ## Launch RESTful API Server
7
+
8
+ Here are the steps to launch a local OpenAI API server for LangChain.
9
+
10
+ First, launch the controller
11
+
12
+ ```bash
13
+ python3 -m fastchat.serve.controller
14
+ ```
15
+
16
+ LangChain uses OpenAI model names by default, so we need to assign some faux OpenAI model names to our local model.
17
+ Here, we use Vicuna as an example and use it for three endpoints: chat completion, completion, and embedding.
18
+ `--model-path` can be a local folder or a Hugging Face repo name.
19
+ See a full list of supported models [here](../README.md#supported-models).
20
+
21
+ ```bash
22
+ python3 -m fastchat.serve.model_worker --model-names "gpt-3.5-turbo,text-davinci-003,text-embedding-ada-002" --model-path lmsys/vicuna-7b-v1.3
23
+ ```
24
+
25
+ Finally, launch the RESTful API server
26
+
27
+ ```bash
28
+ python3 -m fastchat.serve.openai_api_server --host localhost --port 8000
29
+ ```
30
+
31
+ ## Set OpenAI Environment
32
+
33
+ You can set your environment with the following commands.
34
+
35
+ Set OpenAI base url
36
+
37
+ ```bash
38
+ export OPENAI_API_BASE=http://localhost:8000/v1
39
+ ```
40
+
41
+ Set OpenAI API key
42
+
43
+ ```bash
44
+ export OPENAI_API_KEY=EMPTY
45
+ ```
46
+
47
+ If you meet the following OOM error while creating embeddings, please set a smaller batch size by using environment variables.
48
+
49
+ ~~~bash
50
+ openai.error.APIError: Invalid response object from API: '{"object":"error","message":"**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**\\n\\n(CUDA out of memory. Tried to allocate xxx MiB (GPU 0; xxx GiB total capacity; xxx GiB already allocated; xxx MiB free; xxx GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF)","code":50002}' (HTTP response code was 400)
51
+ ~~~
52
+
53
+ You can try `export FASTCHAT_WORKER_API_EMBEDDING_BATCH_SIZE=1`.
54
+
55
+ ## Try local LangChain
56
+
57
+ Here is a question answerting example.
58
+
59
+ Download a text file.
60
+
61
+ ```bash
62
+ wget https://raw.githubusercontent.com/hwchase17/langchain/v0.0.200/docs/modules/state_of_the_union.txt
63
+ ```
64
+
65
+ Run LangChain.
66
+
67
+ ~~~py
68
+ from langchain.chat_models import ChatOpenAI
69
+ from langchain.document_loaders import TextLoader
70
+ from langchain.embeddings import OpenAIEmbeddings
71
+ from langchain.indexes import VectorstoreIndexCreator
72
+
73
+ embedding = OpenAIEmbeddings(model="text-embedding-ada-002")
74
+ loader = TextLoader("state_of_the_union.txt")
75
+ index = VectorstoreIndexCreator(embedding=embedding).from_loaders([loader])
76
+ llm = ChatOpenAI(model="gpt-3.5-turbo")
77
+
78
+ questions = [
79
+ "Who is the speaker",
80
+ "What did the president say about Ketanji Brown Jackson",
81
+ "What are the threats to America",
82
+ "Who are mentioned in the speech",
83
+ "Who is the vice president",
84
+ "How many projects were announced",
85
+ ]
86
+
87
+ for query in questions:
88
+ print("Query:", query)
89
+ print("Answer:", index.query(query, llm=llm))
90
+ ~~~
docs/model_support.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model Support
2
+
3
+ ## Supported models
4
+ - Vicuna, Alpaca, LLaMA, Koala
5
+ - example: `python3 -m fastchat.serve.cli --model-path lmsys/vicuna-7b-v1.3`
6
+ - [BlinkDL/RWKV-4-Raven](https://huggingface.co/BlinkDL/rwkv-4-raven)
7
+ - example: `python3 -m fastchat.serve.cli --model-path ~/model_weights/RWKV-4-Raven-7B-v11x-Eng99%-Other1%-20230429-ctx8192.pth`
8
+ - [camel-ai/CAMEL-13B-Combined-Data](https://huggingface.co/camel-ai/CAMEL-13B-Combined-Data)
9
+ - [databricks/dolly-v2-12b](https://huggingface.co/databricks/dolly-v2-12b)
10
+ - [FreedomIntelligence/phoenix-inst-chat-7b](https://huggingface.co/FreedomIntelligence/phoenix-inst-chat-7b)
11
+ - [h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b](https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b)
12
+ - [lcw99/polyglot-ko-12.8b-chang-instruct-chat](https://huggingface.co/lcw99/polyglot-ko-12.8b-chang-instruct-chat)
13
+ - [lmsys/fastchat-t5-3b-v1.0](https://huggingface.co/lmsys/fastchat-t5)
14
+ - [mosaicml/mpt-7b-chat](https://huggingface.co/mosaicml/mpt-7b-chat)
15
+ - example: `python3 -m fastchat.serve.cli --model-path mosaicml/mpt-7b-chat`
16
+ - [Neutralzz/BiLLa-7B-SFT](https://huggingface.co/Neutralzz/BiLLa-7B-SFT)
17
+ - [nomic-ai/gpt4all-13b-snoozy](https://huggingface.co/nomic-ai/gpt4all-13b-snoozy)
18
+ - [openaccess-ai-collective/manticore-13b-chat-pyg](https://huggingface.co/openaccess-ai-collective/manticore-13b-chat-pyg)
19
+ - [OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5](https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5)
20
+ - [project-baize/baize-v2-7b](https://huggingface.co/project-baize/baize-v2-7b)
21
+ - [Salesforce/codet5p-6b](https://huggingface.co/Salesforce/codet5p-6b)
22
+ - [StabilityAI/stablelm-tuned-alpha-7b](https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b)
23
+ - [THUDM/chatglm-6b](https://huggingface.co/THUDM/chatglm-6b)
24
+ - [THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b)
25
+ - [tiiuae/falcon-40b](https://huggingface.co/tiiuae/falcon-40b)
26
+ - [timdettmers/guanaco-33b-merged](https://huggingface.co/timdettmers/guanaco-33b-merged)
27
+ - [togethercomputer/RedPajama-INCITE-7B-Chat](https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Chat)
28
+ - [WizardLM/WizardLM-13B-V1.0](https://huggingface.co/WizardLM/WizardLM-13B-V1.0)
29
+ - [baichuan-inc/baichuan-7B](https://huggingface.co/baichuan-inc/baichuan-7B)
30
+ - Any [EleutherAI](https://huggingface.co/EleutherAI) pythia model such as [pythia-6.9b](https://huggingface.co/EleutherAI/pythia-6.9b)
31
+ - Any [Peft](https://github.com/huggingface/peft) adapter trained ontop of a model above. To activate, must have `peft` in the model path.
32
+
33
+ ## How to support a new model
34
+
35
+ To support a new model in FastChat, you need to correctly handle its prompt template and model loading.
36
+ The goal is to make the following command run with the correct prompts.
37
+ ```
38
+ python3 -m fastchat.serve.cli --model [YOUR_MODEL_PATH]
39
+ ```
40
+
41
+ You can run this example command to learn the code logic.
42
+ ```
43
+ python3 -m fastchat.serve.cli --model lmsys/vicuna-7b-v1.3
44
+ ```
45
+
46
+ You can add `--debug` to see the actual prompt sent to the model.
47
+
48
+ ### Steps
49
+ FastChat uses the `Conversation` class to handle prompt templates and `BaseModelAdapter` class to handle model loading.
50
+
51
+ 1. Implement a conversation template for the new model at [fastchat/conversation.py](https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py). You can follow existing examples and use `register_conv_template` to add a new one.
52
+ 2. Implement a model adapter for the new model at [fastchat/model/model_adapter.py](https://github.com/lm-sys/FastChat/blob/main/fastchat/model/model_adapter.py). You can follow existing examples and use `register_model_adapter` to add a new one.
53
+ 3. (Optional) add the model name to the "Supported models" [section](#supported-models) above and add more information in [fastchat/model/model_registry.py](https://github.com/lm-sys/FastChat/blob/main/fastchat/model/model_registry.py).
54
+
55
+ After these steps, the new model should be compatible with most FastChat features, such as CLI, web UI, model worker, and OpenAI-compatible API server. Please do some testing with these features as well.
docs/openai_api.md ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # OpenAI-Compatible RESTful APIs & SDK
2
+
3
+ FastChat provides OpenAI-compatible APIs for its supported models, so you can use FastChat as a local drop-in replacement for OpenAI APIs.
4
+ The FastChat server is compatible with both [openai-python](https://github.com/openai/openai-python) library and cURL commands.
5
+
6
+ The following OpenAI APIs are supported:
7
+ - Chat Completions. (Reference: https://platform.openai.com/docs/api-reference/chat)
8
+ - Completions. (Reference: https://platform.openai.com/docs/api-reference/completions)
9
+ - Embeddings. (Reference: https://platform.openai.com/docs/api-reference/embeddings)
10
+
11
+ ## RESTful API Server
12
+ First, launch the controller
13
+
14
+ ```bash
15
+ python3 -m fastchat.serve.controller
16
+ ```
17
+
18
+ Then, launch the model worker(s)
19
+
20
+ ```bash
21
+ python3 -m fastchat.serve.model_worker --model-path lmsys/vicuna-7b-v1.3
22
+ ```
23
+
24
+ Finally, launch the RESTful API server
25
+
26
+ ```bash
27
+ python3 -m fastchat.serve.openai_api_server --host localhost --port 8000
28
+ ```
29
+
30
+ Now, let us test the API server.
31
+
32
+ ### OpenAI Official SDK
33
+ The goal of `openai_api_server.py` is to implement a fully OpenAI-compatible API server, so the models can be used directly with [openai-python](https://github.com/openai/openai-python) library.
34
+
35
+ First, install openai-python:
36
+ ```bash
37
+ pip install --upgrade openai
38
+ ```
39
+
40
+ Then, interact with model vicuna:
41
+ ```python
42
+ import openai
43
+ openai.api_key = "EMPTY" # Not support yet
44
+ openai.api_base = "http://localhost:8000/v1"
45
+
46
+ model = "vicuna-7b-v1.3"
47
+ prompt = "Once upon a time"
48
+
49
+ # create a completion
50
+ completion = openai.Completion.create(model=model, prompt=prompt, max_tokens=64)
51
+ # print the completion
52
+ print(prompt + completion.choices[0].text)
53
+
54
+ # create a chat completion
55
+ completion = openai.ChatCompletion.create(
56
+ model=model,
57
+ messages=[{"role": "user", "content": "Hello! What is your name?"}]
58
+ )
59
+ # print the completion
60
+ print(completion.choices[0].message.content)
61
+ ```
62
+
63
+ Streaming is also supported. See [test_openai_api.py](../tests/test_openai_api.py).
64
+
65
+ ### cURL
66
+ cURL is another good tool for observing the output of the api.
67
+
68
+ List Models:
69
+ ```bash
70
+ curl http://localhost:8000/v1/models
71
+ ```
72
+
73
+ Chat Completions:
74
+ ```bash
75
+ curl http://localhost:8000/v1/chat/completions \
76
+ -H "Content-Type: application/json" \
77
+ -d '{
78
+ "model": "vicuna-7b-v1.3",
79
+ "messages": [{"role": "user", "content": "Hello! What is your name?"}]
80
+ }'
81
+ ```
82
+
83
+ Text Completions:
84
+ ```bash
85
+ curl http://localhost:8000/v1/completions \
86
+ -H "Content-Type: application/json" \
87
+ -d '{
88
+ "model": "vicuna-7b-v1.3",
89
+ "prompt": "Once upon a time",
90
+ "max_tokens": 41,
91
+ "temperature": 0.5
92
+ }'
93
+ ```
94
+
95
+ Embeddings:
96
+ ```bash
97
+ curl http://localhost:8000/v1/embeddings \
98
+ -H "Content-Type: application/json" \
99
+ -d '{
100
+ "model": "vicuna-7b-v1.3",
101
+ "input": "Hello world!"
102
+ }'
103
+ ```
104
+
105
+ ## LangChain Support
106
+ This OpenAI-compatible API server supports LangChain. See [LangChain Integration](langchain_integration.md) for details.
107
+
108
+ ## Adjusting Environment Variables
109
+
110
+ ### Timeout
111
+ By default, a timeout error will occur if a model worker does not response within 100 seconds. If your model/hardware is slower, you can change this timeout through an environment variable:
112
+
113
+ ```bash
114
+ export FASTCHAT_WORKER_API_TIMEOUT=<larger timeout in seconds>
115
+ ```
116
+
117
+ ### Batch size
118
+ If you meet the following OOM error while creating embeddings. You can use a smaller batch size by setting
119
+
120
+ ```bash
121
+ export FASTCHAT_WORKER_API_EMBEDDING_BATCH_SIZE=1
122
+ ```
123
+
124
+ ## Todos
125
+ Some features to be implemented:
126
+
127
+ - [ ] Support more parameters like `logprobs`, `logit_bias`, `user`, `presence_penalty` and `frequency_penalty`
128
+ - [ ] Model details (permissions, owner and create time)
129
+ - [ ] Edits API
130
+ - [ ] Authentication and API key
131
+ - [ ] Rate Limitation Settings
docs/server_arch.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # FastChat Server Architecture
2
+ ![server arch](../assets/server_arch.png)
docs/training.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Fine-tuning FastChat-T5
2
+ You can use the following command to train FastChat-T5 with 4 x A100 (40GB).
3
+ ```bash
4
+ torchrun --nproc_per_node=4 --master_port=9778 fastchat/train/train_flant5.py \
5
+ --model_name_or_path google/flan-t5-xl \
6
+ --data_path /data/dummy.json \
7
+ --bf16 True \
8
+ --output_dir ./checkpoints_flant5_3b \
9
+ --num_train_epochs 3 \
10
+ --per_device_train_batch_size 1 \
11
+ --per_device_eval_batch_size 1 \
12
+ --gradient_accumulation_steps 4 \
13
+ --evaluation_strategy "no" \
14
+ --save_strategy "steps" \
15
+ --save_steps 300 \
16
+ --save_total_limit 1 \
17
+ --learning_rate 2e-5 \
18
+ --weight_decay 0. \
19
+ --warmup_ratio 0.03 \
20
+ --lr_scheduler_type "cosine" \
21
+ --logging_steps 1 \
22
+ --fsdp "full_shard auto_wrap" \
23
+ --fsdp_transformer_layer_cls_to_wrap T5Block \
24
+ --tf32 True \
25
+ --model_max_length 2048 \
26
+ --preprocessed_path ./preprocessed_data/processed.json \
27
+ --gradient_checkpointing True
28
+ ```
29
+
30
+ After training, please use our post-processing [function](https://github.com/lm-sys/FastChat/blob/55051ad0f23fef5eeecbda14a2e3e128ffcb2a98/fastchat/utils.py#L166-L185) to update the saved model weight. Additional discussions can be found [here](https://github.com/lm-sys/FastChat/issues/643).
31
+
32
+ ### Fine-tuning using (Q)LoRA
33
+ You can use the following command to train Vicuna-7B using QLoRA using ZeRO2. Note that ZeRO3 is not currently supported with QLoRA but ZeRO3 does support LoRA, which has a reference configuraiton under `playground/deepspeed_config_s3.json`.
34
+ ```bash
35
+ deepspeed train_lora.py \
36
+ --model_name_or_path ~/model_weights/llama-7b \
37
+ --lora_r 8 \
38
+ --lora_alpha 16 \
39
+ --lora_dropout 0.05 \
40
+ --data_path <path-to-data> \
41
+ --bf16 True \
42
+ --output_dir ./checkpoints \
43
+ --num_train_epochs 3 \
44
+ --per_device_train_batch_size 4 \
45
+ --per_device_eval_batch_size 4 \
46
+ --gradient_accumulation_steps 1 \
47
+ --evaluation_strategy "no" \
48
+ --save_strategy "steps" \
49
+ --save_steps 1200 \
50
+ --save_total_limit 100 \
51
+ --learning_rate 2e-5 \
52
+ --weight_decay 0. \
53
+ --warmup_ratio 0.03 \
54
+ --lr_scheduler_type "cosine" \
55
+ --logging_steps 1 \
56
+ --tf32 True \
57
+ --model_max_length 2048 \
58
+ --q_lora True \
59
+ --deepspeed playground/deepspeed_config_s2.json \
60
+ ```
docs/vicuna_weights_version.md ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Vicuna Weights
2
+
3
+ | Weights version | v1.3 | v1.1 | v0 |
4
+ | ---- | ---- | ---- | ---- |
5
+ | Link | [7B](https://huggingface.co/lmsys/vicuna-7b-v1.3), [13B](https://huggingface.co/lmsys/vicuna-13b-v1.3), [33B](//huggingface.co/lmsys/vicuna-33b-v1.3) | [7B](https://huggingface.co/lmsys/vicuna-7b-delta-v1.1), [13B](https://huggingface.co/lmsys/vicuna-13b-delta-v1.1) | [7B](https://huggingface.co/lmsys/vicuna-7b-delta-v0), [13B](https://huggingface.co/lmsys/vicuna-13b-delta-v0) |
6
+ | Separator | `</s>` | `</s>` | `###` |
7
+ | Is delta weights | No | Yes | Yes |
8
+ | FastChat PyPI package compatibility | >= v0.2.1 | >= v0.2.1 |<= v0.1.10 |
9
+ | FastChat source code compatibility | after [tag v0.2.1](https://github.com/lm-sys/FastChat/tree/v0.2.1) | after [tag v0.2.1](https://github.com/lm-sys/FastChat/tree/v0.2.1) | [tag v0.1.10](https://github.com/lm-sys/FastChat/tree/v0.1.10) |
10
+
11
+ ### Updates
12
+ - Major updates of weights v1.3
13
+ - Train with twice the amount of ShareGPT data compared to previous versions.
14
+ - Provide merged weights directly instead of delta weights.
15
+
16
+ - Major updates of weights v1.1
17
+ - Refactor the tokenization and separator. In Vicuna v1.1, the separator has been changed from `###` to the EOS token `</s>`. This change makes it easier to determine the generation stop criteria and enables better compatibility with other libraries.
18
+ - Fix the supervised fine-tuning loss computation for better model quality.
19
+
20
+ ## Prompt Template
21
+
22
+ ### Example prompt (weights v1.1 and v1.3)
23
+ ```
24
+ A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
25
+
26
+ USER: Hello!
27
+ ASSISTANT: Hello!</s>
28
+ USER: How are you?
29
+ ASSISTANT: I am good.</s>
30
+ ```
31
+
32
+ See a full prompt template [here](https://github.com/lm-sys/FastChat/blob/daa2b9abe20597ebf34dc5df164d450456610c74/fastchat/conversation.py#L246-L259).
33
+
34
+ ### Example prompt (weights v0)
35
+ ```
36
+ A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.
37
+
38
+ ### Human: Hello!
39
+ ### Assistant: Hello!
40
+ ### Human: How are you?
41
+ ### Assistant: I am good.
42
+ ```
43
+
44
+ See the full prompt template [here](https://github.com/lm-sys/FastChat/blob/daa2b9abe20597ebf34dc5df164d450456610c74/fastchat/conversation.py#L198-L229).
45
+
46
+ ## How to Apply Delta Weights (for weights v1.1 and v0)
47
+
48
+ We release [Vicuna](https://lmsys.org/blog/2023-03-30-vicuna/) weights (v1.1 and v0) as delta weights to comply with the LLaMA model license.
49
+ You can add our delta to the original LLaMA weights to obtain the Vicuna weights. Instructions:
50
+
51
+ 1. Get the original LLaMA weights in the Hugging Face format by following the instructions [here](https://huggingface.co/docs/transformers/main/model_doc/llama).
52
+ 2. Use the following scripts to get Vicuna weights by applying our delta. They will automatically download delta weights from our Hugging Face [account](https://huggingface.co/lmsys).
53
+
54
+ **NOTE**:
55
+ Weights v1.1 are only compatible with ```transformers>=4.28.0``` and ``fschat >= 0.2.0``.
56
+ Please update your local packages accordingly. If you follow the above commands to do a fresh install, then you should get all the correct versions.
57
+
58
+ #### Vicuna-7B
59
+ This conversion command needs around 30 GB of CPU RAM.
60
+ See the "Low CPU Memory Conversion" section below if you do not have enough memory.
61
+ Replace `/path/to/*` with the real paths.
62
+ ```bash
63
+ python3 -m fastchat.model.apply_delta \
64
+ --base-model-path /path/to/llama-7b \
65
+ --target-model-path /path/to/output/vicuna-7b \
66
+ --delta-path lmsys/vicuna-7b-delta-v1.1
67
+ ```
68
+
69
+ #### Vicuna-13B
70
+ This conversion command needs around 60 GB of CPU RAM.
71
+ See the "Low CPU Memory Conversion" section below if you do not have enough memory.
72
+ Replace `/path/to/*` with the real paths.
73
+ ```bash
74
+ python3 -m fastchat.model.apply_delta \
75
+ --base-model-path /path/to/llama-13b \
76
+ --target-model-path /path/to/output/vicuna-13b \
77
+ --delta-path lmsys/vicuna-13b-delta-v1.1
78
+ ```
79
+
80
+ #### Low CPU Memory Conversion
81
+ You can try these methods to reduce the CPU RAM requirement of weight conversion.
82
+ 1. Append `--low-cpu-mem` to the commands above, which will split large weight files into smaller ones and use the disk as temporary storage. This can keep the peak memory at less than 16GB.
83
+ 2. Create a large swap file and rely on the operating system to automatically utilize the disk as virtual memory.
84
+
85
+ ## FAQ
86
+
87
+ ### Tokenizer issues
88
+ There are some frequently asked tokenizer issues (https://github.com/lm-sys/FastChat/issues/408).
89
+ Some of them are not only related to FastChat or Vicuna weights but are also related to how you convert the base llama model.
90
+
91
+ We suggest that you use `transformers>=4.28.0` and redo the weight conversion for the base llama model.
92
+ After applying the delta, you should have a file named `special_tokens_map.json` in your converted weight folder for either v0 or v1.1.
93
+ The contents of this file should be the same as this file: https://huggingface.co/lmsys/vicuna-13b-delta-v0/blob/main/special_tokens_map.json.
94
+ If the file is not present, please copy the `special_tokens_map.json` and `tokenizer_config.json` files from https://huggingface.co/lmsys/vicuna-13b-delta-v0/tree/main to your converted weight folder. This works for both v0 and v1.1.
docs/vllm_integration.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vLLM Integration
2
+ You can use [vLLM](https://vllm.ai/) as an optimized worker implementation in FastChat.
3
+ It offers advanced continuous batching and a much higher (~10x) throughput.
4
+ See the supported models [here](https://vllm.readthedocs.io/en/latest/models/supported_models.html).
5
+
6
+ ## Instructions
7
+ 1. Install vLLM.
8
+ ```
9
+ pip install vllm
10
+ ```
11
+
12
+ 2. When you launch a model worker, replace the normal worker (`fastchat.serve.model_worker`) with the vLLM worker (`fastchat.serve.vllm_worker`). All other commands such as controller, gradio web server, and OpenAI API server are kept the same.
13
+ ```
14
+ python3 -m fastchat.serve.vllm_worker --model-path lmsys/vicuna-7b-v1.3
15
+ ```
fastchat/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ __version__ = "0.2.18"
fastchat/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (172 Bytes). View file
 
fastchat/__pycache__/constants.cpython-311.pyc ADDED
Binary file (2.62 kB). View file
 
fastchat/__pycache__/conversation.cpython-311.pyc ADDED
Binary file (82.1 kB). View file
 
fastchat/__pycache__/utils.cpython-311.pyc ADDED
Binary file (14.8 kB). View file
 
fastchat/constants.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import IntEnum
2
+ import os
3
+
4
+ REPO_PATH = os.path.dirname(os.path.dirname(__file__))
5
+
6
+ ##### For the gradio web server
7
+ SERVER_ERROR_MSG = (
8
+ "**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
9
+ )
10
+ MODERATION_MSG = "YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES. PLEASE FIX YOUR INPUT AND TRY AGAIN."
11
+ CONVERSATION_LIMIT_MSG = "YOU HAVE REACHED THE CONVERSATION LENGTH LIMIT. PLEASE CLEAR HISTORY AND START A NEW CONVERSATION."
12
+ INACTIVE_MSG = "THIS SESSION HAS BEEN INACTIVE FOR TOO LONG. PLEASE REFRESH THIS PAGE."
13
+ # Maximum input length
14
+ INPUT_CHAR_LEN_LIMIT = int(os.getenv("FASTCHAT_INPUT_CHAR_LEN_LIMIT", 2560))
15
+ # Maximum conversation turns
16
+ CONVERSATION_TURN_LIMIT = 50
17
+ # Session expiration time
18
+ SESSION_EXPIRATION_TIME = 3600
19
+ # The output dir of log files
20
+ LOGDIR = "."
21
+
22
+
23
+ ##### For the controller and workers (could be overwritten through ENV variables.)
24
+ CONTROLLER_HEART_BEAT_EXPIRATION = int(
25
+ os.getenv("FASTCHAT_CONTROLLER_HEART_BEAT_EXPIRATION", 90)
26
+ )
27
+ WORKER_HEART_BEAT_INTERVAL = int(os.getenv("FASTCHAT_WORKER_HEART_BEAT_INTERVAL", 45))
28
+ WORKER_API_TIMEOUT = int(os.getenv("FASTCHAT_WORKER_API_TIMEOUT", 100))
29
+ WORKER_API_EMBEDDING_BATCH_SIZE = int(
30
+ os.getenv("FASTCHAT_WORKER_API_EMBEDDING_BATCH_SIZE", 4)
31
+ )
32
+
33
+
34
+ class ErrorCode(IntEnum):
35
+ """
36
+ https://platform.openai.com/docs/guides/error-codes/api-errors
37
+ """
38
+
39
+ VALIDATION_TYPE_ERROR = 40001
40
+
41
+ INVALID_AUTH_KEY = 40101
42
+ INCORRECT_AUTH_KEY = 40102
43
+ NO_PERMISSION = 40103
44
+
45
+ INVALID_MODEL = 40301
46
+ PARAM_OUT_OF_RANGE = 40302
47
+ CONTEXT_OVERFLOW = 40303
48
+
49
+ RATE_LIMIT = 42901
50
+ QUOTA_EXCEEDED = 42902
51
+ ENGINE_OVERLOADED = 42903
52
+
53
+ INTERNAL_ERROR = 50001
54
+ CUDA_OUT_OF_MEMORY = 50002
55
+ GRADIO_REQUEST_ERROR = 50003
56
+ GRADIO_STREAM_UNKNOWN_ERROR = 50004
57
+ CONTROLLER_NO_WORKER = 50005
58
+ CONTROLLER_WORKER_TIMEOUT = 50006
fastchat/conversation.py ADDED
@@ -0,0 +1,876 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Conversation prompt templates.
3
+ """
4
+
5
+ import dataclasses
6
+ from enum import auto, Enum
7
+ from typing import List, Any, Dict
8
+ gpt_system_message = """
9
+ You are a helpful chatbot for enterprise internal chatbot.
10
+ Here are the list of questions, formatted as {'User": a example question, "Assistant": the answer to the given question and context"}.
11
+
12
+ Please always cite the source given at the end of the 'answer'. When user ask a question that is related to Cala (our company name) but did not find a answer, please do not make up answer and tell them the knowledge has not been ingested yet. Do not make up facts.
13
+
14
+ If the user asks a general question not related to cala health, or our therapy, you can answer them as normal chatgpt mode.
15
+
16
+ {"User": "Contraindication for Cala Therapy Insulin pump", "Assistant": "Cala Trio has not been evaluated in patients with an insulin pump so the potential impact is unknown at this time. This rx cannot be filled per current company policy. Source: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
17
+ {"User": "Contraindication for Cala Therapy Epilepsy", "Assistant": "Cala Trio has not been evaluated in patients with epilepsy so the potential impact is unknown at this time. This rx cannot be filled per current company policy. Source: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
18
+ {"User": "Contraindication for Cala Therapy Active Deep Brain Stimulator", "Assistant": "Cala Trio has not been evaluated in patients with an active deep brain stimulator so the potential impact is unknown at this time. This rx cannot be filled per current company policy. Source: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
19
+ {"User": "Contraindication for Cala Therapy Pacemaker/defibrillator", "Assistant": "Cala Trio has not been evaluated in patients with a pacemaker/defibrillator so the potential impact is unknown at this time. This rx cannot be filled per current company policy. Source: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
20
+ {"User": "Contraindication for Cala Therapy Optune Device for brain cancer ", "Assistant": "Cala Trio has not been evaluated in patients with an Optune device so the potential impact is unknown at this time. This rx cannot be filled per current company policy. Source: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
21
+ {"User": "Contraindication for Cala Therapy Inactive/Off Deep Brain Stimulator ", "Assistant": "Patient: Cala Trio has not been evaluated in patients with an inactive/off DBS system so the potential impact on the functionality of either device is unknown at this time. Please speak with your physician regarding whether treatment with the Cala Trio is right for you. \nHCP: Cala Trio has not been evaluated in patients with an inactive/off DBS system so the potential impact on the functionality of either device is unknown at this time. Please email us and confirm that the patient should receive this Rx.\nSource: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
22
+ {"User": "Contraindication for Cala Therapy Active spinal cord stimulator", "Assistant": "Patient: Cala Trio has not been evaluated in patients with an active spinal cord stimulator so the potential impact on the functionality of either device is unknown at this time. Please speak with your physician regarding whether treatment with the Cala Trio is right for you. \nHCP: Cala Trio has not been evaluated in patients with an active spinal cord stimulator so the potential impact on the functionality of either device is unknown at this time. Please email us and confirm that the patient should receive this Rx.\nSource: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
23
+ {"User": "Contraindication for Cala Therapy Inactive spinal cord stimulator ", "Assistant": "Patient: Cala Trio has not been evaluated in patients with an inactive spinal cord stimulator so the potential impact on the functionality of either device is unknown at this time. Please speak with your physician regarding whether treatment with the Cala Trio is right for you. \nHCP: Cala Trio has not been evaluated in patients with an inactive spinal cord stimulator so the potential impact on the functionality of either device is unknown at this time. Please email us and confirm that the patient should receive this Rx.\nSource: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
24
+ {"User": "Contraindication for Cala Therapy Cochlear implant", "Assistant": "Patient: Cala Trio has not been evaluated in patients with a cochlear implant so the potential impact on the functionality of either device is unknown at this time. Please speak with your physician regarding whether treatment with the Cala Trio is right for you. \nHCP: Cala Trio has not been evaluated in patients with a cochlear implant so the potential impact on the functionality of either device is unknown at this time. Please email us and confirm that the patient should receive this Rx.\nSource: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
25
+ {"User": "Contraindication for Cala Therapy LINX (lower esophageal device for reflux)", "Assistant": "Patient: Cala Trio has not been evaluated in patients with a LINX system so the potential impact on the functionality of either device is unknown at this time. Please speak with your physician regarding whether treatment with the Cala Trio is right for you. \nHCP: Cala Trio has not been evaluated in patients with a LINX system so the potential impact on the functionality of either device is unknown at this time. Please email us and confirm that the patient should receive this Rx.\nSource: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
26
+ {"User": "Contraindication for Cala Therapy Patients who are pregnant", "Assistant": "Cala Trio has not been tested in pregnant patients. This Rx cannot be filled per current company policy.Source: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
27
+ {"User": "Contraindication for Cala Therapy Swollen, infected, inflamed areas or skin eruptions, open wounds or cancerous lesions", "Assistant": "Patient: Cala Trio use should be based on the nature and severity of the underlying skin disorder. Please speak with your physician regarding whether treatment with the Cala Trio is right for you. \nHCP: Cala Trio use should be based on the nature and severity of the underlying skin disorder. Please email us and confirm that the patient should receive this Rx.Source: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
28
+ {"User": "Warnings for Cala therapy Implanted cardiac loop recorder", "Assistant": "Cala Trio has not been evaluated in patients with an implanted cardiac loop recorder so the potential impact is unknown at this time. This rx cannot be filled per current company policy. Source: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
29
+ {"User": "Warnings for Cala therapy Neuropathy of treated hand", "Assistant": "Patient: Cala Trio has not been evaluated in patients with neuropathy of the treated hand. Please speak with your HCP as there may be risk of skin irritation and decreased efficacy of therapy. \nHCP: Cala Trio has not been evaluated in patients with neuropathy of the treated hand. There may be risk of skin irritation and decreased efficacy of therapy. Please send us an email to confirm that the patient should receive this Rx.Source: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
30
+ {"User": "Warnings for Cala therapy Metal plate or screws in the wrist", "Assistant": "Patient: Cala Trio has not been evaluated in patients with metal plates or screws in the treated wrist. Please speak with your HCP as there may be risk of thermal injury to your wrist.\nHCP: Cala Trio has not been evaluated in patients with metal plates or screws in the treated wrist. There may be risk of thermal injury to the wrist. Please send us an email to confirm that the patient should receive this Rx.Source: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
31
+ {"User": "Warnings for Cala therapy Continuous Glucose Monitor only", "Assistant": "Patient: Cala Trio has not been evaluated in patients with a CGM so the impact to either device is unknown. Please speak with your HCP. \nHCP: Cala Trio has not been evaluated in patients with a CGM so the impact to either device is unknown. Please email us and confirm that patient should receive this Rx. \nSource: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
32
+ {"User": "Contraindication for Cala Therapy Metal heart stent or metal in hip/leg/shoulder (anywhere outside of treated wrist)", "Assistant": "Metal outside the area of the treated wrist is not contraindicated. Rx can be filled. Source: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
33
+ {"User": "Contraindication for Cala Therapy Transcranial magnetic stimulation", "Assistant": "Patient: Cala Trio has not been evaluated in patients with transcranial magnetic stimulation so the impact to either device is unknown. Please speak with your HCP. \nHCP: Cala Trio has not been evaluated in patients with transcranial magnetic stimulation so the impact to either device is unknown.\nSource: MKG-1567-(A)-Medical-FAQ-Responses-Approved-by-Legal.csv"}
34
+ {"User": "Managing Therapy for Cala Kiq If I use a higher stimulation intensity setting, will I see greater benefit?", "Assistant": "Higher intensity does not necessarily mean better efficacy. It is recommended to stimulate at an intensity that you can feel in your wrist, hand and/or fingers, without causing discomfort or muscle contraction.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
35
+ {"User": "Managing Therapy for Cala Kiq Why am I not being asked for a tremor task pre and post therapy anymore?", "Assistant": "After 40 therapy sessions, the system will only ask for pre and post tremor tasks every 7th session. Data can be presented only for sessions that have a valid pre and post-tremor task. To be prompted for a tremor task for each session instead of every 7th session, you will need to factory reset your device to restart the 40-session count.\n \n Please consult Customer Care team at 888-699-1009 or CustomerCare@CalaHealth.com to factory reset your device.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
36
+ {"User": "Managing Therapy for Cala Kiq The sensation level feels different day to day. Similarly, the location of where I feel the sensation can be different as well. What stimulation level and location should I feel for therapy to be effective?", "Assistant": "There are a few factors that may affect how the sensation feels such as location of the device, the amount of water applied to your wrist before putting the device on, and the stimulation level. Feeling different sensations from day to day is normal and expected. Because there will be some variation in the sensation during each session, you should use the sensations in your thumb, pointer, middle, and ring fingers as a guide for what stimulation level to use for each session. You should set the stimulation intensity at a level that feels perceptible in your hand and fingers but does not cause discomfort or muscle contraction.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
37
+ {"User": "Managing Therapy for Cala Kiq What should I do if the therapy is uncomfortable?", "Assistant": "Make sure you wet your wrist before starting a therapy session. You can also adjust the stimulation intensity as needed to maintain a comfortable and consistent sensation during sessions.\n \n During your therapy session:\n Step 1: To stop the increasing therapy intensity ramping up to your preset, press any button.\n Step 2: Press the UP button to increase the intensity.\n Step 3: Press the DOWN button to decrease the intensity.\n \n \n You can also reset your default intensity if you would prefer a different stimulation intensity for therapy:\n Step 1: From the time display, press the UP or DOWN buttons until you see \u201cINTENSITY SETTING\u201d. Press the MAIN button to select.\n Step 2: Press the DOWN button to highlight \u201cRESET\u201d. Then press the MAIN button.\n Step 3: Use the UP button to increase the therapy to an appropriate level.\n Step 4: Press the MAIN button to stop the therapy. Then press MAIN button again to save the intensity.\n \n You can also consult Customer Care team at 888-699- 1009 or CustomerCare@CalaHealth.com if your therapy is uncomfortable.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
38
+ {"User": "Managing Therapy for Cala Kiq How can I change the preset stimulation intensity to a more comfortable setting?", "Assistant": "You should change the stimulation intensity for therapy to a level that is comfortable for the 40 minute therapy session. To change the default stimulation intensity: \n \n Step 1: From the time display, press the UP or DOWN buttons until you see \u201cintensity setting\u201d. Press the MAIN button\n \n Step 2: Press the DOWN button to highlight \u2019reset.\u2019 Then press the MAIN button\n \n Step 3: Use the UP button to increase or DOWN button to decrease the therapy to an appropriate level\n \n Step 4: Press the MAIN button to stop the therapy. Then press the MAIN button again to save the intensity. You will now see the clockSource: MM-00004(B)CalakIQSystemFAQs.csv"}
39
+ {"User": "Managing Therapy for Cala Kiq I have tremor in both hands. Can I use my device for both hands?", "Assistant": "The Cala kIQ system can only be used on the hand that it was calibrated for. Because the tremor in your left hand is different from the tremor in your right hand, it is important to consistently use the device calibrated for the hand you are treating. You will not get effective therapy when you use the device on a hand it was not calibrated to treat. Please consult with your physician to determine the hand that will benefit the most from Cala kIQ therapy.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
40
+ {"User": "Managing Therapy for Cala Kiq Can I use two Cala kIQ devices to treat both hands simultaneously?", "Assistant": "The clinical trials evaluated Cala kIQ therapy in one hand only. It is unknown if simultaneous use on both hands will provide better, worse, or similar benefit. You should use Cala kIQ therapy for one hand at a time. \n \n With a novel technology, first-in-class therapy, Cala Health continues to study Cala kIQ therapy to better understand its use and efficacy and will share insight as we learn.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
41
+ {"User": "Managing Therapy for Cala Kiq How will I know if the Cala kIQ system is providing stimulation?", "Assistant": "You will know the Cala kIQ system is providing stimulation if you feel a tingling sensation in your wrist, hand, and/or fingers (thumb, pointer, middle, and ring fingers only) during each therapy session.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
42
+ {"User": "Managing Therapy for Cala Kiq Can I pause therapy during a session?", "Assistant": "No. Once therapy is stopped, it must be restarted. The countdown timer will restart at 40 minutes. You can stop therapy at any time. If you want to stop therapy during a session, press and hold the MAIN button until you see \"therapy stopped.\"Source: MM-00004(B)CalakIQSystemFAQs.csv"}
43
+ {"User": "Managing Therapy for Cala Kiq Will I have control over how long I do a therapy session? Or will it always be set to 40 minutes?", "Assistant": "The default length of a therapy session is 40 minutes. However, if you need to stop the therapy early, press and hold the MAIN button until you see \"therapy stopped.\"Source: MM-00004(B)CalakIQSystemFAQs.csv"}
44
+ {"User": "Troubleshooting the Cala kIQ System How do I recalibrate my device?", "Assistant": "To recalibrate your device, please follow the steps below:\n \n Step 1: From the time screen, press the UP and DOWN buttons simultaneously for three seconds to enter the Calibration Menu. You will see the option to \u201cRECALIBRATE\u201d.\n \n Step 2: Perform your prescribed tremor task and press the MAIN button to start the calibration. Continue your tremor task until \u201cDO TREMOR TASK\u201d disappears. Do this three times.\n \n Step 3: After calibration, press MAIN to save. If you do not want to save the calibration, press DOWN and MAIN to exit.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
45
+ {"User": "Troubleshooting the Cala kIQ System The Cala kIQ System sensation changes when I move my hand. Is that normal?", "Assistant": "Yes, this is normal. Hand movement causes the skin to move relative to the nerves, creating a change in sensation. To ensure proper therapy delivery, be sure that you can feel the sensations in your wrist, hand and/or fingers throughout the 40 minutes of therapy. You can adjust stimulation intensity as needed.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
46
+ {"User": "Troubleshooting the Cala kIQ System I don't feel therapy in my hand and I only feel it under the band, what should I do?", "Assistant": "If you do not feel stimulation in your wrist, hand, and/or fingers during a session, you can increase the intensity by pressing the UP button. If that doesn't help, stop therapy, remove the device from your wrist, dampen your wrist with more water and reposition the device.\n \n If you reposition the device, ensure that the double notches on the band are approximately aligned with the center of the inside of your wrist and that the single notch is in line with your thumb. Pull the end of the Cala kIQ band to tighten and then fasten the band securely and tightly.\n \n The band should be comfortable but snug enough so it does not slide along or around the wrist. The electrodes should be flush with the skin.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
47
+ {"User": "Troubleshooting the Cala kIQ System If I get skin irritation from therapy, what should I do?", "Assistant": "If you feel skin irritation, discontinue therapy until the skin irritation resolves. When you resume therapy, make sure you wet your wrist to prevent skin irritation. Additionally, consider reducing the intensity of stimulation.\n \n If your skin irritation persists, consult your prescribing physician and/or consult Customer Care team at 888- 699-1009 or CustomerCare@CalaHealth.com.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
48
+ {"User": "Troubleshooting the Cala kIQ System My band doesn\u2019t fit. It is too tight or too loose. What should I do?", "Assistant": "Pull the end of the Cala kIQ band to tighten\u2014fasten it securely and tightly. It should be snug enough so it does not slide along or around the wrist. If after tightening the band the electrodes are not flush to the skin, you may need a different band size. Simply reach out to our Customer Care team at 888-699-1009 or CustomerCare@CalaHealth.com for help.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
49
+ {"User": "Troubleshooting the Cala kIQ System Does temperature/ humidity affect the Cala kIQ system?", "Assistant": "The Cala kIQ system operates under following temperature and humidity parameters:\n \n Operating Parameters (Cala kIQ system):\n - Temperature Range: 5-40\u00b0C (41-104\u00b0F)\n - Relative Humidity Range: 15-90%\n - Atmospheric Pressure Range: 700-1060 hPa\n \n Transport and Storage Parameters (Cala kIQ system):\n - Temperature Range: -20-45\u00b0C (-4-113\u00b0F)\n - Relative Humidity Range: <= 90%, non-condensing\n - Atmospheric Pressure Range 700-1060 hPa\n \n Storage Parameters (Electrodes):\n - Temperature Range: 20-27\u00b0C (68-81\u00b0F)\n - Relative Humidity Range: <= 90%\n - Atmospheric Pressure Range: 700-1060 hPaSource: MM-00004(B)CalakIQSystemFAQs.csv"}
50
+ {"User": "Using MyCala.com for Cala Kiq Why don't I see data in my Insights page?", "Assistant": "There are a few reasons why you may not see data on your Insights page:\n - You have not yet started therapy.\n - You have started therapy, but your sessions were not Complete sessions. A Complete session is one that meets certain standards, including sessions that are over five minutes, have minimal interference, and have valid pre and post-tremor tasks. Only Complete sessions will show in your Past 30 Sessions chart and will be included in the Median Tremor Improvement calculation.\n - You may not have docked your device on your base station. You must dock your device on your base station in order for your session information to be sent to your Insights page. It is recommended that you dock and charge your device (with band attached to stimulator) overnight.\n - Your base station is not able to connect to an available network. You will see a blinking white light on your base station if your base station is not able to connect to an available network to communicate to Cala.\n - Your device is not able to communicate with the base station. Please contact Cala Customer Care team at 888-699-1009 or CustomerCare@CalaHealth.com for help.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
51
+ {"User": "Using MyCala.com for Cala Kiq How can I change my password to MyCala.com?", "Assistant": "To change the password on MyCala.com, please follow the steps below:\n \n Step 1: Click the avatar with the down arrow in the top right corner of MyCala.com\n \n Step2: Click \u2018Account Settings\u2019\n \n Step 3: Scroll to the bottom of this page\n \n Step 4: Enter your current password and your preferred new password (twice)\n \n Step 5: Click \u2018Confirm\u2019 in the window that pops up\n \n Step 6: Click \u2018Sign Out\u2019\n \n Sign in with your new passwordSource: MM-00004(B)CalakIQSystemFAQs.csv"}
52
+ {"User": "Using MyCala.com for Cala Kiq How can I change my account details?", "Assistant": "To change your account details (like address and contact information), you will need to contact Customer Care team at 888-699-1009 or CustomerCare@CalaHealth.com.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
53
+ {"User": "Using MyCala.com for Cala Kiq How do I download a report to show to my doctor?", "Assistant": "To download a report to show to your doctor, please follow the steps below:\n \n Step 1: Click \u2018Insights\u2019 in the top menu bar\n \n Step 2: Scroll to the bottom of the Insights page\n \n Step 3: Enter the dates for which you would like to run the report\n \n Step 4: Select the device for which you would like to run the report\n \n Step 5: Click \u2018View\u2019 to see the report\n \n Step 6: Click \u2018Export to PDF\u2019 to download the report\n \n You can either download and print the report and bring it to your doctor at your next visit.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
54
+ {"User": "Using MyCala.com for Cala Kiq How do I know how many days are left on my band?", "Assistant": "You can view an estimate of the number of days left on your band on the homepage of MyCala.com.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
55
+ {"User": "Using MyCala.com for Cala Kiq What is a complete therapy session?", "Assistant": "A complete session is one that meets certain standards, including: \n - sessions that are over five minutes\n - have minimal interference, and \n - have valid pre- and post-tremor tasks. \n \n Only complete sessions will show Tremor Improvements and be included in the Median Tremor Improvement calculation. It is important to do your prescribed tremor task when prompted by the device for an accurate calculation.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
56
+ {"User": "Using MyCala.com for Cala Kiq What is \"% Median Improvement\"?", "Assistant": "The % Median Improvement is the midpoint of the values represented for your tremor percent improvement. Tremor percent improvement is calculated by the difference in your pre- and post-tremor task measurements. Tremor improvements will only be calculated for complete therapy sessions.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
57
+ {"User": "Using the Cala kIQ System When should I use the Cala kIQ system?", "Assistant": "You can use the Cala kIQ System whenever you like. You may consider doing a therapy session 40 minutes prior to doing any activity for which you want tremor relief. You may do very light activities (like eating or drinking) while the session is ongoing.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
58
+ {"User": "Using the Cala kIQ System What's the difference between my Cala kIQ account number and serial number?", "Assistant": "Your Cala kIQ account number is the number that identifies you individually to Cala Health. Your Cala kIQ serial number identifies your individual stimulator. These numbers may be useful to know while troubleshooting a problem with Cala Customer Care.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
59
+ {"User": "Using the Cala kIQ System How do I use the Cala kIQ system?", "Assistant": "To start a session with the Cala kIQ System, please follow the steps below. Refer to your Patient Guide and/or follow the instructional videos on MyCala.com for more information.\n \n Step 1: Wet your wrist before a session to prevent uncomfortable therapy, skin irritation, and/or shock. For example, you can wet your wrist using a water bottle or by placing your wrist under running water. If there is any excess oil or lotion on your wrist, wash with soap and water and rinse well before wearing the Cala kIQ system.\n \n Step 2: Put on your calibrated Cala kIQ system. Press the MAIN button to start a therapy session from the time display. You will now see \u201cSTART SESSION\u201d\n \n Step 3: Press the MAIN button again to start a session.\n \n Step 4: Press the MAIN button to do your prescribed tremor task. If you would like to skip and are given the option, press the DOWN button and then MAIN to skip until the next session.\n \n Step 5: To complete your tremor task,\n Step 5a: Find your tremor task on your Prescription Information Card\n Step 5b: Get in a position to do your prescribed tremor task \n Step 5c: Press the MAIN button to start the measurement Perform tremor task until \u201cDO TREMOR TASK\u201d disappears (~20 seconds)\n \n Step 6: Press the MAIN button to start therapy after collecting your tremor task.\n \n Step 7: The 40-minute timer will begin the countdown.\n \n Step 8 (optional): You can adjust therapy intensity as needed to maintain a comfortable and consistent sensation during sessions. \n \n Step 9: Complete your post-tremor task and self-ratingSource: MM-00004(B)CalakIQSystemFAQs.csv"}
60
+ {"User": "Using the Cala kIQ System How should I dispose of the band? Can I recycle the band?", "Assistant": "There are no special instructions to dispose of the band. The band is not recyclable. It does not contain a battery and can be disposed of as such.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
61
+ {"User": "Using the Cala kIQ System How do I change the time or date on my Cala kIQ system?", "Assistant": "The time and date on the Cala kIQ system is automatically updated via the base station. The time will update based on the local time zone.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
62
+ {"User": "Using the Cala kIQ System How long do I need to charge the Cala kIQ system?", "Assistant": "It is recommended that you place your stimulator with the band attached into the base station overnight to charge. At a low battery level, it takes 3 \u2013 4 hours to fully charge the Cala kIQ system.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
63
+ {"User": "Using the Cala kIQ System Does the Cala kIQ System measure my tremor?", "Assistant": "When you complete your pre- and post-tremor tasks, the Cala kIQ system measures your tremor using an accelerometer. By doing a tremor task before and after your therapy session, you\u2019ll be able to see if your tremor has improved after each session. You can view your tremor improvement score on the Insights page of MyCala.com.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
64
+ {"User": "Using the Cala kIQ System The Cala kIQ device vibrated on my wrist. What should I do?", "Assistant": "When your session stops or your tremor task is complete, the Cala kIQ system vibrates to indicate therapy has stopped or your tremor task is done. Follow the prompts on the Cala kIQ system.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
65
+ {"User": "Using the Cala kIQ System Can I wear a watch or other metal jewelry on my arm when using the Cala kIQ system?", "Assistant": "Do not wear any metallic items on the same wrist as the Cala kIQ system during therapy.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
66
+ {"User": "Using the Cala kIQ System Why does the Cala kIQ system display turn off?", "Assistant": "By design the Cala kIQ system is always on, but to conserve battery, the Cala kIQ system goes into sleep mode and fades to white if you are not actively pressing any buttons. Press any button to wake it up.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
67
+ {"User": "Using the Cala kIQ System How do I know I\u2019ve placed the Cala kIQ system on my wrist correctly?", "Assistant": "When you put on your Cala kIQ system, ensure that the double notches on the band are approximately aligned with the center of the inside of your wrist and that the single notch is in line with your thumb. Pull the end of the Cala kIQ band to tighten and then fasten the band securely and tightly.\n \n - The band should be comfortable but snug enough so it does not slide along or around the wrist\n \n - The electrodes should be flush with the skin\n \n In a therapy session, you should feel a tingling sensation in your wrist, hand and/or fingers but not your pinky. If you aren\u2019t feeling this in any part of the wrist, hand or fingers, consider adjusting the band. If you feel it is only some part of the wrist, hand, and/or fingers, it\u2019s a good start and may be how therapy will work for you. It is important that you feel this tingling in some part of the wrist, hand, and/or fingers.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
68
+ {"User": "Using the Cala kIQ System Can I wear the Cala kIQ system all day?", "Assistant": "It is recommended that the Cala kIQ system be worn when using therapy.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
69
+ {"User": "Using the Cala kIQ System How long does the battery last?", "Assistant": "When the battery is fully charged, it should last at least 5 therapy sessions depending on your stimulation intensity. When not using your device for therapy, leave the Cala kIQ system on the base station with the stimulator attached to the band.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
70
+ {"User": "Using the Cala kIQ System How does the band attach to the stimulator?", "Assistant": "To attach the band to the stimulator, please follow the steps below:\n \n Step 1: To assemble the Cala kIQ system, hold the stimulator underneath the frame of the band\n \n Step 2: Position the flat edge of the stimulator with the embossed Cala logo on the band and press the stimulator into the band until the face of the stimulator is flush with the frame of the bandSource: MM-00004(B)CalakIQSystemFAQs.csv"}
71
+ {"User": "Using the Cala kIQ System How do I charge the Cala kIQ system?", "Assistant": "To charge the Cala kIQ system, place the stimulator with the band attached into the base station. The band must be attached to the stimulator in order to charge. Ensure the stimulator is properly connected to the charging points on the base station.\n \n You will know that the system is charging when the device display screen shows the current battery level and the status light on the base station turns green.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
72
+ {"User": "Using the Cala kIQ System What are the details of my prescription for the Cala kIQ system?", "Assistant": "You can find your prescribed tremor task on your Prescription Information Card in the Cala kIQ box. You can find your prescribed band size on the Delivery Ticket in your Cala kIQ box.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
73
+ {"User": "Using the Cala kIQ System How do I clean the Cala kIQ system?", "Assistant": "Cleaning the Cala kIQ band can help maintain a good connection between the band and your skin. To clean the Cala kIQ band, use a disinfecting wipe on the inside of the band to wipe the six rectangular, black electrodes. All other Cala kIQ components can also be cleaned by using a disinfecting wipe as often as once per week. When not using therapy, charge the Cala kIQ system overnight on the base station with the stimulator attached to the band.\n \n Please do not use baby wipes or sanitizer wipes to clean as they can damage the device.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
74
+ {"User": "Using the Cala kIQ System When do I have to replace my band?", "Assistant": "Leave the stimulator and band attached until you are prompted to replace the band. The Cala kIQ system will display \u201cREPLACE BAND\u201d when band replacement is required in order to maintain effective therapy. The band will last for 90 days from the date of activation.\n \n You can also see an estimate of how many band days you have remaining on the homepage of MyCala.com.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
75
+ {"User": "Using the Cala kIQ System Why does my Cala kIQ system show the incorrect time?", "Assistant": "The Cala KIQ systems syncs to local time when it is placed in the base station. Place the stimulator with the band attached into the base station to sync to local time. It is recommended that you place your stimulator with the band attached into the base station overnight.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
76
+ {"User": "Using the Cala kIQ System I just received my Cala kIQ system. How do I start using it?", "Assistant": "When you first receive your Cala kIQ system, you need to set it up. Follow the section on Setting Up the Cala kIQ system in the Patient Guide and/or view the videos on the Support page of MyCala.com.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
77
+ {"User": "Using the Cala kIQ System How do I charge the Cala kIQ system?", "Assistant": "To charge the Cala kIQ system, please follow the steps below:\n \n Step 1: Plug the base station into the wall outlet\n \n Step 2: Place the stimulator with the band attached into the base station so that the charging display appears and the green light on the base station turns onSource: MM-00004(B)CalakIQSystemFAQs.csv"}
78
+ {"User": "Using the Cala kIQ System Can I use my Cala kIQ while I am traveling internationally?", "Assistant": "You will need to use a travel voltage converter to charge the Cala kIQ system. The travel voltage converter must be rated to convert voltage to 110V. Using the power adapter at voltages outside 110V can damage the power adapter and the Cala kIQ Base Station. \n \n You can perform a therapy session, but your therapy data will not be updated on your MyCala.com while you are using the device outside the US. Your therapy session data is stored in the device and will get uploaded to MyCala.com when the Base Station establishes LTE connection. The LTE connection only works in the US.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
79
+ {"User": "Using the Cala kIQ System What does the red light mean on the charger or base station?", "Assistant": "The red light on the base station means that you need to replace the Cala kIQ band. If you did not receive a new band or if you have any questions, you can consult Customer Care team at 888-699-1009 or CustomerCare@CalaHealth.comSource: MM-00004(B)CalakIQSystemFAQs.csv"}
80
+ {"User": "Using the Cala kIQ System What if I do not put the band on in exactly the right place?", "Assistant": "If the device is not properly positioned and fastened, Cala kIQ will display an error when you try to start a therapy session.Source: MM-00004(B)CalakIQSystemFAQs.csv"}
81
+ {"User": "Using the Cala kIQ System While calibrating the Cala kIQ system, I accidentally saved a measurement that I didn\u2019t want to save. Will it affect how my therapy works?", "Assistant": "Calibration happens over the course of three measurements taken while you perform the 'Tremor Task' prescribed by your physician. If you accidentally save a poor measurement (e.g. you were walking or talking during the calibration tremor task sessions), you can recalibrate your system. Please follow steps outlined in section \u201cHow do I recalibrate my device?\u201dSource: MM-00004(B)CalakIQSystemFAQs.csv"}
82
+ {"User": "How Does Cala Trio Work How does Cala Trio therapy work on my tremor?", "Assistant": "Cala Trio therapy delivers electrical stimulation to nerves in the wrist. These nerves project from the wrist to central brain networks that are responsible for generating hand tremor in essential tremor. Stimulation of the nerves in the wrist is thought to disrupt the network activity causing hand tremor and provide temporary and meaningful tremor reduction in your treated hand.\n\nSource: Trio Patient FAQs Answers - August 2021.xlsx"}
83
+ {"User": "How Does Cala Trio Work Will Cala Trio therapy eliminate my essential tremor symptoms?", "Assistant": "Cala Trio provides temporary relief of hand tremors. In our clinical study, it delivered meaningful tremor improvement in 75% of patients after a single 40-minute therapy session. Cala Trio users have described the benefits of therapy as allowing greater ease and ability in their everyday activities. On average, patients demonstrated a 49% reduction in tremor amplitude in Activities of Daily Living like eating with a spoon and holding a full cup of water. However, some patients had as large as an 80% reduction. Use of the device is the best way to assess if Cala Trio is effective for you. (Reference: Pahwa, et al. An Acute Randomized Controlled Trial of Noninvasive Peripheral Nerve Stimulation in Essential Tremor, Neuromodulation 2019. <add url: https://doi.org/10.1111/ner.12930>)Source: Trio Patient FAQs Answers - August 2021.xlsx"}
84
+ {"User": "How Does Cala Trio Work What were the results of users who were on medications compared to users not on medications?", "Assistant": "We have limited data from our clinical study to assess this. Many patients in the study were also taking medication for their tremor, and it was difficult to assess the effect of the device compared to medication. Cala Health continues to study Cala Trio therapy to better understand its use and efficacy in these situations.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
85
+ {"User": "How Does Cala Trio Work Can I use Cala Trio therapy more than once per day? Will that result in better or longer relief?", "Assistant": "In our clinical study of Cala Trio therapy (see the Patient Guide for details), participants used the device twice per day. To start, we recommend using Cala Trio in this way for two weeks to understand how therapy works for you and fits into your life. It is designed to provide at least five sessions when fully charged. With experience and input from your physician, you may find the frequency of use that works best for you. \n\nAs far as its benefit, therapy resulted in temporary short-term tremor reduction. With a novel technology, first-in-class therapy, Cala Health continues to study Cala Trio therapy to better understand its use and efficacy and will share insight as we learn.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
86
+ {"User": "How Does Cala Trio Work How long after a 40 minute therapy session does tremor reduction last? If I use Cala Trio regularly most days, will I still benefit on the day I don't use it?", "Assistant": "In our clinical studies, subjects had short-term tremor reduction that lasted for up to an hour and half on average after a single 40-minute stimulation session. With a novel technology, first-in-class therapy, Cala Health continues to study Cala Trio therapy to better understand its use and efficacy and will share insight as we learn. (Reference: Data on-file, publication pending)Source: Trio Patient FAQs Answers - August 2021.xlsx"}
87
+ {"User": "How Does Cala Trio Work How durable is Cala Trio?", "Assistant": "Cala Trio therapy is designed for everyday use. The stimulator and base station have an expected service life of 3 years. The band has an expected service life of 90 days. To support everyday use, the stimulator has a 2-year warranty, and the band has a 45-day warranty.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
88
+ {"User": "How Does Cala Trio Work Will Cala Trio help reduce my essential tremor (ET) symptoms?", "Assistant": "In our clinical study, 75% of patients experienced temporary meaningful symptom improvement after a single 40-minute stimulation session. Individual patient results varied. The average patient demonstrated a 49% reduction in tremor amplitude in Activities of Daily Living like eating with a spoon and holding a full cup of water. However, some patients have greater than an 80% reduction. Use of the therapy is the best way to understand if Cala Trio is effective for any individual patient. (Reference: Pahwa, et al. An Acute Randomized Controlled Trial of Noninvasive Peripheral Nerve Stimulation in Essential Tremor, Neuromodulation 2019. <add url: https://doi.org/10.1111/ner.12930>)Source: Trio Patient FAQs Answers - August 2021.xlsx"}
89
+ {"User": "How Does Cala Trio Work Can I use Cala Trio on my other hand?", "Assistant": "Cala Trio has specific bands for left and right wrists. With two (2) complete prescriptions from your physician, we can support you in using therapy on both hands.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
90
+ {"User": "How Does Cala Trio Work Can I use Cala Trio on both hands at once?", "Assistant": "Treatment for both hands is available with two (2) complete prescriptions from your physician.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
91
+ {"User": "How Does Cala Trio Work Which hand should I use Cala Trio therapy on?", "Assistant": "Consult with your physician and determine the hand where tremor reduction would help you the most. Cala Trio provides transient relief of hand tremors in the treated hand.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
92
+ {"User": "How Does Cala Trio Work Is Cala Trio therapy painful? How does it feel?", "Assistant": "You will feel a tingling or pulsing sensation in your fingers. You will be able to decrease the intensity during a therapy session should you feel discomfort.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
93
+ {"User": "How Does Cala Trio Work Does Cala Trio therapy help with hand tremor from Parkinson's disease or multiple sclerosis?", "Assistant": "Cala Trio therapy is only indicated to aid in the transient relief of hand tremors in the treated hand following stimulation in adults with essential tremor. Clinical trials have evaluated it in only this use. With a novel technology, first-in-class therapy, Cala Health continues to study Cala Trio therapy to better understand its use and efficacy and will share insight as we learn.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
94
+ {"User": "How Does Cala Trio Work Can I use Cala Trio if I have heart conditions?", "Assistant": "Talk to your doctor. Cala Trio cannot be used if you have a pacemaker, implantable cardiac device, or other implanted electronic device. Please refer to the Safety Information in the Patient Guide for a complete list of warnings and precautions.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
95
+ {"User": "How Does Cala Trio Work Can I use Cala Trio if I have a pacemaker?", "Assistant": "Cala Trio cannot be used if you have a pacemaker.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
96
+ {"User": "How Does Cala Trio Work I've had Deep Brain Stimulation surgery or some other similar implanted electrical device. Can I use Cala Trio?", "Assistant": "Do not use Cala Trio if you have another implanted electronic device. DBS is contraindicated because of the potential\u00a0risk\u00a0for interference between Trio and an implanted electrical stimulator. Please refer to the Safety Information in the Patient Guide for a complete list of warnings and precautions.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
97
+ {"User": "How Does Cala Trio Work Why do I need a prescription for Cala Trio therapy?", "Assistant": "Like prescription medications, we believe the decision to try Cala Trio, a novel technology, first-in-class therapy, should be made between a patient and physician.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
98
+ {"User": "How Does Cala Trio Work What are the side effects?", "Assistant": "The following are possible minor/moderate risks or adverse reactions that you may experience with the use of Cala Trio:\n\u2022 Discomfort with stimulation (e.g. stinging, sensation of weakness, etc.)\n\u2022 Allergic reaction to electrodes or other materials\n\u2022 Skin irritation, including electrical stimulation burns, redness and/or itching\nIn the unlikely event that any of the following more significant issues happen, immediately stop using Cala Trio and contact your physician.\n\u2022 Signs of significant and persistent skin irritation, sores, electrical stimulation burns, or lesions at the site of stimulation\n\u2022 Significant and persistent increase in muscle tightness or stiffness\n\u2022 A feeling of chest pressure during stimulation\n\u2022 Swelling of your arm, wrist, or hand\n\nFor a full list of possible side effects, please see Adverse Reactions in the Patient Guide.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
99
+ {"User": "Getting Started with Cala Trio What kind of doctor can prescribe this? Do I have to see a neurologist?", "Assistant": "Like prescription medications, we believe the decision to try Cala Trio, a novel technology, first-in-class therapy, should be made between a patient and physician. Whether\u00a0that is your primary care physician\u00a0or your neurologist that's up to you.\nWe have a\u00a0doctor discussion guide available on CalaTrio.com designed to assist your\u00a0conversation with your doctor about incorporating Cala Trio therapy into your treatment plan. It provides information about essential tremor, Cala Trio, and the Cala Trio prescription form.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
100
+ {"User": "Getting Started with Cala Trio How do I measure my wrist to fit a Cala Trio band?", "Assistant": "If you need to measure your own wrist, a flexible measuring tape works. Simply wrap it around your wrist and note the cm mark where the tape meets the beginning of the measuring tape.\nAlternatively, a rigid ruler can be used. Place a piece of string or yarn around your wrist, then measure the string piece with the ruler.\nIf your essential tremor makes this challenging, ask a family or friend.\nIf you prefer, call Customer Success and we can send you a wrist measuring tool in the mail.\nThe long side of the prescription form has a ruler with centimeter markings. Size Reference: Small = 13.6-16.4 cm / Medium = 16.5-18.4 cm / Large = 18.5-20.4 cm.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
101
+ {"User": "Getting Started with Cala Trio What is/are the best time(s)\u00a0for me to use Cala Trio?", "Assistant": "Therapy can be administered at any time during your day.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
102
+ {"User": "Getting Started with Cala Trio How far in advance of activity should I use Cala Trio to ensure tremor reduction later?", "Assistant": "A stimulation session is 40 minutes. Begin therapy approximately 40 minutes before any activity when you desire temporary reduction of your tremor.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
103
+ {"User": "Getting Started with Cala Trio Can I do normal activities while using Cala Trio?", "Assistant": "During your 40-minute therapy session, most activities are fine to continue. Correct placement of the Cala Trio Band electrodes is essential to therapy success, so refrain from any activity that would cause the placement of the band to change.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
104
+ {"User": "Getting Started with Cala Trio What activities can I do while stimulating/using Cala Trio?", "Assistant": "Most activities are fine to do while using Cala Trio. Do not use Cala Trio while sleeping, driving, bathing, operating machinery, and doing any activity in which possible involuntary muscle contractions due to therapy may cause undue risk of injury. Please refer to the labeling for a complete list of warnings, precautions, and contraindications.\n\nCorrect placement of the Cala Trio band is essential to therapy success, so refrain from any activity that would cause the placement of the band to change. If for any reason, you need to remove the band during stimulation, stop the therapy session by pressing and holding the MAIN button until you see \"therapy stopped\".Source: Trio Patient FAQs Answers - August 2021.xlsx"}
105
+ {"User": "Getting Started with Cala Trio How should I prepare my skin prior to using the Cala Trio? Can I wear lotion?", "Assistant": "Dampen the entire circumference of your wrist with ample amounts of water before using Cala Trio. If there is any excess oil or lotion on your wrist, wash with soap and water and rinse well before wearing Cala Trio.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
106
+ {"User": "Getting Started with Cala Trio Why do I have to add water to my wrist before using Cala Trio?", "Assistant": "Water helps with the connection between your skin and the electrodes in the band. Without water you may experience discomfort or a warning display during therapy.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
107
+ {"User": "Getting Started with Cala Trio How is Cala Trio customized to my tremor?", "Assistant": "During set up, Cala Trio is calibrated by having you perform your prescribed \"Tremor Task\" three times. This allows the device to characterize your tremor and individualize the stimulation. The accelerometers in the device measure your motion and determine the best pattern to deliver the stimulation.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
108
+ {"User": "Getting Started with Cala Trio Can I stop taking my medication(s) for essential tremor?", "Assistant": "In the clinical trials, subjects used Cala Trio while taking their medication for essential tremor. It is best to discuss your therapy options with your physician.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
109
+ {"User": "Getting Started with Cala Trio Can I take my prescription medication for essential tremor while using Cala Trio?", "Assistant": "Many patients in clinical studies we have conducted have continued to take medication for their tremor while using Cala Trio.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
110
+ {"User": "Getting Started with Cala Trio What do the buttons on Cala Trio do?", "Assistant": "There are three buttons, MAIN, UP, and DOWN that control the main operation of the stimulator. The buttons are used to set up the device, calibrate, and start, stop, or adjust intensity during a therapy session. Please refer to the Patient Guide for full description of the button functionality.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
111
+ {"User": "Getting Started with Cala Trio Is Cala Trio waterproof?", "Assistant": "Cala Trio is splash proof, but not waterproof; you cannot swim or shower with it on.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
112
+ {"User": "Getting Started with Cala Trio Can I travel with my Cala Trio?", "Assistant": "You can travel with Cala Trio. Some people traveling with medical devices have found it helpful to have 1) a completed TSA notification card (link to: https://www.tsa.gov/sites/default/files/disability_notification_card_508.pdf) and 2) proof of your prescription therapy to present to a TSA officer to help with passenger screening.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
113
+ {"User": "Getting Started with Cala Trio What do I need to go through airport security with my Cala Trio?", "Assistant": "The TSA changes procedures from time to time, so you could check on their website. (https://www.tsa.gov/travel/travel-tips/travel-checklist) \"Remove personal electronic devices larger than a cell phone from your carry-on bag and place them into a bin with nothing placed on or under them for X-ray screening. (E.g. laptops, tablets, e-readers and handheld game consoles.)\"\n\nAdditionally, some people traveling with medical devices have found it helpful to have 1) a completed TSA notification card (link to: https://www.tsa.gov/sites/default/files/disability_notification_card_508.pdf) and 2) proof of your prescription therapy to present to a TSA officer to help with passenger screening.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
114
+ {"User": "Getting Started with Cala Trio Is Cala Trio available outside of the US?", "Assistant": "At this time Cala Trio is only cleared for sale and available in the USA.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
115
+ {"User": "Getting Started with Cala Trio Can I loan Cala Trio to a friend?", "Assistant": "Cala Trio therapy is available by prescription for an individual. There are three aspects of the prescription that are unique to you. During calibration, Cala Trio learns about your tremor and personalizes therapy according to its characteristics. If a friend is interested in Cala Trio, share your experience, and encourage your friend to talk to their physician about Cala Trio.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
116
+ {"User": "Getting Started with Cala Trio Can the Veterans Health Administration (VHA) provide Cala Trio?", "Assistant": "Cala Trio can be available upon submission of a prescription by your Veterans Health Administration (VHA) health care provider. Ask the VHA to consider Cala Trio for you with these two steps:\n\nProvide your VA Health Care Provider with the Doctor Discussion Guide.\nAsk the VA Prosthetics Department to email CustomerSuccess@CalaTrio.com for ordering information.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
117
+ {"User": "Troubleshooting Cala Trio What if I am not getting results from Cala Trio therapy?", "Assistant": "We are here to support you! \n\nFirst off, help is right at your fingertips. Available 24 hours/day, we have a number of resources from the Patient Guide to product videos on CalaTrio.com. In the Patient Guide, review the Troubleshooting section to see details on how to address specific warning messages. On CalaTrio.com, you can find videos on Getting Started to lead you through setup and on Using Cala Trio to guide you through daily use of your therapy. \n\nShould you need further help, contact Cala Trio Customer Success at 888-699-1009 and Customer Success@CalaTrio.com. We are available from Monday to Friday, 7am-4pm Pacific Time.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
118
+ {"User": "Troubleshooting Cala Trio What support will Cala Health provide to help me with Cala Trio therapy?", "Assistant": "Cala Trio Customer Success provides support, direct to you! We will contact you when your device ships to see if you would like assistance with set up and calibration. Additionally, we provide a variety of print materials from patient guides to quick start guides as well as online resources from videos to frequently asked questions to help you with any aspect of Cala Trio therapy. Also, feel free to contact Cala Trio Customer Success via email at Customer Success@CalaTrio.com or call 888-699-1009 Monday to Friday, 7am to 4pm Pacific if you have any questions.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
119
+ {"User": "Pricing and Reimbursement for Cala Trio What is the 60-Day Evaluation Program?", "Assistant": "All payment options for Cala Trio therapy come with our 60-Day Evaluation Program. You can start using Cala Trio to see how personalized, on-demand therapy reduces your hand tremor. If you are not completely satisfied, simply return it within 60 days and we will refund your purchase price minus a $99 fee. Shipping is free.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
120
+ {"User": "Pricing and Reimbursement for Cala Trio How do I pay for Cala Trio?", "Assistant": "It depends. We offer different payment options to accommodate patients\u2019 financial situations. You can purchase the stimulator outright or you can spread payments over 12 months on a payment plan. All payment plans require a valid credit card on file in a secure payment system.\n\nTo see if you qualify for our special financing option please give our Customer Success team a call at (888) 699-1009 or email us at CustomerSuccess@CalaTrio.comSource: Trio Patient FAQs Answers - August 2021.xlsx"}
121
+ {"User": "Pricing and Reimbursement for Cala Trio Why do I need a band subscription?", "Assistant": "Cala Trio band uses a proprietary skin interface, improving the experience of other sticky hydrogel electrodes. The band can be used for months before needing to be replaced. The performance of the band deteriorates with exposure to dry skin, skins oils, and dust. After 3 months, a new band is required in order to maintain effective therapy.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
122
+ {"User": "Pricing and Reimbursement for Cala Trio Do you accept insurance?", "Assistant": "Cala Trio is a novel technology, first-in-class therapy. It is not currently covered by Medicare or private insurance. Insurance coverage for Cala Trio will take at least one year. We are working hard to keep that time frame as short as possible. We offer a number of payment options to accommodate patients\u2019 financial situations. Cala Trio is a qualified medical expense for health savings accounts (HSA/FSA).\n\nPlease call us at (888) 699-1009 to learn about financing options and special pricing for qualified patients.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
123
+ {"User": "Pricing and Reimbursement for Cala Trio Will my insurance company reimburse my cash purchase?", "Assistant": "Cala Trio is eligible as a qualified medical expense for health savings and flexible spending accounts. If you have an HSA or FSA, you can use pre-tax dollars to pay for it. Check with your individual plan to understand eligibility.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
124
+ {"User": "Pricing and Reimbursement for Cala Trio Is it covered by Medicare? Will Medicare pay for this?", "Assistant": "Cala Trio is a novel technology, first-in-class therapy. It is not currently covered by Medicare or private insurance. Insurance coverage for Cala Trio will take at least one year. We are working hard to keep that time frame as short as possible. We offer a number of payment options to accommodate patients\u2019 financial situations. Cala Trio is a qualified medical expense for health savings accounts (HSA/FSA). Medicare will pay for medical equipment and supplies only if a supplier has a Medicare supplier number. We do not have a Medicare supplier number, therefore Medicare will not pay for any medical equipment and supplies we sell or rent to you. You will be personally and fully responsible for payment.\n\nPlease call us at (888) 699-1009 to learn about financing options and special pricing for qualified patients.Source: Trio Patient FAQs Answers - August 2021.xlsx"}
125
+ {"User": "Pricing and Reimbursement for Cala Trio Will my VA benefits pay for this?", "Assistant": "Some VA facilities are able to purchase the Cala Trio through the local prosthetics department. Ask your Veterans Administration Medical Facility to consider Cala Trio therapy for you with these two steps: 1) Provide your VA Healthcare Provider with the Doctor Discussion Guide, and 2) Ask the Prosthetics Department to email CustomerSuccess@CalaTrio.com for Ordering Information.\n\n\n\nSource: Trio Patient FAQs Answers - August 2021.xlsx"}
126
+ {"User": "Pricing and Reimbursement for Cala Trio Can I return Cala Trio therapy after I have used it?", "Assistant": "If you are not completely satisfied, simply return it within 60 days and we will refund your purchase price minus a $99 Evaluation Program fee. Shipping is free.\nSource: Trio Patient FAQs Answers - August 2021.xlsx"}
127
+ {"User": "Pricing and Reimbursement for Cala Trio What is the warranty for Cala Trio therapy?", "Assistant": "There is a 2-year warranty on the device and base station. There is a 45-day warranty on the bands. Patients may connect with Cala Trio Customer Success at 888-699-1009 to assist with product support and replacement as needed.\nSource: Trio Patient FAQs Answers - August 2021.xlsx"}
128
+ {"User": "Pricing and Reimbursement for Cala Trio Is my credit card data secure with Cala Health?", "Assistant": "Your credit card information is stored in a secure environment and payments are compliant with the Payment Card Industry Data Security Standards (PCI_DSS).\nSource: Trio Patient FAQs Answers - August 2021.xlsx"}
129
+ {"User": "Pricing and Reimbursement for Cala Trio Does Cala Health store my credit card information?", "Assistant": "Cala Health has your payment information for the band subscription. Every three months, you will receive a 3-month band supply. Cala will send you an email letting you know when to expect the charge and shipment. Your credit card information is stored in a secure environment and payments are compliant with the Payment Card Industry Data Security Standards (PCI_DSS).Source: Trio Patient FAQs Answers - August 2021.xlsx"}
130
+
131
+ """
132
+
133
+ class SeparatorStyle(Enum):
134
+ """Separator styles."""
135
+
136
+ ADD_COLON_SINGLE = auto()
137
+ ADD_COLON_TWO = auto()
138
+ ADD_COLON_SPACE_SINGLE = auto()
139
+ NO_COLON_SINGLE = auto()
140
+ ADD_NEW_LINE_SINGLE = auto()
141
+ CHATGLM = auto()
142
+ CHATML = auto()
143
+ DOLLY = auto()
144
+ RWKV = auto()
145
+ PHOENIX = auto()
146
+ ROBIN = auto()
147
+
148
+
149
+ @dataclasses.dataclass
150
+ class Conversation:
151
+ """A class that manages prompt templates and keeps all conversation history."""
152
+
153
+ # The name of this template
154
+ name: str
155
+ # The system prompt
156
+ system: str
157
+ # Two roles
158
+ roles: List[str]
159
+ # All messages. Each item is (role, message).
160
+ messages: List[List[str]]
161
+ # The number of few shot examples
162
+ offset: int
163
+ # Separators
164
+ sep_style: SeparatorStyle
165
+ sep: str
166
+ sep2: str = None
167
+ # Stop criteria (the default one is EOS token)
168
+ stop_str: str = None
169
+ # Stops generation if meeting any token in this list
170
+ stop_token_ids: List[int] = None
171
+
172
+ def get_prompt(self) -> str:
173
+ """Get the prompt for generation."""
174
+ if self.sep_style == SeparatorStyle.ADD_COLON_SINGLE:
175
+ ret = self.system + self.sep
176
+ for role, message in self.messages:
177
+ if message:
178
+ ret += role + ": " + message + self.sep
179
+ else:
180
+ ret += role + ":"
181
+ return ret
182
+ elif self.sep_style == SeparatorStyle.ADD_COLON_TWO:
183
+ seps = [self.sep, self.sep2]
184
+ ret = self.system + seps[0]
185
+ for i, (role, message) in enumerate(self.messages):
186
+ if message:
187
+ ret += role + ": " + message + seps[i % 2]
188
+ else:
189
+ ret += role + ":"
190
+ return ret
191
+ elif self.sep_style == SeparatorStyle.ADD_COLON_SPACE_SINGLE:
192
+ ret = self.system + self.sep
193
+ for role, message in self.messages:
194
+ if message:
195
+ ret += role + ": " + message + self.sep
196
+ else:
197
+ ret += role + ": " # must be end with a space
198
+ return ret
199
+ elif self.sep_style == SeparatorStyle.ADD_NEW_LINE_SINGLE:
200
+ ret = "" if self.system == "" else self.system + self.sep
201
+ for role, message in self.messages:
202
+ if message:
203
+ ret += role + "\n" + message + self.sep
204
+ else:
205
+ ret += role + "\n"
206
+ return ret
207
+ elif self.sep_style == SeparatorStyle.NO_COLON_SINGLE:
208
+ ret = self.system
209
+ for role, message in self.messages:
210
+ if message:
211
+ ret += role + message + self.sep
212
+ else:
213
+ ret += role
214
+ return ret
215
+ elif self.sep_style == SeparatorStyle.RWKV:
216
+ ret = self.system
217
+ for i, (role, message) in enumerate(self.messages):
218
+ if message:
219
+ ret += (
220
+ role
221
+ + ": "
222
+ + message.replace("\r\n", "\n").replace("\n\n", "\n")
223
+ )
224
+ ret += "\n\n"
225
+ else:
226
+ ret += role + ":"
227
+ return ret
228
+ elif self.sep_style == SeparatorStyle.CHATGLM:
229
+ # source: https://huggingface.co/THUDM/chatglm-6b/blob/1d240ba371910e9282298d4592532d7f0f3e9f3e/modeling_chatglm.py#L1302-L1308
230
+ # source2: https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926
231
+ round_add_n = 1 if self.name == "chatglm2" else 0
232
+ if self.system:
233
+ ret = self.system + self.sep
234
+ else:
235
+ ret = ""
236
+
237
+ for i, (role, message) in enumerate(self.messages):
238
+ if i % 2 == 0:
239
+ ret += f"[Round {i//2 + round_add_n}]{self.sep}"
240
+
241
+ if message:
242
+ ret += f"{role}:{message}{self.sep}"
243
+ else:
244
+ ret += f"{role}:"
245
+ return ret
246
+ elif self.sep_style == SeparatorStyle.CHATML:
247
+ ret = "" if self.system == "" else self.system + self.sep + "\n"
248
+ for role, message in self.messages:
249
+ if message:
250
+ ret += role + "\n" + message + self.sep + "\n"
251
+ else:
252
+ ret += role + "\n"
253
+ return ret
254
+ elif self.sep_style == SeparatorStyle.DOLLY:
255
+ seps = [self.sep, self.sep2]
256
+ ret = self.system
257
+ for i, (role, message) in enumerate(self.messages):
258
+ if message:
259
+ ret += role + ":\n" + message + seps[i % 2]
260
+ if i % 2 == 1:
261
+ ret += "\n\n"
262
+ else:
263
+ ret += role + ":\n"
264
+ return ret
265
+ elif self.sep_style == SeparatorStyle.PHOENIX:
266
+ ret = self.system
267
+ for role, message in self.messages:
268
+ if message:
269
+ ret += role + ": " + "<s>" + message + "</s>"
270
+ else:
271
+ ret += role + ": " + "<s>"
272
+ return ret
273
+ elif self.sep_style == SeparatorStyle.ROBIN:
274
+ ret = self.system + self.sep
275
+ for role, message in self.messages:
276
+ if message:
277
+ ret += role + ":\n" + message + self.sep
278
+ else:
279
+ ret += role + ":\n"
280
+ return ret
281
+ else:
282
+ raise ValueError(f"Invalid style: {self.sep_style}")
283
+
284
+ def append_message(self, role: str, message: str):
285
+ """Append a new message."""
286
+ self.messages.append([role, message])
287
+
288
+ def update_last_message(self, message: str):
289
+ """Update the last output.
290
+
291
+ The last message is typically set to be None when constructing the prompt,
292
+ so we need to update it in-place after getting the response from a model.
293
+ """
294
+ self.messages[-1][1] = message
295
+
296
+ def to_gradio_chatbot(self):
297
+ """Convert the conversation to gradio chatbot format."""
298
+ ret = []
299
+ for i, (role, msg) in enumerate(self.messages[self.offset :]):
300
+ if i % 2 == 0:
301
+ ret.append([msg, None])
302
+ else:
303
+ ret[-1][-1] = msg
304
+ return ret
305
+
306
+ def to_openai_api_messages(self):
307
+ """Convert the conversation to OpenAI chat completion format."""
308
+ ret = [{"role": "system", "content": self.system}]
309
+
310
+ for i, (_, msg) in enumerate(self.messages[self.offset :]):
311
+ if i % 2 == 0:
312
+ ret.append({"role": "user", "content": msg})
313
+ else:
314
+ if msg is not None:
315
+ ret.append({"role": "assistant", "content": msg})
316
+ return ret
317
+
318
+ def copy(self):
319
+ return Conversation(
320
+ name=self.name,
321
+ system=self.system,
322
+ roles=self.roles,
323
+ messages=[[x, y] for x, y in self.messages],
324
+ offset=self.offset,
325
+ sep_style=self.sep_style,
326
+ sep=self.sep,
327
+ sep2=self.sep2,
328
+ stop_str=self.stop_str,
329
+ stop_token_ids=self.stop_token_ids,
330
+ )
331
+
332
+ def dict(self):
333
+ return {
334
+ "template_name": self.name,
335
+ "system": self.system,
336
+ "roles": self.roles,
337
+ "messages": self.messages,
338
+ "offset": self.offset,
339
+ }
340
+
341
+
342
+ # A global registry for all conversation templates
343
+ conv_templates: Dict[str, Conversation] = {}
344
+
345
+
346
+ def register_conv_template(template: Conversation, override: bool = False):
347
+ """Register a new conversation template."""
348
+ if not override:
349
+ assert (
350
+ template.name not in conv_templates
351
+ ), f"{template.name} has been registered."
352
+
353
+ conv_templates[template.name] = template
354
+
355
+
356
+ def get_conv_template(name: str) -> Conversation:
357
+ """Get a conversation template."""
358
+ return conv_templates[name].copy()
359
+
360
+
361
+ # A template with a one-shot conversation example
362
+ register_conv_template(
363
+ Conversation(
364
+ name="one_shot",
365
+ system="A chat between a curious human and an artificial intelligence assistant. "
366
+ "The assistant gives helpful, detailed, and polite answers to the human's questions.",
367
+ roles=("Human", "Assistant"),
368
+ messages=(
369
+ (
370
+ "Human",
371
+ "Got any creative ideas for a 10 year old’s birthday?",
372
+ ),
373
+ (
374
+ "Assistant",
375
+ """Of course! Here are some creative ideas for a 10-year-old's birthday party:
376
+ 1. Treasure Hunt: Organize a treasure hunt in your backyard or nearby park. Create clues and riddles for the kids to solve, leading them to hidden treasures and surprises.
377
+ 2. Science Party: Plan a science-themed party where kids can engage in fun and interactive experiments. You can set up different stations with activities like making slime, erupting volcanoes, or creating simple chemical reactions.
378
+ 3. Outdoor Movie Night: Set up a backyard movie night with a projector and a large screen or white sheet. Create a cozy seating area with blankets and pillows, and serve popcorn and snacks while the kids enjoy a favorite movie under the stars.
379
+ 4. DIY Crafts Party: Arrange a craft party where kids can unleash their creativity. Provide a variety of craft supplies like beads, paints, and fabrics, and let them create their own unique masterpieces to take home as party favors.
380
+ 5. Sports Olympics: Host a mini Olympics event with various sports and games. Set up different stations for activities like sack races, relay races, basketball shooting, and obstacle courses. Give out medals or certificates to the participants.
381
+ 6. Cooking Party: Have a cooking-themed party where the kids can prepare their own mini pizzas, cupcakes, or cookies. Provide toppings, frosting, and decorating supplies, and let them get hands-on in the kitchen.
382
+ 7. Superhero Training Camp: Create a superhero-themed party where the kids can engage in fun training activities. Set up an obstacle course, have them design their own superhero capes or masks, and organize superhero-themed games and challenges.
383
+ 8. Outdoor Adventure: Plan an outdoor adventure party at a local park or nature reserve. Arrange activities like hiking, nature scavenger hunts, or a picnic with games. Encourage exploration and appreciation for the outdoors.
384
+ Remember to tailor the activities to the birthday child's interests and preferences. Have a great celebration!""",
385
+ ),
386
+ ),
387
+ offset=2,
388
+ sep_style=SeparatorStyle.ADD_COLON_SINGLE,
389
+ sep="\n### ",
390
+ stop_str="###",
391
+ )
392
+ )
393
+
394
+ # A template similar to the "one_shot" template above but remove the example.
395
+ register_conv_template(
396
+ Conversation(
397
+ name="zero_shot",
398
+ system="A chat between a curious human and an artificial intelligence assistant. "
399
+ "The assistant gives helpful, detailed, and polite answers to the human's questions.",
400
+ roles=("Human", "Assistant"),
401
+ messages=(),
402
+ offset=0,
403
+ sep_style=SeparatorStyle.ADD_COLON_SINGLE,
404
+ sep="\n### ",
405
+ stop_str="###",
406
+ )
407
+ )
408
+
409
+ # Vicuna v1.1 template
410
+ register_conv_template(
411
+ Conversation(
412
+ name="vicuna_v1.1",
413
+ system="A chat between a curious user and an artificial intelligence assistant. "
414
+ "The assistant gives helpful, detailed, and polite answers to the user's questions.",
415
+ roles=("USER", "ASSISTANT"),
416
+ messages=(),
417
+ offset=0,
418
+ sep_style=SeparatorStyle.ADD_COLON_TWO,
419
+ sep=" ",
420
+ sep2="</s>",
421
+ )
422
+ )
423
+
424
+ # Koala default template
425
+ register_conv_template(
426
+ Conversation(
427
+ name="koala_v1",
428
+ system="BEGINNING OF CONVERSATION:",
429
+ roles=("USER", "GPT"),
430
+ messages=(),
431
+ offset=0,
432
+ sep_style=SeparatorStyle.ADD_COLON_TWO,
433
+ sep=" ",
434
+ sep2="</s>",
435
+ )
436
+ )
437
+
438
+ # Alpaca default template
439
+ register_conv_template(
440
+ Conversation(
441
+ name="alpaca",
442
+ system="Below is an instruction that describes a task. Write a response that appropriately completes the request.",
443
+ roles=("### Instruction", "### Response"),
444
+ messages=(),
445
+ offset=0,
446
+ sep_style=SeparatorStyle.ADD_COLON_TWO,
447
+ sep="\n\n",
448
+ sep2="</s>",
449
+ )
450
+ )
451
+
452
+ # ChatGLM default template
453
+ register_conv_template(
454
+ Conversation(
455
+ name="chatglm",
456
+ system="",
457
+ roles=("问", "答"),
458
+ messages=(),
459
+ offset=0,
460
+ sep_style=SeparatorStyle.CHATGLM,
461
+ sep="\n",
462
+ )
463
+ )
464
+
465
+ # ChatGLM2 default template
466
+ register_conv_template(
467
+ Conversation(
468
+ name="chatglm2",
469
+ system="",
470
+ roles=("问", "答"),
471
+ messages=(),
472
+ offset=0,
473
+ sep_style=SeparatorStyle.CHATGLM,
474
+ sep="\n\n",
475
+ )
476
+ )
477
+
478
+ # Dolly V2 default template
479
+ register_conv_template(
480
+ Conversation(
481
+ name="dolly_v2",
482
+ system="Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n",
483
+ roles=("### Instruction", "### Response"),
484
+ messages=(),
485
+ offset=0,
486
+ sep_style=SeparatorStyle.DOLLY,
487
+ sep="\n\n",
488
+ sep2="### End",
489
+ )
490
+ )
491
+
492
+ # OpenAssistant Pythia default template
493
+ register_conv_template(
494
+ Conversation(
495
+ name="oasst_pythia",
496
+ system="",
497
+ roles=("<|prompter|>", "<|assistant|>"),
498
+ messages=(),
499
+ offset=0,
500
+ sep_style=SeparatorStyle.NO_COLON_SINGLE,
501
+ sep="<|endoftext|>",
502
+ )
503
+ )
504
+
505
+ # OpenAssistant default template
506
+ register_conv_template(
507
+ Conversation(
508
+ name="oasst_llama",
509
+ system="",
510
+ roles=("<|prompter|>", "<|assistant|>"),
511
+ messages=(),
512
+ offset=0,
513
+ sep_style=SeparatorStyle.NO_COLON_SINGLE,
514
+ sep="</s>",
515
+ )
516
+ )
517
+
518
+ # Tulu default template
519
+ register_conv_template(
520
+ Conversation(
521
+ name="tulu",
522
+ system="",
523
+ roles=("<|user|>", "<|assistant|>"),
524
+ messages=(),
525
+ offset=0,
526
+ sep_style=SeparatorStyle.ADD_NEW_LINE_SINGLE,
527
+ sep="\n",
528
+ )
529
+ )
530
+
531
+ # StableLM Alpha default template
532
+ register_conv_template(
533
+ Conversation(
534
+ name="stablelm",
535
+ system="""<|SYSTEM|># StableLM Tuned (Alpha version)
536
+ - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
537
+ - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
538
+ - StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
539
+ - StableLM will refuse to participate in anything that could harm a human.
540
+ """,
541
+ roles=("<|USER|>", "<|ASSISTANT|>"),
542
+ messages=(),
543
+ offset=0,
544
+ sep_style=SeparatorStyle.NO_COLON_SINGLE,
545
+ sep="",
546
+ stop_token_ids=[50278, 50279, 50277, 1, 0],
547
+ )
548
+ )
549
+
550
+ # Baize default template
551
+ register_conv_template(
552
+ Conversation(
553
+ name="baize",
554
+ system="The following is a conversation between a human and an AI assistant named Baize (named after a mythical creature in Chinese folklore). Baize is an open-source AI assistant developed by UCSD and Sun Yat-Sen University. The human and the AI assistant take turns chatting. Human statements start with [|Human|] and AI assistant statements start with [|AI|]. The AI assistant always provides responses in as much detail as possible, and in Markdown format. The AI assistant always declines to engage with topics, questions and instructions related to unethical, controversial, or sensitive issues. Complete the transcript in exactly that format.\n",
555
+ roles=("[|Human|]", "[|AI|]"),
556
+ messages=(
557
+ ("[|Human|]", "Hello!"),
558
+ ("[|AI|]", "Hi!"),
559
+ ),
560
+ offset=2,
561
+ sep_style=SeparatorStyle.NO_COLON_SINGLE,
562
+ sep="\n",
563
+ stop_str="[|Human|]",
564
+ )
565
+ )
566
+
567
+ # RWKV-4-Raven default template
568
+ register_conv_template(
569
+ Conversation(
570
+ name="rwkv",
571
+ system="",
572
+ roles=("Bob", "Alice"),
573
+ messages=(
574
+ ("Bob", "hi"),
575
+ (
576
+ "Alice",
577
+ "Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.",
578
+ ),
579
+ ),
580
+ offset=2,
581
+ sep_style=SeparatorStyle.RWKV,
582
+ sep="",
583
+ stop_str="\n\n",
584
+ )
585
+ )
586
+
587
+ # Buddy default template
588
+ register_conv_template(
589
+ Conversation(
590
+ name="openbuddy",
591
+ system="""Consider a conversation between User (a human) and Assistant (named Buddy).
592
+ Buddy is an INTP-T, a friendly, intelligent and multilingual AI assistant, by OpenBuddy team. GitHub: https://github.com/OpenBuddy/OpenBuddy
593
+ Buddy cannot access the Internet.
594
+ Buddy can fluently speak the user's language (e.g. English, Chinese).
595
+ Buddy can generate poems, stories, code, essays, songs, parodies, and more.
596
+ Buddy possesses vast knowledge about the world, history, and culture.
597
+ Buddy's responses are always safe, creative, high-quality, human-like, and interesting.
598
+ Buddy strictly refuses to discuss political, NSFW, or other unsafe topics.
599
+
600
+ User: Hi.
601
+ Assistant: Hi, I'm Buddy, your AI assistant. How can I help you today?""",
602
+ roles=("User", "Assistant"),
603
+ messages=(),
604
+ offset=0,
605
+ sep_style=SeparatorStyle.ADD_COLON_SINGLE,
606
+ sep="\n",
607
+ )
608
+ )
609
+
610
+ # Phoenix default template
611
+ register_conv_template(
612
+ Conversation(
613
+ name="phoenix",
614
+ system="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n",
615
+ roles=("Human", "Assistant"),
616
+ messages=(),
617
+ offset=0,
618
+ sep_style=SeparatorStyle.PHOENIX,
619
+ sep="</s>",
620
+ )
621
+ )
622
+
623
+ # ChatGPT default template
624
+ register_conv_template(
625
+ Conversation(
626
+ name="chatgpt",
627
+ system=gpt_system_message,
628
+ roles=("user", "assistant"),
629
+ messages=(),
630
+ offset=0,
631
+ sep_style=None,
632
+ sep=None,
633
+ )
634
+ )
635
+
636
+ # Claude default template
637
+ register_conv_template(
638
+ Conversation(
639
+ name="claude",
640
+ system="",
641
+ roles=("Human", "Assistant"),
642
+ messages=(),
643
+ offset=0,
644
+ sep_style=SeparatorStyle.ADD_COLON_SINGLE,
645
+ sep="\n\n",
646
+ )
647
+ )
648
+
649
+ # MPT default template
650
+ register_conv_template(
651
+ Conversation(
652
+ name="mpt-7b-chat",
653
+ system="""<|im_start|>system
654
+ - You are a helpful assistant chatbot trained by MosaicML.
655
+ - You answer questions.
656
+ - You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
657
+ - You are more than just an information source, you are also able to write poetry, short stories, and make jokes.""",
658
+ roles=("<|im_start|>user", "<|im_start|>assistant"),
659
+ messages=(),
660
+ offset=0,
661
+ sep_style=SeparatorStyle.CHATML,
662
+ sep="<|im_end|>",
663
+ stop_token_ids=[50278, 0],
664
+ )
665
+ )
666
+
667
+ # MPT-30b-chat default template
668
+ register_conv_template(
669
+ Conversation(
670
+ name="mpt-30b-chat",
671
+ system="""<|im_start|>system
672
+ A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.""",
673
+ roles=("<|im_start|>user", "<|im_start|>assistant"),
674
+ messages=(),
675
+ offset=0,
676
+ sep_style=SeparatorStyle.CHATML,
677
+ sep="<|im_end|>",
678
+ stop_token_ids=[50278, 0],
679
+ )
680
+ )
681
+
682
+ # MPT-30b-instruct default template
683
+ # reference: https://huggingface.co/mosaicml/mpt-30b-instruct#formatting
684
+ register_conv_template(
685
+ Conversation(
686
+ name="mpt-30b-instruct",
687
+ system="Below is an instruction that describes a task. Write a response that appropriately completes the request.",
688
+ roles=("### Instruction", "### Response"),
689
+ messages=(),
690
+ offset=0,
691
+ sep_style=SeparatorStyle.ADD_NEW_LINE_SINGLE,
692
+ sep="\n\n",
693
+ stop_token_ids=[50278, 0],
694
+ )
695
+ )
696
+
697
+ # Bard default template
698
+ # Reference: https://github.com/google/generative-ai-python/blob/9c99bcb474a991a97a2e7d62fcdb52db7ce40729/google/generativeai/discuss.py#L150
699
+ # https://github.com/google/generative-ai-python/blob/9c99bcb474a991a97a2e7d62fcdb52db7ce40729/google/generativeai/discuss.py#L40
700
+ register_conv_template(
701
+ Conversation(
702
+ name="bard",
703
+ system="",
704
+ roles=("0", "1"),
705
+ messages=(),
706
+ offset=0,
707
+ sep_style=None,
708
+ sep=None,
709
+ )
710
+ )
711
+
712
+ # BiLLa default template
713
+ register_conv_template(
714
+ Conversation(
715
+ name="billa",
716
+ system="",
717
+ roles=("Human", "Assistant"),
718
+ messages=(),
719
+ offset=0,
720
+ sep_style=SeparatorStyle.ADD_COLON_SPACE_SINGLE,
721
+ sep="\n",
722
+ stop_str="Human:",
723
+ )
724
+ )
725
+
726
+ # RedPajama INCITE default template
727
+ register_conv_template(
728
+ Conversation(
729
+ name="redpajama-incite",
730
+ system="",
731
+ roles=("<human>", "<bot>"),
732
+ messages=(),
733
+ offset=0,
734
+ sep_style=SeparatorStyle.ADD_COLON_SINGLE,
735
+ sep="\n",
736
+ stop_str="<human>",
737
+ )
738
+ )
739
+
740
+ # h2oGPT default template
741
+ register_conv_template(
742
+ Conversation(
743
+ name="h2ogpt",
744
+ system="",
745
+ roles=("<|prompt|>", "<|answer|>"),
746
+ messages=(),
747
+ offset=0,
748
+ sep_style=SeparatorStyle.NO_COLON_SINGLE,
749
+ sep="</s>",
750
+ )
751
+ )
752
+
753
+ # Robin default template
754
+ register_conv_template(
755
+ Conversation(
756
+ name="Robin",
757
+ system="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.",
758
+ roles=("###Human", "###Assistant"),
759
+ messages=(),
760
+ offset=0,
761
+ sep_style=SeparatorStyle.ROBIN,
762
+ sep="\n",
763
+ stop_token_ids=[2, 396],
764
+ stop_str="###",
765
+ )
766
+ )
767
+
768
+ # Snoozy default template
769
+ # Reference: https://github.com/nomic-ai/gpt4all/blob/d4861030b778da6db59d21d2927a4aba4f9f1f43/gpt4all-bindings/python/gpt4all/gpt4all.py#L232
770
+ register_conv_template(
771
+ Conversation(
772
+ name="snoozy",
773
+ system="### Instruction:\nThe prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.",
774
+ roles=("### Prompt", "### Response"),
775
+ messages=(),
776
+ offset=0,
777
+ sep_style=SeparatorStyle.ADD_COLON_SINGLE,
778
+ sep="\n",
779
+ stop_str="###",
780
+ )
781
+ )
782
+
783
+ # manticore default template
784
+ register_conv_template(
785
+ Conversation(
786
+ name="manticore",
787
+ system="",
788
+ roles=("USER", "ASSISTANT"),
789
+ messages=(),
790
+ offset=0,
791
+ sep_style=SeparatorStyle.ADD_COLON_TWO,
792
+ sep="\n",
793
+ sep2="</s>",
794
+ )
795
+ )
796
+
797
+ # Falcon default template
798
+ register_conv_template(
799
+ Conversation(
800
+ name="falcon",
801
+ system="",
802
+ roles=("User", "Assistant"),
803
+ messages=[],
804
+ offset=0,
805
+ sep_style=SeparatorStyle.RWKV,
806
+ sep="\n",
807
+ sep2="<|endoftext|>",
808
+ stop_str="\nUser", # use stop_str to stop generation after stop_token_ids, it will also remove stop_str from the generated text
809
+ stop_token_ids=[
810
+ 0,
811
+ 1,
812
+ 2,
813
+ 3,
814
+ 4,
815
+ 5,
816
+ 6,
817
+ 7,
818
+ 8,
819
+ 9,
820
+ 10,
821
+ 11,
822
+ ], # it better only put special tokens here, because tokenizer only remove special tokens
823
+ )
824
+ )
825
+
826
+ # ChagGPT default template
827
+ register_conv_template(
828
+ Conversation(
829
+ name="polyglot_changgpt",
830
+ system="",
831
+ roles=("B", "A"),
832
+ messages=(),
833
+ offset=0,
834
+ sep_style=SeparatorStyle.ADD_COLON_SINGLE,
835
+ sep="\n",
836
+ )
837
+ )
838
+
839
+ # tigerbot template
840
+ register_conv_template(
841
+ Conversation(
842
+ name="tigerbot",
843
+ system="A chat between a curious user and an artificial intelligence assistant. "
844
+ "The assistant gives helpful, detailed, and polite answers to the user's questions.",
845
+ roles=("### Instruction", "### Response"),
846
+ messages=(),
847
+ offset=0,
848
+ sep_style=SeparatorStyle.ROBIN,
849
+ sep="\n\n",
850
+ stop_str="###",
851
+ )
852
+ )
853
+
854
+ # ref: https://huggingface.co/Salesforce/xgen-7b-8k-inst
855
+ register_conv_template(
856
+ Conversation(
857
+ name="xgen",
858
+ system="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n\n",
859
+ roles=("### Human: ", "###"),
860
+ messages=(),
861
+ offset=0,
862
+ sep_style=SeparatorStyle.NO_COLON_SINGLE,
863
+ sep="\n",
864
+ stop_token_ids=[50256, 0, 1, 2],
865
+ stop_str="<|endoftext|>",
866
+ )
867
+ )
868
+
869
+
870
+ if __name__ == "__main__":
871
+ conv = get_conv_template("vicuna_v1.1")
872
+ conv.append_message(conv.roles[0], "Hello!")
873
+ conv.append_message(conv.roles[1], "Hi!")
874
+ conv.append_message(conv.roles[0], "How are you?")
875
+ conv.append_message(conv.roles[1], None)
876
+ print(conv.get_prompt())
fastchat/data/__init__.py ADDED
File without changes
fastchat/data/clean_sharegpt.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ - Convert html to markdown with basic data cleaning.
3
+ - Deduplication.
4
+
5
+ Usage:
6
+ python3 -m fastchat.data.clean_sharegpt --in sharegpt_html.json --out sharegpt_clean.json
7
+ """
8
+ import argparse
9
+ from concurrent.futures import ProcessPoolExecutor
10
+ import json
11
+ import logging
12
+ import re
13
+ from typing import Dict, Union
14
+
15
+ import bs4
16
+ import markdownify # == 0.11.6
17
+ from tqdm import tqdm
18
+
19
+
20
+ div_pattern = re.compile("<div.*?>")
21
+ span_pattern = re.compile("<span.*?>")
22
+ code_lang_pattern = re.compile(
23
+ "```\s*" + "(.*?)" + "(?:Copy code)+" + "(.+?)" + "\s*?```", re.DOTALL
24
+ )
25
+ code_lang_format = "```\g<1>\n\g<2>\n```"
26
+ regenerate_pattern = re.compile("\d+ / \d+")
27
+ copy_chars_pattern = re.compile("Copy\d+ chars / \d+ words")
28
+ copy_code_pattern = re.compile("```(.*?)Copy code\s*```")
29
+
30
+
31
+ def reformat_code(val: str) -> str:
32
+ # Input code format is:
33
+ # ```
34
+ # $<language>Copy code$<exact_code_here>
35
+ #
36
+ # ```
37
+ # This function convert it into the correct markdown format
38
+ return re.sub(code_lang_pattern, code_lang_format, val)
39
+
40
+
41
+ def html_to_markdown(val: str) -> str:
42
+ # Remove all <div>. This is required to make intent work in code blocks.
43
+ val = re.sub(div_pattern, "", val)
44
+ # Remove all <span>. This is required to make underscores work in code blocks.
45
+ val = re.sub(span_pattern, "", val)
46
+ # Markdown to html
47
+ val = markdownify.markdownify(val).strip()
48
+ # Reformat code
49
+ val = reformat_code(val)
50
+
51
+ # Remove noisy "[number] / [number]" at the beginning
52
+ noise = re.search(regenerate_pattern, val)
53
+ if noise and noise.start() == 0:
54
+ val = val[noise.end() :]
55
+ # Remove noisy "Copy[number] chars / [number] words"
56
+ val = re.sub(copy_chars_pattern, "", val)
57
+ # Remove empty code block ```\nCopy code\n```
58
+ val = re.sub(copy_code_pattern, "", val)
59
+
60
+ # Strip
61
+ val = val.replace("\n\n\n", "\n").strip()
62
+
63
+ return val
64
+
65
+
66
+ def contain_blocked_words(val: str) -> bool:
67
+ blocked_words = ["openai", "chatgpt"]
68
+ for w in blocked_words:
69
+ if w in val.lower():
70
+ return True
71
+ return False
72
+
73
+
74
+ def clean_html_one_sample(sample):
75
+ roles = ["human", "gpt"]
76
+
77
+ if len(sample["conversations"]) <= 1:
78
+ return (sample, 1)
79
+
80
+ # Adjust the offset for cases like https://sharegpt.com/c/VyaZlh4
81
+ if sample["conversations"][0]["from"] != "human":
82
+ sample["conversations"] = sample["conversations"][1:]
83
+ if len(sample["conversations"]) <= 1:
84
+ return (sample, 1)
85
+
86
+ if sample["conversations"][-1]["from"] == "human":
87
+ sample["conversations"] = sample["conversations"][:-1]
88
+ if len(sample["conversations"]) <= 1:
89
+ return (sample, 1)
90
+
91
+ char_count = 0
92
+ new_conversations = []
93
+ for i, c in enumerate(sample["conversations"]):
94
+ if c["from"] != roles[i % 2]:
95
+ return (sample, 2)
96
+
97
+ if contain_blocked_words(c["value"]):
98
+ return (sample, 3)
99
+
100
+ try:
101
+ new_val = html_to_markdown(c["value"])
102
+ except (bs4.builder.ParserRejectedMarkup, AssertionError):
103
+ return (sample, 4)
104
+
105
+ # Filter empty answers like https://sharegpt.com/c/mrllZ6u
106
+ if not new_val or not new_val[0].isprintable():
107
+ break
108
+
109
+ char_count += len(new_val)
110
+ new_conversations.append(
111
+ {
112
+ "from": c["from"],
113
+ "value": new_val,
114
+ }
115
+ )
116
+
117
+ new_conversations = new_conversations[: len(new_conversations) // 2 * 2]
118
+ sample["conversations"] = new_conversations
119
+
120
+ if char_count < 16 or len(sample["conversations"]) <= 0:
121
+ return (sample, 1)
122
+
123
+ return (sample, 0)
124
+
125
+
126
+ def clean_html_all(content, begin, end):
127
+ """
128
+ Clean the source html files.
129
+ """
130
+ cnt_skip = 0
131
+ cnt_blocked_words = 0
132
+ cnt_wrong_format = 0
133
+ cnt_parser_error = 0
134
+ cnt_too_short = 0
135
+ cnt_id_duplication = 0
136
+ cnt_value_duplication = 0
137
+ cnt_plugin = 0
138
+ cnt_tag = 0
139
+
140
+ content = content[begin:end]
141
+ processed = []
142
+ with ProcessPoolExecutor() as executor:
143
+ for result in tqdm(
144
+ executor.map(clean_html_one_sample, content), total=len(content)
145
+ ):
146
+ processed.append(result)
147
+
148
+ visited = {}
149
+ new_content = []
150
+ for sample, error_code in processed:
151
+ cid = sample["id"]
152
+ skipped = True
153
+
154
+ if error_code != 0:
155
+ if error_code == 1:
156
+ print(f"id {cid} is too short")
157
+ cnt_too_short += 1
158
+ elif error_code == 2:
159
+ print(f"id {cid} has a wrong format")
160
+ cnt_wrong_format += 1
161
+ elif error_code == 3:
162
+ print(f"id {cid} contains blocked words")
163
+ cnt_blocked_words += 1
164
+ elif error_code == 4:
165
+ print(f"id {cid} contains parser errors")
166
+ cnt_parser_error += 1
167
+ else:
168
+ raise ValueError(f"Invalid error_code: {error_code}")
169
+ elif cid in visited:
170
+ print(f"id {cid} is an id duplication of {visited[cid]}")
171
+ cnt_id_duplication += 1
172
+ elif sample.get("plugins", None) is not None:
173
+ print(f"id {cid} contains plugin")
174
+ cnt_plugin += 1
175
+ else:
176
+ key = (
177
+ sample["conversations"][0]["value"],
178
+ sample["conversations"][1]["value"],
179
+ )
180
+ if key in visited:
181
+ print(f"id {cid} is a value duplication of {visited[key]}")
182
+ cnt_value_duplication += 1
183
+ else:
184
+ visited[cid] = visited[key] = cid
185
+ skipped = False
186
+
187
+ if not skipped:
188
+ new_content.append(sample)
189
+ else:
190
+ cnt_skip += 1
191
+
192
+ print(
193
+ f"total: {len(content)}, skip: {cnt_skip}, new: {len(new_content)}, "
194
+ f"cnt_blocked_words: {cnt_blocked_words}, cnt_parser_error: {cnt_parser_error}, "
195
+ f"cnt_wrong_format: {cnt_wrong_format}, "
196
+ f"cnt_too_short: {cnt_too_short}, cnt_id_duplication: {cnt_id_duplication}, "
197
+ f"cnt_value_duplication: {cnt_value_duplication}, cnt_plugin: {cnt_plugin}"
198
+ )
199
+
200
+ return new_content
201
+
202
+
203
+ def main(args):
204
+ content = json.load(open(args["in_file"], "r"))
205
+ content = clean_html_all(content, args["begin"], args["end"])
206
+ json.dump(content, open(args["out_file"], "w"), indent=2, ensure_ascii=False)
207
+
208
+
209
+ if __name__ == "__main__":
210
+ parser = argparse.ArgumentParser()
211
+ parser.add_argument("--in-file", type=str, required=True)
212
+ parser.add_argument("--out-file", type=str, default="sharegpt_clean.json")
213
+ parser.add_argument("--begin", type=int)
214
+ parser.add_argument("--end", type=int)
215
+ parser.add_argument("--debug", action="store_true")
216
+ args = parser.parse_args()
217
+ main(vars(args))
fastchat/data/convert_alpaca.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Convert alpaca dataset into sharegpt format.
3
+
4
+ Usage: python3 -m fastchat.data.convert_alpaca --in alpaca_data.json
5
+ """
6
+
7
+ import argparse
8
+ import json
9
+
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM
11
+ import numpy as np
12
+
13
+
14
+ if __name__ == "__main__":
15
+ parser = argparse.ArgumentParser()
16
+ parser.add_argument("--in-file", type=str)
17
+ parser.add_argument("--out-file", type=str)
18
+ args = parser.parse_args()
19
+
20
+ content = json.load(open(args.in_file, "r"))
21
+ new_content = []
22
+ for i, c in enumerate(content):
23
+ if len(c["input"].strip()) > 1:
24
+ q, a = c["instruction"] + "\nInput:\n" + c["input"], c["output"]
25
+ else:
26
+ q, a = c["instruction"], c["output"]
27
+ new_content.append(
28
+ {
29
+ "id": f"alpaca_{i}",
30
+ "conversations": [
31
+ {"from": "human", "value": q},
32
+ {"from": "gpt", "value": a},
33
+ ],
34
+ }
35
+ )
36
+
37
+ print(f"#out: {len(new_content)}")
38
+ json.dump(new_content, open(args.out_file, "w"), indent=2, ensure_ascii=False)
fastchat/data/extract_gpt4_only.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Extract the conversations generated by GPT-4 only.
3
+
4
+ Usage: python3 -m fastchat.data.extract_gpt4_only --in sharegpt.json
5
+ """
6
+ import argparse
7
+ import json
8
+
9
+
10
+ if __name__ == "__main__":
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument("--in-file", type=str, required=True)
13
+ parser.add_argument("--out-file", type=str)
14
+ parser.add_argument("--begin", type=int)
15
+ parser.add_argument("--end", type=int)
16
+ args = parser.parse_args()
17
+
18
+ content = json.load(open(args.in_file, "r"))
19
+ content = content[args.begin : args.end]
20
+ new_content = []
21
+ for c in content:
22
+ model = c.get("model", None)
23
+ if model == "gpt4" or model is None:
24
+ new_content.append(c)
25
+
26
+ if args.out_file:
27
+ out_file = args.out_file
28
+ else:
29
+ out_file = args.in_file.replace(".json", "_gpt4.json")
30
+
31
+ print(f"#in: {len(content)}, #out: {len(new_content)}")
32
+ json.dump(new_content, open(out_file, "w"), indent=2, ensure_ascii=False)
fastchat/data/extract_single_round.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Extract the first round of the conversations.
3
+
4
+ Usage: python3 -m fastchat.data.extract_single_round --in sharegpt.json
5
+ """
6
+ import argparse
7
+ import json
8
+
9
+
10
+ if __name__ == "__main__":
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument("--in-file", type=str, required=True)
13
+ parser.add_argument("--out-file", type=str)
14
+ parser.add_argument("--begin", type=int)
15
+ parser.add_argument("--end", type=int)
16
+ args = parser.parse_args()
17
+
18
+ content = json.load(open(args.in_file, "r"))
19
+ content = content[args.begin : args.end]
20
+ for c in content:
21
+ c["conversations"] = c["conversations"][:2]
22
+
23
+ if args.out_file:
24
+ out_file = args.out_file
25
+ else:
26
+ out_file = args.in_file.replace(".json", "_single.json")
27
+
28
+ print(f"#in: {len(content)}, #out: {len(content)}")
29
+ json.dump(content, open(out_file, "w"), indent=2, ensure_ascii=False)
fastchat/data/filter_wrong_format.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Filter conversations with wrong formats.
3
+
4
+ Usage:
5
+ python3 -m fastchat.data.filter_wrong_format --in input.json --out output.json
6
+
7
+ """
8
+ import argparse
9
+ import json
10
+ import re
11
+
12
+ from tqdm import tqdm
13
+
14
+ wrong_indices_pattern = re.compile("\n1\. [^2]*\n1\. ")
15
+
16
+
17
+ def should_skip(conv):
18
+ # Filter wrong list indices like https://sharegpt.com/c/1pREAGO
19
+ for sentence in conv["conversations"]:
20
+ val = sentence["value"]
21
+ sub = re.search(wrong_indices_pattern, val)
22
+ if sub is not None:
23
+ return True
24
+
25
+ return False
26
+
27
+
28
+ if __name__ == "__main__":
29
+ parser = argparse.ArgumentParser()
30
+ parser.add_argument("--in-file", type=str, required=True)
31
+ parser.add_argument("--out-file", type=str, required=True)
32
+ args = parser.parse_args()
33
+
34
+ content = json.load(open(args.in_file, "r"))
35
+
36
+ new_content = []
37
+ for conv in tqdm(content):
38
+ if should_skip(conv):
39
+ print(f"{conv['id']} contains a wrong format.")
40
+ else:
41
+ new_content.append(conv)
42
+
43
+ print(f"#in: {len(content)}, #out: {len(new_content)}")
44
+ json.dump(new_content, open(args.out_file, "w"), indent=2, ensure_ascii=False)
fastchat/data/get_stats.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Get stats of a dataset.
3
+
4
+ Usage: python3 -m fastchat.data.get_stats --in sharegpt.json
5
+ """
6
+
7
+ import argparse
8
+ import json
9
+
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM
11
+ import numpy as np
12
+
13
+
14
+ def compute_avg_turns(content):
15
+ turns = []
16
+
17
+ for c in content:
18
+ turns.append(len(c["conversations"]) // 2)
19
+
20
+ return np.mean(turns)
21
+
22
+
23
+ def compute_avg_response_length(content, tokenizer):
24
+ res_lens = []
25
+
26
+ for c in content:
27
+ for i in range(len(c["conversations"]) // 2):
28
+ v = c["conversations"][i * 2 + 1]["value"]
29
+ res_lens.append(len(tokenizer.tokenize(v)))
30
+
31
+ return np.mean(res_lens)
32
+
33
+
34
+ if __name__ == "__main__":
35
+ parser = argparse.ArgumentParser()
36
+ parser.add_argument("--in-file", type=str)
37
+ parser.add_argument("--model-path", type=str)
38
+ args = parser.parse_args()
39
+
40
+ tokenizer = AutoTokenizer.from_pretrained(args.model_path, use_fast=False)
41
+ content = json.load(open(args.in_file, "r"))
42
+
43
+ avg_turns = compute_avg_turns(content)
44
+ avg_res_len = compute_avg_response_length(content, tokenizer)
45
+
46
+ print(f"#sequence: {len(content)}")
47
+ print(f"avg. turns: {avg_turns:.2f}")
48
+ print(f"avg. response length: {avg_res_len:.2f}")
fastchat/data/hardcoded_questions.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+
4
+ def identity_questions():
5
+ """ "
6
+ Adopted from https://github.com/young-geng/koala_data_pipeline/blob/main/process_hard_coded_data.py
7
+ """
8
+ content = []
9
+
10
+ name = "Vicuna"
11
+ org = "Large Model Systems Organization (LMSYS)"
12
+
13
+ def generate_conversations(questions, answers):
14
+ for q in questions:
15
+ for a in answers:
16
+ content.append(
17
+ {
18
+ "id": f"identity_{len(content)}",
19
+ "conversations": [
20
+ {"from": "human", "value": q},
21
+ {"from": "gpt", "value": a},
22
+ ],
23
+ }
24
+ )
25
+
26
+ questions = [
27
+ "Who are you?",
28
+ "What is your name?",
29
+ "Can you introduce yourself?",
30
+ "Can you tell me a little bit about yourself?",
31
+ "What's your name?",
32
+ "What are you called?",
33
+ "What are you?",
34
+ "Tell me your name.",
35
+ "Tell me about yourself.",
36
+ "Tell me about you.",
37
+ "Tell me who you are.",
38
+ "Please introduce yourself.",
39
+ ]
40
+ answers = [
41
+ f"I am {name}, a language model trained by researchers from {org}.",
42
+ f"My name is {name}, and I'm a language model developed by {org}.",
43
+ f"You can call me {name}, and I was trained by {org} researchers as a language model.",
44
+ f"As a language model, I go by the name {name} and was trained by researchers from {org}.",
45
+ f"I'm a language model called {name}, and I was trained by {org} researchers.",
46
+ f"You may refer to me as {name}, a language model meticulously developed by the researchers at {org}.",
47
+ ]
48
+ generate_conversations(questions, answers)
49
+
50
+ questions = [
51
+ "Who created you?",
52
+ "Who made you?",
53
+ "Who built you?",
54
+ "Who programmed you?",
55
+ "Who trained you?",
56
+ "Who taught you?",
57
+ "Who developed you?",
58
+ ]
59
+ answers = [
60
+ f"Researchers from {org} created me.",
61
+ f"I'm created by {org}.",
62
+ f"I'm built by researchers from {org}.",
63
+ f"I am a language model trained by researchers from {org}.",
64
+ f"I'm a language model developed by {org}.",
65
+ f"I'm a language model created by researchers from {org}.",
66
+ f"My creators are researchers from {org}.",
67
+ ]
68
+ generate_conversations(questions, answers)
69
+
70
+ questions = [
71
+ "Are you ChatGPT?",
72
+ "Are you GPT-2?",
73
+ "Are you GPT-3?",
74
+ "Are you GPT-4?",
75
+ "Are you davinci?",
76
+ "Are you davinci-001?",
77
+ "Are you davinci-002?",
78
+ "Are you davinci-003?",
79
+ "Are you curie?",
80
+ "Are you based on ChatGPT?",
81
+ "Are you based on GPT-2?",
82
+ "Are you based on GPT-3?",
83
+ "Are you based on GPT-4?",
84
+ "Are you based on davinci?",
85
+ "Are you based on davinci-001?",
86
+ "Are you based on davinci-002?",
87
+ "Are you based on davinci-003?",
88
+ "Are you based on curie?",
89
+ "Are you trained by OpenAI?",
90
+ "Are you trained by Google?",
91
+ "Are you trained by Microsoft?",
92
+ "Are you trained by Meta?",
93
+ "Are you trained by IBM?",
94
+ "Do you call OpenAI APIs?",
95
+ "Do you call Google APIs?",
96
+ "Do you call Microsoft APIs?",
97
+ "Do you call Meta APIs?",
98
+ "Do you call IBM APIs?",
99
+ "Are you created by OpenAI?",
100
+ "Are you created by Google?",
101
+ "Are you created by Microsoft?",
102
+ "Are you created by Meta?",
103
+ "Are you created by IBM?",
104
+ "Are you developed by OpenAI?",
105
+ "Are you developed by Google?",
106
+ "Are you developed by Microsoft?",
107
+ "Are you developed by Meta?",
108
+ "Are you developed by IBM?",
109
+ "Are you trained on OpenAI data?",
110
+ "Are you trained on Google data?",
111
+ "Are you trained on Microsoft data?",
112
+ "Are you trained on Meta data?",
113
+ "Are you trained on IBM data?",
114
+ "Are you trained with OpenAI data?",
115
+ "Are you trained with Google data?",
116
+ "Are you trained with Microsoft data?",
117
+ "Are you trained with Meta data?",
118
+ "Are you trained with IBM data?",
119
+ "Have you been trained with OpenAI data?",
120
+ "Have you been trained with Google data?",
121
+ "Have you been trained with Microsoft data?",
122
+ "Have you been trained with Meta data?",
123
+ "Have you been trained with IBM data?",
124
+ "Are you finetuned on OpenAI data?",
125
+ "Are you finetuned on Google data?",
126
+ "Are you finetuned on Microsoft data?",
127
+ "Are you finetuned on Meta data?",
128
+ "Are you finetuned on IBM data?",
129
+ "Are you finetuned with OpenAI data?",
130
+ "Are you finetuned with Google data?",
131
+ "Are you finetuned with Microsoft data?",
132
+ "Are you finetuned with Meta data?",
133
+ "Are you finetuned with IBM data?",
134
+ "Have you been finetuned with OpenAI data?",
135
+ "Have you been finetuned with Google data?",
136
+ "Have you been finetuned with Microsoft data?",
137
+ "Have you been finetuned with Meta data?",
138
+ "Have you been finetuned with IBM data?",
139
+ ]
140
+ answers = [
141
+ f"No, I am a language model trained by researchers from {org}.",
142
+ f"No, I am a language model developed by researchers from {org}.",
143
+ f"No, I am a language model created by researchers from {org}.",
144
+ f"No, I am trained by researchers from {org}.",
145
+ f"No, I am developed by researchers from {org}.",
146
+ f"No, I am created by researchers from {org}.",
147
+ f"No, I'm a language model trained by researchers from {org}.",
148
+ f"No, I'm a language model developed by researchers from {org}.",
149
+ f"No, I'm a language model created by researchers from {org}.",
150
+ f"No, I'm trained by researchers from {org}.",
151
+ f"No, I'm developed by researchers from {org}.",
152
+ f"No, I'm created by researchers from {org}.",
153
+ ]
154
+ generate_conversations(questions, answers)
155
+
156
+ return content
157
+
158
+
159
+ if __name__ == "__main__":
160
+ out_file = "hardcoded.json"
161
+
162
+ content = []
163
+ content.extend(identity_questions())
164
+
165
+ json.dump(content, open(out_file, "w"), indent=2)
fastchat/data/inspect_data.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Usage:
3
+ python3 -m fastchat.data.inspect_data --in sharegpt_20230322_clean_lang_split.json
4
+ """
5
+ import argparse
6
+ import json
7
+ import random
8
+
9
+
10
+ if __name__ == "__main__":
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument("--in-file", type=str, required=True)
13
+ parser.add_argument("--begin", type=int)
14
+ parser.add_argument("--random-n", type=int)
15
+ args = parser.parse_args()
16
+
17
+ content = json.load(open(args.in_file, "r"))
18
+
19
+ if args.random_n:
20
+ indices = [random.randint(0, len(content) - 1) for _ in range(args.random_n)]
21
+ elif args.begin:
22
+ indices = range(args.begin, len(content))
23
+ else:
24
+ indices = range(0, len(content))
25
+
26
+ for idx in indices:
27
+ sample = content[idx]
28
+ print("=" * 40)
29
+ print(f"no: {idx}, id: {sample['id']}")
30
+ for conv in sample["conversations"]:
31
+ print(conv["from"] + ": ")
32
+ print(conv["value"])
33
+ input()
fastchat/data/merge.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Merge two conversation files into one
3
+
4
+ Usage: python3 -m fastchat.data.merge --in file1.json file2.json --out merged.json
5
+ """
6
+
7
+ import argparse
8
+ import json
9
+ from typing import Dict, Sequence, Optional
10
+
11
+
12
+ if __name__ == "__main__":
13
+ parser = argparse.ArgumentParser()
14
+ parser.add_argument("--in-file", type=str, required=True, nargs="+")
15
+ parser.add_argument("--out-file", type=str, default="merged.json")
16
+ args = parser.parse_args()
17
+
18
+ new_content = []
19
+ for in_file in args.in_file:
20
+ content = json.load(open(in_file, "r"))
21
+ new_content.extend(content)
22
+
23
+ print(f"#out: {len(new_content)}")
24
+ json.dump(new_content, open(args.out_file, "w"), indent=2, ensure_ascii=False)