Spaces:

yanolja
/

arena

Running

App Files Files Community

Kang Suhyun commited on Feb 1, 2024

Commit

5e33531

unverified ·

2 Parent(s): 93a104a 44ad98f

Merge pull request #2 from Y-IAB/1-arena

Browse files

[#1] Implement side-by-side chatbot interface

Files changed (9) hide show

.gitignore +1 -0
.isort.cfg +2 -0
.pylintrc +401 -0
.style.yapf +3 -0
.vscode/extensions.json +8 -0
.vscode/settings.json +15 -0
README.md +24 -0
app.py +112 -0
requirments.txt +113 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ venv

.isort.cfg ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [settings]
2	+ profile = google

.pylintrc ADDED Viewed

	@@ -0,0 +1,401 @@

+# This Pylint rcfile contains a best-effort configuration to uphold the
+# best-practices and style described in the Google Python style guide:
+#   https://google.github.io/styleguide/pyguide.html
+#
+# Its canonical open-source location is:
+#   https://google.github.io/styleguide/pylintrc
+[MAIN]
+# Files or directories to be skipped. They should be base names, not paths.
+ignore=third_party
+# Files or directories matching the regex patterns are skipped. The regex
+# matches against base names, not paths.
+ignore-patterns=
+# Pickle collected data for later comparisons.
+persistent=no
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+# Use multiple processes to speed up Pylint.
+jobs=4
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+[MESSAGES CONTROL]
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
+confidence=
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+#enable=
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once).You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use"--disable=all --enable=classes
+# --disable=W"
+disable=R,
+        abstract-method,
+        apply-builtin,
+        arguments-differ,
+        attribute-defined-outside-init,
+        backtick,
+        bad-option-value,
+        basestring-builtin,
+        buffer-builtin,
+        c-extension-no-member,
+        consider-using-enumerate,
+        cmp-builtin,
+        cmp-method,
+        coerce-builtin,
+        coerce-method,
+        delslice-method,
+        div-method,
+        eq-without-hash,
+        execfile-builtin,
+        file-builtin,
+        filter-builtin-not-iterating,
+        fixme,
+        getslice-method,
+        global-statement,
+        hex-method,
+        idiv-method,
+        implicit-str-concat,
+        import-error,
+        import-self,
+        import-star-module-level,
+        input-builtin,
+        intern-builtin,
+        invalid-str-codec,
+        locally-disabled,
+        long-builtin,
+        long-suffix,
+        map-builtin-not-iterating,
+        misplaced-comparison-constant,
+        missing-function-docstring,
+        metaclass-assignment,
+        next-method-called,
+        next-method-defined,
+        no-absolute-import,
+        no-init,
+        no-member,
+        no-name-in-module,
+        no-self-use,
+        nonzero-method,
+        oct-method,
+        old-division,
+        old-ne-operator,
+        old-octal-literal,
+        old-raise-syntax,
+        parameter-unpacking,
+        print-statement,
+        raising-string,
+        range-builtin-not-iterating,
+        raw_input-builtin,
+        rdiv-method,
+        reduce-builtin,
+        relative-import,
+        reload-builtin,
+        round-builtin,
+        setslice-method,
+        signature-differs,
+        standarderror-builtin,
+        suppressed-message,
+        sys-max-int,
+        trailing-newlines,
+        unichr-builtin,
+        unicode-builtin,
+        unnecessary-pass,
+        unpacking-in-except,
+        useless-else-on-loop,
+        useless-suppression,
+        using-cmp-argument,
+        wrong-import-order,
+        xrange-builtin,
+        zip-builtin-not-iterating,
+        # added rules
+        missing-class-docstring,
+[REPORTS]
+# Set the output format. Available formats are text, parseable, colorized, msvs
+# (visual studio) and html. You can also give a reporter class, eg
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+# Tells whether to display a full report or only the messages
+reports=no
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details
+#msg-template=
+[BASIC]
+# Good variable names which should always be accepted, separated by a comma
+good-names=main,_
+# Bad variable names which should always be refused, separated by a comma
+bad-names=
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+# Include a hint for the correct naming format with invalid-name
+include-naming-hint=no
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+property-classes=abc.abstractproperty,cached_property.cached_property,cached_property.threaded_cached_property,cached_property.cached_property_with_ttl,cached_property.threaded_cached_property_with_ttl
+# Regular expression matching correct function names
+function-rgx=^(?:(?P<exempt>setUp|tearDown|setUpModule|tearDownModule)|(?P<camel_case>_?[A-Z][a-zA-Z0-9]*)|(?P<snake_case>_?[a-z][a-z0-9_]*))$
+# Regular expression matching correct variable names
+variable-rgx=^[a-z][a-z0-9_]*$
+# Regular expression matching correct constant names
+const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$
+# Regular expression matching correct attribute names
+attr-rgx=^_{0,2}[a-z][a-z0-9_]*$
+# Regular expression matching correct argument names
+argument-rgx=^[a-z][a-z0-9_]*$
+# Regular expression matching correct class attribute names
+class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$
+# Regular expression matching correct inline iteration names
+inlinevar-rgx=^[a-z][a-z0-9_]*$
+# Regular expression matching correct class names
+class-rgx=^_?[A-Z][a-zA-Z0-9]*$
+# Regular expression matching correct module names
+module-rgx=^(_?[a-z][a-z0-9_]*|__init__)$
+# Regular expression matching correct method names
+method-rgx=(?x)^(?:(?P<exempt>_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P<camel_case>_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P<snake_case>_{0,2}[a-z][a-z0-9_]*))$
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=(__.*__|main|test.*|.*test|.*Test)$
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=12
+[TYPECHECK]
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager,contextlib2.contextmanager
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis. It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+[FORMAT]
+# Maximum number of characters on a single line.
+max-line-length=80
+# TODO(https://github.com/pylint-dev/pylint/issues/3352): Direct pylint to exempt
+# lines made too long by directives to pytype.
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=(?x)(
+  ^\s*(\#\ )?<?https?://\S+>?$|
+  ^\s*(from\s+\S+\s+)?import\s+.+$)
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=yes
+# Maximum number of lines in a module
+max-module-lines=99999
+# String used as indentation unit.  The internal Google style guide mandates 2
+# spaces.  Google's externaly-published style guide says 4, consistent with
+# PEP 8.  Here, we use 2 spaces, for conformity with many open-sourced Google
+# projects (like TensorFlow).
+indent-string='  '
+# Number of spaces of indent required inside a hanging  or continued line.
+indent-after-paren=4
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+[MISCELLANEOUS]
+# List of note tags to take in consideration, separated by a comma.
+notes=TODO
+[STRING]
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=yes
+[VARIABLES]
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+# A regular expression matching the name of dummy variables (i.e. expectedly
+# not used).
+dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_)
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,_cb
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functools
+[LOGGING]
+# Logging modules to check that the string format arguments are in logging
+# function parameter format
+logging-modules=logging,absl.logging,tensorflow.io.logging
+[SIMILARITIES]
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+# Ignore comments when computing similarities.
+ignore-comments=yes
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+# Ignore imports when computing similarities.
+ignore-imports=no
+[SPELLING]
+# Spelling dictionary name. Available dictionaries: none. To make it working
+# install python-enchant package.
+spelling-dict=
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+# A path to a file that contains private dictionary; one word per line.
+spelling-private-dict-file=
+# Tells whether to store unknown words to indicated private dictionary in
+# --spelling-private-dict-file option instead of raising a message.
+spelling-store-unknown-words=no
+[IMPORTS]
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=regsub,
+                   TERMIOS,
+                   Bastion,
+                   rexec,
+                   sets
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled)
+import-graph=
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled)
+ext-import-graph=
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled)
+int-import-graph=
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant, absl
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+[CLASSES]
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,
+                  _fields,
+                  _replace,
+                  _source,
+                  _make
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls,
+                            class_
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs

.style.yapf ADDED Viewed

	@@ -0,0 +1,3 @@

+[style]
+based_on_style = google
+indent_width = 2

.vscode/extensions.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "recommendations": [
+    "ms-python.isort",
+    "ms-python.pylint",
+    "eeyore.yapf",
+    "esbenp.prettier-vscode"
+  ]
+}

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "[json]": {
+    "editor.defaultFormatter": "esbenp.prettier-vscode"
+  },
+  "[python]": {
+    "editor.defaultFormatter": "eeyore.yapf"
+  },
+  "yapf.args": ["--style", ".style.yapf"],
+  "pylint.args": ["--rcfile", ".pylintrc"],
+  "isort.args": ["--settings-file", ".isort.cfg"],
+  "editor.formatOnSave": true,
+  "editor.codeActionsOnSave": {
+    "source.organizeImports": "explicit"
+  }
+}

README.md CHANGED Viewed

	@@ -1 +1,25 @@
1	# Arena

 # Arena
+## How to run locally
+1. **Set up a virtual environment**
+   Before installing dependencies, it's recommended to create a virtual environment.
+1. **Install dependencies**
+   With the virtual environment activated, install the project dependencies:
+   ```shell
+   pip install -r requirements.txt
+   ```
+1. **Run the app**
+   Set your OpenAI API key as an environment variable and start the application:
+   ```shell
+   GCP_PROJECT_ID=<your project id> OPENAI_API_KEY=<your key> python3 app.py
+   ```
+   Replace <your project id> and <your key> with your GCP project ID and OpenAI API key respectively.

app.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""
+It provides a platform for comparing the responses of two LLMs.
+"""
+from random import sample
+from fastchat.serve import gradio_web_server
+from fastchat.serve.gradio_web_server import bot_response
+import gradio as gr
+# TODO(#1): Add more models.
+SUPPORTED_MODELS = ["gpt-4", "gpt-4-turbo", "gpt-3.5-turbo", "gemini-pro"]
+def user(user_prompt):
+  model_pair = sample(SUPPORTED_MODELS, 2)
+  new_state_a = gradio_web_server.State(model_pair[0])
+  new_state_b = gradio_web_server.State(model_pair[1])
+  for state in [new_state_a, new_state_b]:
+    state.conv.append_message(state.conv.roles[0], user_prompt)
+    state.conv.append_message(state.conv.roles[1], None)
+    state.skip_next = False
+  return [
+      new_state_a, new_state_b, new_state_a.model_name, new_state_b.model_name
+  ]
+def bot(state_a, state_b, request: gr.Request):
+  new_states = [state_a, state_b]
+  generators = []
+  for state in new_states:
+    try:
+      # TODO(#1): Allow user to set configuration.
+      # bot_response returns a generator yielding states.
+      generator = bot_response(state,
+                               temperature=0.9,
+                               top_p=0.9,
+                               max_new_tokens=100,
+                               request=request)
+      generators.append(generator)
+    # TODO(#1): Narrow down the exception type.
+    except Exception as e:  # pylint: disable=broad-except
+      print(f"Error in bot_response: {e}")
+      raise e
+  new_responses = [None, None]
+  # It simulates concurrent response generation from two models.
+  while True:
+    stop = True
+    for i in range(len(generators)):
+      try:
+        yielded = next(generators[i])
+        # The generator yields a tuple, with the new state as the first item.
+        new_state = yielded[0]
+        new_states[i] = new_state
+        # The last item from 'messages' represents the response to the prompt.
+        bot_message = new_state.conv.messages[-1]
+        # Each message in conv.messages is structured as [role, message],
+        # so we extract the last message component.
+        new_responses[i] = bot_message[-1]
+        stop = False
+      except StopIteration:
+        pass
+      # TODO(#1): Narrow down the exception type.
+      except Exception as e:  # pylint: disable=broad-except
+        print(f"Error in generator: {e}")
+        raise e
+    yield new_states + new_responses
+    if stop:
+      break
+with gr.Blocks() as app:
+  model_names = [gr.State(None), gr.State(None)]
+  responses = [gr.State(None), gr.State(None)]
+  # states stores FastChat-specific conversation states.
+  states = [gr.State(None), gr.State(None)]
+  prompt = gr.TextArea(label="Prompt", lines=4)
+  submit = gr.Button()
+  with gr.Row():
+    responses[0] = gr.Textbox(label="Model A", interactive=False)
+    responses[1] = gr.Textbox(label="Model B", interactive=False)
+  with gr.Accordion("Show models", open=False):
+    with gr.Row():
+      model_names[0] = gr.Textbox(label="Model A", interactive=False)
+      model_names[1] = gr.Textbox(label="Model B", interactive=False)
+  submit.click(user, prompt, states + model_names,
+               queue=False).then(bot, states, states + responses)
+if __name__ == "__main__":
+  # We need to enable queue to use generators.
+  app.queue()
+  app.launch(debug=True)

requirments.txt ADDED Viewed

	@@ -0,0 +1,113 @@

+accelerate==0.26.1
+aiofiles==23.2.1
+aiohttp==3.9.3
+aiosignal==1.3.1
+altair==5.2.0
+annotated-types==0.6.0
+anyio==4.2.0
+attrs==23.2.0
+cachetools==5.3.2
+certifi==2023.11.17
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+contourpy==1.2.0
+cycler==0.12.1
+distro==1.9.0
+fastapi==0.109.0
+ffmpy==0.3.1
+filelock==3.13.1
+fonttools==4.47.2
+frozenlist==1.4.1
+fschat==0.2.35
+fsspec==2023.12.2
+google-api-core==2.16.1
+google-auth==2.27.0
+google-cloud-aiplatform==1.40.0
+google-cloud-bigquery==3.17.1
+google-cloud-core==2.4.1
+google-cloud-resource-manager==1.11.0
+google-cloud-storage==2.14.0
+google-crc32c==1.5.0
+google-resumable-media==2.7.0
+googleapis-common-protos==1.62.0
+gradio==3.50.2
+gradio_client==0.6.1
+grpc-google-iam-v1==0.13.0
+grpcio==1.60.0
+grpcio-status==1.60.0
+h11==0.14.0
+httpcore==1.0.2
+httpx==0.26.0
+huggingface-hub==0.20.3
+idna==3.6
+importlib-resources==6.1.1
+Jinja2==3.1.3
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+markdown2==2.4.12
+MarkupSafe==2.1.4
+matplotlib==3.8.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.0.4
+networkx==3.2.1
+nh3==0.2.15
+numpy==1.26.3
+openai==0.28.0
+orjson==3.9.12
+packaging==23.2
+pandas==2.2.0
+peft==0.8.1
+pillow==10.2.0
+prompt-toolkit==3.0.43
+proto-plus==1.23.0
+protobuf==4.25.2
+psutil==5.9.8
+pyasn1==0.5.1
+pyasn1-modules==0.3.0
+pydantic==1.10.14
+pydantic_core==2.16.1
+pydub==0.25.1
+Pygments==2.17.2
+pyparsing==3.1.1
+python-dateutil==2.8.2
+python-multipart==0.0.6
+pytz==2023.4
+PyYAML==6.0.1
+referencing==0.33.0
+regex==2023.12.25
+requests==2.31.0
+rich==13.7.0
+rpds-py==0.17.1
+rsa==4.9
+ruff==0.1.15
+safetensors==0.4.2
+semantic-version==2.10.0
+sentencepiece==0.1.99
+shapely==2.0.2
+shellingham==1.5.4
+shortuuid==1.0.11
+six==1.16.0
+sniffio==1.3.0
+starlette==0.35.1
+svgwrite==1.4.3
+sympy==1.12
+tiktoken==0.5.2
+tokenizers==0.15.1
+tomlkit==0.12.0
+toolz==0.12.1
+torch==2.2.0
+tqdm==4.66.1
+transformers==4.37.2
+typer==0.9.0
+typing_extensions==4.9.0
+tzdata==2023.4
+urllib3==2.2.0
+uvicorn==0.27.0.post1
+wavedrom==2.0.3.post3
+wcwidth==0.2.13
+websockets==11.0.3
+yarl==1.9.4