Jae-Won Chung commited on
Commit
2ad8f23
1 Parent(s): 9833409

Clean up master branch

Browse files
.gitignore CHANGED
@@ -1,4 +1,12 @@
 
1
  **/.DS_Store
 
 
2
  .envrc
 
 
3
  pyrightconfig.json
4
  .idea
 
 
 
 
1
+ # OS-related
2
  **/.DS_Store
3
+
4
+ # Environment
5
  .envrc
6
+
7
+ # Editor
8
  pyrightconfig.json
9
  .idea
10
+
11
+ # Data
12
+ data/
app.py DELETED
@@ -1,99 +0,0 @@
1
- import gradio as gr
2
- import requests
3
- import pandas as pd
4
- from huggingface_hub.hf_api import SpaceInfo
5
- import matplotlib.pyplot as plt
6
- import plotly.express as px
7
-
8
- model_perf_table = "data/test.csv"
9
- logo_path = "img/image.png"
10
-
11
-
12
- def get_blocks_party_spaces():
13
- df = pd.read_csv(model_perf_table)
14
- df = df.sort_values(by=['score'],ascending=False)
15
- return df
16
-
17
-
18
- def get_blocks_party_spaces_with_formula(formula=None):
19
- # get the dataframe
20
- df = get_blocks_party_spaces()
21
- if formula:
22
- try:
23
- df[str(formula)] = df.eval(formula)
24
- except:
25
- pass # Handle this error properly in your code
26
- return df
27
-
28
- def create_scatter(x, y, z):
29
- df = get_blocks_party_spaces()
30
- if z is None or z == 'None' or z == '':
31
- fig, ax = plt.subplots()
32
-
33
- ax.scatter(list(df[x]),list(df[y]), marker='o', s=50, c='blue')
34
- for i, label in enumerate(list(df['model'])):
35
- ax.text(list(df[x])[i],list(df[y])[i],str(label))
36
- ax.set_xlabel(x)
37
- ax.set_ylabel(y)
38
-
39
- ax.spines['top'].set_visible(False)
40
- ax.spines['right'].set_visible(False)
41
- plt.grid(axis='y', linestyle='--', alpha=0.7)
42
-
43
- else:
44
- fig = px.scatter_3d(df, x=x, y=y, z=z, text=df['model'])
45
-
46
- # Set axis labels and title
47
- fig.update_layout(scene=dict(
48
- xaxis_title=x,
49
- yaxis_title=y,
50
- zaxis_title=z,
51
- ),
52
- title='3D Scatter Plot'
53
- )
54
-
55
- return fig
56
-
57
- block = gr.Blocks()
58
- with block:
59
- # gr.outputs.HTML(f'<img src="{logo_path}" alt="logo" height="1000px">')
60
- # img = gr.Image(logo_path,shape=[1,2]).style( rounded=False)
61
-
62
- gr.Markdown(f"""
63
- # 🦙💦SpitFight - Leaderboard for LLM
64
- """)
65
- with gr.Tabs():
66
- with gr.TabItem("Leaderboard"):
67
- with gr.Row():
68
- data = gr.outputs.Dataframe(type="pandas")
69
- with gr.Row():
70
- formula_input = gr.inputs.Textbox(lines=1, label="User Designed Column", placeholder = 'e.g. verbosity/latency')
71
- data_run = gr.Button("Add To Table")
72
- data_run.click(get_blocks_party_spaces_with_formula, inputs=formula_input, outputs=data)
73
- # running the function on page load in addition to when the button is clicked
74
- with gr.Row():
75
- with gr.Column():
76
- scatter_input = [gr.inputs.Dropdown(choices=get_blocks_party_spaces().columns.tolist()[1:], label="X-axis"),
77
- gr.inputs.Dropdown(choices=get_blocks_party_spaces().columns.tolist()[1:], label="Y-axis"),
78
- gr.inputs.Dropdown(choices=[None]+get_blocks_party_spaces().columns.tolist()[1:], label="Z-axis (Optional)")]
79
- fig_run = gr.Button("Generate Figure")
80
-
81
- with gr.Column():
82
- gen_figure = gr.Plot()# gr.outputs.Image(type="pil")
83
- fig_run.click(create_scatter, inputs=scatter_input, outputs=gen_figure)
84
-
85
-
86
- with gr.TabItem("About"):
87
- gr.Markdown(f"""
88
- ## Metrics:
89
- - **Human Score**: The average score given by human evaluators.
90
- - **Throughput**: The number of tokens generated per second.
91
- - **Verbosity**: The average number of generated tokens in the model's response.
92
- - **Latency**: The average time it takes for the model to generate a response.
93
- - **Memory**: The base memory usage of the model.
94
- """)
95
-
96
- block.load(get_blocks_party_spaces_with_formula, inputs=None, outputs=data)
97
-
98
- block.launch(share=True)
99
- # block.launch( )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
assets/img/image.png DELETED
Binary file (682 kB)
 
benchmark.py CHANGED
@@ -41,6 +41,7 @@ SYSTEM_PROMPTS = {
41
  def main(
42
  model_path: str,
43
  input_file: str,
 
44
  device_index: int = 0,
45
  task: Literal[tuple(SYSTEM_PROMPTS)] = "chat", # type: ignore
46
  load_8bit: bool = False,
@@ -48,20 +49,18 @@ def main(
48
  repitition_penalty: float = 1.0,
49
  max_new_tokens: int = 512,
50
  ) -> None:
51
- """Run the main routine.
52
-
53
- Code structure is based on
54
- https://github.com/lm-sys/FastChat/blob/57dea54055/fastchat/serve/inference.py#L249
55
 
56
  Args:
57
  model_path: Path to or Huggingface Hub Id of the model.
58
  input_file: Path to the input JSON file. Assumed to be our cleaned ShareGPT data.
59
- device_index: Index of the GPU to use for inference.
60
- task: Type of task to perform inference on.
61
- load_8bit: Whether to load the model in 8-bit mode.
62
- temperature: Temperature to use for sampling.
63
- repitition_penalty: Repitition penalty to use for the model.
64
- max_new_tokens: Maximum numbers of tokens to generate, ignoring the prompt.
 
65
  """
66
  # NOTE(JW): ChatGLM is implemented as a special case in FastChat inference.
67
  # Also, it's primarily a model that's fine-tuned for Chinese, so it doesn't
@@ -76,7 +75,7 @@ def main(
76
  if model_path.endswith("/"):
77
  model_path = model_path[:-1]
78
  model_name_cleaned = "--".join(model_path.split("/")[-2:])
79
- output_dir = f"data/{task}/{model_name_cleaned}"
80
  output_csv_path = f"{output_dir}/benchmark.json"
81
  config_json_path = f"{output_dir}/config.json"
82
  table = Table(title="Benchmark")
 
41
  def main(
42
  model_path: str,
43
  input_file: str,
44
+ output_dir: str = "data",
45
  device_index: int = 0,
46
  task: Literal[tuple(SYSTEM_PROMPTS)] = "chat", # type: ignore
47
  load_8bit: bool = False,
 
49
  repitition_penalty: float = 1.0,
50
  max_new_tokens: int = 512,
51
  ) -> None:
52
+ """Run benchmarking for one model on the entire input file.
 
 
 
53
 
54
  Args:
55
  model_path: Path to or Huggingface Hub Id of the model.
56
  input_file: Path to the input JSON file. Assumed to be our cleaned ShareGPT data.
57
+ output_dir: Path to the output directory. (Default: "data")
58
+ device_index: Index of the GPU to use for inference. (Default: 0)
59
+ task: Type of task to perform inference on. (Default: "chat")
60
+ load_8bit: Whether to load the model in 8-bit mode. (Default: False)
61
+ temperature: Temperature to use for sampling. (Default: 0.7)
62
+ repitition_penalty: Repitition penalty to use for the model. (Default: 1.0)
63
+ max_new_tokens: Maximum numbers of tokens to generate, ignoring the prompt. (Default: 512)
64
  """
65
  # NOTE(JW): ChatGLM is implemented as a special case in FastChat inference.
66
  # Also, it's primarily a model that's fine-tuned for Chinese, so it doesn't
 
75
  if model_path.endswith("/"):
76
  model_path = model_path[:-1]
77
  model_name_cleaned = "--".join(model_path.split("/")[-2:])
78
+ output_dir = f"{output_dir}/{task}/{model_name_cleaned}"
79
  output_csv_path = f"{output_dir}/benchmark.json"
80
  config_json_path = f"{output_dir}/config.json"
81
  table = Table(title="Benchmark")
data/chat/lmsys--vicuna-7B/benchmark.json DELETED
@@ -1,38 +0,0 @@
1
- [
2
- {
3
- "model": "lmsys--vicuna-7B",
4
- "throughput": 30.558443157709224,
5
- "response_length": 331,
6
- "latency": 10.831703640520573,
7
- "energy": 2215.2789999842644,
8
- "input": "A chat between a human user (prompter) and an artificial intelligence (AI) assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: Summarize the main ideas of Jeff Walker's Product Launch Formula into bullet points as it pertains to a growth marketing agency implementing these strategies and tactics for their clients... ASSISTANT:",
9
- "output": "Sure, here are the main ideas of Jeff Walker's Product Launch Formula and how they can be applied to a growth marketing agency implementing these strategies and tactics for their clients:\n\n1. Pre-launch phase:\n* Build an audience through content marketing, social media, and other channels.\n* Create a sense of urgency and exclusivity by offering early access or special bonuses to those on a mailing list.\n* Use teasers, sneak peeks, and other forms of anticipation to build excitement.\n2. Launch phase:\n* Use a webinar or live event to launch the product, using stories, testimonials, and social proof to build trust and credibility.\n* Offer a limited-time special offer or bonus to those who purchase during the launch.\n* Continually communicate with the audience during the launch phase, answering questions, providing updates, and building relationships.\n3. Post-launch phase:\n* Follow up with customers to make sure they are satisfied and provide additional support and resources.\n* Use customer testimonials, success stories, and case studies to continue building social proof and credibility.\n* Continually update and improve the product based on customer feedback.\n\nOverall, the Product Launch Formula emphasizes the importance of building an engaged audience, creating urgency and exclusivity, and providing ongoing support and value to customers. By implementing these strategies and tactics, a growth marketing agency can help their clients launch successful products and grow their businesses."
10
- },
11
- {
12
- "model": "lmsys--vicuna-7B",
13
- "throughput": 31.514563598172494,
14
- "response_length": 98,
15
- "latency": 3.109673395752907,
16
- "energy": 640.4210000038147,
17
- "input": "A chat between a human user (prompter) and an artificial intelligence (AI) assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: How to tell if a customer segment is well segmented? In 3 bullet points. ASSISTANT:",
18
- "output": "1. Homogeneity: The segment should consist of customers who share similar characteristics, behaviors, or needs.\n2. Distinctiveness: The segment should be different from other segments in terms of their characteristics, behaviors, or needs.\n3. Stability: The segment should remain relatively consistent over time and not change drastically. If the segment is constantly changing or becoming more similar to other segments, it may indicate that the segmentation is not effective."
19
- },
20
- {
21
- "model": "lmsys--vicuna-7B",
22
- "throughput": 30.362787676034074,
23
- "response_length": 346,
24
- "latency": 11.39552809484303,
25
- "energy": 2334.6629999876022,
26
- "input": "A chat between a human user (prompter) and an artificial intelligence (AI) assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: In Java, I want to replace string like \"This is a new {object} at {place}\" with a Map, {object: \"student\", \"point 3, 4\"}, and get a result \"This is a new student at point 3, 4\". How can I do? ASSISTANT:",
27
- "output": "To replace the string with a map, you can use the `String.format()` method and pass in an object that contains the key-value pairs of the map. Here's an example of how you can do this in Java:\n```\nString originalString = \"This is a new {object} at {place}\";\nString formattedString = String.format(\"This is a new %s at %s\",\n new ObjectMapper().writeValueAsString(map),\n new ObjectMapper().writeValueAsString(place));\n```\nIn this example, `originalString` is the original string that you want to replace, `map` is the map that you want to use to replace the {object} and {place} placeholders, and `ObjectMapper` is a class that is used to convert the map to a string. The `writeValueAsString()` method is used to convert the map to a JSON string, which can then be passed to the `String.format()` method to replace the placeholders in the string.\n\nTo get the result \"This is a new student at point 3, 4\", you can use the following code:\n```\nString result = String.format(\"This is a new %s at %s\",\n new ObjectMapper().writeValueAsString(map),\n new ObjectMapper().writeValueAsString(place));\n```\nYou can then pass the `result` string to a `println` statement or use it in any other way that you like.\n\nI hope this helps! Let me know if you have any other questions."
28
- },
29
- {
30
- "model": "lmsys--vicuna-7B",
31
- "throughput": 31.208981333084907,
32
- "response_length": 96,
33
- "latency": 3.076037598773837,
34
- "energy": 649.0310000181198,
35
- "input": "A chat between a human user (prompter) and an artificial intelligence (AI) assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: Metaphorical language is also used to describe the various addressing modes of the instructions. Grandiose language to express their excitement and admiration for the functionality of the instructions being described. Now, rewrite this with more perplexity:\n\nJMP ABCD\nMOV AX, [BX+SI]\nMOV AX, [100]\nMOV AX, [BX]\nMOV AX, [BX\\*2+SI]\nMOV AX, BX\nMOV AX, 7 ASSISTANT:",
36
- "output": "The instructions described seem to possess a remarkable array of addressing modes, each with its own unique set of characteristics. The use of grandiose language to express excitement and admiration for these instructions is certainly understandable, as their functionality seems to transcend the mere execution of basic computations. With so many different addressing modes to choose from, one might be forgiven for feeling a sense of perplexity and wonder at the sheer complexity of these instructions."
37
- }
38
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/chat/lmsys--vicuna-7B/config.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "model_path": "/data/leaderboard/weights/lmsys/vicuna-7B",
3
- "input_file": "/data/leaderboard/sharegpt/sg_90k_part1_html_cleaned_lang_first_sampled.json",
4
- "device_index": 0,
5
- "task": "chat",
6
- "load_8bit": false,
7
- "temperature": 0.7,
8
- "repitition_penalty": 1.0,
9
- "max_new_tokens": 512
10
- }
 
 
 
 
 
 
 
 
 
 
 
data/test.csv DELETED
@@ -1,6 +0,0 @@
1
- model,score,throughput,response_length,latency,energy
2
- lmsys/vicuna-13B,1054,19.47,178.71,10.37,12
3
- lmsys/vicuna-7B,1007,31.26,217.33,7.71,32
4
- metaai/llama-13B,854,18.49,390.9,22.74,24
5
- lmsys/fastchat-t5-3B,941,31.98,131.12,6.09,4
6
- BlinkDL/rwkv-7B,928,23.09,239.98,9.62,12