dwb2023 commited on
Commit
9eaa1af
1 Parent(s): 811b009

Update app.py

Browse files

update and simplify page to support OpenELM-270M

Files changed (1) hide show
  1. app.py +27 -32
app.py CHANGED
@@ -12,50 +12,46 @@ DEFAULT_MAX_NEW_TOKENS = 256
12
  MAX_INPUT_TOKEN_LENGTH = 512
13
 
14
  DESCRIPTION = """\
15
- # OpenELM-3B-Instruct
16
 
17
- This Space demonstrates [OpenELM-3B-Instruct](https://huggingface.co/apple/OpenELM-3B-Instruct) by Apple. Please, check the original model card for details.
18
- You can see the other models of the OpenELM family [here](https://huggingface.co/apple/OpenELM)
19
- The following Colab notebooks are available:
20
- * [OpenELM-3B-Instruct (GPU)](https://gist.github.com/Norod/4f11bb36bea5c548d18f10f9d7ec09b0)
21
- * [OpenELM-270M (CPU)](https://gist.github.com/Norod/5a311a8e0a774b5c35919913545b7af4)
22
 
23
- You might also be interested in checking out Apple's [CoreNet Github page](https://github.com/apple/corenet?tab=readme-ov-file).
24
 
25
- If you duplicate this space, make sure you have access to [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf)
26
- because this model uses it as a tokenizer.
27
-
28
- # Note: Use this model for only for completing sentences and instruction following.
29
  """
30
 
31
  LICENSE = """
32
  <p/>
33
 
34
  ---
35
- As a derivative work of [OpenELM-3B-Instruct](https://huggingface.co/apple/OpenELM-3B-Instruct) by Apple,
36
- this demo is governed by the original [license](https://huggingface.co/apple/OpenELM-3B-Instruct/blob/main/LICENSE).
 
 
37
  """
38
 
39
- if not torch.cuda.is_available():
40
- DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
41
 
 
 
 
 
 
 
 
 
 
42
 
43
- if torch.cuda.is_available():
44
- model_id = "apple/OpenELM-3B-Instruct"
45
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True, low_cpu_mem_usage=True)
46
- tokenizer_id = "meta-llama/Llama-2-7b-hf"
47
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
48
- if tokenizer.pad_token == None:
49
- tokenizer.pad_token = tokenizer.eos_token
50
- tokenizer.pad_token_id = tokenizer.eos_token_id
51
- model.config.pad_token_id = tokenizer.eos_token_id
52
 
53
- @spaces.GPU
54
  def generate(
55
  message: str,
56
  chat_history: list[tuple[str, str]],
57
  max_new_tokens: int = 1024,
58
- temperature: float = 0.6,
59
  top_p: float = 0.9,
60
  top_k: int = 50,
61
  repetition_penalty: float = 1.4,
@@ -139,17 +135,16 @@ chat_interface = gr.ChatInterface(
139
  ],
140
  stop_btn=None,
141
  examples=[
142
- ["A recipe for a chocolate cake:"],
143
- ["Can you explain briefly to me what is the Python programming language?"],
144
- ["Explain the plot of Cinderella in a sentence."],
145
- ["Question: What is the capital of France?\nAnswer:"],
146
- ["Question: I am very tired, what should I do?\nAnswer:"],
147
  ],
148
  )
149
 
150
  with gr.Blocks(css="style.css") as demo:
151
  gr.Markdown(DESCRIPTION)
152
- gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
153
  chat_interface.render()
154
  gr.Markdown(LICENSE)
155
 
 
12
  MAX_INPUT_TOKEN_LENGTH = 512
13
 
14
  DESCRIPTION = """\
15
+ # OpenELM-270M-Instruct -- Running on CPU
16
 
17
+ This Space demonstrates [apple/OpenELM-270M-Instruct](https://huggingface.co/apple/OpenELM-270M-Instruct) by Apple. Please, check the original model card for details.
 
 
 
 
18
 
19
+ For detail on the OpenELM model refer to the Paper page [here](https://huggingface.co/papers/2404.14619)
20
 
21
+ For detail on the pre-training, instruct tuning, and parameter-efficient finetuning process refer to the [OpenELM page in the CoreNet GitHub repository](https://github.com/apple/corenet/tree/main/projects/openelm)
 
 
 
22
  """
23
 
24
  LICENSE = """
25
  <p/>
26
 
27
  ---
28
+ As a derivative work of [apple/OpenELM-270M-Instruct](https://huggingface.co/apple/OpenELM-270M-Instruct) by Apple,
29
+ this demo is governed by the original [license](https://huggingface.co/apple/OpenELM-270M-Instruct/blob/main/LICENSE).
30
+ ---
31
+ based on the [Norod78/OpenELM_3B_Demo](https://huggingface.co/spaces/Norod78/OpenELM_3B_Demo) space - I encourage you to like his space as well. I have a lot of respect for how he promoted and shared information about this unique model.
32
  """
33
 
 
 
34
 
35
+ model = AutoModelForCausalLM.from_pretrained(
36
+ "apple/OpenELM-270M-Instruct",
37
+ trust_remote_code=True,
38
+ )
39
+ tokenizer = AutoTokenizer.from_pretrained(
40
+ "NousResearch/Llama-2-7b-hf",
41
+ trust_remote_code=True,
42
+ tokenizer_class=LlamaTokenizer,
43
+ )
44
 
45
+ if tokenizer.pad_token == None:
46
+ tokenizer.pad_token = tokenizer.eos_token
47
+ tokenizer.pad_token_id = tokenizer.eos_token_id
48
+ model.config.pad_token_id = tokenizer.eos_token_id
 
 
 
 
 
49
 
 
50
  def generate(
51
  message: str,
52
  chat_history: list[tuple[str, str]],
53
  max_new_tokens: int = 1024,
54
+ temperature: float = 0.1,
55
  top_p: float = 0.9,
56
  top_k: int = 50,
57
  repetition_penalty: float = 1.4,
 
135
  ],
136
  stop_btn=None,
137
  examples=[
138
+ ["Tell me a joke about a sandwich:"],
139
+ ["What would a polite pirate say?"],
140
+ ["Explain quantum physics in 5 words or less:"],
141
+ ["Question: Why don't scientists trust atoms?\nAnswer:"],
142
+ ["Question: What do you call a bear with no teeth?\nAnswer:"],
143
  ],
144
  )
145
 
146
  with gr.Blocks(css="style.css") as demo:
147
  gr.Markdown(DESCRIPTION)
 
148
  chat_interface.render()
149
  gr.Markdown(LICENSE)
150