Norod78 commited on
Commit
5cd48aa
โ€ข
1 Parent(s): 99ff896

Hebrew-Mistral-7B

Browse files
Files changed (4) hide show
  1. README.md +14 -6
  2. app.py +138 -0
  3. requirements.txt +9 -0
  4. style.css +17 -0
README.md CHANGED
@@ -1,12 +1,20 @@
1
  ---
2
- title: Hebrew Mistral 7B
3
- emoji: ๐Ÿš€
4
- colorFrom: red
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 4.28.3
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
1
  ---
2
+ title: Yam-Peleg Hebrew-Mistral-7B
3
+ emoji: ๐Ÿ•Ž
4
+ colorFrom: blue
5
+ colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 4.28.2
8
  app_file: app.py
9
  pinned: false
10
+ license: apache-2.0
11
+ suggested_hardware: a10g-small
12
  ---
13
 
14
+ # Yam-Peleg's Hebrew-Mistral-7B
15
+
16
+ Hebrew-Mistral-7B was introduced in [this Facebook post](https://www.facebook.com/groups/MDLI1/posts/2701023256728372/).
17
+
18
+ Please, check the [original model card](https://huggingface.co/yam-peleg/Hebrew-Mistral-7B) for more details.
19
+ You can see the other Hebrew models by Yam [here](https://huggingface.co/collections/yam-peleg/hebrew-models-65e957875324e2b9a4b68f08)
20
+
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from threading import Thread
3
+ from typing import Iterator
4
+
5
+ import gradio as gr
6
+ import spaces
7
+ import torch
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
+
10
+ MAX_MAX_NEW_TOKENS = 1024
11
+ DEFAULT_MAX_NEW_TOKENS = 256
12
+ MAX_INPUT_TOKEN_LENGTH = 512
13
+
14
+ DESCRIPTION = """\
15
+ # Yam-Peleg's Hebrew-Mistral-7B
16
+
17
+ Hebrew-Mistral-7B was introduced in [this Facebook post](https://www.facebook.com/groups/MDLI1/posts/2701023256728372/).
18
+
19
+ Please, check the [original model card](https://huggingface.co/yam-peleg/Hebrew-Mistral-7B) for more details.
20
+ You can see the other Hebrew models by Yam [here](https://huggingface.co/collections/yam-peleg/hebrew-models-65e957875324e2b9a4b68f08)
21
+
22
+ # Note: Use this model for only for completing sentences.
23
+ ## While the user interface is of a chatbot for convenience, this is a base model and is not fine-tuned for chatbot tasks or instruction following tasks. As such, the model is not provided a chat history and will complete your text based on the last given prompt only.
24
+ """
25
+
26
+ LICENSE = """
27
+ <p/>
28
+
29
+ ---
30
+ A derivative work of [mistral-7b](https://mistral.ai/news/announcing-mistral-7b/) by Mistral-AI.
31
+ The model and space are released under the Apache 2.0 license
32
+ """
33
+
34
+ if not torch.cuda.is_available():
35
+ DESCRIPTION += "\n<p>Running on CPU ๐Ÿฅถ This demo does not work on CPU.</p>"
36
+
37
+
38
+ if torch.cuda.is_available():
39
+ model_id = "yam-peleg/Hebrew-Mistral-7B"
40
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16, low_cpu_mem_usage=True)
41
+ tokenizer_id = "yam-peleg/Hebrew-Mistral-7B"
42
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
43
+
44
+
45
+ @spaces.GPU
46
+ def generate(
47
+ message: str,
48
+ chat_history: list[tuple[str, str]],
49
+ max_new_tokens: int = 1024,
50
+ temperature: float = 0.2,
51
+ top_p: float = 0.7,
52
+ top_k: int = 30,
53
+ repetition_penalty: float = 1.0,
54
+ ) -> Iterator[str]:
55
+
56
+ input_ids = tokenizer([message], return_tensors="pt").input_ids
57
+ if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
58
+ input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
59
+ gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
60
+ input_ids = input_ids.to(model.device)
61
+
62
+ streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
63
+ generate_kwargs = dict(
64
+ {"input_ids": input_ids},
65
+ streamer=streamer,
66
+ max_new_tokens=max_new_tokens,
67
+ do_sample=True,
68
+ top_p=top_p,
69
+ top_k=top_k,
70
+ temperature=temperature,
71
+ num_beams=1,
72
+ pad_token_id = tokenizer.eos_token_id,
73
+ repetition_penalty=repetition_penalty,
74
+ no_repeat_ngram_size=5,
75
+ early_stopping=True,
76
+ )
77
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
78
+ t.start()
79
+
80
+ outputs = []
81
+ for text in streamer:
82
+ outputs.append(text)
83
+ yield "".join(outputs)
84
+
85
+
86
+ chat_interface = gr.ChatInterface(
87
+ fn=generate,
88
+ chatbot=gr.Chatbot(rtl=True, show_copy_button=True),
89
+ textbox=gr.Textbox(text_align = 'right', rtl = True),
90
+ additional_inputs=[
91
+ gr.Slider(
92
+ label="Max new tokens",
93
+ minimum=1,
94
+ maximum=MAX_MAX_NEW_TOKENS,
95
+ step=1,
96
+ value=DEFAULT_MAX_NEW_TOKENS,
97
+ ),
98
+ gr.Slider(
99
+ label="Temperature",
100
+ minimum=0.1,
101
+ maximum=4.0,
102
+ step=0.1,
103
+ value=0.3,
104
+ ),
105
+ gr.Slider(
106
+ label="Top-p (nucleus sampling)",
107
+ minimum=0.05,
108
+ maximum=1.0,
109
+ step=0.05,
110
+ value=0.3,
111
+ ),
112
+ gr.Slider(
113
+ label="Top-k",
114
+ minimum=1,
115
+ maximum=1000,
116
+ step=1,
117
+ value=30,
118
+ ),
119
+ ],
120
+ stop_btn=None,
121
+ examples=[
122
+ ["ืžืชื›ื•ืŸ ืœืขื•ื’ืช ืฉื•ืงื•ืœื“:"],
123
+ ["ื”ืื™ืฉ ื”ืื—ืจื•ืŸ ื‘ืขื•ืœื ื™ืฉื‘ ืœื‘ื“ ื‘ื—ื“ืจื•, ื›ืฉืœืคืชืข ื ืฉืžืขื”"],
124
+ ["ืฉืคืช ื”ืชื›ื ื•ืช ืคื™ื™ื˜ื•ืŸ ื”ื™ื"],
125
+ ["ื”ืขืœื™ืœื” ืฉืœ ืกื™ื ื“ืจืœื”"],
126
+ ["ืฉืืœื”: ืžื”ื™ ืขื™ืจ ื”ื‘ื™ืจื” ืฉืœ ืžื“ื™ื ืช ื™ืฉืจืืœ?\nืชืฉื•ื‘ื”:"],
127
+ ["ืฉืืœื”: ืื ื™ ืžืžืฉ ืขื™ื™ืฃ, ืžื” ื›ื“ืื™ ืœื™ ืœืขืฉื•ืช?\nืชืฉื•ื‘ื”:"],
128
+ ],
129
+ )
130
+
131
+ with gr.Blocks(css="style.css") as demo:
132
+ gr.Markdown(DESCRIPTION)
133
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
134
+ chat_interface.render()
135
+ gr.Markdown(LICENSE)
136
+
137
+ if __name__ == "__main__":
138
+ demo.queue(max_size=20).launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.28.0
2
+ bitsandbytes==0.43.0
3
+ gradio==4.28.2
4
+ scipy==1.12.0
5
+ sentencepiece==0.2.0
6
+ spaces==0.26.2
7
+ torch==2.1.1
8
+ transformers==4.40.1
9
+ tokenizers==0.19.1
style.css ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ h1 {
2
+ text-align: center;
3
+ display: block;
4
+ }
5
+
6
+ #duplicate-button {
7
+ margin: auto;
8
+ color: white;
9
+ background: #1565c0;
10
+ border-radius: 100vh;
11
+ }
12
+
13
+ .contain {
14
+ max-width: 900px;
15
+ margin: auto;
16
+ padding-top: 1.5rem;
17
+ }