choukrani commited on
Commit
3daf494
1 Parent(s): 5875528

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import copy
3
+ import gradio as gr
4
+ from typing import List, Tuple
5
+ from llama_cpp import Llama
6
+ from huggingface_hub import hf_hub_download
7
+
8
+
9
+ # Load the LLaMA model
10
+ llm = Llama(
11
+ model_path=hf_hub_download(
12
+ repo_id=os.environ.get("REPO_ID", "mradermacher/Atlas-Chat-2B-GGUF"),
13
+ filename=os.environ.get("MODEL_FILE", "Atlas-Chat-2B.Q8_0.gguf"),
14
+ ),
15
+ n_ctx=2048, # context window size
16
+ )
17
+
18
+
19
+ # Training prompt template
20
+ training_prompt = """<start_of_turn>user
21
+ {}<end_of_turn>
22
+ <start_of_turn>model
23
+ {}<end_of_turn>"""
24
+
25
+
26
+ # Generate response function
27
+ def response(
28
+ user_message: str,
29
+ chat_history: List[Tuple[str, str]],
30
+ max_response_length: int,
31
+ temperature: float,
32
+ top_p: float,
33
+ ):
34
+ if not user_message.strip():
35
+ return "تقدروا تكتبوا الرسالة مرة اخرى؟"
36
+
37
+ # Format chat history into the prompt
38
+ formatted_prompt = ""
39
+ for user_input, model_response in chat_history:
40
+ formatted_prompt += training_prompt.format(user_input, model_response)
41
+
42
+ # Add the current user message to the formatted prompt
43
+ formatted_prompt += training_prompt.format(user_message, "")
44
+
45
+ try:
46
+ output = llm(
47
+ formatted_prompt,
48
+ max_tokens=max_response_length,
49
+ temperature=temperature,
50
+ top_p=top_p,
51
+ top_k=40,
52
+ repeat_penalty=1.1,
53
+ stop=["<end_of_turn>", "<|endoftext|>"],
54
+ stream=True,
55
+ )
56
+
57
+ response_text = ""
58
+ for out in output:
59
+ stream = copy.deepcopy(out)
60
+ response_text += stream["choices"][0]["text"]
61
+ return response_text
62
+
63
+ except Exception as e:
64
+ return f"شي خطأ وقع: {str(e)}"
65
+
66
+ # Create the Gradio chat interface
67
+ demo = gr.ChatInterface(
68
+ response,
69
+ title="AtlasChat-mini",
70
+ description="""\
71
+ # AtlasChat-mini 2B
72
+ This is a demo of [`MBZUAI-Paris/Atlas-Chat-2B`](https://huggingface.co/mbzuai-paris/atlas-chat-2b).
73
+ For more details, please check [our paper](https://arxiv.org/pdf/2409.17912).
74
+ Looking for a larger and more powerful version? Try the 9B version in [Hugging Face](https://huggingface.co/mbzuai-paris/atlas-chat-9b).
75
+ This demo was done using the [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) library for efficient inference and is running the [`mradermacher/Atlas-Chat-2B-GGUF`](https://huggingface.co/mradermacher/Atlas-Chat-2B-GGUF) version with 8-bit Q8_0 quantization.
76
+ """,
77
+ examples=[
78
+ ['What is the capital of Morocco?'],
79
+ ['كيفاش نوجد شي طاجين ؟'],
80
+ ['واش تقدر تعوض Google ؟'],
81
+ ['عاود لي شي نكتة']
82
+ ],
83
+ cache_examples=False,
84
+ additional_inputs=[
85
+ gr.Slider(minimum=1, maximum=1024, value=128, step=1, label="Max New Tokens"),
86
+ gr.Slider(minimum=0.1, maximum=3.0, value=0.5, step=0.1, label="Temperature"),
87
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.90, step=0.05, label="Top-p (nucleus sampling)"),
88
+ ],
89
+ )
90
+
91
+
92
+ # Launch the demo
93
+ if __name__ == "__main__":
94
+ demo.launch()