Korron commited on
Commit
853ccba
1 Parent(s): 5e72236

pure mistral testing

Browse files
Files changed (3) hide show
  1. app.py +31 -21
  2. app_old.py +26 -29
  3. requirements.txt +0 -5
app.py CHANGED
@@ -1,25 +1,16 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- from inference import get_bot_response
4
- from rag import get_context
5
- from config import config
6
 
 
 
 
 
7
 
8
- model_name = "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"
9
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
10
 
11
- print("model start loading")
12
- model = AutoModelForCausalLM.from_pretrained(model_name,
13
- device_map="auto",
14
- trust_remote_code=False,
15
- revision="main")
16
- print("model loaded")
17
-
18
- # model = AutoModelForCausalLM.from_pretrained(config["model_checkpoint"],
19
- # device_map="auto",
20
- # trust_remote_code=False,
21
- # revision="main")
22
 
 
23
 
24
  def respond(
25
  message,
@@ -31,12 +22,31 @@ def respond(
31
  ):
32
  messages = [{"role": "system", "content": system_message}]
33
 
34
- request = message
35
- context = get_context(request, config["top_k"])
36
- response = get_bot_response(request, context, model, tokenizer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
- return response
 
39
 
 
 
 
40
  demo = gr.ChatInterface(
41
  respond,
42
  additional_inputs=[
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
 
 
 
3
 
4
+ """
5
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
+ """
7
+ # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
+ model_name = "mistralai/Mistral-7B-Instruct-v0.2"
10
+ client = InferenceClient(model_name)
11
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ print("test")
14
 
15
  def respond(
16
  message,
 
22
  ):
23
  messages = [{"role": "system", "content": system_message}]
24
 
25
+ for val in history:
26
+ if val[0]:
27
+ messages.append({"role": "user", "content": val[0]})
28
+ if val[1]:
29
+ messages.append({"role": "assistant", "content": val[1]})
30
+
31
+ messages.append({"role": "user", "content": message})
32
+
33
+ response = ""
34
+
35
+ for message in client.chat_completion(
36
+ messages,
37
+ max_tokens=max_tokens,
38
+ stream=True,
39
+ temperature=temperature,
40
+ top_p=top_p,
41
+ ):
42
+ token = message.choices[0].delta.content
43
 
44
+ response += token
45
+ yield response
46
 
47
+ """
48
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
49
+ """
50
  demo = gr.ChatInterface(
51
  respond,
52
  additional_inputs=[
app_old.py CHANGED
@@ -1,14 +1,30 @@
1
  import gradio as gr
 
 
 
 
2
  from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
- client = InferenceClient("TheBloke/Mistral-7B-Instruct-v0.2-GPTQ")
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- print("test")
12
 
13
  def respond(
14
  message,
@@ -20,31 +36,12 @@ def respond(
20
  ):
21
  messages = [{"role": "system", "content": system_message}]
22
 
23
- for val in history:
24
- if val[0]:
25
- messages.append({"role": "user", "content": val[0]})
26
- if val[1]:
27
- messages.append({"role": "assistant", "content": val[1]})
28
-
29
- messages.append({"role": "user", "content": message})
30
-
31
- response = ""
32
-
33
- for message in client.chat_completion(
34
- messages,
35
- max_tokens=max_tokens,
36
- stream=True,
37
- temperature=temperature,
38
- top_p=top_p,
39
- ):
40
- token = message.choices[0].delta.content
41
 
42
- response += token
43
- yield response
44
 
45
- """
46
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
47
- """
48
  demo = gr.ChatInterface(
49
  respond,
50
  additional_inputs=[
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from inference import get_bot_response
4
+ from rag import get_context
5
+ from config import config
6
  from huggingface_hub import InferenceClient
7
 
 
 
 
 
 
8
 
9
+ model_name = "mistralai/Mistral-7B-Instruct-v0.2"
10
+
11
+
12
+ client = InferenceClient(model_name)
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
15
+
16
+ print("model start loading")
17
+ model = AutoModelForCausalLM.from_pretrained(model_name,
18
+ device_map="auto",
19
+ trust_remote_code=False,
20
+ revision="main")
21
+ print("model loaded")
22
+
23
+ # model = AutoModelForCausalLM.from_pretrained(config["model_checkpoint"],
24
+ # device_map="auto",
25
+ # trust_remote_code=False,
26
+ # revision="main")
27
 
 
28
 
29
  def respond(
30
  message,
 
36
  ):
37
  messages = [{"role": "system", "content": system_message}]
38
 
39
+ request = message
40
+ context = get_context(request, config["top_k"])
41
+ response = get_bot_response(request, context, model, tokenizer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ return response
 
44
 
 
 
 
45
  demo = gr.ChatInterface(
46
  respond,
47
  additional_inputs=[
requirements.txt CHANGED
@@ -3,8 +3,3 @@ torch
3
  transformers
4
  llama_index
5
  llama-index-embeddings-huggingface
6
- accelerate
7
- optimum
8
- bitsandbytes
9
- auto-gptq
10
- peft
 
3
  transformers
4
  llama_index
5
  llama-index-embeddings-huggingface