msdarck commited on
Commit
8dbc9b7
β€’
1 Parent(s): 45aaf82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -2
app.py CHANGED
@@ -1,3 +1,43 @@
1
- import gradio as gr
2
 
3
- gr.load("models/cognitivecomputations/dolphin-2.6-mixtral-8x7b").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
2
 
3
+ model_name_or_path = "TheBloke/dolphin-2.5-mixtral-8x7b-GPTQ"
4
+ # To use a different branch, change revision
5
+ # For example: revision="gptq-4bit-128g-actorder_True"
6
+ model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
7
+ device_map="auto",
8
+ trust_remote_code=False,
9
+ revision="main")
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
12
+
13
+ prompt = "Write a story about llamas"
14
+ system_message = "You are a story writing assistant"
15
+ prompt_template=f'''<|im_start|>system
16
+ {system_message}<|im_end|>
17
+ <|im_start|>user
18
+ {prompt}<|im_end|>
19
+ <|im_start|>assistant
20
+ '''
21
+
22
+ print("\n\n*** Generate:")
23
+
24
+ input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
25
+ output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
26
+ print(tokenizer.decode(output[0]))
27
+
28
+ # Inference can also be done using transformers' pipeline
29
+
30
+ print("*** Pipeline:")
31
+ pipe = pipeline(
32
+ "text-generation",
33
+ model=model,
34
+ tokenizer=tokenizer,
35
+ max_new_tokens=512,
36
+ do_sample=True,
37
+ temperature=0.7,
38
+ top_p=0.95,
39
+ top_k=40,
40
+ repetition_penalty=1.1
41
+ )
42
+
43
+ print(pipe(prompt_template)[0]['generated_text'])