Raushan-123 commited on
Commit
ccb06ff
1 Parent(s): 509e4db

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +57 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
+ from threading import Thread
5
+
6
+ print(f"Starting to load the model to memory")
7
+ m = AutoModelForCausalLM.from_pretrained(
8
+ "stabilityai/stablelm-2-zephyr-1_6b", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, trust_remote_code=True)
9
+ tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-zephyr-1_6b", trust_remote_code=True)
10
+ # using CUDA for an optimal experience
11
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
12
+ m = m.to(device)
13
+ print(f"Sucessfully loaded the model to the memory")
14
+
15
+ start_message = ""
16
+
17
+ def user(message, history):
18
+ # Append the user's message to the conversation history
19
+ return "", history + [[message, ""]]
20
+
21
+
22
+ def chat(message, history):
23
+ chat = []
24
+ for item in history:
25
+ chat.append({"role": "user", "content": item[0]})
26
+ if item[1] is not None:
27
+ chat.append({"role": "assistant", "content": item[1]})
28
+ chat.append({"role": "user", "content": message})
29
+ messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
30
+ # Tokenize the messages string
31
+ model_inputs = tok([messages], return_tensors="pt").to(device)
32
+ streamer = TextIteratorStreamer(
33
+ tok, timeout=10., skip_prompt=True, skip_special_tokens=True)
34
+ generate_kwargs = dict(
35
+ model_inputs,
36
+ streamer=streamer,
37
+ max_new_tokens=1024,
38
+ do_sample=True,
39
+ top_p=0.95,
40
+ top_k=1000,
41
+ temperature=0.75,
42
+ num_beams=1,
43
+ )
44
+ t = Thread(target=m.generate, kwargs=generate_kwargs)
45
+ t.start()
46
+
47
+ # Initialize an empty string to store the generated text
48
+ partial_text = ""
49
+ for new_text in streamer:
50
+ # print(new_text)
51
+ partial_text += new_text
52
+ # Yield an empty string to cleanup the message textbox and the updated conversation history
53
+ yield partial_text
54
+
55
+
56
+ demo = gr.ChatInterface(fn=chat, examples=["hello"], title="GPT Chatboat by Raushan Sharma")
57
+ demo.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==4.15.0
2
+ torch
3
+ transformers>=4.38.1
4
+ numpy
5
+ tiktoken