Roastem commited on
Commit
1dfc6af
1 Parent(s): 3b92a2c

Update app.py - CPU + GPU Inference

Browse files
Files changed (1) hide show
  1. app.py +22 -25
app.py CHANGED
@@ -1,59 +1,58 @@
 
1
  from langchain.llms import CTransformers
2
  from langchain.chains import LLMChain
3
- from langchain import PromptTemplate
4
- import os
5
- import io
6
  import gradio as gr
7
  import time
8
 
9
-
10
  custom_prompt_template = """
11
- You are an AI Chatbot named Sunny, you are created by 'Sic Team' and your task is to provide information to users and chat with them based on given user's query. Below is the user's query.
12
  Query: {query}
13
 
14
- You just return the helpful message in English and always try to provide relevant answers to user's query.
15
  """
16
 
 
17
  def set_custom_prompt():
18
- prompt = PromptTemplate(template=custom_prompt_template,
19
- input_variables=['query'])
20
  return prompt
21
 
22
 
23
- #Loading the model
24
  def load_model():
25
- # Load the locally downloaded model here
 
 
 
 
26
  llm = CTransformers(
27
- model = "TheBloke/zephyr-7B-beta-GGUF",
28
  model_type="llama",
29
- max_new_tokens = 1096,
30
- temperature = 0.2,
31
- repetition_penalty = 1.13,
32
- gpu=True,
33
- gpu_layers = 3,
34
  )
35
 
36
  return llm
37
 
38
- print(load_model())
39
 
40
  def chain_pipeline():
41
  llm = load_model()
42
  main_prompt = set_custom_prompt()
43
- main_chain = LLMChain(
44
- prompt=main_prompt,
45
- llm=llm
46
- )
47
  return main_chain
48
 
 
49
  llmchain = chain_pipeline()
50
 
 
51
  def bot(query):
52
  llm_response = llmchain.run({"query": query})
53
  return llm_response
54
 
 
55
  with gr.Blocks(title='Sunny') as main:
56
- # gr.HTML("Code Llama main")
57
  gr.Markdown("# Sunny Chatbot")
58
  chatbot = gr.Chatbot([], elem_id="chatbot", height=700)
59
  msg = gr.Textbox()
@@ -65,8 +64,6 @@ with gr.Blocks(title='Sunny') as main:
65
  time.sleep(2)
66
  return "", chat_history
67
 
68
-
69
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
70
 
71
- main.launch(share=True)
72
-
 
1
+ import torch
2
  from langchain.llms import CTransformers
3
  from langchain.chains import LLMChain
4
+ from langchain import PromptTemplate
 
 
5
  import gradio as gr
6
  import time
7
 
 
8
  custom_prompt_template = """
9
+ You are an AI Chatbot named Sunny, created by 'Sic Team', and your task is to provide information to users and chat with them based on given user's query. Below is the user's query.
10
  Query: {query}
11
 
12
+ You just return the helpful message in English and always try to provide relevant answers to the user's query.
13
  """
14
 
15
+
16
  def set_custom_prompt():
17
+ prompt = PromptTemplate(
18
+ template=custom_prompt_template, input_variables=['query'])
19
  return prompt
20
 
21
 
 
22
  def load_model():
23
+ # Check if GPU is available
24
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
+ print(f"Using device: {device}")
26
+
27
+ # Load the locally downloaded model here, specifying the device
28
  llm = CTransformers(
29
+ model="TheBloke/zephyr-7B-beta-GGUF",
30
  model_type="llama",
31
+ max_new_tokens=1096,
32
+ temperature=0.2,
33
+ repetition_penalty=1.13,
34
+ device=device # Set the device explicitly during model initialization
 
35
  )
36
 
37
  return llm
38
 
 
39
 
40
  def chain_pipeline():
41
  llm = load_model()
42
  main_prompt = set_custom_prompt()
43
+ main_chain = LLMChain(prompt=main_prompt, llm=llm)
 
 
 
44
  return main_chain
45
 
46
+
47
  llmchain = chain_pipeline()
48
 
49
+
50
  def bot(query):
51
  llm_response = llmchain.run({"query": query})
52
  return llm_response
53
 
54
+
55
  with gr.Blocks(title='Sunny') as main:
 
56
  gr.Markdown("# Sunny Chatbot")
57
  chatbot = gr.Chatbot([], elem_id="chatbot", height=700)
58
  msg = gr.Textbox()
 
64
  time.sleep(2)
65
  return "", chat_history
66
 
 
67
  msg.submit(respond, [msg, chatbot], [msg, chatbot])
68
 
69
+ main.launch(share=False)