junaidbaber commited on
Commit
ef628bc
·
verified ·
1 Parent(s): df0aaaf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -49
app.py CHANGED
@@ -1,69 +1,63 @@
1
- import streamlit as st
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
- import os
5
- # Hugging Face repository details
6
- # MODEL_ID = "meta-llama/CodeLlama-7b-Instruct-hf"
7
- MODEL_ID = "meta-llama/Llama-3.1-8B"
8
 
9
  from huggingface_hub import login
 
10
  token = os.environ.get("hf")
11
-
12
  login(token)
13
- def load_model():
14
- """Load the Hugging Face model and tokenizer."""
 
 
 
 
 
 
 
 
15
  try:
16
- st.write("Loading model and tokenizer...")
17
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
18
- model = AutoModelForCausalLM.from_pretrained(
19
- MODEL_ID, device_map="auto", torch_dtype=torch.float16
 
 
20
  )
21
- st.write("Model and tokenizer successfully loaded.")
22
- return tokenizer, model
23
  except Exception as e:
24
- st.error(f"Error loading model: {e}")
25
- return None, None
26
 
27
- # Load the model and tokenizer
28
- @st.cache_resource
29
- def get_model():
30
- return load_model()
31
-
32
- tokenizer, model = get_model()
33
 
34
  # Streamlit UI
35
- st.title("LowCode Chatbot")
36
- st.write("This chatbot provides interaction with LLM. Type your question below!")
37
 
38
- if model is None or tokenizer is None:
39
- st.error("Model failed to load. Please check the Hugging Face model path or environment configuration.")
40
  else:
41
- user_input = st.text_input("You:", placeholder="Enter your medical question here...", key="input_box")
 
 
42
 
43
  if st.button("Send"):
44
  if user_input.strip():
45
- # Construct the prompt
46
- SYSTEM_PROMPT = "You are a helpful assistant. Provide accurate and concise answers."
47
- full_prompt = f"{SYSTEM_PROMPT}\nUser: {user_input}\nAssistant:"
48
-
49
- # Tokenize the input
50
- inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True).to("cuda")
51
-
52
  try:
53
- # Generate the response
54
- outputs = model.generate(
55
- inputs["input_ids"],
56
- max_length=200, # Limit response length
57
- temperature=0.7, # Control randomness
58
- top_p=0.9, # Top-p sampling
59
- pad_token_id=tokenizer.eos_token_id
 
60
  )
61
-
62
- # Decode and display the response
63
- response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
64
- st.write(f"**Model:** {response}")
65
-
66
  except Exception as e:
67
  st.error(f"Error generating response: {e}")
68
  else:
69
- st.warning("Please enter a valid question.")
 
 
 
 
 
 
 
 
 
1
 
2
  from huggingface_hub import login
3
+ import os
4
  token = os.environ.get("hf")
 
5
  login(token)
6
+
7
+ import streamlit as st
8
+ from transformers import pipeline
9
+ import torch
10
+
11
+ # Model ID
12
+ MODEL_ID = "meta-llama/Meta-Llama-3.1-8B-Instruct"
13
+
14
+ @st.cache_resource
15
+ def load_pipeline():
16
  try:
17
+ st.write("Loading the instruct pipeline...")
18
+ instruct_pipeline = pipeline(
19
+ "text-generation",
20
+ model=MODEL_ID,
21
+ model_kwargs={"torch_dtype": torch.bfloat16},
22
+ device_map="auto",
23
  )
24
+ st.write("Pipeline successfully loaded.")
25
+ return instruct_pipeline
26
  except Exception as e:
27
+ st.error(f"Error loading pipeline: {e}")
28
+ return None
29
 
30
+ # Load the pipeline
31
+ instruct_pipeline = load_pipeline()
 
 
 
 
32
 
33
  # Streamlit UI
34
+ st.title("Instruction Chatbot")
35
+ st.write("Chat with the instruction-tuned model!")
36
 
37
+ if instruct_pipeline is None:
38
+ st.error("Pipeline failed to load. Please check the configuration.")
39
  else:
40
+ # Message-based interaction
41
+ system_message = st.text_area("System Message", value="You are a helpful assistant.", height=100)
42
+ user_input = st.text_input("User:", placeholder="Ask a question or provide an instruction...")
43
 
44
  if st.button("Send"):
45
  if user_input.strip():
 
 
 
 
 
 
 
46
  try:
47
+ messages = [
48
+ {"role": "system", "content": system_message},
49
+ {"role": "user", "content": user_input},
50
+ ]
51
+ # Generate response
52
+ outputs = instruct_pipeline(
53
+ messages,
54
+ max_new_tokens=150, # Limit response length
55
  )
56
+ # Display the generated response
57
+ response = outputs[0]["generated_text"]
58
+ st.write(f"**Assistant:** {response}")
 
 
59
  except Exception as e:
60
  st.error(f"Error generating response: {e}")
61
  else:
62
+ st.warning("Please enter a valid message.")
63
+