mosama commited on
Commit
82b8515
Β·
verified Β·
1 Parent(s): 1499856

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -26
app.py CHANGED
@@ -1,19 +1,22 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
- import torch
4
 
5
  @st.cache_resource(show_spinner="Loading Model & Tokenizer")
6
  def load_model():
7
  # This is cached and will not run again and again.
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
  import torch
 
10
 
11
  base_model = AutoModelForCausalLM.from_pretrained(
12
- "mosama/Qwen2.5-0.5B-Pretrained-ar-end-urd-500", device_map="cpu", torch_dtype=torch.float16)
 
 
 
 
13
 
14
  tokenizer = AutoTokenizer.from_pretrained("mosama/Qwen2.5-0.5B-Pretrained-ar-end-urd-500")
15
  st.success('Model & Tokenizer Loaded Successfully!', icon="βœ…")
16
- return base_model, tokenizer
17
 
18
  st.title("Qwen2.5-0.5B Arabic, English & Urdu Continuous Pretrained")
19
 
@@ -32,31 +35,39 @@ if not st.session_state.messages:
32
  st.write("Hello πŸ‘‹ I am an AI bot powered by Qwen 2.5 0.5B model.")
33
  st.session_state.messages.append({"role": "assistant", "content": "Hello πŸ‘‹ I am an AI bot powered by Qwen 2.5 0.5B model."})
34
 
35
- if prompt := st.chat_input("Say Something"):
 
 
36
  # Display user message in chat message container
37
  with st.chat_message("user"):
38
  st.markdown(prompt)
39
  # Add user message to chat history
40
  st.session_state.messages.append({"role": "user", "content": prompt})
41
 
42
- if prompt:
43
- with st.spinner(text="Generating response..."):
44
- model_inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
45
- print(model_inputs)
46
- generated_ids = model.generate(
47
- **model_inputs,
48
- max_new_tokens=50,
49
- repetition_penalty=1.2,
50
- temperature=0.5,
51
- do_sample=True,
52
- top_p=0.9,
53
- top_k=20
54
- )
55
- print("Generated Response!")
56
- response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
57
-
58
- # Display assistant response in chat message container
59
- with st.chat_message("assistant"):
60
- st.markdown(response)
61
- # Add assistant response to chat history
62
- st.session_state.messages.append({"role": "assistant", "content": response})
 
 
 
 
 
 
 
1
  import streamlit as st
 
 
2
 
3
  @st.cache_resource(show_spinner="Loading Model & Tokenizer")
4
  def load_model():
5
  # This is cached and will not run again and again.
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
  import torch
8
+ from peft import PeftModel
9
 
10
  base_model = AutoModelForCausalLM.from_pretrained(
11
+ "unsloth/Qwen2.5-0.5B", device_map="cpu", torch_dtype=torch.bfloat16)
12
+
13
+ m = PeftModel.from_pretrained(base_model, "mosama/Qwen2.5-0.5B-Pretraining-ar-eng-urd-LoRA-Adapters")
14
+
15
+ merged_model = m.merge_and_unload()
16
 
17
  tokenizer = AutoTokenizer.from_pretrained("mosama/Qwen2.5-0.5B-Pretrained-ar-end-urd-500")
18
  st.success('Model & Tokenizer Loaded Successfully!', icon="βœ…")
19
+ return merged_model, tokenizer
20
 
21
  st.title("Qwen2.5-0.5B Arabic, English & Urdu Continuous Pretrained")
22
 
 
35
  st.write("Hello πŸ‘‹ I am an AI bot powered by Qwen 2.5 0.5B model.")
36
  st.session_state.messages.append({"role": "assistant", "content": "Hello πŸ‘‹ I am an AI bot powered by Qwen 2.5 0.5B model."})
37
 
38
+ st.session_state.state_chat_input = False
39
+
40
+ if prompt := st.chat_input("Say Something", key="input_1", disabled=st.session_state.state_chat_input):
41
  # Display user message in chat message container
42
  with st.chat_message("user"):
43
  st.markdown(prompt)
44
  # Add user message to chat history
45
  st.session_state.messages.append({"role": "user", "content": prompt})
46
 
47
+ if prompt or st.session_state.state_chat_input:
48
+ if st.session_state.state_chat_input:
49
+ with st.spinner(text="Generating response..."):
50
+ model_inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
51
+ print(model_inputs)
52
+ generated_ids = model.generate(
53
+ **model_inputs,
54
+ max_new_tokens=50,
55
+ repetition_penalty=1.2,
56
+ temperature=0.5,
57
+ do_sample=True,
58
+ top_p=0.9,
59
+ top_k=20
60
+ )
61
+ print("Generated Response!")
62
+ response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
63
+
64
+ # Display assistant response in chat message container
65
+ with st.chat_message("assistant"):
66
+ st.markdown(response)
67
+ # Add assistant response to chat history
68
+ st.session_state.messages.append({"role": "assistant", "content": response})
69
+ st.session_state.state_chat_input = False
70
+ st.rerun()
71
+ else:
72
+ st.session_state.state_chat_input = True
73
+ st.rerun()