joermd commited on
Commit
74011b4
·
verified ·
1 Parent(s): 64e1afa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -9
app.py CHANGED
@@ -8,10 +8,8 @@ import os
8
  random_dog = [
9
  "0f476473-2d8b-415e-b944-483768418a95.jpg",
10
  "1bd75c81-f1d7-4e55-9310-a27595fa8762.jpg",
11
- # Add more images as needed
12
  ]
13
 
14
- # Function to reset conversation
15
  def reset_conversation():
16
  '''Resets conversation'''
17
  st.session_state.conversation = []
@@ -33,7 +31,7 @@ for message in st.session_state.messages:
33
  st.markdown(message["content"])
34
 
35
  # Set cache directory path to /data
36
- cache_dir = "/data" # المسار المحدد للتخزين في مساحة Hugging Face
37
 
38
  # Load model and tokenizer on-demand to save memory
39
  if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع مساعدتك ؟"):
@@ -41,11 +39,20 @@ if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع
41
  st.markdown(prompt)
42
  st.session_state.messages.append({"role": "user", "content": prompt})
43
 
44
- # Load model only when user submits a prompt
45
  try:
46
- # Load the tokenizer and model with caching in the specified directory
47
- tokenizer = AutoTokenizer.from_pretrained("joermd/llma-speedy", cache_dir=cache_dir)
48
- model = AutoModelForCausalLM.from_pretrained("joermd/llma-speedy", cache_dir=cache_dir)
 
 
 
 
 
 
 
 
 
 
49
 
50
  # Generate response
51
  inputs = tokenizer(prompt, return_tensors="pt")
@@ -57,7 +64,7 @@ if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع
57
  )
58
  assistant_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
59
 
60
- # Clear memory (for CUDA) and delete the model to free up RAM
61
  if torch.cuda.is_available():
62
  torch.cuda.empty_cache()
63
  del model
@@ -71,4 +78,4 @@ if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع
71
  # Display assistant response
72
  with st.chat_message("assistant"):
73
  st.markdown(assistant_response)
74
- st.session_state.messages.append({"role": "assistant", "content": assistant_response})
 
8
  random_dog = [
9
  "0f476473-2d8b-415e-b944-483768418a95.jpg",
10
  "1bd75c81-f1d7-4e55-9310-a27595fa8762.jpg",
 
11
  ]
12
 
 
13
  def reset_conversation():
14
  '''Resets conversation'''
15
  st.session_state.conversation = []
 
31
  st.markdown(message["content"])
32
 
33
  # Set cache directory path to /data
34
+ cache_dir = "/data"
35
 
36
  # Load model and tokenizer on-demand to save memory
37
  if prompt := st.chat_input(f"مرحبا انا سبيدي , كيف استطيع مساعدتك ؟"):
 
39
  st.markdown(prompt)
40
  st.session_state.messages.append({"role": "user", "content": prompt})
41
 
 
42
  try:
43
+ # Load the tokenizer and model with specific configuration
44
+ tokenizer = AutoTokenizer.from_pretrained(
45
+ "joermd/llma-speedy",
46
+ cache_dir=cache_dir,
47
+ local_files_only=False # السماح بتحميل الملفات المتوفرة فقط
48
+ )
49
+
50
+ model = AutoModelForCausalLM.from_pretrained(
51
+ "joermd/llma-speedy",
52
+ cache_dir=cache_dir,
53
+ local_files_only=False, # السماح بتحميل الملفات المتوفرة فقط
54
+ ignore_missing_weights=True # تجاهل الأوزان المفقودة
55
+ )
56
 
57
  # Generate response
58
  inputs = tokenizer(prompt, return_tensors="pt")
 
64
  )
65
  assistant_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
66
 
67
+ # Clear memory
68
  if torch.cuda.is_available():
69
  torch.cuda.empty_cache()
70
  del model
 
78
  # Display assistant response
79
  with st.chat_message("assistant"):
80
  st.markdown(assistant_response)
81
+ st.session_state.messages.append({"role": "assistant", "content": assistant_response})