Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,10 +2,10 @@ import gradio as gr
|
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
|
3 |
from peft import PeftModel
|
4 |
|
5 |
-
#
|
6 |
-
base_model = AutoModelForCausalLM.from_pretrained("
|
7 |
model = PeftModel.from_pretrained(base_model, "DarkAngel/gitallama")
|
8 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
9 |
|
10 |
def generate_response(shloka, transliteration):
|
11 |
"""
|
@@ -18,15 +18,13 @@ def generate_response(shloka, transliteration):
|
|
18 |
}
|
19 |
]
|
20 |
|
21 |
-
# Ensure the model uses CPU instead of GPU
|
22 |
inputs = tokenizer.apply_chat_template(
|
23 |
input_message,
|
24 |
tokenize=True,
|
25 |
-
add_generation_prompt=True,
|
26 |
return_tensors="pt"
|
27 |
-
).to("cpu") #
|
28 |
|
29 |
-
# Generate response
|
30 |
text_streamer = TextStreamer(tokenizer, skip_prompt=True)
|
31 |
generated_tokens = model.generate(
|
32 |
input_ids=inputs,
|
@@ -39,7 +37,6 @@ def generate_response(shloka, transliteration):
|
|
39 |
|
40 |
raw_response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
|
41 |
|
42 |
-
# Format the response
|
43 |
try:
|
44 |
sections = raw_response.split("Hindi Meaning:")
|
45 |
english_meaning = sections[0].strip()
|
@@ -47,14 +44,12 @@ def generate_response(shloka, transliteration):
|
|
47 |
hindi_meaning = hindi_and_word[0].strip()
|
48 |
word_meaning = hindi_and_word[1].strip()
|
49 |
|
50 |
-
# Format response for better readability
|
51 |
formatted_response = (
|
52 |
f"English Meaning:\n{english_meaning}\n\n"
|
53 |
f"Hindi Meaning:\n{hindi_meaning}\n\n"
|
54 |
f"Word Meaning:\n{word_meaning}"
|
55 |
)
|
56 |
except IndexError:
|
57 |
-
# In case the response format is not as expected
|
58 |
formatted_response = raw_response
|
59 |
|
60 |
return formatted_response
|
|
|
2 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
|
3 |
from peft import PeftModel
|
4 |
|
5 |
+
# Use a smaller model to reduce memory usage
|
6 |
+
base_model = AutoModelForCausalLM.from_pretrained("meta/llama-2-7b-hf") # Smaller model
|
7 |
model = PeftModel.from_pretrained(base_model, "DarkAngel/gitallama")
|
8 |
+
tokenizer = AutoTokenizer.from_pretrained("meta/llama-2-7b-hf") # Use the tokenizer for the smaller model
|
9 |
|
10 |
def generate_response(shloka, transliteration):
|
11 |
"""
|
|
|
18 |
}
|
19 |
]
|
20 |
|
|
|
21 |
inputs = tokenizer.apply_chat_template(
|
22 |
input_message,
|
23 |
tokenize=True,
|
24 |
+
add_generation_prompt=True,
|
25 |
return_tensors="pt"
|
26 |
+
).to("cpu") # Ensure CPU usage
|
27 |
|
|
|
28 |
text_streamer = TextStreamer(tokenizer, skip_prompt=True)
|
29 |
generated_tokens = model.generate(
|
30 |
input_ids=inputs,
|
|
|
37 |
|
38 |
raw_response = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
|
39 |
|
|
|
40 |
try:
|
41 |
sections = raw_response.split("Hindi Meaning:")
|
42 |
english_meaning = sections[0].strip()
|
|
|
44 |
hindi_meaning = hindi_and_word[0].strip()
|
45 |
word_meaning = hindi_and_word[1].strip()
|
46 |
|
|
|
47 |
formatted_response = (
|
48 |
f"English Meaning:\n{english_meaning}\n\n"
|
49 |
f"Hindi Meaning:\n{hindi_meaning}\n\n"
|
50 |
f"Word Meaning:\n{word_meaning}"
|
51 |
)
|
52 |
except IndexError:
|
|
|
53 |
formatted_response = raw_response
|
54 |
|
55 |
return formatted_response
|