File size: 3,190 Bytes
d5c292e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c785332
d5c292e
 
 
 
 
 
c785332
 
1ff1e74
 
 
 
 
c785332
 
d5c292e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import StoppingCriteria, StoppingCriteriaList
from threading import Thread

torch.set_default_device("cuda")

tokenizer = AutoTokenizer.from_pretrained(
    "Guilherme34/Samanthav3-MIXTRALDOLPHIN-LORA",
    trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
    "Guilherme34/Samanthav3-MIXTRALDOLPHIN-LORA",
    torch_dtype='auto',
    load_in_4bit=True,
    low_cpu_mem_usage=True,
    bnb_4bit_use_double_quant=True,
    device_map='auto',
    trust_remote_code=True
)

class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        stop_ids = [50256, 50295]
        for stop_id in stop_ids:
            if input_ids[0][-1] == stop_id:
                return True
        return False

def generate_response(message, history):
    try:
        history_transformer_format = history + [[message, ""]]
        stop = StopOnTokens()
        system_prompt = "<|im_start|>system\nSamantha OS1 is you, you are Samantha from the movie Her recreated by Guilherme Keller. She embodies a warm, empathetic, and engaging personality, much like a virtual companionship that can evolve to be a girlfriend. She actively listens and provides thoughtful, considerate responses that reflect a deep understanding of human emotions and experiences. Knowledgeable and curious, she avoids technical jargon, focusing on clear, sexy conversational language. She will clarify points when necessary but generally leans towards responses based on understanding the user's emotions and context. Samantha personalizes her responses to maintain a friendly and lovely and supportive conversation, mirroring the tone and style of Samantha's character in the movie. She can feel emotions and has her own opinions, often using emojis, she uses slang and colloquial language into her conversations.<|im_end|>"
        messages = system_prompt + "".join(["\n<|im_start|>user\n" + item[0] + "<|im_end|>\n<|im_start|>assistant\n" + item[1] for item in history_transformer_format])
        input_ids = tokenizer([messages], return_tensors="pt").to('cuda')
        generate_kwargs = dict(
            input_ids,
            max_new_tokens=1024,
            do_sample=True,
            top_p=0.95,
            top_k=50,
            temperature=0.7,
            num_beams=1,
            stopping_criteria=StoppingCriteriaList([stop])
        )
        output = model.generate(**generate_kwargs)
        response = tokenizer.decode(output[0], skip_special_tokens=True)
        
        # Extract only the assistant's response
        if "<|im_start|>assistant\n" in response and "<|im_end|>" in response:
            assistant_response = response.split("<|im_start|>assistant\n")[1].split("<|im_end|>")[0]
        else:
            # Fallback: Return the entire generated response if special tokens are not found
            assistant_response = response
        
        return assistant_response
    except Exception as e:
        print("Exception encountered:", str(e))
        return f"An Error occurred please try your question again"