| import gradio as gr |
|
|
|
|
| from transformers import pipeline |
| import torch |
|
|
| MAX_NEW_TOKENS = 250 |
|
|
| |
| |
|
|
| MODEL="HuggingFaceTB/SmolLM2-1.7B-Instruct" |
| TEMPERATURE = 0.7 |
| TOP_P = 0.95 |
| REPETITION_PENALTY = 1.1 |
|
|
|
|
| pipe = pipeline("text-generation", model="HuggingFaceTB/SmolLM2-1.7B-Instruct") |
|
|
|
|
| def message_fx(message, history): |
| if len(history) == 0: |
| send_to_api = [{'role':'user', 'content':message}] |
| print(send_to_api) |
| with torch.no_grad(): |
| response = pipe(send_to_api, |
| do_sample=True, |
| max_new_tokens=MAX_NEW_TOKENS, |
| temperature=TEMPERATURE, |
| |
| top_p=TOP_P, |
| repetition_penalty=REPETITION_PENALTY, |
| |
| )[0]['generated_text'][1]['content'] |
| return response |
| |
| else: |
| send_to_api = history + [{'role':'user', 'content':message}] |
| print(send_to_api) |
| with torch.no_grad(): |
| response = pipe(send_to_api, |
| do_sample=True, |
| max_new_tokens=MAX_NEW_TOKENS, |
| temperature=TEMPERATURE, |
| |
| top_p=TOP_P, |
| repetition_penalty=REPETITION_PENALTY, |
| |
| )[0]['generated_text'][-1]['content'] |
| return response |
|
|
|
|
| gr.ChatInterface( |
| fn=message_fx, |
| type="messages" |
| ).launch() |
|
|
|
|