File size: 3,740 Bytes
6db0360
 
3e7e115
 
6db0360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e7e115
6db0360
 
 
 
 
 
 
 
 
 
 
 
3e7e115
 
 
6db0360
 
3e7e115
6db0360
 
 
 
 
 
3e7e115
 
 
 
6db0360
5d1d8a3
6db0360
 
3e7e115
6db0360
 
 
 
 
3e7e115
 
 
 
6db0360
3e7e115
6db0360
3e7e115
6db0360
3e7e115
 
6db0360
3e7e115
 
 
 
 
 
 
 
 
 
6db0360
 
 
3e7e115
 
 
 
 
6db0360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e7e115
6db0360
3e7e115
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import os

import gradio as gr
from huggingface_hub import InferenceClient
import transformers
import torch

from google.cloud import translate_v2 as translate
# Load the credentials from the secret
credentials = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")

# Write the credentials to a temporary file
credentials_path = "google_credentials.json"

with open(credentials_path, "w") as f:
    f.write(credentials)

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path

def translate_text(source:str, target: str, text: str) -> dict:
    """Translates text into the target language.

    Target must be an ISO 639-1 language code.
    See https://g.co/cloud/translate/v2/translate-reference#supported_languages
    """

    translate_client = translate.Client()

    if isinstance(text, bytes):
        text = text.decode("utf-8")

    # Text can also be a sequence of strings, in which case this method
    # will return a sequence of results for each text.
    result = translate_client.translate(text, source_language=source,target_language=target)
    # print(result)
    # print("Text: {}".format(result["input"]))
    # print("Translation: {}".format(result["translatedText"]))
    # # print("Detected source language: {}".format(result["detectedSourceLanguage"]))

    return result
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
model_id="chuanli11/Llama-3.2-3B-Instruct-uncensored"
client = InferenceClient(model_id)

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

def respond(
    message,
    history: list[tuple[str, str]],
    system_message="You are a friendly Chatbot.",
    max_tokens=512,
    temperature=0.7,
    top_p=0.95
):
    print(f"Input...{message}")
    tmp_english_out_text = translate_text("mni-Mtei","en",message)["translatedText"]

    print(f"Translated to English...{tmp_english_out_text}")

    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": translate_text("mni-Mtei","en",val[0])["translatedText"]})
        if val[1]:
            messages.append({"role": "assistant", "content": translate_text("mni-Mtei","en",val[1])["translatedText"]})

    messages.append({"role": "user", "content": tmp_english_out_text})

    response = ""
    print(f"Running inference...{messages}")
    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        print(f"Response...{response}")
        print(f"Yield {translate_text('en','mni-Mtei',response)}")
        yield translate_text("en","mni-Mtei",response)["translatedText"]


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
# demo = gr.ChatInterface(
#     respond,
#     additional_inputs=[
#         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
#         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
#         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
#         gr.Slider(
#             minimum=0.1,
#             maximum=1.0,
#             value=0.95,
#             step=0.05,
#             label="Top-p (nucleus sampling)",
#         ),
#     ],
# )


demo = gr.ChatInterface(
    respond
)


if __name__ == "__main__":
    demo.launch()