import os import gradio as gr from huggingface_hub import InferenceClient import transformers import torch import time from google.cloud import translate_v2 as translate # Load the credentials from the secret credentials = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON") # Write the credentials to a temporary file credentials_path = "google_credentials.json" with open(credentials_path, "w") as f: f.write(credentials) os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path def translate_text(source:str, target: str, text: str) -> dict: """Translates text into the target language. Target must be an ISO 639-1 language code. See https://g.co/cloud/translate/v2/translate-reference#supported_languages """ translate_client = translate.Client() if isinstance(text, bytes): text = text.decode("utf-8") # Text can also be a sequence of strings, in which case this method # will return a sequence of results for each text. result = translate_client.translate(text, source_language=source,target_language=target) # print(result) # print("Text: {}".format(result["input"])) # print("Translation: {}".format(result["translatedText"])) # # print("Detected source language: {}".format(result["detectedSourceLanguage"])) return result """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ model_id="chuanli11/Llama-3.2-3B-Instruct-uncensored" client = InferenceClient(model_id) pipeline = transformers.pipeline( "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto", ) def respond( message, history: list[tuple[str, str]], system_message="You are a friendly Chatbot.", max_tokens=512, temperature=0.7, top_p=0.95 ): print(f"Input...{message}") tmp_english_out_text = translate_text("mni-Mtei","en",message)["translatedText"] print(f"Translated to English...{tmp_english_out_text}") messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": translate_text("mni-Mtei","en",val[0])["translatedText"]}) if val[1]: messages.append({"role": "assistant", "content": translate_text("mni-Mtei","en",val[1])["translatedText"]}) messages.append({"role": "user", "content": tmp_english_out_text}) response = "" print(f"Running inference...{messages}") for message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content response += token print(f"Response...{response}") response_text = translate_text("en","mni-Mtei",response)["translatedText"] typing_text="" for char in response_text: time.sleep(0.05) typing_text += char yield typing_text """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ # demo = gr.ChatInterface( # respond, # additional_inputs=[ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"), # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), # gr.Slider( # minimum=0.1, # maximum=1.0, # value=0.95, # step=0.05, # label="Top-p (nucleus sampling)", # ), # ], # ) demo = gr.ChatInterface( respond ) if __name__ == "__main__": demo.launch()