chatGPT

Runtime error

App Files Files Community

sm00th-manif0ld

yizhangliu commited on Mar 9, 2023

Commit

b9dacd8

•

0 Parent(s):

Duplicate from yizhangliu/chatGPT

Browse files

Co-authored-by: yizhangliu <yizhangliu@users.noreply.huggingface.co>

Files changed (9) hide show

.gitattributes +34 -0
README.md +13 -0
app.py +411 -0
baidu_translate/module.py +106 -0
encoder.json +0 -0
encoder.py +120 -0
requirements.txt +12 -0
utils.py +54 -0
vocab.bpe +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: ChatGPT
+emoji: 📊
+colorFrom: blue
+colorTo: blue
+sdk: gradio
+sdk_version: 3.12.0
+app_file: app.py
+pinned: false
+duplicated_from: yizhangliu/chatGPT
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,411 @@

+from pyChatGPT import ChatGPT
+import openai
+import gradio as gr
+import os, sys, json
+from loguru import logger
+import paddlehub as hub
+import random
+from encoder import get_encoder
+openai.api_key = os.getenv("OPENAI_API_KEY")
+from utils import get_tmt_client, getTextTrans_tmt
+tmt_client = get_tmt_client()
+# language_translation_model = hub.Module(directory=f'./baidu_translate')
+def getTextTrans(text, source='zh', target='en'):
+    return getTextTrans_tmt(tmt_client, text, source, target)
+    # def is_chinese(string):
+    #     for ch in string:
+    #         if u'\u4e00' <= ch <= u'\u9fff':
+    #             return True
+    #     return False
+    # if not is_chinese(text) and target == 'en':
+    #     return text
+    # try:
+    #     text_translation = language_translation_model.translate(text, source, target)
+    #     return text_translation
+    # except Exception as e:
+    #     return text
+session_token = os.environ.get('SessionToken')
+# logger.info(f"session_token_: {session_token}")
+def get_api():
+    api = None
+    try:
+      api = ChatGPT(session_token)
+      # api.refresh_auth()
+    except Exception as e:
+      logger.info(f'get_api_error: {e}')
+      api = None
+    return api
+def get_response_from_chatgpt(api, text):
+    if api is None:
+        # return "Sorry, I'm busy. Try again later.(1)"
+        return "Openai said: I'm too tired. Let me lie down for a few days. If you like, you can visit my home(1)."
+    try:
+      resp = api.send_message(text)
+      # api.refresh_auth()
+      # api.reset_conversation()
+      response = resp['message']
+      conversation_id = resp['conversation_id']
+      parent_id = resp['parent_id']
+      # logger.info(f"response_: {response}")
+      logger.info(f"conversation_id_: [{conversation_id}] / parent_id: [{parent_id}]")
+    except:
+      # response = "Sorry, I'm busy. Try again later.(2)"
+      response = "Openai said: I'm so tired. Let me lie down for a few days. If you like, you can visit my home(2)."
+    return response
+token_encoder = get_encoder()
+total_tokens = 4096
+max_output_tokens = 1024
+max_input_tokens = total_tokens - max_output_tokens
+def get_response_from_openai(input, history):
+    def openai_create(prompt):
+        # no chatgpt, and from gpt-3
+        try:
+            response = openai.Completion.create(
+                model="text-davinci-003",
+                prompt=prompt,
+                temperature=0.9,
+                max_tokens=max_output_tokens,
+                top_p=1,
+                frequency_penalty=0,
+                presence_penalty=0.6,
+                stop=[" Human:", " AI:"]
+            )
+            ret = response.choices[0].text
+            if ret == '':
+                ret = "Openai said: I'm too tired. Let me lie down for a few days. If you like, you can visit my home(3)."
+        except Exception as e:
+            ret = "Openai said: I'm too tired. Let me lie down for a few days. If you like, you can visit my home(4)."
+        return ret
+    history = history or []
+    his= [tuple(item) for item in history]
+    s = list(sum(his, ()))
+    s.append(input)
+    inp = ' '.join(s)
+    tokens = token_encoder.encode(inp)
+    if len(tokens) > max_input_tokens:
+        new_tokens = tokens[-max_input_tokens:]
+        inp = token_encoder.decode(new_tokens)
+    #     tokens_1 = token_encoder.encode(inp)
+    #     logger.info(f"tokens_len[1]__{len(tokens)}__{len(new_tokens)}__{len(tokens_1)}")
+    # else:
+    #     logger.info(f"tokens_len[0]__{len(tokens)}")
+    output = openai_create(inp)
+    return output
+start_work = """async() => {
+    function isMobile() {
+        try {
+            document.createEvent("TouchEvent"); return true;
+        } catch(e) {
+            return false;
+        }
+    }
+	function getClientHeight()
+	{
+	  var clientHeight=0;
+	  if(document.body.clientHeight&&document.documentElement.clientHeight) {
+		var clientHeight = (document.body.clientHeight<document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight;
+	  } else {
+		var clientHeight = (document.body.clientHeight>document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight;
+	  }
+	  return clientHeight;
+	}
+    function img_click(img) {
+        this_width = parseInt(img.style.width) + 20;
+        if (this_width > 100) {
+            this_width = 20;
+        }
+        img.style.width = this_width + "%";
+    }
+    function setNativeValue(element, value) {
+      const valueSetter = Object.getOwnPropertyDescriptor(element.__proto__, 'value').set;
+      const prototype = Object.getPrototypeOf(element);
+      const prototypeValueSetter = Object.getOwnPropertyDescriptor(prototype, 'value').set;
+      if (valueSetter && valueSetter !== prototypeValueSetter) {
+            prototypeValueSetter.call(element, value);
+      } else {
+            valueSetter.call(element, value);
+      }
+      element.dispatchEvent(new Event('input', { bubbles: true }));
+    }
+    function save_conversation(chatbot) {
+        var conversations = new Array();
+        var conversations_noimg = new Array();
+        for (var i = 0; i < chatbot.children.length; i++) {
+            innerHTML = chatbot.children[i].innerHTML;
+            conversations.push(innerHTML);
+            if (innerHTML.indexOf("<img ") == -1) {
+                conversations_noimg.push(innerHTML);
+            }
+        }
+        var json_str = JSON.stringify(conversations);
+        setNativeValue(window['chat_his'], JSON.stringify(conversations_noimg));
+        localStorage.setItem('chatgpt_conversations', json_str);
+    }
+    function load_conversation(chatbot) {
+        var json_str = localStorage.getItem('chatgpt_conversations');
+        if (json_str) {
+            var conversations_noimg = new Array();
+            conversations = JSON.parse(json_str);
+            for (var i = 0; i < conversations.length; i++) {
+                var new_div = document.createElement("div");
+                if((i%2)===0){
+                    new_div.className = "px-3 py-2 rounded-[22px] rounded-br-none text-white text-sm chat-message svelte-rct66g";
+                    new_div.style.backgroundColor = "#16a34a";
+                } else {
+                    new_div.className = "px-3 py-2 rounded-[22px] rounded-bl-none place-self-start text-white text-sm chat-message svelte-rct66g";
+                    new_div.style.backgroundColor = "#2563eb";
+                    if (conversations[i].indexOf("<img ") == 0) {
+                        new_div.style.width = "20%";
+                        new_div.onclick = function(e){
+                            img_click(this);
+                        }
+                        new_div.style.padding = "0.2rem";
+                    }
+                }
+                innerHTML = conversations[i];
+                new_div.innerHTML = innerHTML;
+                chatbot.appendChild(new_div);
+                if (innerHTML.indexOf("<img ") == -1) {
+                    conversations_noimg.push(innerHTML);
+                }
+            }
+            setNativeValue(window['chat_his'], JSON.stringify(conversations_noimg));
+        }
+    }
+    var gradioEl = document.querySelector('body > gradio-app').shadowRoot;
+    if (!gradioEl) {
+        gradioEl = document.querySelector('body > gradio-app');
+    }
+    if (typeof window['gradioEl'] === 'undefined') {
+        window['gradioEl'] = gradioEl;
+        const page1 = window['gradioEl'].querySelectorAll('#page_1')[0];
+        const page2 = window['gradioEl'].querySelectorAll('#page_2')[0];
+        page1.style.display = "none";
+        page2.style.display = "block";
+        window['div_count'] = 0;
+        window['chat_bot'] = window['gradioEl'].querySelectorAll('#chat_bot')[0];
+        window['chat_bot1'] = window['gradioEl'].querySelectorAll('#chat_bot1')[0];
+        window['chat_his'] = window['gradioEl'].querySelectorAll('#chat_history')[0].querySelectorAll('textarea')[0];
+        chat_row = window['gradioEl'].querySelectorAll('#chat_row')[0];
+        prompt_row = window['gradioEl'].querySelectorAll('#prompt_row')[0];
+        window['chat_bot1'].children[1].textContent = '';
+        clientHeight = getClientHeight();
+        if (isMobile()) {
+            output_htmls = window['gradioEl'].querySelectorAll('.output-html');
+            for (var i = 0; i < output_htmls.length; i++) {
+               output_htmls[i].style.display = "none";
+            }
+            new_height = (clientHeight - 250) + 'px';
+        } else {
+            new_height = (clientHeight - 350) + 'px';
+        }
+        chat_row.style.height = new_height;
+        window['chat_bot'].style.height = new_height;
+        window['chat_bot'].children[2].style.height = new_height;
+        window['chat_bot1'].style.height = new_height;
+        window['chat_bot1'].children[2].style.height = new_height;
+        prompt_row.children[0].style.flex = 'auto';
+        prompt_row.children[0].style.width = '100%';
+        window['gradioEl'].querySelectorAll('#chat_radio')[0].style.flex = 'auto';
+        window['gradioEl'].querySelectorAll('#chat_radio')[0].style.width = '100%';
+        prompt_row.children[0].setAttribute('style','flex-direction: inherit; flex: 1 1 auto; width: 100%;border-color: green;border-width: 1px !important;')
+        window['chat_bot1'].children[1].setAttribute('style', 'border-bottom-right-radius:0;top:unset;bottom:0;padding-left:0.1rem');
+        window['gradioEl'].querySelectorAll('#btns_row')[0].children[0].setAttribute('style', 'min-width: min(10px, 100%); flex-grow: 1');
+        window['gradioEl'].querySelectorAll('#btns_row')[0].children[1].setAttribute('style', 'min-width: min(10px, 100%); flex-grow: 1');
+        load_conversation(window['chat_bot1'].children[2].children[0]);
+        window['chat_bot1'].children[2].scrollTop = window['chat_bot1'].children[2].scrollHeight;
+        window['gradioEl'].querySelectorAll('#clear-btn')[0].onclick = function(e){
+            if (confirm('Clear all outputs?')==true) {
+                 window['chat_bot1'].children[2].children[0].innerHTML = '';
+                 save_conversation(window['chat_bot1'].children[2].children[0]);
+            }
+        }
+        window['prevPrompt'] = '';
+        window['doCheckPrompt'] = 0;
+        window['prevImgSrc'] = '';
+        window['checkChange'] = function checkChange() {
+            try {
+                if (window['gradioEl'].querySelectorAll('.gr-radio')[0].checked) {
+                    if (window['chat_bot'].children[2].children[0].children.length > window['div_count']) {
+                        new_len = window['chat_bot'].children[2].children[0].children.length - window['div_count'];
+                        for (var i = 0; i < new_len; i++) {
+                            new_div = window['chat_bot'].children[2].children[0].children[window['div_count'] + i].cloneNode(true);
+                            window['chat_bot1'].children[2].children[0].appendChild(new_div);
+                        }
+                        window['div_count'] = chat_bot.children[2].children[0].children.length;
+                        window['chat_bot1'].children[2].scrollTop = window['chat_bot1'].children[2].scrollHeight;
+                        save_conversation(window['chat_bot1'].children[2].children[0]);
+                    }
+                    if (window['chat_bot'].children[0].children.length > 1) {
+                        window['chat_bot1'].children[1].textContent = window['chat_bot'].children[0].children[1].textContent;
+                    } else {
+                        window['chat_bot1'].children[1].textContent = '';
+                    }
+                } else {
+                    texts = window['gradioEl'].querySelectorAll('textarea');
+                    text0 = texts[0];
+                    text1 = texts[1];
+                    img_index = 0;
+                    text_value = text1.value;
+                    if (window['doCheckPrompt'] === 0 && window['prevPrompt'] !== text_value) {
+                            console.log('_____new prompt___[' + text_value + ']_');
+                            window['doCheckPrompt'] = 1;
+                            window['prevPrompt'] = text_value;
+                            tabitems = window['gradioEl'].querySelectorAll('.tabitem');
+                            for (var i = 0; i < tabitems.length; i++) {
+                                inputText = tabitems[i].children[0].children[1].children[0].querySelectorAll('.gr-text-input')[0];
+                                setNativeValue(inputText, text_value);
+                            }
+                            setTimeout(function() {
+                                btns = window['gradioEl'].querySelectorAll('button');
+                                for (var i = 0; i < btns.length; i++) {
+                                    if (['Generate image','Run'].includes(btns[i].innerText)) {
+                                        btns[i].click();
+                                    }
+                                }
+                                window['doCheckPrompt'] = 0;
+                            }, 10);
+                    }
+                    tabitems = window['gradioEl'].querySelectorAll('.tabitem');
+                    imgs = tabitems[img_index].children[0].children[1].children[1].querySelectorAll("img");
+                    if (imgs.length > 0) {
+                        if (window['prevImgSrc'] !== imgs[0].src) {
+                            var user_div = document.createElement("div");
+                            user_div.className = "px-3 py-2 rounded-[22px] rounded-br-none text-white text-sm chat-message svelte-rct66g";
+                            user_div.style.backgroundColor = "#16a34a";
+                            user_div.innerHTML = "<p>" + text0.value + "</p><img ></img>";
+                            window['chat_bot1'].children[2].children[0].appendChild(user_div);
+                            var bot_div = document.createElement("div");
+                            bot_div.className = "px-3 py-2 rounded-[22px] rounded-bl-none place-self-start text-white text-sm chat-message svelte-rct66g";
+                            bot_div.style.backgroundColor = "#2563eb";
+                            bot_div.style.width = "40%";
+                            bot_div.onclick = function(e){
+                                img_click(this);
+                            }
+                            bot_div.style.padding = "0.2rem";
+                            bot_div.appendChild(imgs[0].cloneNode(true));
+                            window['chat_bot1'].children[2].children[0].appendChild(bot_div);
+                            window['chat_bot1'].children[2].scrollTop = window['chat_bot1'].children[2].scrollHeight;
+                            window['prevImgSrc'] = imgs[0].src;
+                            save_conversation(window['chat_bot1'].children[2].children[0]);
+                        }
+                    }
+                    if (tabitems[img_index].children[0].children[1].children[1].children[0].children.length > 1) {
+                        window['chat_bot1'].children[1].textContent = tabitems[img_index].children[0].children[1].children[1].children[0].textContent;
+                    } else {
+                        window['chat_bot1'].children[1].textContent = '';
+                    }
+                }
+            } catch(e) {
+            }
+        }
+        window['checkChange_interval'] = window.setInterval("window.checkChange()", 500);
+    }
+    return false;
+}"""
+space_ids = {
+            "spaces/stabilityai/stable-diffusion":"Stable Diffusion 2.1",
+            # "spaces/runwayml/stable-diffusion-v1-5":"Stable Diffusion 1.5",
+            # "spaces/stabilityai/stable-diffusion-1":"Stable Diffusion 1.0",
+            }
+tab_actions = []
+tab_titles = []
+for space_id in space_ids.keys():
+    print(space_id, space_ids[space_id])
+    try:
+        tab = gr.Interface.load(space_id)
+        tab_actions.append(tab)
+        tab_titles.append(space_ids[space_id])
+    except Exception as e:
+        logger.info(f"load_fail__{space_id}_{e}")
+def chat(api, input0, input1, chat_radio, chat_history):
+    out_chat = []
+    chat_history = chat_history.replace('<p>', '').replace('</p>', '')
+    if chat_history != '':
+        out_chat_1 = json.loads(chat_history)
+        for i in range(int(len(out_chat_1)/2)):
+            out_chat.append([out_chat_1[2*i], out_chat_1[2*i+1]])
+    # logger.info(f"out_chat_: {len(out_chat)} / {chat_radio}")
+    if chat_radio == "Talk to chatGPT":
+        # response = get_response_from_chatgpt(api, input0)
+        response = get_response_from_openai(input0, out_chat)
+        out_chat.append((input0, response))
+        # logger.info(f'liuyz_5___{out_chat}__')
+        return api, out_chat, input1
+    else:
+        prompt_en = getTextTrans(input0, source='zh', target='en') + f',{random.randint(0,sys.maxsize)}'
+        return api, out_chat, prompt_en
+with gr.Blocks(title='Talk to chatGPT') as demo:
+    with gr.Group(elem_id="page_0", visible=True) as page_0:
+        gr.HTML("<p>You can duplicating this space and use your own session token: <a style='display:inline-block' href='https://huggingface.co/spaces/yizhangliu/chatGPT?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14' alt='Duplicate Space'></a></p>")
+        gr.HTML("<p> Instruction on how to get session token can be seen in video <a style='display:inline-block' href='https://www.youtube.com/watch?v=TdNSj_qgdFk'><font style='color:blue;weight:bold;'>here</font></a>. Add your session token by going to settings and add under secrets. </p>")
+    with gr.Group(elem_id="page_1", visible=True) as page_1:
+        with gr.Box():
+            with gr.Row():
+                start_button = gr.Button("Let's talk to chatGPT!", elem_id="start-btn", visible=True)
+                start_button.click(fn=None, inputs=[], outputs=[], _js=start_work)
+    with gr.Group(elem_id="page_2", visible=False) as page_2:
+        with gr.Row(elem_id="chat_row"):
+            chatbot = gr.Chatbot(elem_id="chat_bot", visible=False).style(color_map=("green", "blue"))
+            chatbot1 = gr.Chatbot(elem_id="chat_bot1").style(color_map=("green", "blue"))
+        with gr.Row(elem_id="prompt_row"):
+            prompt_input0 = gr.Textbox(lines=2, label="prompt",show_label=False)
+            prompt_input1 = gr.Textbox(lines=4, label="prompt", visible=False)
+            chat_history = gr.Textbox(lines=4, label="prompt", elem_id="chat_history", visible=False)
+            chat_radio = gr.Radio(["Talk to chatGPT", "Text to Image"], elem_id="chat_radio",value="Talk to chatGPT", show_label=False, visible=True)
+        with gr.Row(elem_id="btns_row"):
+            with gr.Column(id="submit_col"):
+                submit_btn = gr.Button(value = "submit",elem_id="submit-btn").style(
+                        margin=True,
+                        rounded=(True, True, True, True),
+                        width=100
+                    )
+            with gr.Column(id="clear_col"):
+                clear_btn = gr.Button(value = "clear outputs", elem_id="clear-btn").style(
+                        margin=True,
+                        rounded=(True, True, True, True),
+                        width=100
+                    )
+            api = gr.State(value=get_api())
+            submit_btn.click(fn=chat,
+                             inputs=[api, prompt_input0, prompt_input1, chat_radio, chat_history],
+                             outputs=[api, chatbot, prompt_input1],
+                            )
+        with gr.Row(elem_id='tab_img', visible=False).style(height=5):
+           tab_img = gr.TabbedInterface(tab_actions, tab_titles)
+demo.launch(debug = True)

baidu_translate/module.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import argparse
+import random, os
+from hashlib import md5
+from typing import Optional
+import requests
+import paddlehub as hub
+from paddlehub.module.module import moduleinfo
+from paddlehub.module.module import runnable
+from paddlehub.module.module import serving
+def make_md5(s, encoding='utf-8'):
+    return md5(s.encode(encoding)).hexdigest()
+@moduleinfo(name="baidu_translate",
+            version="1.0.0",
+            type="text/machine_translation",
+            summary="",
+            author="baidu-nlp",
+            author_email="paddle-dev@baidu.com")
+class BaiduTranslate:
+    def __init__(self, appid=None, appkey=None):
+        """
+      :param appid: appid for requesting Baidu translation service.
+      :param appkey: appkey for requesting Baidu translation service.
+      """
+        appid = os.environ.get('baidu_translate_appid')
+        appkey = os.environ.get('baidu_translate_appkey')
+        # Set your own appid/appkey.
+        if appid is None:
+            self.appid = ''
+        else:
+            self.appid = appid
+        if appkey is None:
+            self.appkey = ''
+        else:
+            self.appkey = appkey
+        self.url = 'http://api.fanyi.baidu.com/api/trans/vip/translate'
+    def translate(self, query: str, from_lang: Optional[str] = "en", to_lang: Optional[int] = "zh"):
+        """
+        Create image by text prompts using ErnieVilG model.
+        :param query: Text to be translated.
+        :param from_lang: Source language.
+        :param to_lang: Dst language.
+        Return translated string.
+        """
+        # Generate salt and sign
+        salt = random.randint(32768, 65536)
+        sign = make_md5(self.appid + query + str(salt) + self.appkey)
+        # Build request
+        headers = {'Content-Type': 'application/x-www-form-urlencoded'}
+        payload = {'appid': self.appid, 'q': query, 'from': from_lang, 'to': to_lang, 'salt': salt, 'sign': sign}
+        # Send request
+        try:
+            r = requests.post(self.url, params=payload, headers=headers)
+            result = r.json()
+        except Exception as e:
+            error_msg = str(e)
+            raise RuntimeError(error_msg)
+        if 'error_code' in result:
+            raise RuntimeError(result['error_msg'])
+        return result['trans_result'][0]['dst']
+    @runnable
+    def run_cmd(self, argvs):
+        """
+        Run as a command.
+        """
+        self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name),
+                                              prog='hub run {}'.format(self.name),
+                                              usage='%(prog)s',
+                                              add_help=True)
+        self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required")
+        self.add_module_input_arg()
+        args = self.parser.parse_args(argvs)
+        if args.appid is not None and args.appkey is not None:
+            self.appid = args.appid
+            self.appkey = args.appkey
+        result = self.translate(args.query, args.from_lang, args.to_lang)
+        return result
+    @serving
+    def serving_method(self, query, from_lang, to_lang):
+        """
+        Run as a service.
+        """
+        return self.translate(query, from_lang, to_lang)
+    def add_module_input_arg(self):
+        """
+        Add the command input options.
+        """
+        self.arg_input_group.add_argument('--query', type=str)
+        self.arg_input_group.add_argument('--from_lang', type=str, default='en', help="源语言")
+        self.arg_input_group.add_argument('--to_lang', type=str, default='zh', help="目标语言")
+        self.arg_input_group.add_argument('--appid', type=str, default=None, help="注册得到的个人appid")
+        self.arg_input_group.add_argument('--appkey', type=str, default=None, help="注册得到的个人appkey")

encoder.json ADDED Viewed

The diff for this file is too large to render. See raw diff

encoder.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# This file includes code which was modified from https://github.com/openai/gpt-2
+import tensorflow as tf
+import os
+import json
+import regex as re
+from functools import lru_cache
+import requests
+import boto3
+import pdb
+@lru_cache()
+def bytes_to_unicode():
+    bs = (
+        list(range(ord("!"), ord("~") + 1))
+        + list(range(ord("¡"), ord("¬") + 1))
+        + list(range(ord("®"), ord("ÿ") + 1))
+    )
+    cs = bs[:]
+    n = 0
+    for b in range(2 ** 8):
+        if b not in bs:
+            bs.append(b)
+            cs.append(2 ** 8 + n)
+            n += 1
+    cs = [chr(n) for n in cs]
+    return dict(zip(bs, cs))
+def get_pairs(word):
+    pairs = set()
+    prev_char = word[0]
+    for char in word[1:]:
+        pairs.add((prev_char, char))
+        prev_char = char
+    return pairs
+class Encoder:
+    def __init__(self, encoder, bpe_merges, errors="replace"):
+        self.encoder = encoder
+        self.decoder = {v: k for k, v in self.encoder.items()}
+        self.errors = errors
+        self.byte_encoder = bytes_to_unicode()
+        self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
+        self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
+        self.cache = {}
+        self.pat = re.compile(
+            r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"""
+        )
+    def bpe(self, token):
+        if token in self.cache:
+            return self.cache[token]
+        word = tuple(token)
+        pairs = get_pairs(word)
+        if not pairs:
+            return token
+        while True:
+            bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf")))
+            if bigram not in self.bpe_ranks:
+                break
+            first, second = bigram
+            new_word = []
+            i = 0
+            while i < len(word):
+                try:
+                    j = word.index(first, i)
+                    new_word.extend(word[i:j])
+                    i = j
+                except:
+                    new_word.extend(word[i:])
+                    break
+                if word[i] == first and i < len(word) - 1 and word[i + 1] == second:
+                    new_word.append(first + second)
+                    i += 2
+                else:
+                    new_word.append(word[i])
+                    i += 1
+            new_word = tuple(new_word)
+            word = new_word
+            if len(word) == 1:
+                break
+            else:
+                pairs = get_pairs(word)
+        word = " ".join(word)
+        self.cache[token] = word
+        return word
+    def encode(self, text):
+        bpe_tokens = []
+        for token in re.findall(self.pat, text):
+            token = "".join(self.byte_encoder[b] for b in token.encode("utf-8"))
+            bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(" "))
+        return bpe_tokens
+    def decode(self, tokens):
+        text = "".join([self.decoder[token] for token in tokens])
+        text = bytearray([self.byte_decoder[c] for c in text]).decode("utf-8", errors=self.errors)
+        return text
+def get_encoder():
+    with open("encoder.json", "r") as f:
+        encoder = json.load(f)
+    with open("vocab.bpe", "r", encoding="utf-8") as f:
+        bpe_data = f.read()
+    bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split("\n")[1:-1]]
+    return Encoder(encoder=encoder, bpe_merges=bpe_merges)
+# encoder = get_encoder()
+# print('encoded is ', encoder.encode('hello 👋 world 🌍 This is a long string to test whether or not the emoji issue was fixed!'))

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+pyChatGPT
+openai
+loguru
+paddlepaddle==2.3.2
+paddlehub
+# transformers
+# torch
+tensorflow
+regex
+boto3
+gradio==3.12.0
+tencentcloud-sdk-python

utils.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import json, os
+from tencentcloud.common import credential
+from tencentcloud.common.profile.client_profile import ClientProfile
+from tencentcloud.common.profile.http_profile import HttpProfile
+from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
+from tencentcloud.tmt.v20180321 import tmt_client, models
+def get_tmt_client():
+    try:
+        # 实例化一个认证对象，入参需要传入腾讯云账户 SecretId 和 SecretKey，此处还需注意密钥对的保密
+        # 代码泄露可能会导致 SecretId 和 SecretKey 泄露，并威胁账号下所有资源的安全性。以下代码示例仅供参考，建议采用更安全的方式来使用密钥，请参见：https://cloud.tencent.com/document/product/1278/85305
+        # 密钥可前往官网控制台 https://console.cloud.tencent.com/cam/capi 进行获取
+        SecretId = os.environ.get("TENCENTCLOUD_SECRET_ID")
+        SecretKey = os.environ.get("TENCENTCLOUD_SECRET_KEY")
+        cred = credential.Credential(SecretId, SecretKey)
+        # 实例化一个http选项，可选的，没有特殊需求可以跳过
+        httpProfile = HttpProfile()
+        httpProfile.endpoint = "tmt.tencentcloudapi.com"
+        # 实例化一个client选项，可选的，没有特殊需求可以跳过
+        clientProfile = ClientProfile()
+        clientProfile.httpProfile = httpProfile
+        # 实例化要请求产品的client对象,clientProfile是可选的
+        client = tmt_client.TmtClient(cred, "ap-shanghai", clientProfile)
+        print(f'client_{client}')
+        return client
+    except TencentCloudSDKException as err:
+        print(f'client_err_{err}')
+        return None
+def getTextTrans_tmt(tmt_client, text, source='zh', target='en'):
+    def is_chinese(string):
+        for ch in string:
+            if u'\u4e00' <= ch <= u'\u9fff':
+                return True
+        return False
+    if tmt_client is None:
+        return text
+    if not is_chinese(text) and target == 'en':
+        return text
+    try:
+        req = models.TextTranslateRequest()
+        params = {
+            "SourceText": text,
+            "Source": source,
+            "Target": target,
+            "ProjectId": 0
+        }
+        req.from_json_string(json.dumps(params))
+        resp = tmt_client.TextTranslate(req)
+        return resp.TargetText
+    except Exception as e:
+        return text

vocab.bpe ADDED Viewed

The diff for this file is too large to render. See raw diff