|
import requests |
|
import gradio as gr |
|
from dotenv import load_dotenv |
|
import os |
|
|
|
from langchain_openai import OpenAI |
|
import spacy |
|
|
|
from langchain_openai import ChatOpenAI |
|
from langchain.schema import AIMessage, HumanMessage |
|
import pandas as pd |
|
import uuid |
|
import json |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
HF_TOKEN = os.getenv('HUGGING_FACE_TOKEN') |
|
|
|
GITHUB_TOKEN = "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" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
API_URL = f"https://api-inference.huggingface.co/models/" |
|
|
|
headers = {"Authorization": f"Bearer {HF_TOKEN}"} |
|
|
|
|
|
DEBUG_MODE = True |
|
|
|
|
|
def share_to_gist(content, public=False): |
|
url = "https://api.github.com/gists" |
|
headers = { |
|
"Authorization": f"token {os.getenv(GITHUB_TOKEN)}", |
|
"Accept": "application/vnd.github.v3+json", |
|
} |
|
data = { |
|
"public": public, |
|
"description": "Chat history", |
|
"files": { |
|
"chat.txt": { |
|
"content": content |
|
} |
|
} |
|
} |
|
response = requests.post(url, headers=headers, data=json.dumps(data)) |
|
gist_url = response.json().get('html_url', '') |
|
return gist_url |
|
|
|
def generate_unique_id(): |
|
return str(uuid.uuid4()) |
|
|
|
def debug_print(*args, **kwargs): |
|
if DEBUG_MODE: |
|
print(*args, **kwargs) |
|
|
|
def split_sentences_ginza(input_text): |
|
nlp = spacy.load("ja_core_news_sm") |
|
doc = nlp(input_text) |
|
sentences = [sent.text for sent in doc.sents] |
|
return sentences |
|
|
|
|
|
file_path = 'anki_japanese_english_pairs.csv' |
|
|
|
def load_csv(file_path): |
|
|
|
df = pd.read_csv(file_path) |
|
|
|
return df |
|
|
|
def get_sentence_pair(df): |
|
|
|
|
|
random_row = df.sample(1) |
|
|
|
|
|
|
|
|
|
japanese_sentence = str(random_row.iloc[0, 0]) |
|
english_sentence = str(random_row.iloc[0, 1]) |
|
|
|
debug_print("### Japanese sentence:", japanese_sentence) |
|
debug_print("### English sentence:", english_sentence) |
|
|
|
|
|
return japanese_sentence, english_sentence |
|
|
|
|
|
japanese_sentence, english_sentence = get_sentence_pair(load_csv(file_path)) |
|
|
|
|
|
llm = ChatOpenAI(temperature=0.7, model='gpt-3.5-turbo') |
|
|
|
def predict(message, history): |
|
|
|
|
|
initial_setup = f''' |
|
Japanese students are learning to translate Japanese text to English text. They will be given a Japanese sentence to translate, and will provide an English translation attempt. |
|
Based on the feedback you provide, they will revise their translation. This process will continue until their translation is accurate. |
|
|
|
Encourage the student by specifying the strengths of their writing. |
|
DO NOT PROVIDE THE CORRECT ENGLISH TRANSLATION until the student gets the correct translation. Let the student work it out. |
|
Provide your feedback as a list in the format: a, b, c etc. |
|
Do not respond in Japanese - always respond in English even if the student uses Japanese with you. |
|
|
|
Execute the following tasks step by step: |
|
1. Ask the student to translate the following sentence from Japanese to English: {japanese_sentence}. Here is the English translation for reference: {english_sentence} |
|
2. Suggest only mechanical corrections (i.e., spelling, grammar, and punctuation) for the student. Ask for another translation attempt. |
|
|
|
Start by asking the student to translate the Japanese sentence. |
|
''' |
|
|
|
|
|
|
|
|
|
|
|
history_langchain_format = [AIMessage(content=initial_setup)] |
|
|
|
|
|
|
|
for human, ai in history: |
|
if human is not None: |
|
history_langchain_format.append(HumanMessage(content=human)) |
|
if ai is not None: |
|
history_langchain_format.append(AIMessage(content=ai)) |
|
|
|
history_langchain_format.append(HumanMessage(content=message)) |
|
|
|
|
|
gpt_response = llm(history_langchain_format) |
|
return gpt_response.content |
|
|
|
welcome_message = "Hi! ๐. Are you ready to practise translation?" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app = gr.ChatInterface(fn=predict, title="Translation Chatbot", chatbot=gr.Chatbot(value=[(None, welcome_message)],),) |
|
|
|
|
|
|
|
app.launch() |