steve7909 commited on
Commit
69ce671
1 Parent(s): a224a43

little working with random row

Browse files
Files changed (2) hide show
  1. anki_japanese_english_pairs.csv +1 -0
  2. app.py +43 -43
anki_japanese_english_pairs.csv CHANGED
@@ -1,3 +1,4 @@
 
1
  これは本だ・です。,This is a book.
2
  コンサートは八時からだ・です。,The concert is from eight o'clock.
3
  あの人は先生だ。,That person is a teacher.
 
1
+ Japanese sentence,English sentence
2
  これは本だ・です。,This is a book.
3
  コンサートは八時からだ・です。,The concert is from eight o'clock.
4
  あの人は先生だ。,That person is a teacher.
app.py CHANGED
@@ -16,6 +16,7 @@ import spacy
16
  #from langchain.chat_models import ChatOpenAI
17
  from langchain_openai import ChatOpenAI
18
  from langchain.schema import AIMessage, HumanMessage
 
19
 
20
  # Load environment variables from .env file
21
  load_dotenv()
@@ -24,9 +25,9 @@ load_dotenv()
24
  HF_TOKEN = os.getenv('HUGGING_FACE_TOKEN')
25
 
26
  # openai setup
27
- client = OpenAI(
28
- api_key=os.getenv('OPENAI_API_KEY')
29
- )
30
 
31
  # hugging face setup
32
  #model_name = "mmnga/ELYZA-japanese-Llama-2-7b-instruct-gguf"
@@ -47,64 +48,63 @@ def split_sentences_ginza(input_text):
47
  sentences = [sent.text for sent in doc.sents]
48
  return sentences
49
 
50
- def query_hf(payload, model_name):
51
- # HTTP POST Request
52
- response = requests.post(API_URL+model_name, headers=headers, json=payload)
53
- return response.json()
54
 
55
- def translate_hf(input_text):
56
- debug_print("Translating... ", input_text)
57
 
58
- sentences = split_sentences_ginza(input_text) # split into sentences
59
- translated_sentences = []
 
 
 
60
 
61
- debug_print("Split sentences... ", sentences)
62
 
63
- for sentence in sentences:
64
- if sentence.strip(): # Ensure sentence is not empty
65
- # API Request for each sentence:
66
- response = query_hf({
67
- "inputs": sentence.strip(),
68
- "options": {"wait_for_model": True}
69
- }, "Helsinki-NLP/opus-mt-ja-en")
70
 
71
- debug_print("response: ", response)
72
- translated_sentence = response[0]["translation_text"]
73
- translated_sentences.append(translated_sentence)
74
 
75
- # Join the translated sentences
76
- translation = ' '.join(translated_sentences)
77
 
78
- return translation
 
79
 
80
 
81
- def translate_openai(input_text):
82
 
83
- prompt = "Translate the following text into Japanese language: " + input_text
84
 
85
- response = client.chat.completions.create( # get translation from GPT
86
- messages=[
87
- {
88
- "role": "user",
89
- "content": prompt,
90
- }
91
- ],
92
- model="gpt-3.5-turbo",
93
- temperature=0 # should be the same translation every time
94
- )
95
- translation = response.choices[0].message.content
96
- debug_print("GPT translation:", translation)
97
-
98
- return translation
99
 
100
 
101
  llm = ChatOpenAI(temperature=0.7, model='gpt-3.5-turbo')
102
 
103
  def predict(message, history):
104
- history_langchain_format = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  for human, ai in history:
106
- history_langchain_format.append(HumanMessage(content=human)) # converted to str to avoid error; not compatible with multimodal
107
  history_langchain_format.append(AIMessage(content=ai))
 
108
  history_langchain_format.append(HumanMessage(content=message))
109
  gpt_response = llm(history_langchain_format)
110
  return gpt_response.content
 
16
  #from langchain.chat_models import ChatOpenAI
17
  from langchain_openai import ChatOpenAI
18
  from langchain.schema import AIMessage, HumanMessage
19
+ import pandas as pd
20
 
21
  # Load environment variables from .env file
22
  load_dotenv()
 
25
  HF_TOKEN = os.getenv('HUGGING_FACE_TOKEN')
26
 
27
  # openai setup
28
+ # client = OpenAI(
29
+ # api_key=os.getenv('OPENAI_API_KEY')
30
+ # )
31
 
32
  # hugging face setup
33
  #model_name = "mmnga/ELYZA-japanese-Llama-2-7b-instruct-gguf"
 
48
  sentences = [sent.text for sent in doc.sents]
49
  return sentences
50
 
 
 
 
 
51
 
52
+ file_path = 'anki_japanese_english_pairs.csv'
 
53
 
54
+ def load_csv(file_path):
55
+ # Load the CSV file into a DataFrame
56
+ df = pd.read_csv(file_path)
57
+
58
+ return df
59
 
60
+ def get_sentence_pair(df):
61
 
62
+ # Get a random row from the DataFrame
63
+ random_row = df.sample(1)
64
+ #debug_print("### random_row:", random_row)
 
 
 
 
65
 
66
+ #print(random_row.shape)
 
 
67
 
68
+ japanese_sentence = str(random_row.iloc[0, 0])
69
+ english_sentence = str(random_row.iloc[0, 1])
70
 
71
+ debug_print("### Japanese sentence:", japanese_sentence)
72
+ debug_print("### English sentence:", english_sentence)
73
 
74
 
75
+ return japanese_sentence, english_sentence
76
 
 
77
 
78
+ japanese_sentence, english_sentence = get_sentence_pair(load_csv(file_path))
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
 
81
  llm = ChatOpenAI(temperature=0.7, model='gpt-3.5-turbo')
82
 
83
  def predict(message, history):
84
+
85
+ # Define your initial setup prompt here
86
+ initial_setup = f'''
87
+
88
+ Japanese students are learning to translate Japanese text to English text. They will be given a Japanese sentence to translate, and will provide an English translation attempt.
89
+ Based on the feedback you provide, they will revise their translation. This process will continue until their translation is accurate.
90
+
91
+ Encourage the student by specifying the strengths of their writing.
92
+ DO NOT PROVIDE THE CORRECT ENGLISH TRANSLATION. Let the student work it out.
93
+ The student's translation need not match the provided English translation exactly, but it should be accurate to the Japanese text.
94
+ Provide your feedback as a list.
95
+
96
+ Execute the following tasks step by step:
97
+ 1. Ask the student to translate the following sentence from Japanese to English: {japanese_sentence}. Here is the English translation for reference: {english_sentence}
98
+ 2. Suggest only mechanical corrections (i.e., spelling, grammar, and punctuation) for the student. Ask for another translation attempt.
99
+ '''
100
+ # Start your history with a SystemMessage containing the setup prompt
101
+ history_langchain_format = [AIMessage(content=initial_setup)]
102
+
103
+
104
  for human, ai in history:
105
+ history_langchain_format.append(HumanMessage(content=human)) # convert to str to avoid error; not compatible with multimodal
106
  history_langchain_format.append(AIMessage(content=ai))
107
+
108
  history_langchain_format.append(HumanMessage(content=message))
109
  gpt_response = llm(history_langchain_format)
110
  return gpt_response.content