tomer-shimshi
commited on
Commit
•
42b6a47
1
Parent(s):
aa81271
Upload finale_project_Rav_talk.py
Browse files
finale_project_Rav_talk.py
CHANGED
@@ -42,12 +42,6 @@ tokenizer = AutoTokenizer.from_pretrained(
|
|
42 |
tokenizer.pad_token = tokenizer.eos_token
|
43 |
tokenizer.padding_side = "right"
|
44 |
|
45 |
-
####################
|
46 |
-
### Load Dataset ###
|
47 |
-
####################
|
48 |
-
train_dataset_name = "cleaned_Rebe_Q_and_A_dataset_just_rebe_questions_english_no_hebrew.csv"
|
49 |
-
test_dataset = load_dataset("csv", data_files=train_dataset_name,split='train')#[-20%:]')
|
50 |
-
|
51 |
##############################
|
52 |
### Set Saving Arguments ###
|
53 |
##############################
|
@@ -72,11 +66,9 @@ def formatting_prompts_func(examples):
|
|
72 |
texts.append(text)
|
73 |
return { "text" : texts, }
|
74 |
|
75 |
-
# Replace this with the actual output from your LLM application
|
76 |
-
#for i in range(len(test_dataset)):
|
77 |
question = input('Please enter a question for the Rav \n Enter empty string to quit \n')
|
78 |
while len(question)>1:
|
79 |
-
|
80 |
|
81 |
pipe = pipeline(
|
82 |
task="text-generation",
|
|
|
42 |
tokenizer.pad_token = tokenizer.eos_token
|
43 |
tokenizer.padding_side = "right"
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
##############################
|
46 |
### Set Saving Arguments ###
|
47 |
##############################
|
|
|
66 |
texts.append(text)
|
67 |
return { "text" : texts, }
|
68 |
|
|
|
|
|
69 |
question = input('Please enter a question for the Rav \n Enter empty string to quit \n')
|
70 |
while len(question)>1:
|
71 |
+
|
72 |
|
73 |
pipe = pipeline(
|
74 |
task="text-generation",
|