paavansundar commited on
Commit
c14b7e6
1 Parent(s): e236999

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -10,13 +10,19 @@ __checkpoint = "gpt2"
10
  __tokenizer = GPT2Tokenizer.from_pretrained(__checkpoint)
11
  __model = GPT2LMHeadModel.from_pretrained(__checkpoint)
12
  __model_output_path = "/paavansundar/Models"
 
 
 
 
 
13
  # Create a Data collator object
14
  data_collator = DataCollatorForLanguageModeling(tokenizer=__tokenizer, mlm=False, return_tensors="pt")
15
  def queryGPT(question):
16
  return generate_response(__model, __tokenizer, question)
17
 
18
  def generate_response(model,tokenizer, prompt, max_length=200):
19
-
 
20
  input_ids = tokenizer.encode(prompt, return_tensors="pt") # 'pt' for returning pytorch tensor
21
 
22
  # Create the attention mask and pad token id
 
10
  __tokenizer = GPT2Tokenizer.from_pretrained(__checkpoint)
11
  __model = GPT2LMHeadModel.from_pretrained(__checkpoint)
12
  __model_output_path = "/paavansundar/Models"
13
+
14
+ #prepare data
15
+ def prepareData():
16
+ df=pd.read_csv("paavansundar/Datasets/MedQuAD.csv")
17
+
18
  # Create a Data collator object
19
  data_collator = DataCollatorForLanguageModeling(tokenizer=__tokenizer, mlm=False, return_tensors="pt")
20
  def queryGPT(question):
21
  return generate_response(__model, __tokenizer, question)
22
 
23
  def generate_response(model,tokenizer, prompt, max_length=200):
24
+
25
+ prepareData()
26
  input_ids = tokenizer.encode(prompt, return_tensors="pt") # 'pt' for returning pytorch tensor
27
 
28
  # Create the attention mask and pad token id