Spaces:
Running
Running
from flask import Flask, render_template, request | |
from transformers import GPT2LMHeadModel, GPT2Tokenizer | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import json | |
tokenizer = GPT2Tokenizer.from_pretrained('gpt2') | |
model = GPT2LMHeadModel.from_pretrained('gpt2') | |
json_file_path = 'interview-workspace\src\data\short_let_room.json' | |
app = Flask(__name__) | |
def generate_gpt2_response(question): | |
input_ids = tokenizer.encode(question, return_tensors='pt') | |
generated_output = model.generate(input_ids, max_length=len(input_ids[0]) + 100, | |
num_beams=5, | |
no_repeat_ngram_size=2, | |
top_k=10, | |
top_p=1, | |
temperature=0.9, | |
pad_token_id=model.config.eos_token_id) | |
generated_response = tokenizer.decode(generated_output[0], skip_special_tokens=True) | |
return generated_response | |
def find_question_and_answer(json_file, question, input_ids): | |
with open(json_file, "r") as json_file: | |
data = json.load(json_file) | |
quest = question.lower() | |
all_questions = data.get("questions", []) | |
all_response = data.get("responses", []) | |
for dataset_question in all_questions: | |
if 'question' in dataset_question and dataset_question['question'].lower() == quest: | |
for dataset_response in all_response: | |
if dataset_question['response_id'] == dataset_response['id']: | |
response = { | |
"Chat Bot ": dataset_response["response_message"], | |
"Apartment ": dataset_response["response_message1"], | |
"Address ": dataset_response["shortlet_Address"], | |
"Price ": dataset_response["shortlet_Price"], | |
"URL ": dataset_response.get("shortlet_url") | |
} | |
return response | |
input_ids = tokenizer.encode(question, return_tensors='pt') | |
generated_output = model.generate(input_ids, max_length=len(input_ids[0]) + 100, | |
num_beams=5, | |
no_repeat_ngram_size=2, | |
top_k=10, | |
top_p=1, | |
temperature=0.9, | |
pad_token_id=model.config.eos_token_id) | |
generated_response = tokenizer.decode(generated_output[0], skip_special_tokens=True) | |
return generated_response | |
def index(): | |
return render_template('index.html') | |
def ask(): | |
user_input = request.form['user_input'] | |
if not user_input: | |
response = "" | |
else: | |
input_ids = tokenizer.encode(user_input, return_tensors='pt') | |
response = find_question_and_answer(json_file_path, user_input, input_ids) | |
if isinstance(response, dict): | |
response_type = "mapping" | |
else: | |
response_type = "string" | |
return render_template('index.html', user_input=user_input, response=response, response_type=response_type) | |
if __name__ == '__main__': | |
app.run(debug=True) |