greatakela commited on
Commit
5a30cea
1 Parent(s): 7587bb0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -45
app.py CHANGED
@@ -1,47 +1,41 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer
2
  import gradio as gr
 
3
  import torch
4
-
5
-
6
- title = "LocalAI ChatBot"
7
- description = "A State-of-the-Art Large-scale Pretrained Response generation model (DialoGPT)"
8
- examples = [["How are you?"]]
9
-
10
-
11
- tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")
12
- model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")
13
-
14
-
15
- def predict(input, history=[]):
16
- # tokenize the new input sentence
17
- new_user_input_ids = tokenizer.encode(
18
- input + tokenizer.eos_token, return_tensors="pt"
19
- )
20
-
21
- # append the new user input tokens to the chat history
22
- bot_input_ids = torch.cat([torch.LongTensor(history), new_user_input_ids], dim=-1)
23
-
24
- # generate a response
25
- history = model.generate(
26
- bot_input_ids, max_length=4000, pad_token_id=tokenizer.eos_token_id
27
- ).tolist()
28
-
29
- # convert the tokens to text, and then split the responses into lines
30
- response = tokenizer.decode(history[0]).split("<|endoftext|>")
31
- # print('decoded_response-->>'+str(response))
32
- response = [
33
- (response[i], response[i + 1]) for i in range(0, len(response) - 1, 2)
34
- ] # convert to tuples of list
35
- # print('response-->>'+str(response))
36
- return response, history
37
-
38
-
39
- gr.Interface(
40
- fn=predict,
41
- title=title,
42
- description=description,
43
- examples=examples,
44
- inputs=["text", "state"],
45
- outputs=["chatbot", "state"],
46
- theme="coding-alt/soft",
47
- ).launch()
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import numpy as np
6
+
7
+ # Load a pre-trained model and tokenizer from Hugging Face
8
+ model_name = "sentence-transformers/all-MiniLM-L6-v2"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
11
+
12
+ # Sample dataset of questions and answers
13
+ dataset = [
14
+ ("What is the capital of France?", "Paris is the capital of France."),
15
+ ("Who is the creator of Python?", "Guido van Rossum created Python."),
16
+ ("What is the tallest mountain in the world?", "Mount Everest is the tallest mountain in the world."),
17
+ ]
18
+
19
+ # Function to find the most relevant answer
20
+ def find_most_relevant_answer(question):
21
+ question_embedding = model(**tokenizer(question, return_tensors="pt", padding=True, truncation=True))[0].mean(dim=1).detach().numpy()
22
+
23
+ highest_similarity = -1
24
+ most_relevant_answer = ""
25
+
26
+ for q, a in dataset:
27
+ answer_embedding = model(**tokenizer(q, return_tensors="pt", padding=True, truncation=True))[0].mean(dim=1).detach().numpy()
28
+ similarity = cosine_similarity([question_embedding], [answer_embedding])[0][0]
29
+
30
+ if similarity > highest_similarity:
31
+ highest_similarity = similarity
32
+ most_relevant_answer = a
33
+
34
+ return most_relevant_answer
35
+
36
+ # Set up Gradio interface
37
+ def chat_with_bot(question):
38
+ return find_most_relevant_answer(question)
39
+
40
+ iface = gr.Interface(fn=chat_with_bot, inputs="text", outputs="text", title="Simple QA Chatbot")
41
+ iface.launch()