File size: 8,977 Bytes
ac370a7
1d026b1
 
 
ac370a7
 
1d026b1
ac370a7
1d026b1
 
 
 
 
 
 
c88a876
 
1d026b1
ac370a7
1d026b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac370a7
1d026b1
 
 
ac370a7
 
 
 
084c343
ac370a7
 
1d026b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac370a7
c88a876
 
 
 
 
 
 
 
084c343
 
 
 
 
 
db9198a
1d026b1
 
 
 
 
ac370a7
 
 
1d026b1
c88a876
 
 
1d026b1
 
ac370a7
9ce25b9
c88a876
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# v1.1

# %% auto 0
__all__ = ['s_model', 'qa_model', 'question_1', 'question_2', 'question_3', 'question_4', 'question_5', 'question_6',
           'question_7', 'question_8', 'question_9', 'question_10', 'contexts', 'data', 'context_df', 'a_text',
           'n_slider', 'intf', 'QA_similarity']

# %% ../drive/MyDrive/Codici/Python/Gradio_App/SemanticSearch_QA-v2.ipynb 2
import pandas as pd
import gradio as gr

from sentence_transformers import SentenceTransformer, util
s_model = SentenceTransformer('clips/mfaq')

from transformers import pipeline
qa_model = pipeline(task = 'question-answering', 
                    model = 'Francesco-A/bert-finetuned-squad-v1')

# %% ../drive/MyDrive/Codici/Python/Gradio_App/SemanticSearch_QA-v2.ipynb 3
# Define the question(s)
question_1 = "What are the main features of the new XPhone 20?"
question_2 = "What are some benefits of regular exercise?"
question_3 = "What is the color of a rose?"
question_4 = "How does photosynthesis work in plants?"
question_5 = "At what temperature does water boil?"
question_6 = "Where can I find potassium?"
question_7 = "How does the internet function?"
question_8 = "What are the ingredients for making a classic margarita?"
question_9 = "How does cellular respiration work?"
question_10 = "Is money important?"

# Define the contexts as a list of strings
contexts = [
    "The XPhone 20 is expected to come with an improved camera system, featuring advanced image stabilization and enhanced low-light capabilities.",
    "Regular exercise has been shown to reduce the risk of chronic diseases such as heart disease, diabetes, and certain types of cancer.",
    "Roses come in various colors, including red, pink, yellow, white, and even blue (though blue roses are rare and often created through genetic modification).",
    "Photosynthesis occurs in the chloroplasts of plant cells, where chlorophyll captures sunlight and converts it into chemical energy.",
    "Water boils at different temperatures depending on factors like altitude and atmospheric pressure. At sea level, it boils at 100 degrees Celsius or 212 degrees Fahrenheit.",
    "Potassium is an essential mineral that can be found in various foods such as bananas, potatoes, spinach, and oranges.",
    "The internet functions through a complex system of data transmission protocols, routers, and servers that allow for the exchange of information globally.",
    "A classic margarita typically consists of tequila, lime juice, triple sec (or orange liqueur), and is often served with a salted rim.",
    "Cellular respiration takes place in the mitochondria of cells, where glucose and oxygen are converted into ATP (adenosine triphosphate) and carbon dioxide.",
    "Money is a medium of exchange that facilitates transactions of goods and services. Its importance lies in its role in economic systems and the ability to represent value.",
    "The XPhone 20 may feature an OLED display for vibrant colors and deep blacks, providing a high-quality visual experience.",
    "Exercise releases endorphins, which are chemicals in the brain that help improve mood and reduce feelings of stress and anxiety.",
    "Different species of roses can have variations in color, including shades of red, pink, yellow, and white.",
    "During photosynthesis, plants also release oxygen as a byproduct, which is essential for the survival of many organisms on Earth.",
    "Water boils at a lower temperature at higher altitudes due to the reduced atmospheric pressure. For example, in the mountains, it may boil below 100 degrees Celsius.",
    "Potassium is crucial for proper muscle function, nerve function, and maintaining fluid balance in the body.",
    "The internet relies on a system of interconnected networks, including wired and wireless connections, to transmit data across the globe.",
    "In addition to the core ingredients, a classic margarita can also be garnished with a wedge of lime for added flavor.",
    "Cellular respiration involves several stages, including glycolysis, the Krebs cycle, and the electron transport chain, to extract energy from glucose.",
    "Money serves as a unit of account, allowing for standardized pricing and valuation of goods and services in economies worldwide."
    "The XPhone 20 is rumored to feature a smaller notch, providing more screen real estate for users. This allows for an immersive viewing experience.",
    "Photosynthesis is the process by which plants convert carbon dioxide, water, and sunlight into glucose and oxygen.",
    "Bananas are a great source of potassium.",
    "The theory of relativity was developed by Albert Einstein and revolutionized our understanding of space and time.",
    "The Eiffel Tower is located in Paris, France.",
    "Reports suggest that the XPhone 20 will have significant improvements in battery life compared to its predecessor. Users can expect a longer-lasting device.",
    "A penny saved is a penny earned.",
    "Water boils at 100 degrees Celsius.",
    "The Great Wall of China is one of the most impressive architectural feats in history.",
    "The capital of Japan is Tokyo.",
    "One of the anticipated features of the XPhone 20 is a faster and more powerful A16 chip. This will result in smoother and more efficient performance.",
    "Roses are red, violets are blue.",
    "Regular exercise can help improve cardiovascular health and strengthen muscles.",
    "A classic margarita is made with tequila, lime juice, and orange liqueur.",
    "Cellular respiration is the process by which cells convert glucose and oxygen into energy, carbon dioxide, and water.",
    "The internet is a global network of interconnected computers and servers that allows the sharing of information and resources.",
    "Mount Everest is the highest mountain in the world, located in the Himalayas.",
    "The sun rises in the east and sets in the west.",
    "The Mona Lisa is a famous portrait painting by Leonardo da Vinci.",
    "The Declaration of Independence was adopted by the Continental Congress on July 4, 1776.",
]

# %% ../drive/MyDrive/Codici/Python/Gradio_App/SemanticSearch_QA-v2.ipynb 4
# Function to find similar answers in a list of contexts
def QA_similarity(question, contexts, n_answers=1):

    if not isinstance(contexts, pd.DataFrame):
        contexts = pd.read_csv(contexts)
        contexts.rename(index={0: 'Context'})
        contexts = contexts['Context'].to_list()

    else:
      contexts = contexts['Context'].to_list()

    # to use 'clips/mfaq' questions need to be prepended with <Q>, and answers with <A>.
    question = "<Q>"+question
    mfaq_contexts = ["<A>"+answer for answer in contexts]

    # Import the necessary module for itemgetter
    from operator import itemgetter

    # Create context embedding
    query_embedding = s_model.encode(question)
    context_embeddings = s_model.encode(mfaq_contexts)

    # Perform semantic search to find similar contexts
    data = util.semantic_search(query_embedding, context_embeddings)[0]

    # Sort the list of dictionaries by the 'score' key in descending order
    sorted_answers = sorted(data, key=itemgetter('score'), reverse=True)

    # Extract answer, context and score
    answer_ids = [item['corpus_id'] for item in sorted_answers]
    context_score = [item['score'] for item in sorted_answers]
    answers_scores = [qa_model(question, contexts[n]) for n in answer_ids[:n_answers]]

    # Extract relevant information for answers and scores
    answer_context = [contexts[n] for n in answer_ids]
    answers = [a['answer'] for a in answers_scores]
    scores = [s['score'] for s in answers_scores]

    # Create a DataFrame for the results
    df = pd.DataFrame(zip(answer_ids, answers, context_score, answer_context), columns=["ANSWER_IDX", "ANSWER", "CONTEXT_SCORE", "CONTEXT"])

    return df

# %% ../drive/MyDrive/Codici/Python/Gradio_App/SemanticSearch_QA-v2.ipynb 6
#|export

title = 'SemanticSearch_QA-v2'
description = """
FAQ retrieval model: [clips/mfaq](https://huggingface.co/clips/mfaq) \n
QA model: [Francesco-A/bert-finetuned-squad-v1](https://huggingface.co/Francesco-A/bert-finetuned-squad-v1)
"""

data = {
'Context': contexts,
}

context_df = pd.DataFrame(data)

a_text = gr.components.Dataframe(col_count=(1,"fixed"), headers=['Context'], interactive = True)
n_slider = gr.components.Slider(minimum=1, maximum = 10, label = "Select n answers (max= 10)",step = 1)

intf = gr.Interface(fn=QA_similarity,
                    inputs= ["text", a_text, n_slider],
                    outputs= gr.components.Dataframe(),
                    examples = [[question_1,context_df,3],
                                [question_2,context_df,5],
                                [question_4,context_df,10]],
                    debug=True,
                    title=title,
                    description=description, 
                    # article=long_desc
)

intf.launch(inline=True,
            # share=True
            )