import gradio as gr from sentence_transformers import SentenceTransformer, util, CrossEncoder from transformers import set_seed import numpy as np set_seed(42) passage_encoder = SentenceTransformer( "facebook-dpr-ctx_encoder-multiset-base" ) query_encoder = SentenceTransformer( "facebook-dpr-question_encoder-multiset-base" ) ce = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-12-v2") embedder = SentenceTransformer("all-mpnet-base-v2") def calc_preferred_dense(doc1, doc2, q1, q2, model_name="dpr"): """ Input: doc1, doc2: strings containing the documents/passages query1, query2: strings for queries that are only relevant to the corresponding doc (doc1 -> q1, doc2 -> q2) model_name: string containing the type of model to run Returns: A dictionary containing each query (q1 or q2) and the score (P@1) for the pair """ corpus = [doc1, doc2] queries = [q1, q2] results = {} num_correct = 0 ### Do Retrieval if model_name == "dpr": passage_embeddings = passage_encoder.encode(corpus) query_encoder = SentenceTransformer( "facebook-dpr-question_encoder-single-nq-base" ) for idx, query in enumerate(queries): query_embedding = query_encoder.encode(query) # must use dot-product, not cosine_similarity scores = util.dot_score(query_embedding, passage_embeddings)[ 0 ] # only one query results[f"q{idx+1}"] = scores.tolist() should_be_higher = scores[idx] should_be_lower = scores[0] if idx != 0 else scores[1] if should_be_higher > should_be_lower: num_correct += 1 elif model_name == "cross-encoder": for idx, query in enumerate(queries): scores = ce.predict([[query, doc1], [query, doc2]]) results[f"q{idx+1}"] = scores.tolist() should_be_higher = scores[idx] should_be_lower = scores[0] if idx != 0 else scores[1] if ( type(should_be_higher) == np.ndarray and len(should_be_higher) > 1 ): should_be_higher = should_be_higher[1] # entailment models should_be_lower = should_be_lower[1] if should_be_higher > should_be_lower: num_correct += 1 else: # bi-encoder that is not DPR corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True) for idx, query in enumerate(queries): query_embedding = embedder.encode(query, convert_to_tensor=True) scores = util.dot_score(query_embedding, corpus_embeddings)[0].cpu() results[f"q{idx+1}"] = scores.tolist() should_be_higher = scores[idx] should_be_lower = scores[0] if idx != 0 else scores[1] if should_be_higher > should_be_lower: num_correct += 1 results["score"] = num_correct / 2 return results gr.Interface( calc_preferred_dense, inputs=[ gr.Textbox(label="Doc 1"), gr.Textbox(label="Doc 2"), gr.Textbox(label="Sentence 1"), gr.Textbox(label="Sentence 2"), gr.Dropdown(["dpr", "cross-encoder", "dense"], value="cross-encoder")], outputs=["text"], title="Similarity score between 2 sentences", description="Demo in progres...", examples=[["A third single, ""Eagle"", was released in continental Europe and Down Under becoming a number one hit in Begium and a Top 10 hit in the Netherlands, Germany, Switzerland and South Africa, but barely charting Down Under. The B-side of ""Eagle"" was ""Thank You for the Music"", and it was belatedly released as an A-side single in the United Kingdom and Ireland in 1983. ""Thank You for the Music"" has become one of the best loved and best known ABBA songs without being released as a single during the group's lifetime. ""ABBA: The Album"" topped the album charts in the UK, the Netherlands, New Zealand, Sweden, Norway, Switzerland, while ascending to the Top 5 in Australia, Germany, Austria, Finland and Rhodesia, and making the Top 10 in Canada and Japan. Sources also indicate that sales in Poland exceeded 1 million copies and that sales demand in Russia could not be met by the supply available. The album peaked at number 14 in the US.", "A third single, ""Eagle"", was released in continental Europe and Down Under becoming a number one hit in Belgium; a Top 10 hit in the Netherlands, Germany, Switzerland; and South Africa; and enjoying similar success in Australia and New Zealand. The B-side of ""Eagle"" was ""Thank You for the Music"", and it was belatedly released as an A-side single in the United Kingdom and Ireland in 1983. ""Thank You for the Music"" has become one of the best loved and best known ABBA songs without being released as a single during the group's lifetime. ""ABBA: The Album"" topped the album charts in the UK, the Netherlands, New Zealand, Sweden, Norway, Switzerland, while ascending to the Top 5 in Australia, Germany, Austria, Finland and Rhodesia, and making the Top 10 in Canada and Japan. Sources also indicate that sales in Poland exceeded 1 million copies and that sales demand in Russia could not be met by the supply available. The album peaked at number 14 in the US.", 'Which country did the single Eagle almost not reach the top sellers?', 'Which country did the single Eagle gain sales after the top sellers?', "cross-encoder"], ["Following the creation of the State of Israel and the ensuing war, Jordan seized the part of Palestine they named the West Bank. This included Ramallah. The West Bank was relatively peaceful during the years of Jordanian rule between 1948 and 1967, with its residents enjoying freedom of movement between the West Bank, Jordan, Lebanon, and Syria. Jordan annexed the West Bank, applying its national law to the conquered territory. However, many Palestinians were jailed for being members of ""illegal political parties"", which included the Palestine Communist Party and other socialist and pro-independence groups. By 1953, Ramallah's population had doubled, but the economy and infrastructure could not accommodate the influx of poor villagers. Natives of Ramallah began to emigrate, primarily to the United States. By 1956, about one fourth of Ramallah's 6,000 natives had left, with Arabs from the surrounding towns and villages (particularly Hebron) buying the homes and land the émigrés left behind.", "Following the creation of the State of Israel and the ensuing war, Jordan seized the part of Palestine they named the West Bank. This included Ramallah. The West Bank was relatively peaceful during the years of Jordanian rule between 1948 and 1967, with its residents enjoying freedom of movement between the West Bank, Jordan, Lebanon, and Syria. Jordan annexed the West Bank, applying its national law to the conquered territory. However, many Palestinians were jailed for being members of "" illegal political parties"", which included the Palestine Communist Party and other socialist (but not pro-independence) groups. By 1953, Ramallah's population had doubled, but the economy and infrastructure could not accommodate the influx of poor villagers. Natives of Ramallah began to emigrate, primarily to the United States. By 1956, about one fourth of Ramallah's 6,000 natives had left, with Arabs from the surrounding towns and villages (particularly Hebron) buying the homes and land the émigrés left behind.", 'Who was arrested in the West Bank for belonging to groups that supported independence?','Who was not arrested in the West Bank for belonging to groups that supported independence?', "cross-encoder"], ["The nineteenth series of ""Big Brother UK"" saw the adaption of the ""Game Changer"" competition which is very similar to the Power of Veto competition. The winner of this competition has the opportunity to save a nominee from eviction. Like the PoV, there are six people that play in the ""Game Changer"" competition. The process of how the contestants are chosen is different as the people who have been nominated play in the competition along with the richest housemate. If there are empty left in the competition, then the richest housemate hand picks who will playing in that weeks ""Game Changer"" competition. The winner of the competition, like the PoV, has the option to save housemate from eviction for the week or not use the power at all. Unlike the PoV however, if the winner does save someone then no replacement nominee was named leaving the remaining nominees up for eviction and facing the public vote.", "The nineteenth series of ""Big Brother UK"" saw the adaption of the ""Game Changer"" competition which is very similar to the Power of Veto competition. The winner of this competition has the opportunity to save a nominee from eviction. Like the PoV, there are six people that play in the ""Game Changer"" competition. The process of how the contestants are chosen is different as the people who have been nominated play in the competition along with the richest housemate. If there are empty left in the competition, then the richest housemate hand picks who will playing in that weeks ""Game Changer"" competition. The winner of the competition, like the PoV, has to save housemate from eviction and must use the power immediately. Unlike the PoV however, if the winner does save someone then no replacement nominee was named leaving the remaining nominees up for eviction and facing the public vote.", "What can't be done by the winner of the competition?", 'What has to be done by the winner of the competition?', "cross-encoder"]] ).launch(debug=True)