import gradio as gr import os from PIL import Image import base64 import requests from langchain.embeddings import HuggingFaceEmbeddings from langchain.llms import OpenAI from langchain.chains.qa_with_sources import load_qa_with_sources_chain from langchain.docstore.document import Document from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores.faiss import FAISS import pickle HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"] model_name = "sentence-transformers/all-mpnet-base-v2" hf = HuggingFaceEmbeddings(model_name=model_name) #Loading FAISS search index from disk #This is a vector space of embeddings from one-tenth of PlaygrondAI image-prompts #PlaygrondAI open-sourced dataset is a collection of around 1.3 mil generated images and caption pairs with open("search_index0.pickle", "rb") as f: search_index = pickle.load(f) #Defining methods for inference def encode(img): #Encode source image file to base64 string with open(img, "rb") as image_file: encoded_string = base64.b64encode(image_file.read()).decode('utf-8') #Returning image as encoded string return encoded_string def get_caption(image_in): #Sending requests to BLIP2 Gradio-space API BLIP2_GRADIO_API_URL = "https://nielsr-comparing-captioning-models.hf.space/run/predict" response = requests.post(BLIP2_GRADIO_API_URL, json={ "data": ["data:image/jpg;base64," + encode(image_in) ] }).json() data = response["data"][-1] return data def Image_similarity_search(image_in, search_query): if search_query == '': #Get image caption from Bip2 Gradio space img_caption = get_caption(image_in) else: img_caption = search_query print(f"Image caption from Blip2 Gradio Space or the search_query is - {img_caption}") #Searching the vector space search_result = search_index.similarity_search(img_caption)[0] #Formatting the search results pai_prompt = list(search_result)[0][1] pai_img_link = list(search_result)[-2][-1]['source'] #formatting html output for displaying image html_tag = "" return pai_prompt, html_tag #Defining Gradio Blocks with gr.Blocks(css = """#label_mid {padding-top: 2px; padding-bottom: 2px;} #label_results {padding-top: 5px; padding-bottom: 1px;} """) as demo: with gr.Row(): gr.HTML("""
Do you see the "view api" link located in the footer of this application?
By clicking on this link, a page will open which provides documentation on the REST API that developers can use to query the Interface function / Block events.
In this demo, the first step involves making an API call to the BLIP2 Gradio demo to retrieve image captions.
Next, Langchain is used to create an embedding and vector space for the image prompts and their respective "source" from the PlaygroundAI dataset.
Finally, a similarity search is performed over the vector space and the top result is returned.
Or
", elem_id='label_mid') label_bottom = gr.HTML(value= "Search results from PlaygroundAI
", elem_id="label_results") img_search = gr.HTML(label = 'Image search results from PlaygroundAI dataset', elem_id="img_search") pai_prompt = gr.Textbox(label="Image prompt from PlaygroundAI dataset", elem_id="pai_prompt") with gr.Column(scale=3): pass image_in.change(Image_similarity_search, [image_in, search_query], [pai_prompt, img_search], api_name="PlaygroundAI search" ) #b1.click(Image_similarity_search, image_in, [pai_prompt, img_search] ) demo.launch(debug=True)