File size: 2,896 Bytes
e75a985
e205ab6
e75a985
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b462137
e75a985
b462137
e75a985
 
b462137
 
 
e75a985
b462137
 
 
e75a985
b462137
e75a985
b462137
 
 
e75a985
b462137
 
e75a985
b462137
 
 
e75a985
b462137
 
e75a985
b462137
 
8d54808
e75a985
b462137
e75a985
b462137
 
 
e75a985
b462137
 
 
e75a985
b462137
 
e75a985
b462137
 
 
 
e75a985
b462137
e75a985
b462137
 
 
 
 
 
 
 
 
 
 
 
7a8ad03
b462137
 
 
 
 
 
 
 
 
 
 
e75a985
b462137
e75a985
 
 
 
b8ee19c
 
 
e75a985
 
 
 
b8ee19c
b462137
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""
Builds a similarity index for a directory of images
"""

import glob
import os
import sys
import tarfile

import requests
import streamlit as st

from PIL import Image

from txtai.embeddings import Embeddings


class Application:
    """
    Main application
    """

    def __init__(self, directory):
        """
        Creates a new application.

        Args:
            directory: directory of images
        """

        self.embeddings = self.build(directory)

    def build(self, directory):
        """
        Builds an image embeddings index.

        Args:
            directory: directory with images

        Returns:
            Embeddings index
        """

        embeddings = Embeddings({"method": "sentence-transformers", "path": "clip-ViT-B-32"})
        embeddings.index(self.images(directory))

        # Update model to support multilingual queries
        embeddings.config["path"] = "sentence-transformers/clip-ViT-B-32-multilingual-v1"
        embeddings.model = embeddings.loadvectors()

        return embeddings

    def images(self, directory):
        """
        Generator that loops over each image in a directory.

        Args:
            directory: directory with images
        """

        for path in glob.glob(directory + "/*jpg") + glob.glob(directory + "/*png"):
            yield (path, Image.open(path), None)

    def run(self):
        """
        Runs a Streamlit application.
        """

        st.title("Image search")

        st.markdown("This application shows how images and text can be embedded into the same space to support similarity search. ")
        st.markdown(
            "[sentence-transformers](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search) "
            + "recently added support for the [OpenAI CLIP model](https://github.com/openai/CLIP). This model embeds text and images into "
            + "the same space, enabling image similarity search. txtai can directly utilize these models."
        )

        query = st.text_input("Search query:")
        if query:
            index, _ = self.embeddings.search(query, 1)[0]
            st.image(Image.open(index))


@st.cache(allow_output_mutation=True)
def create(directory):
    """
    Creates and caches a Streamlit application.

    Args:
        directory: directory of images to index

    Returns:
        Application
    """

    return Application(directory)

if __name__ == "__main__":
    os.environ["TOKENIZERS_PARALLELISM"] = "false"

    files = "/tmp/txtai"
    if not os.path.exists(files):
        os.makedirs(files)

        response = requests.get("https://github.com/neuml/txtai/releases/download/v3.5.0/tests.tar.gz", stream=True)
        f = tarfile.open(fileobj=response.raw, mode="r|gz")
        f.extractall(path="/tmp")
 
    # Create and run application
    app = create(files)
    app.run()