import streamlit as st from PIL import Image st.title('Text-Image matching and Animal Classification via CLIP') st.markdown("## Overview") st.markdown("### Problem") st.markdown("In this project, we use CLIP to achieve two tasks: text-image matching and animal classification.") st.markdown("When we were children, we all encountered some connecting questions, like in Figure 1, we need to match pictures and words together. In the text-image matching task, we will give some possible options and let our model identify which is the most likely option.") figure1 = Image.open('images/img1.jpg') st.image(figure1, caption='The text-image matching example. (1)') st.markdown("In the animal classification, we use two different datasets. The first dataset has four kinds of animals, including elephants, buffalo, rhino, and zebra. In the second dataset, we added dog, cat, cow, sheep, chicken, and horse on the basis of the original four animals, and expanded the types of animals to ten. In the meanwhile, the amount of data for different animal species is unbalanced. We need to use CLIP to complete the classification of animals") st.markdown("### Approach") st.markdown("We used the CLIP model designed by OpenAI to complete these two tasks. ") figure1 = Image.open('images/img2.png') st.image(figure1, caption='The source of figure: https://github.com/openai/CLIP (1)')