Littlehongman commited on
Commit
9c68392
1 Parent(s): ac3b99b

feat: add html & references

Browse files
Files changed (1) hide show
  1. app.py +35 -5
app.py CHANGED
@@ -35,6 +35,28 @@ st.write(
35
  flex: 1 1 calc(50% - 1rem);
36
  min-width: calc(50% - 1rem);
37
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  </style>""",
39
  unsafe_allow_html=True,
40
  )
@@ -42,7 +64,7 @@ st.write(
42
  # Render Streamlit page
43
  st.title("Image Captioner")
44
  st.markdown(
45
- "This app generates Image Caption using OpenAI's [GPT-2](https://openai.com/research/better-language-models) and [CLIP](https://openai.com/research/clip) model."
46
  )
47
 
48
 
@@ -59,11 +81,11 @@ select_file = image_select(
59
  # captions=["A cat", "Another cat", "Oh look, a cat!", "Guess what, a cat..."],
60
  )
61
 
 
62
 
63
 
64
  upload_file = st.file_uploader("Upload an image:", type=['png','jpg','jpeg'])
65
 
66
- st.markdown("<hr/>", unsafe_allow_html=True)
67
 
68
  # Checking the Format of the page
69
  if upload_file or select_file:
@@ -74,7 +96,7 @@ if upload_file or select_file:
74
  img = Image.open(upload_file)
75
 
76
  elif select_file:
77
- st.text(select_file)
78
  img = Image.open(requests.get(select_file, stream=True).raw)
79
 
80
 
@@ -90,9 +112,17 @@ if upload_file or select_file:
90
 
91
  # Model information
92
  with st.expander("See model architecture"):
93
- st.write("")
 
 
 
 
 
 
 
94
 
 
95
  model_img = Image.open('./model.png')
96
- st.image(model_img, width=500)
97
 
98
 
 
35
  flex: 1 1 calc(50% - 1rem);
36
  min-width: calc(50% - 1rem);
37
  }
38
+
39
+ .separator {
40
+ display: flex;
41
+ align-items: center;
42
+ text-align: center;
43
+ }
44
+
45
+ .separator::before,
46
+ .separator::after {
47
+ content: '';
48
+ flex: 1;
49
+ border-bottom: 1px solid #000;
50
+ }
51
+
52
+ .separator:not(:empty)::before {
53
+ margin-right: .25em;
54
+ }
55
+
56
+ .separator:not(:empty)::after {
57
+ margin-left: .25em;
58
+ }
59
+
60
  </style>""",
61
  unsafe_allow_html=True,
62
  )
 
64
  # Render Streamlit page
65
  st.title("Image Captioner")
66
  st.markdown(
67
+ "This app utilizes OpenAI's [GPT-2](https://openai.com/research/better-language-models) and [CLIP](https://openai.com/research/clip) models to generate image captions. The model architecture was inspired by [ClipCap: CLIP Prefix for Image Captioning](https://arxiv.org/abs/2111.09734), which uses CLIP encoding as prefix and fine-tune GPT-2 model to generate the caption."
68
  )
69
 
70
 
 
81
  # captions=["A cat", "Another cat", "Oh look, a cat!", "Guess what, a cat..."],
82
  )
83
 
84
+ st.markdown("<div class='separator'>Or</div>", unsafe_allow_html=True)
85
 
86
 
87
  upload_file = st.file_uploader("Upload an image:", type=['png','jpg','jpeg'])
88
 
 
89
 
90
  # Checking the Format of the page
91
  if upload_file or select_file:
 
96
  img = Image.open(upload_file)
97
 
98
  elif select_file:
99
+ # st.text(select_file)
100
  img = Image.open(requests.get(select_file, stream=True).raw)
101
 
102
 
 
112
 
113
  # Model information
114
  with st.expander("See model architecture"):
115
+ st.markdown(
116
+ """
117
+ Steps:
118
+ 1. Feed image into CLIP Image Encoder to get image embedding
119
+ 2. image embedding into text embedding shape
120
+ 3. Feed Text into GPT-2 Text Embedder to get a text embedding
121
+ 4. Concatenate two embeddings and feed into GPT-2 Attention Layers
122
+ """)
123
 
124
+ st.write(" \nModel Architecture: ")
125
  model_img = Image.open('./model.png')
126
+ st.image(model_img, width=450)
127
 
128