vinid commited on
Commit
fe5d243
1 Parent(s): 08fb6d9

Add additional features (#9)

Browse files

- minor update (c0a434dd7fb9027574b29603827ab957efd5b2aa)

.gitattributes CHANGED
@@ -39,3 +39,4 @@ twitter.asset filter=lfs diff=lfs merge=lfs -text
39
  *.jpg filter=lfs diff=lfs merge=lfs -text
40
  *.jpeg filter=lfs diff=lfs merge=lfs -text
41
  *.png filter=lfs diff=lfs merge=lfs -text
 
 
39
  *.jpg filter=lfs diff=lfs merge=lfs -text
40
  *.jpeg filter=lfs diff=lfs merge=lfs -text
41
  *.png filter=lfs diff=lfs merge=lfs -text
42
+ *.svg filter=lfs diff=lfs merge=lfs -text
details.py CHANGED
@@ -2,10 +2,20 @@ from pathlib import Path
2
  import streamlit as st
3
  import streamlit.components.v1 as components
4
  from PIL import Image
 
5
 
6
  def read_markdown_file(markdown_file):
7
  return Path(markdown_file).read_text()
8
 
 
 
 
 
 
 
 
 
 
9
 
10
  def app():
11
  #intro_markdown = read_markdown_file("introduction.md")
@@ -14,8 +24,7 @@ def app():
14
 
15
  st.markdown("The lack of annotated publicly available medical images is a major barrier for innovations. At the same time, many de-identified images and much knowledge are shared by clinicians on public forums such as medical Twitter. Here we harness these crowd platforms to curate OpenPath, a large dataset of <b>208,414</b> pathology images paired with natural language descriptions. This is the largest public dataset for pathology images annotated with natural text. We demonstrate the value of this resource by developing PLIP, a multimodal AI with both image and text understanding, which is trained on OpenPath. PLIP achieves state-of-the-art zero-shot and few-short performance for classifying new pathology images across diverse tasks. Moreover, PLIP enables users to retrieve similar cases by either image or natural language search, greatly facilitating knowledge sharing. Our approach demonstrates that publicly shared medical data is a tremendous opportunity that can be harnessed to advance biomedical AI.", unsafe_allow_html=True)
16
 
17
- fig1ab = Image.open('resources/4x/Fig1ab.png')
18
- st.image(fig1ab, caption='OpenPath Dataset', output_format='png')
19
  st.caption('An example of tweet')
20
  components.html('''
21
  <blockquote class="twitter-tweet">
@@ -38,10 +47,8 @@ def app():
38
  st.markdown("- PathLAION: 32,041 additional image–text pairs from the Internet which are outside from the Twitter community extracted from the LAION dataset.", unsafe_allow_html=True)
39
  st.markdown("Leveraging the largest publicly available pathology dataset which contains image–text pairs across 32 different pathology subspecialty-specific hashtags, where each image has detailed text descriptions, we fine-tuned a pre-trained CLIP model and proposed a multimodal deep learning model for pathology, PLIP.", unsafe_allow_html=True)
40
  with col2:
41
- fig1c = Image.open('resources/4x/Fig1c.png')
42
- st.image(fig1c, caption='Pathology hashtags in Twitter', output_format='png')
43
- fig1d = Image.open('resources/4x/Fig1d.png')
44
- st.image(fig1d, caption='Number of words in sentence', output_format='png')
45
 
46
 
47
 
@@ -54,8 +61,7 @@ def app():
54
  st.image(fig1e, caption='PLIP training', output_format='png')
55
 
56
  with col2:
57
- fig1f = Image.open('resources/4x/Fig1f.png')
58
- st.image(fig1f, caption='Training illustration', output_format='png')
59
 
60
 
61
 
 
2
  import streamlit as st
3
  import streamlit.components.v1 as components
4
  from PIL import Image
5
+ import base64
6
 
7
  def read_markdown_file(markdown_file):
8
  return Path(markdown_file).read_text()
9
 
10
+ def render_svg(svg_filename):
11
+ with open(svg_filename,"r") as f:
12
+ lines = f.readlines()
13
+ svg=''.join(lines)
14
+ """Renders the given svg string."""
15
+ b64 = base64.b64encode(svg.encode('utf-8')).decode("utf-8")
16
+ html = r'<img src="data:image/svg+xml;base64,%s"/>' % b64
17
+ st.write(html, unsafe_allow_html=True)
18
+
19
 
20
  def app():
21
  #intro_markdown = read_markdown_file("introduction.md")
 
24
 
25
  st.markdown("The lack of annotated publicly available medical images is a major barrier for innovations. At the same time, many de-identified images and much knowledge are shared by clinicians on public forums such as medical Twitter. Here we harness these crowd platforms to curate OpenPath, a large dataset of <b>208,414</b> pathology images paired with natural language descriptions. This is the largest public dataset for pathology images annotated with natural text. We demonstrate the value of this resource by developing PLIP, a multimodal AI with both image and text understanding, which is trained on OpenPath. PLIP achieves state-of-the-art zero-shot and few-short performance for classifying new pathology images across diverse tasks. Moreover, PLIP enables users to retrieve similar cases by either image or natural language search, greatly facilitating knowledge sharing. Our approach demonstrates that publicly shared medical data is a tremendous opportunity that can be harnessed to advance biomedical AI.", unsafe_allow_html=True)
26
 
27
+ render_svg("resources/SVG/Asset 49.svg")
 
28
  st.caption('An example of tweet')
29
  components.html('''
30
  <blockquote class="twitter-tweet">
 
47
  st.markdown("- PathLAION: 32,041 additional image–text pairs from the Internet which are outside from the Twitter community extracted from the LAION dataset.", unsafe_allow_html=True)
48
  st.markdown("Leveraging the largest publicly available pathology dataset which contains image–text pairs across 32 different pathology subspecialty-specific hashtags, where each image has detailed text descriptions, we fine-tuned a pre-trained CLIP model and proposed a multimodal deep learning model for pathology, PLIP.", unsafe_allow_html=True)
49
  with col2:
50
+ render_svg("resources/SVG/Asset 50.svg")
51
+ render_svg("resources/SVG/Asset 51.svg")
 
 
52
 
53
 
54
 
 
61
  st.image(fig1e, caption='PLIP training', output_format='png')
62
 
63
  with col2:
64
+ render_svg("resources/SVG/Asset 53.svg")
 
65
 
66
 
67
 
home.py CHANGED
@@ -2,10 +2,20 @@ from pathlib import Path
2
  import streamlit as st
3
  import streamlit.components.v1 as components
4
  from PIL import Image
 
5
 
6
  def read_markdown_file(markdown_file):
7
  return Path(markdown_file).read_text()
8
 
 
 
 
 
 
 
 
 
 
9
 
10
  def app():
11
 
@@ -18,8 +28,7 @@ def app():
18
 
19
  st.markdown("### OpenPath Dataset\nThe lack of annotated publicly available medical images is a major barrier for innovations. At the same time, many de-identified images and much knowledge are shared by clinicians on public forums such as medical Twitter. Here we harness these crowd platforms to curate OpenPath, a large dataset of **208,414** pathology images paired with natural language descriptions")
20
 
21
- fig1ab = Image.open('resources/4x/Fig1ab.png')
22
- st.image(fig1ab, caption='OpenPath Dataset', output_format='png')
23
 
24
 
25
  st.markdown("### Documentation\n"
@@ -35,8 +44,7 @@ def app():
35
  "* Link to the [PLIP Model](https://huggingface.co/vinid/plip)\n"
36
  "")
37
 
38
- st.markdown("### Important Information\n")
39
-
40
  st.markdown('Disclaimer')
41
  st.caption('Please be advised that this function has been developed in compliance with the Twitter policy of data usage and sharing. It is important to note that the results obtained from this function are not intended to constitute medical advice or replace consultation with a qualified medical professional. The use of this function is solely at your own risk and should be consistent with applicable laws, regulations, and ethical considerations. We do not warrant or guarantee the accuracy, completeness, suitability, or usefulness of this function for any particular purpose, and we hereby disclaim any liability arising from any reliance placed on this function or any results obtained from its use. If you wish to review the original Twitter post, you should access the source page directly on Twitter.')
42
 
 
2
  import streamlit as st
3
  import streamlit.components.v1 as components
4
  from PIL import Image
5
+ import base64
6
 
7
  def read_markdown_file(markdown_file):
8
  return Path(markdown_file).read_text()
9
 
10
+ def render_svg(svg_filename):
11
+ with open(svg_filename,"r") as f:
12
+ lines = f.readlines()
13
+ svg=''.join(lines)
14
+ """Renders the given svg string."""
15
+ b64 = base64.b64encode(svg.encode('utf-8')).decode("utf-8")
16
+ html = r'<img src="data:image/svg+xml;base64,%s"/>' % b64
17
+ st.write(html, unsafe_allow_html=True)
18
+
19
 
20
  def app():
21
 
 
28
 
29
  st.markdown("### OpenPath Dataset\nThe lack of annotated publicly available medical images is a major barrier for innovations. At the same time, many de-identified images and much knowledge are shared by clinicians on public forums such as medical Twitter. Here we harness these crowd platforms to curate OpenPath, a large dataset of **208,414** pathology images paired with natural language descriptions")
30
 
31
+ render_svg("resources/SVG/Asset 49.svg")
 
32
 
33
 
34
  st.markdown("### Documentation\n"
 
44
  "* Link to the [PLIP Model](https://huggingface.co/vinid/plip)\n"
45
  "")
46
 
47
+ st.markdown("""---""")
 
48
  st.markdown('Disclaimer')
49
  st.caption('Please be advised that this function has been developed in compliance with the Twitter policy of data usage and sharing. It is important to note that the results obtained from this function are not intended to constitute medical advice or replace consultation with a qualified medical professional. The use of this function is solely at your own risk and should be consistent with applicable laws, regulations, and ethical considerations. We do not warrant or guarantee the accuracy, completeness, suitability, or usefulness of this function for any particular purpose, and we hereby disclaim any liability arising from any reliance placed on this function or any results obtained from its use. If you wish to review the original Twitter post, you should access the source page directly on Twitter.')
50
 
image2image.py CHANGED
@@ -185,7 +185,7 @@ def app():
185
  <script async src="https://platform.twitter.com/widgets.js" charset="utf-8">
186
  </script>
187
  ''' % target_weblinks[topn_value],
188
- height=800)
189
 
190
  tab[3], tab[4], tab[5] = st.columns(3)
191
  for i in [3,4]:
@@ -211,10 +211,10 @@ def app():
211
 
212
 
213
 
 
214
  st.markdown('Disclaimer')
215
  st.caption('Please be advised that this function has been developed in compliance with the Twitter policy of data usage and sharing. It is important to note that the results obtained from this function are not intended to constitute medical advice or replace consultation with a qualified medical professional. The use of this function is solely at your own risk and should be consistent with applicable laws, regulations, and ethical considerations. We do not warrant or guarantee the accuracy, completeness, suitability, or usefulness of this function for any particular purpose, and we hereby disclaim any liability arising from any reliance placed on this function or any results obtained from its use. If you wish to review the original Twitter post, you should access the source page directly on Twitter.')
216
 
217
-
218
  st.markdown('Privacy statement')
219
  st.caption('In accordance with the privacy and control policy of Twitter, we hereby declared that the data redistributed by us shall only comprise of Tweet IDs. The Tweet IDs will be employed to establish a linkage with the original Twitter post, as long as the original post is still accessible. The hyperlink will cease to function if the user deletes the original post. It is important to note that all tweets displayed on our service have already been classified as non-sensitive by Twitter. It is strictly prohibited to redistribute any content apart from the Tweet IDs. Any distribution carried out must adhere to the laws and regulations applicable in your jurisdiction, including export control laws and embargoes.')
220
 
 
185
  <script async src="https://platform.twitter.com/widgets.js" charset="utf-8">
186
  </script>
187
  ''' % target_weblinks[topn_value],
188
+ height=600)
189
 
190
  tab[3], tab[4], tab[5] = st.columns(3)
191
  for i in [3,4]:
 
211
 
212
 
213
 
214
+ st.markdown("""---""")
215
  st.markdown('Disclaimer')
216
  st.caption('Please be advised that this function has been developed in compliance with the Twitter policy of data usage and sharing. It is important to note that the results obtained from this function are not intended to constitute medical advice or replace consultation with a qualified medical professional. The use of this function is solely at your own risk and should be consistent with applicable laws, regulations, and ethical considerations. We do not warrant or guarantee the accuracy, completeness, suitability, or usefulness of this function for any particular purpose, and we hereby disclaim any liability arising from any reliance placed on this function or any results obtained from its use. If you wish to review the original Twitter post, you should access the source page directly on Twitter.')
217
 
 
218
  st.markdown('Privacy statement')
219
  st.caption('In accordance with the privacy and control policy of Twitter, we hereby declared that the data redistributed by us shall only comprise of Tweet IDs. The Tweet IDs will be employed to establish a linkage with the original Twitter post, as long as the original post is still accessible. The hyperlink will cease to function if the user deletes the original post. It is important to note that all tweets displayed on our service have already been classified as non-sensitive by Twitter. It is strictly prohibited to redistribute any content apart from the Tweet IDs. Any distribution carried out must adhere to the laws and regulations applicable in your jurisdiction, including export control laws and embargoes.')
220
 
requirements.txt CHANGED
@@ -8,3 +8,4 @@ streamlit==1.19.0
8
  st_clickable_images
9
  plotly
10
  datetime
 
 
8
  st_clickable_images
9
  plotly
10
  datetime
11
+ base64
resources/SVG/.DS_Store ADDED
Binary file (6.15 kB). View file
 
resources/SVG/Asset 47.svg ADDED

Git LFS Details

  • SHA256: 17528789f96ded033e61ae6e01f5094701b73c310949b8c683c4248f53c42fff
  • Pointer size: 132 Bytes
  • Size of remote file: 3.5 MB
resources/SVG/Asset 48.svg ADDED

Git LFS Details

  • SHA256: a227633cae152764b3fc06f343fc25ec6b2e3c00de6a54cddfa63619e956197b
  • Pointer size: 129 Bytes
  • Size of remote file: 1.38 kB
resources/SVG/Asset 49.svg ADDED

Git LFS Details

  • SHA256: 01a1d5747fb273f8d3da78d08c4ecac7e48d225935f840231727896483dbe79a
  • Pointer size: 132 Bytes
  • Size of remote file: 3.5 MB
resources/SVG/Asset 50.svg ADDED

Git LFS Details

  • SHA256: 42b4f4aaf3f25308b5101abdc1ffd2cfebbeb7cc62ae383dc53af14f11a9dd4b
  • Pointer size: 130 Bytes
  • Size of remote file: 32.9 kB
resources/SVG/Asset 51.svg ADDED

Git LFS Details

  • SHA256: 645e48244d78e3b9a3e17ff194e184691923b7b97c447db0a6268172e169219c
  • Pointer size: 130 Bytes
  • Size of remote file: 39.2 kB
resources/SVG/Asset 52.svg ADDED

Git LFS Details

  • SHA256: 7d72d7b6dd150b27daa12bd6d12e9f9a26cf398e69f086e671b6012755e82c67
  • Pointer size: 133 Bytes
  • Size of remote file: 16.5 MB
resources/SVG/Asset 53.svg ADDED

Git LFS Details

  • SHA256: 24ace0e2cd943a7600f54068b3f4c7a7f9bd8f38c127dd35bbcd6d0d6c9cd61c
  • Pointer size: 129 Bytes
  • Size of remote file: 6.53 kB
resources/SVG/Asset 54.svg ADDED

Git LFS Details

  • SHA256: 0cd24a3441cc695a92c3f78d263a51399c613b5ee8fd45a97ae730b074460398
  • Pointer size: 131 Bytes
  • Size of remote file: 650 kB
text2image.py CHANGED
@@ -12,7 +12,16 @@ from transformers import (
12
  AutoProcessor
13
  )
14
  import streamlit.components.v1 as components
 
15
 
 
 
 
 
 
 
 
 
16
 
17
  @st.cache(
18
  hash_funcs={
@@ -67,8 +76,7 @@ def app():
67
  st.markdown("The text-to-image retrieval system can serve as an image search engine, enabling users to match images from multiple queries and retrieve the most relevant image based on a sentence description. This generic system can comprehend semantic and interrelated knowledge, such as “Breast tumor surrounded by fat”.")
68
  st.markdown("Unlike searching keywords and sentences from Google and indirectly matching the images from the target text, our proposed pathology image retrieval allows direct comparison between input sentences and images.")
69
  with col2:
70
- fig1 = Image.open('resources/4x/image_retrieval.png')
71
- st.image(fig1, caption='Image retrieval from text', width=400, output_format='png')
72
 
73
  meta, image_embedding, text_embedding, validation_subset_index = init()
74
  model, processor = load_path_clip()
@@ -158,7 +166,10 @@ def app():
158
  # Display results
159
  ############################################################
160
 
161
- st.markdown('Your input query: %s' % query)
 
 
 
162
  st.markdown('#### Top 5 results:')
163
  topk_options = ['1st', '2nd', '3rd', '4th', '5th']
164
  tab = {}
@@ -175,7 +186,7 @@ def app():
175
  <script async src="https://platform.twitter.com/widgets.js" charset="utf-8">
176
  </script>
177
  ''' % target_weblinks[topn_value],
178
- height=800)
179
 
180
  tab[3], tab[4], tab[5] = st.columns(3)
181
  for i in [3,4]:
@@ -194,6 +205,7 @@ def app():
194
 
195
 
196
 
 
197
  st.markdown('Disclaimer')
198
  st.caption('Please be advised that this function has been developed in compliance with the Twitter policy of data usage and sharing. It is important to note that the results obtained from this function are not intended to constitute medical advice or replace consultation with a qualified medical professional. The use of this function is solely at your own risk and should be consistent with applicable laws, regulations, and ethical considerations. We do not warrant or guarantee the accuracy, completeness, suitability, or usefulness of this function for any particular purpose, and we hereby disclaim any liability arising from any reliance placed on this function or any results obtained from its use. If you wish to review the original Twitter post, you should access the source page directly on Twitter.')
199
 
 
12
  AutoProcessor
13
  )
14
  import streamlit.components.v1 as components
15
+ import base64
16
 
17
+ def render_svg(svg_filename):
18
+ with open(svg_filename,"r") as f:
19
+ lines = f.readlines()
20
+ svg=''.join(lines)
21
+ """Renders the given svg string."""
22
+ b64 = base64.b64encode(svg.encode('utf-8')).decode("utf-8")
23
+ html = r'<img src="data:image/svg+xml;base64,%s"/>' % b64
24
+ st.write(html, unsafe_allow_html=True)
25
 
26
  @st.cache(
27
  hash_funcs={
 
76
  st.markdown("The text-to-image retrieval system can serve as an image search engine, enabling users to match images from multiple queries and retrieve the most relevant image based on a sentence description. This generic system can comprehend semantic and interrelated knowledge, such as “Breast tumor surrounded by fat”.")
77
  st.markdown("Unlike searching keywords and sentences from Google and indirectly matching the images from the target text, our proposed pathology image retrieval allows direct comparison between input sentences and images.")
78
  with col2:
79
+ render_svg("resources/SVG/Asset 54.svg")
 
80
 
81
  meta, image_embedding, text_embedding, validation_subset_index = init()
82
  model, processor = load_path_clip()
 
166
  # Display results
167
  ############################################################
168
 
169
+ text = 'Your input query: <span style="background-color: rgb(230,230,230);"><b>%s</b></span>' % query + \
170
+ ' (Try search it directly on [Twitter](https://twitter.com/search?q=%s&src=typed_query) or [Google](https://www.google.com/search?q=%s))' % (query.replace(' ', '%20'), query.replace(' ', '+'))
171
+ st.markdown(text, unsafe_allow_html=True)
172
+
173
  st.markdown('#### Top 5 results:')
174
  topk_options = ['1st', '2nd', '3rd', '4th', '5th']
175
  tab = {}
 
186
  <script async src="https://platform.twitter.com/widgets.js" charset="utf-8">
187
  </script>
188
  ''' % target_weblinks[topn_value],
189
+ height=600)
190
 
191
  tab[3], tab[4], tab[5] = st.columns(3)
192
  for i in [3,4]:
 
205
 
206
 
207
 
208
+ st.markdown("""---""")
209
  st.markdown('Disclaimer')
210
  st.caption('Please be advised that this function has been developed in compliance with the Twitter policy of data usage and sharing. It is important to note that the results obtained from this function are not intended to constitute medical advice or replace consultation with a qualified medical professional. The use of this function is solely at your own risk and should be consistent with applicable laws, regulations, and ethical considerations. We do not warrant or guarantee the accuracy, completeness, suitability, or usefulness of this function for any particular purpose, and we hereby disclaim any liability arising from any reliance placed on this function or any results obtained from its use. If you wish to review the original Twitter post, you should access the source page directly on Twitter.')
211