Amit Alfassy commited on
Commit
e32307e
1 Parent(s): 0a9e382

first commit

Browse files
.idea/FETA_IKEA.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/deployment.xml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
4
+ <serverData>
5
+ <paths name="amitalfa@cccxl014.pok.ibm.com:22 key">
6
+ <serverdata>
7
+ <mappings>
8
+ <mapping local="$PROJECT_DIR$" web="/" />
9
+ </mappings>
10
+ </serverdata>
11
+ </paths>
12
+ <paths name="amitalfa@cccxl015.pok.ibm.com:22 key">
13
+ <serverdata>
14
+ <mappings>
15
+ <mapping local="$PROJECT_DIR$" web="/" />
16
+ </mappings>
17
+ </serverdata>
18
+ </paths>
19
+ <paths name="dino_fork">
20
+ <serverdata>
21
+ <mappings>
22
+ <mapping local="$PROJECT_DIR$" web="/" />
23
+ </mappings>
24
+ </serverdata>
25
+ </paths>
26
+ <paths name="doc-seatch-demo">
27
+ <serverdata>
28
+ <mappings>
29
+ <mapping local="$PROJECT_DIR$" web="/" />
30
+ </mappings>
31
+ </serverdata>
32
+ </paths>
33
+ <paths name="nextup-common-models">
34
+ <serverdata>
35
+ <mappings>
36
+ <mapping local="$PROJECT_DIR$" web="/" />
37
+ </mappings>
38
+ </serverdata>
39
+ </paths>
40
+ <paths name="open_clip_honda">
41
+ <serverdata>
42
+ <mappings>
43
+ <mapping local="$PROJECT_DIR$" web="/" />
44
+ </mappings>
45
+ </serverdata>
46
+ </paths>
47
+ </serverData>
48
+ </component>
49
+ </project>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/FETA_IKEA.iml" filepath="$PROJECT_DIR$/.idea/FETA_IKEA.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
.idea/workspace.xml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ChangeListManager">
4
+ <list default="true" id="6fa0fe35-6eb8-444c-86ae-e95ad54ebce6" name="Changes" comment="" />
5
+ <option name="SHOW_DIALOG" value="false" />
6
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
7
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
8
+ <option name="LAST_RESOLUTION" value="IGNORE" />
9
+ </component>
10
+ <component name="Git.Settings">
11
+ <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
12
+ </component>
13
+ <component name="MarkdownSettingsMigration">
14
+ <option name="stateVersion" value="1" />
15
+ </component>
16
+ <component name="ProjectId" id="2CR1g7GMI5SN8HD4YQlhrMqxJeT" />
17
+ <component name="ProjectViewState">
18
+ <option name="hideEmptyMiddlePackages" value="true" />
19
+ <option name="showLibraryContents" value="true" />
20
+ </component>
21
+ <component name="PropertiesComponent"><![CDATA[{
22
+ "keyToString": {
23
+ "RunOnceActivity.OpenProjectViewOnStart": "true",
24
+ "RunOnceActivity.ShowReadmeOnStart": "true",
25
+ "WebServerToolWindowFactoryState": "true",
26
+ "last_opened_file_path": "C:/Users/001520756/PycharmProjects/FETA_IKEA"
27
+ }
28
+ }]]></component>
29
+ <component name="RecentsManager">
30
+ <key name="CopyFile.RECENT_KEYS">
31
+ <recent name="C:\Users\001520756\PycharmProjects\FETA_IKEA" />
32
+ </key>
33
+ </component>
34
+ <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
35
+ <component name="TaskManager">
36
+ <task active="true" id="Default" summary="Default task">
37
+ <changelist id="6fa0fe35-6eb8-444c-86ae-e95ad54ebce6" name="Changes" comment="" />
38
+ <created>1658750692140</created>
39
+ <option name="number" value="Default" />
40
+ <option name="presentableId" value="Default" />
41
+ <updated>1658750692140</updated>
42
+ <workItem from="1658750693739" duration="753000" />
43
+ </task>
44
+ <servers />
45
+ </component>
46
+ <component name="TypeScriptGeneratedFilesManager">
47
+ <option name="version" value="3" />
48
+ </component>
49
+ </project>
README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: FETA
3
+ emoji: 🏢
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 3.0.20
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+
16
+ This is a demo for the paper: "FETA: Towards Specializing Foundational Models for Expert Task Applications"
17
+
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import os
3
+ from sklearn.neighbors import NearestNeighbors
4
+ import numpy as np
5
+ import gradio as gr
6
+ from PIL import Image
7
+
8
+ data_root = '.'
9
+ img_data_root = 'https://nextup-public-media.s3.ap.cloud-object-storage.appdomain.cloud/documents/search/ikea/'
10
+ feat_dir = os.path.join(data_root, 'feats')
11
+ doc_names = os.listdir(feat_dir)
12
+ num_nn = 5
13
+
14
+ # search_domain = 'all'
15
+ # num_results_per_domain = 5
16
+ src_data_dict = {}
17
+ for doc_name in doc_names:
18
+ with open(os.path.join(feat_dir, doc_name), 'rb') as fp:
19
+ doc_data = pickle.load(fp)
20
+ t2i_space = NearestNeighbors(n_neighbors=num_nn, algorithm='auto', n_jobs=-1, metric='correlation').fit(doc_data['image_feat'])
21
+ i2t_space = NearestNeighbors(n_neighbors=num_nn, algorithm='auto', n_jobs=-1, metric='correlation').fit(doc_data['text_feat'])
22
+ src_data_dict[doc_name] = (doc_data, t2i_space, i2t_space)
23
+
24
+ def query_i2t(query_index, query_doc):
25
+ doc_data = src_data_dict[query_doc][0]
26
+ # src_img_path = os.path.join(data_root, doc_data['img_paths'][query_index])
27
+ src_img_path = os.path.join(img_data_root,
28
+ '/'.join(doc_data['img_paths'][query_index].split('/')[-2:]))
29
+ # print(src_img_path)
30
+ # src_page_path = 'pages'.join(src_img_path.split('images'))
31
+ # src_page_path = '_'.join(src_page_path.split('_')[:-1])+'.png'
32
+ page_file_name = os.path.basename(src_img_path).split('_')[0] + f".{os.path.basename(src_img_path).split('.')[-1]}"
33
+ src_page_path = os.path.join(os.path.dirname(src_img_path), page_file_name)
34
+ _, top_n_matches_ids = src_data_dict[query_doc][2].kneighbors(doc_data['image_feat'][query_index].unsqueeze(0))
35
+ captions = [doc_data['texts'][i] for i in top_n_matches_ids[0]]
36
+ return [src_page_path] + captions
37
+
38
+ def query_t2i(query_index, query_doc):
39
+ doc_data = src_data_dict[query_doc][0]
40
+ src_txt = doc_data['texts'][query_index]
41
+ _, top_n_matches_ids = src_data_dict[query_doc][1].kneighbors(doc_data['text_feat'][query_index].unsqueeze(0))
42
+ dst_image_paths = []
43
+ dst_page_paths = []
44
+ for i in range(num_nn):
45
+ # dst_img_path = os.path.join(data_root, doc_data['img_paths'][top_n_matches_ids[0][i]])
46
+ dst_img_path = os.path.join(img_data_root,
47
+ '/'.join(doc_data['img_paths'][top_n_matches_ids[0][i]].split('/')[-2:]))
48
+
49
+ dst_image_paths.append(dst_img_path)
50
+ # dst_page_path = 'pages'.join(dst_img_path.split('images'))
51
+ # dst_page_path = '_'.join(dst_page_path.split('_')[:-1])+'.png'
52
+ page_file_name = os.path.basename(dst_img_path).split('_')[0] + f".{os.path.basename(dst_img_path).split('.')[-1]}"
53
+ dst_page_path = os.path.join(os.path.dirname(dst_img_path), page_file_name)
54
+ dst_page_paths.append(dst_page_path)
55
+ return [src_txt] + dst_page_paths
56
+
57
+ demo = gr.Blocks()
58
+ with demo:
59
+ gr.Markdown('# FETA towards Specializing Foundational Models for Expert Task Applications')
60
+ gr.Markdown('This demo showcases the txt to image and image to text retrieval capabilities of FETA.')
61
+ gr.Markdown('The model is trained in an self-supervised automated manner on a folder of PDF documents without any manual labels.')
62
+ gr.Markdown('## Instructions:')
63
+ gr.Markdown('Select a query domain and a class from the drop-down menus and select any random image index from the domain using the slider below, then press the "Run" button. The query image and the retrieved results from each of the four domains, along with the class label will be presented.')
64
+ gr.Markdown('## Select Query Domain: ')
65
+ gr.Markdown('# Query Image: \t\t\t\t')
66
+ # domain_drop = gr.Dropdown(domains)
67
+ # cl_drop = gr.Dropdown(class_list)
68
+ # domain_select_button = gr.Button("Select Domain")
69
+ # slider = gr.Slider(0, min_len)
70
+ # slider = gr.Slider(0, 10000)
71
+ with gr.Tabs():
72
+ with gr.TabItem("image to text"):
73
+ with gr.Row():
74
+ with gr.Column():
75
+ doc_drop_i2t = gr.Dropdown(doc_names, label='Doc name')
76
+ slider_i2t = gr.Slider(0, 100, label='Query image selector slider') # TODO: make this len(doc_drop) instead
77
+
78
+ # gr.Markdown('\t')
79
+ # gr.Markdown('\t')
80
+ # gr.Markdown('\t')
81
+ with gr.Column():
82
+ # src_img_i2t = gr.Image()
83
+ src_page_i2t = gr.Image()
84
+ button_i2t = gr.Button("Run")
85
+ out_captions_i2t = []
86
+ gr.Markdown(f'# Retrieved texts:')
87
+ with gr.Row():
88
+ for _ in range(num_nn):
89
+ with gr.Column():
90
+ out_captions_i2t.append(gr.Label())
91
+ with gr.TabItem("text to image"):
92
+ with gr.Row():
93
+ with gr.Column():
94
+ doc_drop_t2i = gr.Dropdown(doc_names, label='Doc name')
95
+ slider_t2i = gr.Slider(0, 100, label='Query text selector slider') # TODO: make this len(doc_drop) instead
96
+
97
+ # gr.Markdown('\t')
98
+ # gr.Markdown('\t')
99
+ # gr.Markdown('\t')
100
+ with gr.Column():
101
+ src_caption_t2i = gr.Text()
102
+
103
+ button_t2i = gr.Button("Run")
104
+ dst_images_t2i = []
105
+ dst_pages_t2i = []
106
+ gr.Markdown(f'# Retrieved images:')
107
+ for _ in range(num_nn):
108
+ with gr.Row():
109
+ # with gr.Column():
110
+ # dst_images_t2i.append(gr.Image())
111
+ with gr.Column():
112
+ dst_pages_t2i.append(gr.Image())
113
+
114
+ button_i2t.click(query_i2t, inputs=[slider_i2t, doc_drop_i2t], outputs=[src_page_i2t] + out_captions_i2t)
115
+ button_t2i.click(query_t2i, inputs=[slider_t2i, doc_drop_t2i], outputs= [src_caption_t2i] + dst_pages_t2i)
116
+
117
+ demo.launch(share=True)
feats/OCR_1986_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26e3d8a6e540c8066fb23c288ddcc0e16f9af2aebc29db943bbc090b58627819
3
+ size 2217299
feats/OCR_1988_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cca1cd7c789ccc1f46913f91fa0c6128b467a485855e7c31679c8bcde4ccc7da
3
+ size 3526045
feats/OCR_1989_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4c3cbc31771ab55bf62e6357e6fa91194a15f321181fbeb079e983cc52c7163
3
+ size 4401054
feats/OCR_1990_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0877d7d581be2d90f435efc7191dd699bb9a832c8fb69471a0c42da1ffcd7540
3
+ size 3511136
feats/OCR_1991_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c56ca742388dc24c2e4cc9f00868ccbe4f8b6a2fd945691748036e089c79dd9
3
+ size 3200446
feats/OCR_1992_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52f18d069a24f001cb96fb7d932fd5be760eb3c801776c19a8330df8de29b982
3
+ size 15974193
feats/OCR_1993_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:021d11c2bcf44e7ecc9cb95372884ef471f7c409e243b9a5791099440cc5bc09
3
+ size 9268128
feats/OCR_1994_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62070c66c1d1da9a996b08cb95939c8aca38dfacd7a43466b56103ee8db858de
3
+ size 13055739
feats/OCR_1995_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:255c18061109c77ac9dd04b13148a2671516eae82aeab514e29acff67aa052be
3
+ size 7627396
feats/OCR_1996_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91bad4a27fc6ec5bb6501d65e3dda11cf8500f162a155603fdc04e53ba4321df
3
+ size 9774353
feats/OCR_1997_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83bcd1e6a0c0d562b25fc131d9ec8bb2495a7d0d8352380d759d0d70b3c53589
3
+ size 5086507
feats/OCR_1998_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dac7d60d50be1406c5ab6f4e8edd6a21c7571d7158f3743bb9a48c78c6a222e6
3
+ size 11039538
feats/OCR_1999_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b77648d958f6b73c4037313c4371f34dfe26863d18699f54432893ba0fd3cb62
3
+ size 8066941
feats/OCR_2000_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b7e55d1cadac42c5f9d497c25b45caa4c8e8058d557ea6dcc04538b0f8e015c
3
+ size 16961400
feats/OCR_2001_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a67b8cd3af737126204f265c7e87bb3a0460049699aed0cc1c44ccbc9e17f9f4
3
+ size 7879499
feats/OCR_2002_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd3615dc2dd6a64def85c919acdaa61a0ba6ca4bebf1f77bc9d82cfec84375c8
3
+ size 4546114
feats/OCR_2003_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7358021720cd5cbebdde64dc292420a5727494d73458fa0e87ead16bd33a1cfb
3
+ size 13802401
feats/OCR_2004_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebb9b730ed806d45e2ffa97407f05ec5561e3913b1e83acee3cf1c83d4dc4a0f
3
+ size 15180789
feats/OCR_2008_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:078b0835f1a2799c36234c29c00e49144521b24d74ca056a8d089728bbca8813
3
+ size 13416943
feats/OCR_2009_IKEA_US.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e9da5a7091dc164388e7ff714f2a6a1f1acaf1c62e1395500fdebe24460239f
3
+ size 7292966
feats/us2010.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb78e8d96c43cddef0a015bf1a90bf7f60ae6507f09d2f793ab7cf8f27505e98
3
+ size 8259988
feats/us2011.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32ae4a38a4e948c0f56f768aee79856b5c4e66a804c2627e5881dc0924de4c7c
3
+ size 6565430
feats/us2012.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02149bd8935413f71892a8217d071e9f5859d7cc652172e527112987f599e2da
3
+ size 8103386
feats/us2013.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b3377fedf82ca8371c0f41513d72ebafeaf4f820803fec0d37079229f840894
3
+ size 5398149
feats/us2014.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4cba44be476229745597c7325e5e54c5c228fd0d0a34dba4f0b7b0bf0052bfd
3
+ size 7082986
feats/us2015.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7beb0563014d449e85a4c2acdca0cba37c72e97fccf622432d28ab832c5c70fc
3
+ size 10220411
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy==1.20.1
2
+ Pillow==8.2.0
3
+ scikit-learn==0.24.1
4
+
5
+