Michael-Geis commited on
Commit
93058e0
1 Parent(s): 7af8946

upgraded interface, model unchanged

Browse files
Files changed (1) hide show
  1. app.py +85 -73
app.py CHANGED
@@ -13,105 +13,117 @@ def parse_id(input_id):
13
  result = next(search.results())
14
  raw_categories = result.categories
15
  title = result.title
 
16
  subject_tags = ", ".join(
17
  sorted(
18
  [subject_dict[tag] for tag in raw_categories if tag in subject_dict.keys()]
19
  )
20
  )
21
 
22
- return (title, subject_tags)
23
 
24
 
25
- def parse_title(input_title):
26
- query_title = input_title.replace(" ", "\%20")
27
- search = arxiv.Search(
28
- query=f"ti:\%22{query_title}\%22",
29
- sort_by=arxiv.SortCriterion.Relevance,
30
- sort_order=arxiv.SortOrder.Descending,
31
- max_results=1,
32
- )
33
- result = next(search.results())
34
- raw_categories = result.categories
35
- title = result.title
36
 
37
- with open("./data/arxiv-label-dict.json", "r") as file:
38
- subject_dict = json.loads(file.read())
39
 
40
- subject_tags = ", ".join(
41
- sorted(
42
- [subject_dict[tag] for tag in raw_categories if tag in subject_dict.keys()]
43
- )
44
- )
45
 
46
- return (title, subject_tags)
47
 
48
 
49
  def outputs_from_id(input_id, threshold_probability):
50
- title, true_tags = parse_id(input_id)
51
  predicted_tags = predict_from_text(title, threshold_probability)
 
52
 
53
- return title, predicted_tags, true_tags
54
 
 
 
 
55
 
56
- def outputs_from_title(input_title, threshold_probability):
57
- title, true_tags = parse_title(input_title)
58
- predicted_tags = predict_from_text(title, threshold_probability)
59
-
60
- return title, predicted_tags, true_tags
61
 
62
 
63
  with gr.Blocks() as demo:
64
  gr.Markdown(
65
- "Predict the arXiv subject tags of a math article by its title using this demo."
 
 
 
 
 
66
  )
67
- with gr.Tab("Predict by id"):
68
- with gr.Row():
69
- id_input = gr.Textbox(label="Input ID")
70
- id_title = gr.Textbox(label="Title")
71
- id_predict = gr.Textbox(label="Predicted tags")
72
- id_true = gr.Textbox(label="True tags")
73
- id_button = gr.Button("Predict")
74
-
75
- threshold_probability = gr.Slider(minimum=0, maximum=1)
76
-
77
- gr.Examples(
78
- examples=[
79
- "1706.03762",
80
- "1709.07343",
81
- "2303.11559",
82
- "2107.05105",
83
- "1910.06441",
84
- ],
85
- inputs=id_input,
86
- )
87
-
88
- with gr.Tab("Predict by title"):
89
- with gr.Row():
90
- title_input = gr.Textbox(label="Input title")
91
- title_title = gr.Textbox(label="Title of closest match")
92
- title_predict = gr.Textbox(label="Predicted tags")
93
- title_true = gr.Textbox(label="True tags")
94
- title_button = gr.Button("Predict")
95
- gr.Examples(
96
- examples=[
97
- "Attention is all you need",
98
- "Etale cohomology of diamonds",
99
- "Stochastic Kahler geometry from random zeros to random metrics",
100
- "Scaling asymptotics for Szego kernels on Grauert tubes",
101
- "The Wave Trace and Birkhoff Billiards",
102
- ],
103
- inputs=title_input,
104
- )
 
 
 
 
 
 
 
105
 
106
  id_button.click(
107
  outputs_from_id,
108
  inputs=[id_input, threshold_probability],
109
- outputs=[id_title, id_predict, id_true],
110
- )
111
- title_button.click(
112
- outputs_from_title,
113
- inputs=[title_input, threshold_probability],
114
- outputs=[title_title, title_predict, title_true],
115
  )
 
 
 
 
 
116
 
117
- demo.launch()
 
13
  result = next(search.results())
14
  raw_categories = result.categories
15
  title = result.title
16
+ abstract = result.summary
17
  subject_tags = ", ".join(
18
  sorted(
19
  [subject_dict[tag] for tag in raw_categories if tag in subject_dict.keys()]
20
  )
21
  )
22
 
23
+ return (title, subject_tags, abstract)
24
 
25
 
26
+ # def parse_title(input_title):
27
+ # query_title = input_title.replace(" ", "\%20")
28
+ # search = arxiv.Search(
29
+ # query=f"ti:\%22{query_title}\%22",
30
+ # sort_by=arxiv.SortCriterion.Relevance,
31
+ # sort_order=arxiv.SortOrder.Descending,
32
+ # max_results=1,
33
+ # )
34
+ # result = next(search.results())
35
+ # raw_categories = result.categories
36
+ # title = result.title
37
 
38
+ # with open("./data/arxiv-label-dict.json", "r") as file:
39
+ # subject_dict = json.loads(file.read())
40
 
41
+ # subject_tags = ", ".join(
42
+ # sorted(
43
+ # [subject_dict[tag] for tag in raw_categories if tag in subject_dict.keys()]
44
+ # )
45
+ # )
46
 
47
+ # return (title, subject_tags)
48
 
49
 
50
  def outputs_from_id(input_id, threshold_probability):
51
+ title, true_tags, abstract = parse_id(input_id)
52
  predicted_tags = predict_from_text(title, threshold_probability)
53
+ return title, predicted_tags, true_tags, abstract
54
 
 
55
 
56
+ # def outputs_from_title(input_title, threshold_probability):
57
+ # title, true_tags = parse_title(input_title)
58
+ # predicted_tags = predict_from_text(title, threshold_probability)
59
 
60
+ # return title, predicted_tags, true_tags
 
 
 
 
61
 
62
 
63
  with gr.Blocks() as demo:
64
  gr.Markdown(
65
+ """# <center> arXiv Subject Classifier
66
+ This demo labels an arXiv math publication with appropriate subject categories based on its title. To get subject tag predictions for an article, input its unique
67
+ arXiv ID (the 9 digit number appearing at the end of an article's URL). You can find the ID number of an article by searching for it on <a href="arxiv.org">arxiv.org</a>.
68
+ For each possible tag, the model calculates the probability that the tag is applicable. Use the slider bar to set the minimum probability required for a tag to be predicted.
69
+ For instance, when the slider is set to 0.50, only tags with a predicted probability of relevance over 50% will be suggested.
70
+ </center>"""
71
  )
72
+ with gr.Row():
73
+ id_input = gr.Textbox(label="arXiv ID:", placeholder="XXXX.XXXXX")
74
+ id_title = gr.Textbox(label="Title of Input Article:")
75
+ id_predict = gr.Textbox(label="Predicted Subject Tags:")
76
+ id_true = gr.Textbox(label="Actual Subject Tags:")
77
+ threshold_probability = gr.Slider(
78
+ label="Minimum Confidence For Tag Prediction:", value=0.5, minimum=0, maximum=1
79
+ )
80
+ id_button = gr.Button("Get Predicted Subject Tags")
81
+ gr.Examples(
82
+ label="Try These Example Articles:",
83
+ examples=[
84
+ "1709.07343",
85
+ "2107.05105",
86
+ "1910.06441",
87
+ "2210.09246",
88
+ "2111.03188",
89
+ "1811.07007",
90
+ "2303.15347",
91
+ "2210.04580",
92
+ "1909.06032",
93
+ "2107.13138",
94
+ ],
95
+ inputs=id_input,
96
+ )
97
+ gr.Markdown("### Article Abstract:")
98
+ article_abstract = gr.HTML()
99
+
100
+ # with gr.Tab("Predict by title"):
101
+ # with gr.Row():
102
+ # title_input = gr.Textbox(label="Input title")
103
+ # title_title = gr.Textbox(label="Title of closest match")
104
+ # title_predict = gr.Textbox(label="Predicted tags")
105
+ # title_true = gr.Textbox(label="True tags")
106
+ # title_button = gr.Button("Predict")
107
+ # gr.Examples(
108
+ # examples=[
109
+ # "Attention is all you need",
110
+ # "Etale cohomology of diamonds",
111
+ # "Stochastic Kahler geometry from random zeros to random metrics",
112
+ # "Scaling asymptotics for Szego kernels on Grauert tubes",
113
+ # "The Wave Trace and Birkhoff Billiards",
114
+ # ],
115
+ # inputs=title_input,
116
+ # )
117
 
118
  id_button.click(
119
  outputs_from_id,
120
  inputs=[id_input, threshold_probability],
121
+ outputs=[id_title, id_predict, id_true, article_abstract],
 
 
 
 
 
122
  )
123
+ # title_button.click(
124
+ # outputs_from_title,
125
+ # inputs=[title_input, threshold_probability],
126
+ # outputs=[title_title, title_predict, title_true],
127
+ # )
128
 
129
+ demo.launch(inbrowser=True)