qtpi commited on
Commit
a47277e
·
verified ·
1 Parent(s): 37f61a9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +259 -0
app.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import bibtexparser
2
+ import json
3
+
4
+ class ConsistencyHandler:
5
+
6
+ def __init__(self):
7
+ self.articleExtraFields = set()
8
+ self.inbookExtraFields = set()
9
+ self.techreportExtraFields = set()
10
+ self.inproceedingsExtraFields = set()
11
+ # self.miscExtraFields = set()
12
+
13
+ self.articleImportantFields = {'year', 'author', 'title', 'journal', 'volume', 'pages'}
14
+ self.inbookImportantFields = {'author', 'year', 'title', 'booktitle', 'publisher'}
15
+ self.techreportImportantFields = {'title', 'author', 'institution', 'year', 'number'}
16
+ self.inproceedingsImportantFields = {'author', 'title', 'booktitle', 'year', 'publisher', 'pages'}
17
+ # self.miscImportantFields = set()
18
+
19
+ self.articleCount = 0
20
+ self.inbookCount = 0
21
+ self.techreportCount = 0
22
+ self.inproceedingscount = 0
23
+ # self.miscCount = 0
24
+
25
+
26
+ #######################################################################################################################################################
27
+
28
+ def checkArticles(self, entry: dict):
29
+ allFields = set(entry.keys())
30
+ metadata = {'ID', 'number', 'ENTRYTYPE', 'date'}
31
+
32
+ allFields = allFields-metadata
33
+
34
+ # consistency error list
35
+ consistencyErrorFields = set()
36
+
37
+ # important fields error list
38
+ importantErrorFields = set()
39
+
40
+ if(self.articleImportantFields.issubset(allFields)==False):
41
+ # find the asterik fields present or not
42
+ importantErrorFields = self.articleImportantFields-allFields.intersection(self.articleImportantFields)
43
+
44
+ # code to check consistency
45
+ tempFields = allFields-self.articleImportantFields
46
+
47
+ for field in tempFields:
48
+ if(self.articleCount == 0 and (field not in self.articleExtraFields)):
49
+ self.articleExtraFields.add(field)
50
+ elif (field not in self.articleExtraFields):
51
+ consistencyErrorFields.add(field)
52
+ # print('here')
53
+ # else:
54
+ # print("kuch to gadbad hai iss field me:- ", field, "\n ", self.articleExtraFields)
55
+
56
+ if (self.articleExtraFields.issubset(tempFields)==False):
57
+ consistencyErrorFields.union(tempFields-self.articleExtraFields)
58
+
59
+ self.articleCount+=1
60
+
61
+ return importantErrorFields, consistencyErrorFields
62
+
63
+ #######################################################################################################################################################
64
+
65
+ def checkInbook(self, entry: dict):
66
+ allFields = set(entry.keys())
67
+ metadata = {'ID', 'number', 'ENTRYTYPE', 'date', 'doi'}
68
+
69
+ allFields = allFields-metadata
70
+
71
+ # consistency error list
72
+ consistencyErrorFields = set()
73
+
74
+ # important fields error list
75
+ importantErrorFields = set()
76
+
77
+ if(self.inbookImportantFields.issubset(allFields)==False):
78
+ # find the asterik fields present or not
79
+ importantErrorFields = self.inbookImportantFields-allFields.intersection(self.inbookImportantFields)
80
+
81
+ # code to check consistency
82
+ tempFields = allFields-self.inbookImportantFields
83
+
84
+ for field in tempFields:
85
+ if(self.inbookCount == 0 and (field in self.inbookExtraFields)==False):
86
+ self.inbookExtraFields.add(field)
87
+
88
+ elif field not in self.inbookExtraFields:
89
+ consistencyErrorFields.add(field)
90
+ # else:
91
+ # print("kuch to gadbad hai iss field me:- ", field)
92
+
93
+ if (self.inbookExtraFields.issubset(tempFields)==False):
94
+ consistencyErrorFields.union(tempFields-self.inbookExtraFields)
95
+
96
+ self.inbookCount+=1
97
+
98
+ return importantErrorFields, consistencyErrorFields
99
+
100
+ #######################################################################################################################################################
101
+
102
+ def checkTechreport(self, entry: dict):
103
+ allFields = set(entry.keys())
104
+ metadata = {'ID', 'ENTRYTYPE', 'date'}
105
+
106
+ allFields = allFields-metadata
107
+
108
+ # consistency error list
109
+ consistencyErrorFields = set()
110
+
111
+ # important fields error list
112
+ importantErrorFields = set()
113
+
114
+ if(self.techreportImportantFields.issubset(allFields)==False):
115
+ # find the asterik fields present or not
116
+ importantErrorFields = self.techreportImportantFields-allFields.intersection(self.techreportImportantFields)
117
+
118
+ # code to check consistency
119
+ tempFields = allFields-self.techreportImportantFields
120
+
121
+ for field in tempFields:
122
+ if(self.techreportCount == 0 and (field in self.techreportExtraFields)==False):
123
+ self.techreportExtraFields.add(field)
124
+ elif field not in self.techreportExtraFields:
125
+ consistencyErrorFields.add(field)
126
+ # else:
127
+ # print("kuch to gadbad hai iss field me:- ", field)
128
+
129
+ if (self.techreportExtraFields.issubset(tempFields)==False):
130
+ consistencyErrorFields.union(tempFields-self.techreportExtraFields)
131
+
132
+ self.techreportCount+=1
133
+
134
+ return importantErrorFields, consistencyErrorFields
135
+
136
+ def checkInproceedings(self, entry: dict):
137
+ allFields = set(entry.keys())
138
+ metadata = {'ID', 'ENTRYTYPE', 'date', 'series'}
139
+
140
+ allFields = allFields-metadata
141
+
142
+ # consistency error list
143
+ consistencyErrorFields = set()
144
+
145
+ # important fields error list
146
+ importantErrorFields = set()
147
+
148
+ if(self.inproceedingsImportantFields.issubset(allFields)==False):
149
+ # find the asterik fields present or not
150
+ importantErrorFields = self.inproceedingsImportantFields-allFields.intersection(self.inproceedingsImportantFields)
151
+
152
+ # code to check consistency
153
+ tempFields = allFields-self.inproceedingsImportantFields
154
+
155
+ for field in tempFields:
156
+ if(self.inproceedingscount == 0 and (field not in self.inproceedingsExtraFields)):
157
+ self.inproceedingsExtraFields.add(field)
158
+ elif field not in self.inproceedingsExtraFields:
159
+ consistencyErrorFields.add(field)
160
+
161
+
162
+ if (self.inproceedingsExtraFields.issubset(tempFields)==False):
163
+ consistencyErrorFields.union(tempFields-self.inproceedingsExtraFields)
164
+
165
+ self.inproceedingscount+=1
166
+
167
+ return importantErrorFields, consistencyErrorFields
168
+
169
+ def checkMisc():
170
+ pass
171
+
172
+
173
+
174
+ def bibtex_to_dict_list(bibtex_string):
175
+ # Parse the BibTeX string
176
+ bib_database = bibtexparser.loads(bibtex_string)
177
+
178
+ # Convert each entry to a dictionary and return as a list
179
+ return [dict(entry) for entry in bib_database.entries]
180
+
181
+
182
+ def referenceErrorParser(bibtex_string: str):
183
+
184
+ entries = bibtex_to_dict_list(bibtex_string)
185
+
186
+ for entry in entries:
187
+
188
+ # article
189
+ if entry['ENTRYTYPE']=="article":
190
+ imp, cons = checker.checkArticles(entry)
191
+
192
+ entry['asterikError'] = list(imp)
193
+ entry['consistensyError'] = list(cons)
194
+
195
+ # inbook
196
+ elif entry['ENTRYTYPE']=="inbook":
197
+ imp, cons = checker.checkInbook(entry)
198
+
199
+ entry['asterikError'] = list(imp)
200
+ entry['consistensyError'] = list(cons)
201
+
202
+
203
+ # techreport
204
+ elif entry['ENTRYTYPE']=="techreport":
205
+ imp, cons = checker.checkTechreport(entry)
206
+
207
+ entry['asterikError'] = list(imp)
208
+ entry['consistensyError'] = list(cons)
209
+
210
+ # inproceedings
211
+ elif entry['ENTRYTYPE']=="inproceedings":
212
+ imp, cons = checker.checkInproceedings(entry)
213
+
214
+ entry['asterikError'] = list(imp)
215
+ entry['consistensyError'] = list(cons)
216
+
217
+ # misc
218
+ elif entry['ENTRYTYPE']=="misc":
219
+ # return warning for this section
220
+ continue
221
+
222
+ else:
223
+ print(f"{entry['ENTRYTYPE']} has to be added for processing.")
224
+
225
+
226
+ outputJson = json.dumps(entries)
227
+
228
+ return outputJson
229
+
230
+
231
+ def create_interface():
232
+ with gr.Blocks(title="BibTex Analyser") as interface:
233
+ gr.Markdown("# Reference Validation")
234
+
235
+ with gr.Row():
236
+ text_input = gr.TextArea()
237
+
238
+ with gr.Row():
239
+ analyze_btn = gr.Button("Get Corrections")
240
+
241
+ with gr.Row():
242
+ results_output = gr.TextArea(
243
+ label="Analysis Results",
244
+ show_label=True
245
+ )
246
+
247
+ analyze_btn.click(
248
+ fn=referenceErrorParser,
249
+ inputs=text_input,
250
+ outputs=results_output
251
+ )
252
+
253
+ return interface
254
+
255
+ if __name__ == "__main__":
256
+ interface = create_interface()
257
+ interface.launch(
258
+ share=False, # Set to False in production
259
+ )