browndw commited on
Commit
f7b5f82
1 Parent(s): a8abd7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -9
app.py CHANGED
@@ -1,13 +1,71 @@
 
1
  import gradio as gr
2
 
3
- def ner(text):
4
- api = gr.Interface.load("browndw/docusco-bert", src='models')
5
- spans = api(text)
6
- replaced_spans = [(key, None) if value=='O' else (key, value) for (key, value) in spans]
7
- return replaced_spans
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- interface = gr.Interface(ner, inputs=gr.Textbox(label="Input", value="Autophagy maintains tumour growth through circulating arginine. Acute, \
10
- whole-body deletion of the essential autophagy gene Atg7 in adult mice causes a systemic metabolic defect that manifests as starvation \
11
- intolerance and gradual loss of white adipose tissue, liver glycogen and muscle mass. Cancer cells also benefit from autophagy."), outputs="highlightedtext")
12
 
13
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
2
  import gradio as gr
3
 
4
+ def ner_tagging(text):
5
+ model_name = "browndw/docusco-bert"
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
7
+
8
+ model = AutoModelForTokenClassification.from_pretrained(model_name)
9
+ nlp = pipeline("ner", model=model, tokenizer=tokenizer)
10
+ ner_results = nlp(text.lower())
11
+
12
+ output = []
13
+
14
+ text_2 = text.split(" ")
15
+
16
+ for i in range(len(text_2)):
17
+ ent = ner_results[i]["entity"]
18
+ if ent != "O":
19
+ output.extend([(text_2[i], ent), (" ", None)])
20
+ else:
21
+ output.extend([(text_2[i], None), (" ", None)])
22
 
23
+ return output
 
 
24
 
25
+ def get_entities(example):
26
+ model_name = "browndw/docusco-bert"
27
+ tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
28
+
29
+ model = AutoModelForTokenClassification.from_pretrained(model_name)
30
+ token_classifier = pipeline("token-classification", aggregation_strategy="max", model=model, tokenizer=tokenizer)
31
+ results = token_classifier(example.lower())
32
+
33
+ output = []
34
+
35
+ i=0
36
+ prev_item = None
37
+ next_item = None
38
+ while i < (len(results)):
39
+ item = results[i]
40
+ p=i-1
41
+ n=i+1
42
+
43
+ if p > 0:
44
+ prev_item = results[p]
45
+
46
+
47
+ if n<(len(results)):
48
+ next_item = results[n]
49
+
50
+
51
+ if (i==0):
52
+ if item["start"]>0:
53
+ output.extend([(example[0:item["start"]], None)])
54
+ output.extend([(example[item["start"]:item["end"]], item["entity_group"])])
55
+ if (next_item!=None):
56
+ ##check the span
57
+ if(item["end"]!=next_item["start"]):
58
+ output.extend([(example[item["end"]:next_item["start"]], None)])
59
+ i=i+1
60
+
61
+ if item["end"] < len(example):
62
+ output.extend([(example[item["end"]:len(example)], None)])
63
+
64
+ return output
65
+
66
+ def greet(name):
67
+ return "Hello " + name + "!!"
68
+
69
+ iface = gr.Interface(fn=get_entities, inputs="text", outputs=['highlight'], examples=[['Jaws is a splendidly shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.'],
70
+ ['In order to understand how cyclic variations in turbulence intensities affect cycle-to-cycle variations in combustion, in-cylinder flow fields and turbulence need to be studied more closely.']], title='DocuScope Demo (BERT)', description = 'This is one of a family of models trained on DocuScope. Click on one of the examples below and SUBMIT. Be sure to CLEAR the output before tagging a new submission. You can also enter your own text.')
71
+ iface.launch()