File size: 2,766 Bytes
f7b5f82
df38f5d
 
f7b5f82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a3bfd1
f7b5f82
7a3bfd1
f7b5f82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
import gradio as gr

def ner_tagging(text):
  model_name = "browndw/docusco-bert"
  tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
  
  model = AutoModelForTokenClassification.from_pretrained(model_name)
  nlp = pipeline("ner", model=model, tokenizer=tokenizer)
  ner_results = nlp(text.lower())
  
  output = []
  
  text_2 = text.split(" ")
  
  for i in range(len(text_2)):
      ent = ner_results[i]["entity"]
      if ent != "O":
          output.extend([(text_2[i], ent), (" ", None)])
      else:
          output.extend([(text_2[i], None), (" ", None)])

  return output

def get_entities(example):
    model_name = "browndw/docusco-bert"
    tokenizer = AutoTokenizer.from_pretrained(model_name, add_prefix_space=True)
  
    model = AutoModelForTokenClassification.from_pretrained(model_name)
    token_classifier = pipeline("token-classification", aggregation_strategy="max", model=model, tokenizer=tokenizer)
    results = token_classifier(example.lower())
    
    output = []

    i=0
    prev_item = None
    next_item = None
    while i < (len(results)):
        item = results[i]
        p=i-1
        n=i+1
        
        if p > 0:
            prev_item = results[p]
        
        
        if n<(len(results)):
            next_item = results[n]
        
        
        if (i==0):
            if item["start"]>0:
                output.extend([(example[0:item["start"]], None)])
        output.extend([(example[item["start"]:item["end"]], item["entity_group"])])
        if (next_item!=None):
            ##check the span
            if(item["end"]!=next_item["start"]):
                output.extend([(example[item["end"]:next_item["start"]], None)])
        i=i+1

    if item["end"] < len(example):
        output.extend([(example[item["end"]:len(example)], None)])
    
    return output

def greet(name):
    return "Hello " + name + "!!"

iface = gr.Interface(fn=get_entities, inputs="text", outputs=['highlight'], examples=[['Jaws is a splendidly shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.'], 
['In order to understand how cyclic variations in turbulence intensities affect cycle-to-cycle variations in combustion, in-cylinder flow fields and turbulence need to be studied more closely.']], title='DocuScope Demo (BERT)', description = 'This is one of a family of models trained on DocuScope. Click on one of the examples below and SUBMIT. Be sure to CLEAR the output before tagging a new submission. You can also enter your own text.')
iface.launch()