Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- README.md +5 -6
- app.py +474 -0
- requirements.txt +6 -0
README.md
CHANGED
|
@@ -1,12 +1,11 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 5.
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Gravitee PII
|
| 3 |
+
emoji: 💻
|
| 4 |
+
colorFrom: pink
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.31.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: apache-2.0
|
| 11 |
---
|
|
|
|
|
|
app.py
ADDED
|
@@ -0,0 +1,474 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ruff: noqa: E501, INP001, FBT001
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Dict, List, Tuple
|
| 6 |
+
|
| 7 |
+
import gradio as gr
|
| 8 |
+
import torch
|
| 9 |
+
from optimum.onnxruntime import ORTModelForTokenClassification
|
| 10 |
+
from transformers import AutoTokenizer
|
| 11 |
+
|
| 12 |
+
# Hugging Face model
|
| 13 |
+
MODEL_NAME = "gravitee-io/bert-small-pii-detection"
|
| 14 |
+
|
| 15 |
+
def load_model() -> Tuple[ORTModelForTokenClassification, AutoTokenizer]:
|
| 16 |
+
"""Load BERT ONNX model and tokenizer from Hugging Face"""
|
| 17 |
+
import os
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
# Load tokenizer from Hugging Face
|
| 21 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 22 |
+
MODEL_NAME,
|
| 23 |
+
token=os.getenv("HUGGINGFACE_TOKEN")
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# Try to load quantized model first, fallback to regular model
|
| 27 |
+
try:
|
| 28 |
+
model = ORTModelForTokenClassification.from_pretrained(
|
| 29 |
+
MODEL_NAME,
|
| 30 |
+
file_name="model.quant.onnx",
|
| 31 |
+
token=os.getenv("HUGGINGFACE_TOKEN")
|
| 32 |
+
)
|
| 33 |
+
except:
|
| 34 |
+
model = ORTModelForTokenClassification.from_pretrained(
|
| 35 |
+
MODEL_NAME,
|
| 36 |
+
file_name="model.onnx",
|
| 37 |
+
token=os.getenv("HUGGINGFACE_TOKEN")
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
return model, tokenizer
|
| 41 |
+
except Exception as e:
|
| 42 |
+
raise ValueError(f"Could not load model {MODEL_NAME}: {e}")
|
| 43 |
+
|
| 44 |
+
def convert_predictions_to_spans(predictions: List[int], offset_mapping: List[Tuple[int, int]], id2label: Dict[int, str], text: str) -> List[Dict]:
|
| 45 |
+
"""Convert token-level predictions to entity spans using BIO tagging"""
|
| 46 |
+
spans = []
|
| 47 |
+
current_entity = None
|
| 48 |
+
|
| 49 |
+
for i, (pred, (start, end)) in enumerate(zip(predictions, offset_mapping)):
|
| 50 |
+
if start == end == 0: # Skip special tokens
|
| 51 |
+
continue
|
| 52 |
+
|
| 53 |
+
label = id2label[pred]
|
| 54 |
+
|
| 55 |
+
if label.startswith("B-"):
|
| 56 |
+
# Begin new entity
|
| 57 |
+
if current_entity:
|
| 58 |
+
spans.append(current_entity)
|
| 59 |
+
current_entity = {
|
| 60 |
+
"start": start,
|
| 61 |
+
"end": end,
|
| 62 |
+
"label": label[2:].lower(),
|
| 63 |
+
"text": text[start:end]
|
| 64 |
+
}
|
| 65 |
+
elif label.startswith("I-") and current_entity and label[2:].lower() == current_entity["label"]:
|
| 66 |
+
# Continue current entity
|
| 67 |
+
current_entity["end"] = end
|
| 68 |
+
current_entity["text"] = text[current_entity["start"]:end]
|
| 69 |
+
elif label == "O":
|
| 70 |
+
# Outside any entity
|
| 71 |
+
if current_entity:
|
| 72 |
+
spans.append(current_entity)
|
| 73 |
+
current_entity = None
|
| 74 |
+
|
| 75 |
+
# Don't forget the last entity
|
| 76 |
+
if current_entity:
|
| 77 |
+
spans.append(current_entity)
|
| 78 |
+
|
| 79 |
+
return spans
|
| 80 |
+
|
| 81 |
+
# Load model during initialization
|
| 82 |
+
print("Loading model from Hugging Face...")
|
| 83 |
+
_model, _tokenizer = load_model()
|
| 84 |
+
print(f"Model {MODEL_NAME} loaded successfully!")
|
| 85 |
+
|
| 86 |
+
def get_model_info():
|
| 87 |
+
"""Get model and tokenizer (already loaded)"""
|
| 88 |
+
return _model, _tokenizer
|
| 89 |
+
|
| 90 |
+
def predict_entities(text: str, threshold: float) -> Dict:
|
| 91 |
+
"""Predict entities using BERT ONNX model"""
|
| 92 |
+
try:
|
| 93 |
+
model, tokenizer = get_model_info()
|
| 94 |
+
|
| 95 |
+
# Tokenize input text
|
| 96 |
+
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True,
|
| 97 |
+
return_offsets_mapping=True, max_length=512)
|
| 98 |
+
|
| 99 |
+
offset_mapping = inputs.pop("offset_mapping")[0].tolist()
|
| 100 |
+
|
| 101 |
+
# Run inference
|
| 102 |
+
with torch.no_grad():
|
| 103 |
+
outputs = model(**inputs)
|
| 104 |
+
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
| 105 |
+
predicted_class_ids = torch.argmax(predictions, dim=-1)[0].tolist()
|
| 106 |
+
prediction_scores = torch.max(predictions, dim=-1)[0][0].tolist()
|
| 107 |
+
|
| 108 |
+
# Filter by threshold
|
| 109 |
+
filtered_predictions = []
|
| 110 |
+
filtered_offsets = []
|
| 111 |
+
for pred, score, offset in zip(predicted_class_ids, prediction_scores, offset_mapping):
|
| 112 |
+
if score >= threshold:
|
| 113 |
+
filtered_predictions.append(pred)
|
| 114 |
+
filtered_offsets.append(offset)
|
| 115 |
+
else:
|
| 116 |
+
filtered_predictions.append(0) # O tag
|
| 117 |
+
filtered_offsets.append(offset)
|
| 118 |
+
|
| 119 |
+
# Convert to spans
|
| 120 |
+
id2label = model.config.id2label
|
| 121 |
+
spans = convert_predictions_to_spans(filtered_predictions, filtered_offsets, id2label, text)
|
| 122 |
+
|
| 123 |
+
# Convert to gradio format
|
| 124 |
+
entities = []
|
| 125 |
+
for span in spans:
|
| 126 |
+
entities.append({
|
| 127 |
+
"entity": span["label"],
|
| 128 |
+
"word": span["text"],
|
| 129 |
+
"start": span["start"],
|
| 130 |
+
"end": span["end"],
|
| 131 |
+
"score": 1.0 # We already filtered by threshold
|
| 132 |
+
})
|
| 133 |
+
|
| 134 |
+
return {
|
| 135 |
+
"text": text,
|
| 136 |
+
"entities": entities
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
except Exception as e:
|
| 140 |
+
return {
|
| 141 |
+
"text": text,
|
| 142 |
+
"entities": [],
|
| 143 |
+
"error": str(e)
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
def format_text(text: str, format_type: str) -> str:
|
| 147 |
+
"""Format text with proper spacing and indentation"""
|
| 148 |
+
if format_type == "None":
|
| 149 |
+
return text
|
| 150 |
+
elif format_type == "JSON":
|
| 151 |
+
try:
|
| 152 |
+
import json
|
| 153 |
+
# Try to parse and format as JSON
|
| 154 |
+
parsed = json.loads(text)
|
| 155 |
+
return json.dumps(parsed, indent=2)
|
| 156 |
+
except:
|
| 157 |
+
return text
|
| 158 |
+
elif format_type == "XML":
|
| 159 |
+
try:
|
| 160 |
+
import xml.etree.ElementTree as ET
|
| 161 |
+
from xml.dom import minidom
|
| 162 |
+
|
| 163 |
+
# Remove b' prefix if present
|
| 164 |
+
clean_text = text
|
| 165 |
+
if text.startswith("b'") and text.endswith("'"):
|
| 166 |
+
clean_text = text[2:-1]
|
| 167 |
+
|
| 168 |
+
# Parse and format XML
|
| 169 |
+
root = ET.fromstring(clean_text)
|
| 170 |
+
rough_string = ET.tostring(root, 'unicode')
|
| 171 |
+
reparsed = minidom.parseString(rough_string)
|
| 172 |
+
return reparsed.toprettyxml(indent=" ")
|
| 173 |
+
except:
|
| 174 |
+
return text
|
| 175 |
+
elif format_type == "HTML":
|
| 176 |
+
try:
|
| 177 |
+
from bs4 import BeautifulSoup
|
| 178 |
+
soup = BeautifulSoup(text, 'html.parser')
|
| 179 |
+
return soup.prettify()
|
| 180 |
+
except:
|
| 181 |
+
# Fallback: simple HTML formatting
|
| 182 |
+
formatted = text.replace('><', '>\n<')
|
| 183 |
+
formatted = formatted.replace('<tr>', '\n <tr>')
|
| 184 |
+
formatted = formatted.replace('<td>', '\n <td>')
|
| 185 |
+
formatted = formatted.replace('<th>', '\n <th>')
|
| 186 |
+
return formatted
|
| 187 |
+
elif format_type == "SQL":
|
| 188 |
+
# Simple SQL formatting
|
| 189 |
+
formatted = text.upper()
|
| 190 |
+
formatted = formatted.replace(' FROM ', '\nFROM ')
|
| 191 |
+
formatted = formatted.replace(' WHERE ', '\nWHERE ')
|
| 192 |
+
formatted = formatted.replace(' AND ', '\n AND ')
|
| 193 |
+
formatted = formatted.replace(' OR ', '\n OR ')
|
| 194 |
+
formatted = formatted.replace(' ORDER BY ', '\nORDER BY ')
|
| 195 |
+
formatted = formatted.replace(' GROUP BY ', '\nGROUP BY ')
|
| 196 |
+
formatted = formatted.replace(' HAVING ', '\nHAVING ')
|
| 197 |
+
formatted = formatted.replace(' LIMIT ', '\nLIMIT ')
|
| 198 |
+
return formatted
|
| 199 |
+
else:
|
| 200 |
+
return text
|
| 201 |
+
|
| 202 |
+
def ner(text: str, threshold: float, data_type: str = None, format_input: bool = False) -> List[Tuple[str, str]]:
|
| 203 |
+
"""Main NER function for Gradio interface"""
|
| 204 |
+
# Format text if requested
|
| 205 |
+
if format_input and data_type and data_type != "Natural Text":
|
| 206 |
+
formatted_text = format_text(text, data_type)
|
| 207 |
+
result = predict_entities(formatted_text, threshold)
|
| 208 |
+
display_text = formatted_text
|
| 209 |
+
else:
|
| 210 |
+
result = predict_entities(text, threshold)
|
| 211 |
+
display_text = text
|
| 212 |
+
|
| 213 |
+
if "error" in result:
|
| 214 |
+
return [(display_text, None)]
|
| 215 |
+
|
| 216 |
+
# Convert to highlighted text format
|
| 217 |
+
highlighted = []
|
| 218 |
+
last_end = 0
|
| 219 |
+
|
| 220 |
+
for entity in sorted(result["entities"], key=lambda x: x["start"]):
|
| 221 |
+
# Add text before entity
|
| 222 |
+
if entity["start"] > last_end:
|
| 223 |
+
highlighted.append((display_text[last_end:entity["start"]], None))
|
| 224 |
+
|
| 225 |
+
# Add entity
|
| 226 |
+
highlighted.append((entity["word"], entity["entity"].upper()))
|
| 227 |
+
last_end = entity["end"]
|
| 228 |
+
|
| 229 |
+
# Add remaining text
|
| 230 |
+
if last_end < len(display_text):
|
| 231 |
+
highlighted.append((display_text[last_end:], None))
|
| 232 |
+
|
| 233 |
+
return highlighted
|
| 234 |
+
|
| 235 |
+
# Example texts - longer, more complex samples starting with Mixed PII
|
| 236 |
+
examples = [
|
| 237 |
+
# Natural Text examples (longer, more comprehensive)
|
| 238 |
+
[
|
| 239 |
+
"Dr. Sarah Martinez, age 34, works as a Senior Data Scientist at TechCorp International. Her employee ID is TC-DS-5591 and she joined the company on 2019-03-15. Sarah lives at 1247 Oak Avenue, Apartment 5B, Portland, Oregon 97205. Her work phone is 503-555-0147 and personal email is sarah.martinez@personalmail.com. For banking, she uses account TCBK89012345678901 at First National Bank. Her driver's license number is OR-DL-M8829134 and her social security number is 123-45-6789. She recently traveled to London using passport US-P-543216789 and her frequent flyer number with Delta Airlines is DL987654321.",
|
| 240 |
+
0.35,
|
| 241 |
+
"Natural Text"
|
| 242 |
+
],
|
| 243 |
+
[
|
| 244 |
+
"The customer database contains the following entries: Michael Chen (DOB: 1985-07-22, age 38) residing at 789 Pine Street, Suite 200, San Francisco, CA 94102. His contact details include phone 415-555-0298 and email michael.chen@businessmail.org. Financial information: Chase Bank account CH-5567889012345678, credit card 4532-1234-5678-9012 (exp: 08/2027, CVV: 451). Professional details: Software Engineer at InnovateTech LLC, employee ID IT-SE-7793, salary $125,000. Government IDs include SSN 987-65-4321, California driver's license CA-DL-B1234567, and passport number US-578912345. His device MAC address is aa:bb:cc:dd:ee:ff and IMEI 358240051111110.",
|
| 245 |
+
0.35,
|
| 246 |
+
"Natural Text"
|
| 247 |
+
],
|
| 248 |
+
[
|
| 249 |
+
"Security incident report for Lisa Thompson (ID: LT-2023-001): On 2023-11-15 at 14:30 PST, user accessed system from IP address 192.168.1.100 using API key api_key_abc123xyz789. Employee details: Lisa Thompson, age 29, title Senior Security Analyst, department Cybersecurity, hired 2021-09-01. Home address: 456 Maple Drive, Unit 3C, Seattle, WA 98109. Contact: phone 206-555-0189, work email lisa.thompson@company.com. Banking: Wells Fargo account WF-4455667788990011, routing number 021000021. Government IDs: SSN 555-44-3333, WA driver's license WA-DL-THOMP567, passport US-890123456. Vehicle: 2020 Honda Civic, license plate WA-ABC1234, VIN 1HGBH41JXMN109186.",
|
| 250 |
+
0.35,
|
| 251 |
+
"Natural Text"
|
| 252 |
+
],
|
| 253 |
+
[
|
| 254 |
+
"Patient intake form: Dr. Robert Kim (Medical License: MD-12345-WA), age 42, practices at Seattle General Hospital, 1500 Medical Center Drive, Seattle, WA 98101. Phone: 206-555-0234, fax: 206-555-0235, email: dr.kim@seattlegeneral.org. Patient information: Jennifer Walsh, DOB 1990-12-03 (age 33), SSN 111-22-3333, address 2100 Broadway Ave, Apt 15D, Seattle, WA 98122. Insurance: Blue Cross Blue Shield, policy BC-556677889900, group 12345. Emergency contact: Mark Walsh (spouse), phone 206-555-0167. Medical history includes prescription for Medication XYZ, DEA number DR1234567. Appointment scheduled for 2024-01-20 at 10:00 AM, confirmation code CONF-789456.",
|
| 255 |
+
0.35,
|
| 256 |
+
"Natural Text"
|
| 257 |
+
],
|
| 258 |
+
# HTML samples (longer, more complex)
|
| 259 |
+
[
|
| 260 |
+
'<table border=\"1\"><tr><th>api_key</th><td>PmtrSlgEzO PmtrSlgEzO br</td></tr><tr><th>page</th><td>73595</td></tr><tr><th>max_primary_general_date</th><td>1992-09-22</td></tr><tr><th>sort</th><td>RqJu PZwhjrbcS</td></tr><tr><th>election_type_id</th><td>PFTZDOBxIl</td></tr><tr><th>election_district</th><td>XNc7rk</td></tr><tr><th>max_election_date</th><td>2007-02-15</td></tr><tr><th>sort_null_only</th><td>False</td></tr><tr><th>min_election_date</th><td>2014-06-27</td></tr><tr><th>per_page</th><td>62971536</td></tr><tr><th>min_primary_general_date</th><td>1982-03-22</td></tr><tr><th>election_state</th><td>xzJis</td></tr><tr><th>election_party</th><td>lHUet 1vtAg5J lHUet</td></tr><tr><th>min_update_date</th><td>1984-07-25</td></tr><tr><th>sort_nulls_last</th><td>False</td></tr><tr><th>max_create_date</th><td>1980-01-02</td></tr><tr><th>max_update_date</th><td>1997-11-10</td></tr><tr><th>sort_hide_null</th><td>True</td></tr><tr><th>election_year</th><td>hNf2nYGMbX</td></tr><tr><th>min_create_date</th><td>2000-11-25</td></tr></table>',
|
| 261 |
+
0.35,
|
| 262 |
+
"HTML"
|
| 263 |
+
],
|
| 264 |
+
[
|
| 265 |
+
'<table border=\"1\"><tr><th>religion</th><td>Christianity</td></tr><tr><th>api-version</th><td>dCwMNqR</td></tr><tr><th>to_contact</th><td>VirginiaTBarrett@fleckens.hu</td></tr><tr><th>spot</th><td>6765 2278 Norma Avenue Mcbee , SC 33987</td></tr><tr><th>endTime</th><td>2022-09-07 14:17:30</td></tr><tr><th>startTime</th><td>2001-09-20 20:45:43</td></tr><tr><th>facility</th><td>Apt. 074</td></tr><tr><th>vocation</th><td>Lay-out worker</td></tr><tr><th>alley</th><td>1697 2496 White Pine Lane Apt. 904</td></tr></table>',
|
| 266 |
+
0.35,
|
| 267 |
+
"HTML"
|
| 268 |
+
],
|
| 269 |
+
[
|
| 270 |
+
'<table border=\"1\"><tr><th>imei</th><td>25-894407-891989-9</td></tr><tr><th>post-code</th><td>2142</td></tr><tr><th>startTime</th><td>2001-06-20 10:16:33</td></tr><tr><th>timeGrain</th><td></td></tr><tr><th>longitude</th><td>-70.990988</td></tr><tr><th>latitude</th><td>42.32382</td></tr><tr><th>endTime</th><td>1971-08-20 19:09:13</td></tr><tr><th>api-version</th><td>u zNS zNS</td></tr><tr><th>key store password</th><td>teiy1oD5ie</td></tr><tr><th>bank account</th><td>FILW85959012098599</td></tr></table>',
|
| 271 |
+
0.35,
|
| 272 |
+
"HTML"
|
| 273 |
+
],
|
| 274 |
+
[
|
| 275 |
+
'<table border=\"1\"><tr><th>country</th><td>United States</td></tr><tr><th>address</th><td>0133 2669 Locust Street Suite 601 Fort Gaines United States</td></tr><tr><th>project</th><td></td></tr><tr><th>nation_plural</th><td>vietnameses</td></tr><tr><th>urban__area</th><td>Buena Park</td></tr><tr><th>region</th><td>California</td></tr><tr><th>street</th><td>01474 3910 Melody Lane Apt. 383</td></tr><tr><th>phone-country-code</th><td>US</td></tr><tr><th>spot</th><td>Apt. 554</td></tr></table>',
|
| 276 |
+
0.35,
|
| 277 |
+
"HTML"
|
| 278 |
+
],
|
| 279 |
+
# JSON samples (longer, more complex)
|
| 280 |
+
[
|
| 281 |
+
'{\"api_key\": \"9ewl5\", \"page\": \"82\", \"max_primary_general_date\": \"1998-02-01\", \"sort\": \"nz siw\", \"election_type_id\": \"guerv jgwbunon guerv\", \"election_district\": \"03vpuute\", \"max_election_date\": \"1980-12-30\", \"sort_null_only\": \"false\", \"min_election_date\": \"2003-03-05\", \"per_page\": \"96\", \"min_primary_general_date\": \"1991-05-29\", \"election_state\": \"f9u4gfgt pzji\", \"election_party\": \"\", \"min_update_date\": \"1998-01-26\", \"sort_nulls_last\": \"false\", \"max_create_date\": \"1970-10-19\", \"office_sought\": \"rz1thr5zp\", \"max_update_date\": \"2018-12-12\", \"sort_hide_null\": \"true\", \"election_year\": \"alrcfqpswf\", \"min_create_date\": \"2003-02-18\"}',
|
| 282 |
+
0.35,
|
| 283 |
+
"JSON"
|
| 284 |
+
],
|
| 285 |
+
[
|
| 286 |
+
'{\"sort\": \"\", \"incumbent_challenge\": \"rQ a\", \"longitude\": \"-98.705515\", \"has_raised_funds\": \"True\", \"airport\": \"New Orleans International airport\", \"office\": \"\", \"candidate_status\": \"e\", \"district\": \"\", \"sort_nulls_last\": \"True\", \"per_page\": \"344387016\", \"state\": \"Texas\", \"location\": \"-89.030682\", \"airport_icao\": \"KOKC\", \"api_key\": \"\", \"origin airport code\": \"LIS\", \"year\": \"2012\", \"sort_hide_null\": \"False\", \"cycle\": \"VAnEFSGu LDiJQtw LDiJQtw\", \"lat\": \"33.182925\", \"sort_null_only\": \"False\", \"page\": \"5661254\", \"election_year\": \"\", \"federal_funds_flag\": \"False\", \"party\": \"\", \"name\": \"OSsUo\"}',
|
| 287 |
+
0.35,
|
| 288 |
+
"JSON"
|
| 289 |
+
],
|
| 290 |
+
[
|
| 291 |
+
'{\"nationality\": \"American\", \"keyStorePass\": \"LObizj\", \":operation\": \"XSnpUioywM iOF5gN1bHM\", \"currentPassword\": \"wo3vooch8Ie\", \"nation_plural\": \"north-americans\", \"alias\": \"aoJPk aoJPk\", \"prefix\": \"Mr.\", \"prefix_male\": \"Mr.\", \"newAlias\": \"\", \"nation_woman\": \"western samoan\", \"newPassword\": \"UVpvCQ UVpvCQ\", \"keyPassword\": \"k4GWWlP@@z\", \"nation_man\": \"bahraini\", \"rePassword\": \"\", \"removeAlias\": \"o\"}',
|
| 292 |
+
0.35,
|
| 293 |
+
"JSON"
|
| 294 |
+
],
|
| 295 |
+
[
|
| 296 |
+
'{\"imei\": \"27-051998-738345-4\", \"post-code\": \"28403\", \"startTime\": \"1996-04-20 02:21:52\", \"timeGrain\": \"0f8Jl9qmZ3 cJSVXOylw\", \"longitude\": \"-77.952502\", \"latitude\": \"34.258789\", \"endTime\": \"1994-08-17 13:38:00\", \"api-version\": \"HDjWC jcOLlPG8W\", \"key store password\": \"ahZeT2ee\", \"bank account\": \"KEKY41344355014443\"}',
|
| 297 |
+
0.35,
|
| 298 |
+
"JSON"
|
| 299 |
+
],
|
| 300 |
+
# SQL samples (longer, more complex)
|
| 301 |
+
[
|
| 302 |
+
'SELECT \"endTime,startTime,age,nation_woman,national identity,arline name,airport_icao,coordinate,api-version\",\"api-version\",CASE WHEN \"endTime\" THEN \'skin\' WHEN \"startTime\"=\'1992-01-13 23:33:10\' THEN \'president\' WHEN \"age\"=\'31\' THEN \'be\' WHEN \"nation_woman\"=\'syrian\' THEN \'particular\' WHEN \"national identity\"<>\'600233955\' THEN \'trip\' WHEN \"arline name\"<>\'Shanghai Airlines\' THEN \'present\' WHEN \"airport_icao\"<>\'SBJP\' THEN \'forget\' WHEN \"coordinate\"=\'52.297060\' THEN \'car\' WHEN \"api-version\" THEN \'also\' END FROM \"not\" WHERE \"endTime\" AND \"startTime\"=\'1973-12-27 11:08:01\' AND (\"age\"=\'64\' OR \"age\"=\'answer\') AND \"nation_woman\"<>\'guyanese\' AND \"national identity\"<>\'142451774\' AND \"arline name\" AND \"airport_icao\" AND \"coordinate\"=\'46.828790\' AND (\"api-version\"=\'KOikhS KOikhS yz\' OR \"api-version\"=\'activity\') LIMIT 64',
|
| 303 |
+
0.35,
|
| 304 |
+
"SQL"
|
| 305 |
+
],
|
| 306 |
+
[
|
| 307 |
+
'SELECT \"week__day,Version,Tags,age,currency_code,TargetBucket,expiration-date,TargetSnapshotName,swift-code,KmsKeyId,Action,debit card,SourceSnapshotName\",\"SourceSnapshotName\",CASE WHEN \"week__day\"=\'Saturday\' THEN \'serious\' WHEN \"Version\"=\'2015-02-02\' OR \"Version\"=\'staff\' THEN \'country\' WHEN \"Tags\"<>\'\' THEN \'water\' WHEN \"age\" THEN \'behind\' WHEN \"currency_code\"=\'CAD\' THEN \'position\' WHEN \"TargetBucket\" THEN \'next\' WHEN \"expiration-date\"=\'11/2023\' OR \"expiration-date\"=\'technology\' THEN \'kid\' WHEN \"TargetSnapshotName\"=\'pWJ\' OR \"TargetSnapshotName\"=\'give\' THEN \'child\' WHEN \"swift-code\"=\'GWIZGBQPBUW\' THEN \'poor\' WHEN \"KmsKeyId\" THEN \'meeting\' WHEN \"Action\"=\'CopySnapshot\' THEN \'collection\' WHEN \"debit card\"<>\'30381983513092\' THEN \'paper\' WHEN \"SourceSnapshotName\"=\'\' THEN \'keep\' END FROM \"statement\" WHERE \"week__day\"=\'Tuesday\' AND \"Version\"=\'2015-02-02\' AND \"Tags\"=\'\' AND \"age\"=\'20\' AND \"currency_code\"=\'MGA\' AND \"TargetBucket\"=\'\' AND \"expiration-date\"=\'02/24\' AND \"TargetSnapshotName\"=\'\' AND \"swift-code\"=\'GNCHGBZC\' AND \"KmsKeyId\"=\'\' AND \"Action\"=\'CopySnapshot\' AND \"debit card\"=\'4534384187682\' AND \"SourceSnapshotName\"=\'\' LIMIT 36',
|
| 308 |
+
0.35,
|
| 309 |
+
"SQL"
|
| 310 |
+
],
|
| 311 |
+
[
|
| 312 |
+
'SELECT \"expiration-date,prettyPrint,alt,master-card,arline__name,key,bank city,fields,building,quotaUser,userIp,to country code,oauth_token\",\"oauth_token\",CASE WHEN \"expiration-date\"=\'3/2024\' THEN \'reduce\' WHEN \"prettyPrint\"=\'False\' OR \"prettyPrint\"=\'south\' THEN \'within\' WHEN \"alt\"<>\'json\' THEN \'thing\' WHEN \"master-card\" THEN \'strategy\' WHEN \"arline__name\"=\'Air India\' THEN \'forward\' WHEN \"key\" THEN \'artist\' WHEN \"bank city\"=\'Helena\' OR \"bank city\"=\'more\' THEN \'pay\' WHEN \"fields\"=\'\' OR \"fields\"=\'thing\' THEN \'rest\' WHEN \"building\"=\'977\' THEN \'executive\' WHEN \"quotaUser\" THEN \'safe\' WHEN \"userIp\"=\'pWJ\' THEN \'whom\' WHEN \"to country code\"<>\'US\' THEN \'not\' WHEN \"oauth_token\"=\'\' THEN \'choice\' END FROM \"wrong\" WHERE (\"expiration-date\"=\'05/23\' OR \"expiration-date\"=\'language\') AND \"prettyPrint\"=\'True\' AND \"alt\"<>\'json\' AND \"master-card\"=\'349245482859346\' AND \"arline__name\"=\'Indonesia AirAsia\' AND \"key\"=\'\' AND \"bank city\"=\'Georgetown\' AND \"fields\"=\'\' AND \"building\"=\'7241\' AND \"quotaUser\"=\'\' AND \"userIp\"=\'\' AND \"to country code\"=\'TM\' AND \"oauth_token\"=\'\' LIMIT 64',
|
| 313 |
+
0.35,
|
| 314 |
+
"SQL"
|
| 315 |
+
],
|
| 316 |
+
[
|
| 317 |
+
'SELECT `schemaName,databaseName,city,building,coordinate,state_abbreviation,driver license,international__mobile__equipment__identity`,`international__mobile__equipment__identity`,CASE WHEN `schemaName`<>\'fX04 bHQKn bHQKn\' THEN \'far\' WHEN `databaseName` THEN \'college\' WHEN `city`=\'Orlando\' OR `city`=\'probably\' THEN \'boy\' WHEN `building`<>\'2672\' THEN \'wind\' WHEN `coordinate`=\'-21.907687\' THEN \'offer\' WHEN `state_abbreviation`=\'FL\' THEN \'its\' WHEN `driver license`=\'H872538367807\' THEN \'lose\' WHEN `international__mobile__equipment__identity`=\'42-161139-363377-6\' OR `international__mobile__equipment__identity`=\'attention\' THEN \'nor\' END FROM `business` WHERE (`schemaName`=\'BfgAeXWjbC BfgAeXWjbC\' OR `schemaName`=\'across\') AND `databaseName`<>\'hw w\' AND `city`=\'West Caroline\' AND `building`<>\'44030\' AND `coordinate`=\'-21.907687\' AND `state_abbreviation`=\'IA\' AND `driver license`=\'224242065\' AND `international__mobile__equipment__identity`=\'83-695777-883364-1\' LIMIT 10',
|
| 318 |
+
0.35,
|
| 319 |
+
"SQL"
|
| 320 |
+
],
|
| 321 |
+
# XML samples (longer, more complex)
|
| 322 |
+
[
|
| 323 |
+
'b\'<?xml version=\"1.0\" encoding=\"UTF-8\" ?><root><sort type=\"str\"></sort><incumbent_challenge type=\"str\"></incumbent_challenge><longitude type=\"str\">-97.518538</longitude><has_raised_funds type=\"str\">True</has_raised_funds><airport type=\"str\">John F Kennedy International airport</airport><office type=\"str\">IDuqbH m</office><candidate_status type=\"str\">qEw3Tpc wmYqRUtTH</candidate_status><district type=\"str\">D UCd6ZAFD D</district><sort_nulls_last type=\"str\">False</sort_nulls_last><per_page type=\"str\">7720</per_page><state type=\"str\">South Dakota</state><location type=\"str\">-109.575655</location><airport_icao type=\"str\">EDDH</airport_icao><api_key type=\"str\">46nCNe0 Wj Wj</api_key><origin_airport_code type=\"str\">DEN</origin_airport_code><year type=\"str\">1996</year><sort_hide_null type=\"str\">False</sort_hide_null><cycle type=\"str\">FNxL</cycle><lat type=\"str\">43.16524</lat><sort_null_only type=\"str\">False</sort_null_only><page type=\"str\">4894426</page><election_year type=\"str\"></election_year><federal_funds_flag type=\"str\">False</federal_funds_flag><party type=\"str\"></party><name type=\"str\">aKPjF</name></root>\'',
|
| 324 |
+
0.35,
|
| 325 |
+
"XML"
|
| 326 |
+
],
|
| 327 |
+
[
|
| 328 |
+
'b\'<?xml version=\"1.0\" encoding=\"UTF-8\" ?><root><api_key type=\"str\">E hMCQl hMCQl</api_key><page type=\"str\">984478</page><max_primary_general_date type=\"str\">2008-01-29</max_primary_general_date><sort type=\"str\"></sort><election_type_id type=\"str\">L85O2N</election_type_id><election_district type=\"str\">M</election_district><max_election_date type=\"str\">2017-08-07</max_election_date><sort_null_only type=\"str\">False</sort_null_only><min_election_date type=\"str\">2007-07-01</min_election_date><per_page type=\"str\">452141118</per_page><min_primary_general_date type=\"str\">1977-07-12</min_primary_general_date><election_state type=\"str\"></election_state><election_party type=\"str\">CH4 Ceq Ceq</election_party><min_update_date type=\"str\">1980-04-11</min_update_date><sort_nulls_last type=\"str\">False</sort_nulls_last><max_create_date type=\"str\">1997-04-23</max_create_date><max_update_date type=\"str\">2020-12-25</max_update_date><sort_hide_null type=\"str\">True</sort_hide_null><election_year type=\"str\">v0rF4t8</election_year><min_create_date type=\"str\">2013-11-30</min_create_date></root>\'',
|
| 329 |
+
0.35,
|
| 330 |
+
"XML"
|
| 331 |
+
],
|
| 332 |
+
[
|
| 333 |
+
'b\'<?xml version=\"1.0\" encoding=\"UTF-8\" ?><root><nationality type=\"str\">American</nationality><last_name_male type=\"str\">Hayden</last_name_male><NextToken type=\"str\">YX8Fh4d NiOugSJPwm NiOugSJPwm</NextToken><StartDate type=\"str\">2007-04-07</StartDate><EndDate type=\"str\">1971-05-28</EndDate><family-name-female type=\"str\">Weishaar</family-name-female><PageSize type=\"str\">19750435</PageSize><prefix_male type=\"str\">Mr.</prefix_male><given__name__female type=\"str\">Dara</given__name__female><nation_man type=\"str\">bulgarian</nation_man></root>\'',
|
| 334 |
+
0.35,
|
| 335 |
+
"XML"
|
| 336 |
+
],
|
| 337 |
+
[
|
| 338 |
+
'b\'<?xml version=\"1.0\" encoding=\"UTF-8\" ?><root><imei type=\"str\">30-696164-389965-5</imei><post-code type=\"str\">33179</post-code><startTime type=\"str\">2017-02-05 13:11:21</startTime><timeGrain type=\"str\">S</timeGrain><longitude type=\"str\">-80.270951</longitude><latitude type=\"str\">25.898545</latitude><endTime type=\"str\">1990-02-04 22:51:09</endTime><api-version type=\"str\">Ad Ad wM5NWqRt</api-version><key_store_password type=\"str\">Shohr3aep</key_store_password><bank_account type=\"str\">BZEV05211288606606</bank_account></root>\'',
|
| 339 |
+
0.35,
|
| 340 |
+
"XML"
|
| 341 |
+
],
|
| 342 |
+
]
|
| 343 |
+
|
| 344 |
+
with gr.Blocks(title="Gravitee BERT PII") as demo:
|
| 345 |
+
gr.Markdown(
|
| 346 |
+
f"""
|
| 347 |
+
# Gravitee BERT PII (Personally Identifiable Information extraction)
|
| 348 |
+
|
| 349 |
+
This application uses the **{MODEL_NAME}** model for Named Entity Recognition (NER) to detect personally identifiable information.
|
| 350 |
+
The model uses token classification with BIO tagging to identify predefined entity types including names, addresses,
|
| 351 |
+
financial information, and more.
|
| 352 |
+
"""
|
| 353 |
+
)
|
| 354 |
+
|
| 355 |
+
with gr.Accordion("Available Entity Types", open=False):
|
| 356 |
+
gr.Markdown(
|
| 357 |
+
"""
|
| 358 |
+
The BERT models can detect the following entity types:
|
| 359 |
+
|
| 360 |
+
**Personal Information:**
|
| 361 |
+
- PERSON (names)
|
| 362 |
+
- AGE
|
| 363 |
+
- PHONE_NUMBER
|
| 364 |
+
- EMAIL_ADDRESS
|
| 365 |
+
|
| 366 |
+
**Location & Address:**
|
| 367 |
+
- LOCATION
|
| 368 |
+
- COORDINATE
|
| 369 |
+
|
| 370 |
+
**Financial:**
|
| 371 |
+
- CREDIT_CARD
|
| 372 |
+
- IBAN_CODE
|
| 373 |
+
- FINANCIAL
|
| 374 |
+
- US_BANK_NUMBER
|
| 375 |
+
|
| 376 |
+
**Government IDs:**
|
| 377 |
+
- US_SSN (Social Security Number)
|
| 378 |
+
- US_DRIVER_LICENSE
|
| 379 |
+
- US_PASSPORT
|
| 380 |
+
- US_ITIN
|
| 381 |
+
- US_LICENSE_PLATE
|
| 382 |
+
- NRP (National Registration Number)
|
| 383 |
+
|
| 384 |
+
**Technical:**
|
| 385 |
+
- IP_ADDRESS
|
| 386 |
+
- MAC_ADDRESS
|
| 387 |
+
- URL
|
| 388 |
+
- IMEI
|
| 389 |
+
- PASSWORD
|
| 390 |
+
|
| 391 |
+
**Other:**
|
| 392 |
+
- DATE_TIME
|
| 393 |
+
- ORGANIZATION
|
| 394 |
+
- TITLE
|
| 395 |
+
"""
|
| 396 |
+
)
|
| 397 |
+
|
| 398 |
+
with gr.Accordion("How to run this model locally", open=False):
|
| 399 |
+
gr.Markdown(
|
| 400 |
+
"""
|
| 401 |
+
## Installation
|
| 402 |
+
To use this model, install the required dependencies:
|
| 403 |
+
```
|
| 404 |
+
pip install transformers optimum[onnxruntime] torch
|
| 405 |
+
```
|
| 406 |
+
|
| 407 |
+
## Usage
|
| 408 |
+
Load the model using the Optimum library for ONNX Runtime:
|
| 409 |
+
```python
|
| 410 |
+
from optimum.onnxruntime import ORTModelForTokenClassification
|
| 411 |
+
from transformers import AutoTokenizer
|
| 412 |
+
|
| 413 |
+
model_path = "gravitee-io/bert-small-vanilla-ner"
|
| 414 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 415 |
+
model = ORTModelForTokenClassification.from_pretrained(model_path, file_name="model.onnx")
|
| 416 |
+
|
| 417 |
+
text = "John Doe lives at 123 Main St and his email is john@example.com"
|
| 418 |
+
inputs = tokenizer(text, return_tensors="pt", return_offsets_mapping=True)
|
| 419 |
+
outputs = model(**inputs)
|
| 420 |
+
```
|
| 421 |
+
"""
|
| 422 |
+
)
|
| 423 |
+
|
| 424 |
+
input_text = gr.Textbox(
|
| 425 |
+
value=examples[0][0],
|
| 426 |
+
label="Text input",
|
| 427 |
+
placeholder="Enter your text here"
|
| 428 |
+
)
|
| 429 |
+
|
| 430 |
+
with gr.Row():
|
| 431 |
+
threshold = gr.Slider(
|
| 432 |
+
0,
|
| 433 |
+
1,
|
| 434 |
+
value=0.35,
|
| 435 |
+
step=0.01,
|
| 436 |
+
label="Confidence Threshold",
|
| 437 |
+
info="Lower the threshold to get more predictions with lower confidence.",
|
| 438 |
+
scale=2
|
| 439 |
+
)
|
| 440 |
+
|
| 441 |
+
data_type_display = gr.Textbox(
|
| 442 |
+
value=examples[0][2],
|
| 443 |
+
label="Data Type",
|
| 444 |
+
interactive=False,
|
| 445 |
+
scale=1
|
| 446 |
+
)
|
| 447 |
+
|
| 448 |
+
format_checkbox = gr.Checkbox(
|
| 449 |
+
value=False,
|
| 450 |
+
label="Format Text",
|
| 451 |
+
info="Auto-format JSON, XML, HTML, SQL with proper indentation",
|
| 452 |
+
scale=1
|
| 453 |
+
)
|
| 454 |
+
|
| 455 |
+
output = gr.HighlightedText(label="Predicted Entities")
|
| 456 |
+
submit_btn = gr.Button("Submit")
|
| 457 |
+
|
| 458 |
+
examples_component = gr.Examples(
|
| 459 |
+
examples,
|
| 460 |
+
fn=ner,
|
| 461 |
+
inputs=[input_text, threshold, data_type_display, format_checkbox],
|
| 462 |
+
outputs=output,
|
| 463 |
+
cache_examples=False,
|
| 464 |
+
)
|
| 465 |
+
|
| 466 |
+
# Event handlers
|
| 467 |
+
input_text.submit(fn=ner, inputs=[input_text, threshold, data_type_display, format_checkbox], outputs=output)
|
| 468 |
+
threshold.release(fn=ner, inputs=[input_text, threshold, data_type_display, format_checkbox], outputs=output)
|
| 469 |
+
format_checkbox.change(fn=ner, inputs=[input_text, threshold, data_type_display, format_checkbox], outputs=output)
|
| 470 |
+
submit_btn.click(fn=ner, inputs=[input_text, threshold, data_type_display, format_checkbox], outputs=output)
|
| 471 |
+
|
| 472 |
+
if __name__ == "__main__":
|
| 473 |
+
demo.queue()
|
| 474 |
+
demo.launch(debug=True)
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
transformers>=4.21.0
|
| 2 |
+
optimum[onnxruntime]>=1.12.0
|
| 3 |
+
gradio>=4.0.0
|
| 4 |
+
torch>=1.13.0
|
| 5 |
+
numpy>=1.21.0
|
| 6 |
+
beautifulsoup4>=4.9.0
|