Spaces:

kazalbrur
/

Bangla-Legal-NER

Sleeping

kazalbrur commited on Sep 8, 2024

Commit

1ab345c

•

1 Parent(s): 6ac85e1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,10 +1,18 @@
 import gradio as gr
 import spaces
-from transformers import pipeline
 from typing import List, Dict, Any
 import torch
-def merge_tokens(tokens: List[Dict[str, any]]) -> List[Dict[str, any]]:
     merged_tokens = []
     for token in tokens:
         if merged_tokens and token['entity'].startswith('I-') and merged_tokens[-1]['entity'].endswith(token['entity'][2:]):
@@ -19,8 +27,8 @@ def merge_tokens(tokens: List[Dict[str, any]]) -> List[Dict[str, any]]:
 # Determine device
 device = 0 if torch.cuda.is_available() else -1
-# Initialize Model
-get_completion = pipeline("ner", model="kazalbrur/BanglaLegalNER", device=device)
 @spaces.GPU(duration=120)
 def ner(input: str) -> Dict[str, Any]:
@@ -35,7 +43,7 @@ def ner(input: str) -> Dict[str, Any]:
 title = """<h1 id="title"> Bangla Legal Entity Recognition </h1>"""
 description = """
-- The model used for Recognizing entities [BERT-BASE-NER](https://huggingface.co/kazalbrur/BanglaLegalNER).
 """
 css = '''

 import gradio as gr
 import spaces
+from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
 from typing import List, Dict, Any
 import torch
+# Define the model and tokenizer
+model_name = "kazalbrur/BanglaLegalNER"  # Ensure this model is suitable or update accordingly
+tokenizer_name = "csebuetnlp/banglat5_banglaparaphrase"
+# Load the tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, use_fast=False)
+model = AutoModelForTokenClassification.from_pretrained(model_name)
+def merge_tokens(tokens: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
     merged_tokens = []
     for token in tokens:
         if merged_tokens and token['entity'].startswith('I-') and merged_tokens[-1]['entity'].endswith(token['entity'][2:]):
 # Determine device
 device = 0 if torch.cuda.is_available() else -1
+# Initialize Pipeline with the new model and tokenizer
+get_completion = pipeline("ner", model=model, tokenizer=tokenizer, device=device)
 @spaces.GPU(duration=120)
 def ner(input: str) -> Dict[str, Any]:
 title = """<h1 id="title"> Bangla Legal Entity Recognition </h1>"""
 description = """
+- The model used for Recognizing entities [Bangla Legal NER](https://huggingface.co/kazalbrur/BanglaLegalNER).
 """
 css = '''