sreejith8100 commited on
Commit
10f884a
·
verified ·
1 Parent(s): af9071d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -84
app.py CHANGED
@@ -1,84 +1,90 @@
1
- import torch
2
- import gradio as gr
3
- from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
4
- from rapidfuzz import process
5
-
6
- # ------------------ Load models once ------------------
7
- # Sentiment model
8
- sentiment_pipeline = pipeline(
9
- "sentiment-analysis",
10
- model="sreejith8100/indian_output",
11
- tokenizer="sreejith8100/indian_output",
12
- device=0 if torch.cuda.is_available() else -1
13
- )
14
-
15
- # NER model
16
- ner_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicNER", use_fast=True)
17
- ner_model = AutoModelForTokenClassification.from_pretrained("ai4bharat/IndicNER")
18
- ner_pipeline = pipeline(
19
- "ner",
20
- model=ner_model,
21
- tokenizer=ner_tokenizer,
22
- aggregation_strategy="simple",
23
- device=0 if torch.cuda.is_available() else -1
24
- )
25
-
26
- # Canonical entity list
27
- CANONICAL_ENTITIES = [
28
- "V Abdurahiman / വി അബ്ദുറഹിമാൻ",
29
- "P A Mohamed Riyas / പി എ മുഹമ്മദ് റിയാസ്",
30
- "P Rajeev / പി രാജീവ്",
31
- "Saji Cherian / സജി ചെറിയാൻ",
32
- "Roshy Augustine / റോഷി ഓഗസ്റ്റിൻ",
33
- "R Bindu / ആർ ബിന്ദു",
34
- "A K Saseendran / എ കെ സസീന്ദ്രൻ",
35
- "O R Kelu / ഒ ആർ കെലു",
36
- "J Chinchurani / ജെ ചിഞ്ചുറാണി",
37
- "K N Balagopal / കെ എൻ ബാലഗോപാൽ",
38
- "K Krishnankutty / കെ കൃഷ്ണൻകുട്ടി",
39
- "Veena George / വീണാ ജോർജ്",
40
- "Antony Raju / ആന്റണി രാജു",
41
- "K Rajan / കെ രാജൻ",
42
- "M B Rajesh / എം ബി രാജേഷ്",
43
- "Chittayam Gopakumar / ചിറ്റയം ഗോപകുമാർ",
44
- "K Radhakrishnan / കെ രാധാകൃഷ്ണൻ",
45
- "Pinarayi Vijayan / പിണറായി വിജയൻ",
46
- "V Sivankutty / വി ശിവൻകുട്ടി",
47
- "K K Shailaja / കെ കെ ശൈലജ"
48
- ]
49
-
50
- def map_entity(entity_text, known_entities=CANONICAL_ENTITIES, threshold=70):
51
- match, score, _ = process.extractOne(entity_text, known_entities)
52
- if score >= threshold:
53
- return match
54
- return None
55
-
56
-
57
- # ------------------ Prediction function ------------------
58
- def predict(sentence):
59
- # Run sentiment
60
- sent_pred = sentiment_pipeline(sentence)[0]
61
-
62
- # Run NER + map
63
- entities = ner_pipeline(sentence)
64
- mapped_entities = [map_entity(ent["word"]) for ent in entities if map_entity(ent["word"])]
65
-
66
- return {
67
- "sentence": sentence,
68
- "prediction": sent_pred["label"],
69
- "score": float(sent_pred["score"]),
70
- "mapped_entities": list(set(mapped_entities))
71
- }
72
-
73
-
74
- # ------------------ Gradio Interface ------------------
75
- demo = gr.Interface(
76
- fn=predict,
77
- inputs=gr.Textbox(label="Enter a sentence"),
78
- outputs=gr.JSON(label="Result"),
79
- title="Entity + Sentiment Analysis",
80
- description="Upload a sentence in Malayalam/English. The app detects entities and predicts sentiment."
81
- )
82
-
83
- if __name__ == "__main__":
84
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification
4
+ from rapidfuzz import process
5
+
6
+ # ------------------ Load models once ------------------
7
+ # Sentiment model
8
+ sentiment_pipeline = pipeline(
9
+ "sentiment-analysis",
10
+ model="sreejith8100/indian_output",
11
+ tokenizer="sreejith8100/indian_output",
12
+ device=0 if torch.cuda.is_available() else -1
13
+ )
14
+
15
+ # NER model
16
+ ner_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicNER", use_fast=True)
17
+ ner_model = AutoModelForTokenClassification.from_pretrained("ai4bharat/IndicNER")
18
+ ner_pipeline = pipeline(
19
+ "ner",
20
+ model=ner_model,
21
+ tokenizer=ner_tokenizer,
22
+ aggregation_strategy="simple",
23
+ device=0 if torch.cuda.is_available() else -1
24
+ )
25
+
26
+ # Canonical entity list
27
+ CANONICAL_ENTITIES = [
28
+ "V Abdurahiman / വി അബ്ദുറഹിമാൻ",
29
+ "P A Mohamed Riyas / പി എ മുഹമ്മദ് റിയാസ്",
30
+ "P Rajeev / പി രാജീവ്",
31
+ "Saji Cherian / സജി ചെറിയാൻ",
32
+ "Roshy Augustine / റോഷി ഓഗസ്റ്റിൻ",
33
+ "R Bindu / ആർ ബിന്ദു",
34
+ "A K Saseendran / എ കെ സസീന്ദ്രൻ",
35
+ "O R Kelu / ഒ ആർ കെലു",
36
+ "J Chinchurani / ജെ ചിഞ്ചുറാണി",
37
+ "K N Balagopal / കെ എൻ ബാലഗോപാൽ",
38
+ "K Krishnankutty / കെ കൃഷ്ണൻകുട്ടി",
39
+ "Veena George / വീണാ ജോർജ്",
40
+ "Antony Raju / ആന്റണി രാജു",
41
+ "K Rajan / കെ രാജൻ",
42
+ "M B Rajesh / എം ബി രാജേഷ്",
43
+ "Chittayam Gopakumar / ചിറ്റയം ഗോപകുമാർ",
44
+ "K Radhakrishnan / കെ രാധാകൃഷ്ണൻ",
45
+ "Pinarayi Vijayan / പിണറായി വിജയൻ",
46
+ "V Sivankutty / വി ശിവൻകുട്ടി",
47
+ "K K Shailaja / കെ കെ ശൈലജ"
48
+ ]
49
+
50
+ def map_entity(entity_text, known_entities=CANONICAL_ENTITIES, threshold=70):
51
+ match, score, _ = process.extractOne(entity_text, known_entities)
52
+ if score >= threshold:
53
+ return match
54
+ return None
55
+
56
+ # Map raw model labels to readable ones
57
+ label_map = {
58
+ "LABEL_0": "POSITIVE",
59
+ "LABEL_1": "NEGATIVE",
60
+ "LABEL_2": "NEUTRAL"
61
+ }
62
+
63
+ # ------------------ Prediction function ------------------
64
+ def predict(sentence):
65
+ # Run sentiment
66
+ sent_pred = sentiment_pipeline(sentence)[0]
67
+ human_label = label_map.get(sent_pred["label"], sent_pred["label"]) # map it
68
+
69
+ # Run NER + map
70
+ entities = ner_pipeline(sentence)
71
+ mapped_entities = [map_entity(ent["word"]) for ent in entities if map_entity(ent["word"])]
72
+
73
+ return {
74
+ "sentence": sentence,
75
+ "prediction": human_label, # use mapped label
76
+ "score": float(sent_pred["score"]),
77
+ "mapped_entities": list(set(mapped_entities))
78
+ }
79
+
80
+ # ------------------ Gradio Interface ------------------
81
+ demo = gr.Interface(
82
+ fn=predict,
83
+ inputs=gr.Textbox(label="Enter a sentence"),
84
+ outputs=gr.JSON(label="Result"),
85
+ title="Entity + Sentiment Analysis",
86
+ description="Upload a sentence in Malayalam/English. The app detects entities and predicts sentiment."
87
+ )
88
+
89
+ if __name__ == "__main__":
90
+ demo.launch(server_name="0.0.0.0", server_port=7860)