vickeee465 commited on
Commit
7a079bf
1 Parent(s): 2ab6001
app.py CHANGED
@@ -4,6 +4,8 @@ from interfaces.cap import demo as cap_demo
4
  from interfaces.manifesto import demo as manifesto_demo
5
  from interfaces.sentiment import demo as sentiment_demo
6
  from interfaces.emotion import demo as emotion_demo
 
 
7
 
8
  with gr.Blocks() as demo:
9
  gr.Markdown(
@@ -16,11 +18,12 @@ with gr.Blocks() as demo:
16
  """)
17
 
18
  gr.TabbedInterface(
19
- interface_list=[cap_demo, manifesto_demo, sentiment_demo, emotion_demo],
20
- tab_names=["CAP", "Manifesto", "Sentiment (3)", "Emotions (8)"],
21
  )
22
 
23
  if __name__ == "__main__":
 
24
  demo.launch()
25
 
26
  # TODO: add all languages & domains
 
4
  from interfaces.manifesto import demo as manifesto_demo
5
  from interfaces.sentiment import demo as sentiment_demo
6
  from interfaces.emotion import demo as emotion_demo
7
+ from interfaces.ner import demo as ner_demo
8
+ from interfaces.ner import download_models as download_spacy_models
9
 
10
  with gr.Blocks() as demo:
11
  gr.Markdown(
 
18
  """)
19
 
20
  gr.TabbedInterface(
21
+ interface_list=[cap_demo, manifesto_demo, sentiment_demo, emotion_demo, ner_demo],
22
+ tab_names=["CAP", "Manifesto", "Sentiment (3)", "Emotions (8)", "Named Entity Recognition"],
23
  )
24
 
25
  if __name__ == "__main__":
26
+ download_spacy_models()
27
  demo.launch()
28
 
29
  # TODO: add all languages & domains
interfaces/cap.py CHANGED
@@ -14,19 +14,19 @@ from label_dicts import CAP_NUM_DICT, CAP_LABEL_NAMES
14
  HF_TOKEN = os.environ["hf_read"]
15
 
16
  languages = [
17
- "danish",
18
- "dutch",
19
- "english",
20
- "french",
21
- "german",
22
- "hungarian",
23
- "italian",
24
- "polish",
25
- "portuguese",
26
- "spanish",
27
- "czech",
28
- "slovak",
29
- "norwegian"
30
  ]
31
 
32
  domains = {
 
14
  HF_TOKEN = os.environ["hf_read"]
15
 
16
  languages = [
17
+ "Danish",
18
+ "Dutch",
19
+ "English",
20
+ "French",
21
+ "German",
22
+ "Hungarian",
23
+ "Italian",
24
+ "Polish",
25
+ "Portuguese",
26
+ "Spanish",
27
+ "Czech",
28
+ "Slovak",
29
+ "Norwegian"
30
  ]
31
 
32
  domains = {
interfaces/emotion.py CHANGED
@@ -12,7 +12,7 @@ from label_dicts import MANIFESTO_LABEL_NAMES
12
  HF_TOKEN = os.environ["hf_read"]
13
 
14
  languages = [
15
- "czech", "english", "french", "german", "hungarian", "italian"
16
  ]
17
 
18
  def build_huggingface_path(language: str):
 
12
  HF_TOKEN = os.environ["hf_read"]
13
 
14
  languages = [
15
+ "Czech", "English", "French", "German", "Hungarian", "Italian"
16
  ]
17
 
18
  def build_huggingface_path(language: str):
interfaces/manifesto.py CHANGED
@@ -12,11 +12,11 @@ from label_dicts import MANIFESTO_LABEL_NAMES
12
  HF_TOKEN = os.environ["hf_read"]
13
 
14
  languages = [
15
- "armenian", "bulgarian", "croatian", "czech", "danish", "dutch", "english",
16
- "estonian", "finnish", "french", "georgian", "german", "greek", "hebrew",
17
- "hungarian", "icelandic", "italian", "japanese", "korean", "latvian",
18
- "lithuanian", "norwegian", "polish", "portuguese", "romanian", "russian",
19
- "serbian", "slovak", "slovenian", "spanish", "swedish", "turkish"
20
  ]
21
 
22
  def build_huggingface_path(language: str):
 
12
  HF_TOKEN = os.environ["hf_read"]
13
 
14
  languages = [
15
+ "Armenian", "Bulgarian", "Croatian", "Czech", "Danish", "Dutch", "English",
16
+ "Estonian", "Finnish", "French", "Georgian", "German", "Greek", "Hebrew",
17
+ "Hungarian", "Icelandic", "Italian", "Japanese", "Korean", "Latvian",
18
+ "Lithuanian", "Norwegian", "Polish", "Portuguese", "Romanian", "Russian",
19
+ "Serbian", "Slovak", "Slovenian", "Spanish", "Swedish", "Turkish"
20
  ]
21
 
22
  def build_huggingface_path(language: str):
interfaces/ner.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ import os
4
+ import torch
5
+ import numpy as np
6
+ from transformers import AutoModelForSequenceClassification
7
+ from transformers import AutoTokenizer
8
+ from huggingface_hub import HfApi
9
+
10
+ languages = [
11
+ "English", "Hungarian", "Multilingual"
12
+ ]
13
+
14
+ def download_models(models=["en_core_web_lg", "xx_ent_wiki_sm", "hu_core_news_lg"]):
15
+ for model in models:
16
+ if model.startswith("hu"):
17
+ huspacy.download(model)
18
+ spacy.cli.download(model_name)
19
+
20
+ def build_spacy_path(language: str):
21
+ language = language.lower()
22
+ if language == "english":
23
+ return "en_core_web_lg"
24
+ if language == "hungarian":
25
+ return "hu_core_news_lg"
26
+ else:
27
+ return "xx_ent_wiki_sm"
28
+
29
+ def named_entity_recognition(text, language):
30
+ model_id = build_spacy_path(language)
31
+ pipeline = spacy.load(model_id)
32
+ doc = pipeline(text)
33
+ entities = [(ent.text, ent.label_) for ent in doc.ents]
34
+ model_id_hf = f"huspacy/{model_id}" if model_id.startswith("hu") else f"spacy/{model_id}"
35
+ output_info = f'<p style="text-align: center; display: block">Prediction was made using the <a href="https://huggingface.co/{model_id_hf}">{model_id_hf}</a> model.</p>'
36
+ return entities, output_info
37
+
38
+ demo = gr.Interface(
39
+ fn=named_entity_recognition,
40
+ inputs=[gr.Textbox(lines=6, label="Input"),
41
+ gr.Dropdown(languages, label="Language")],
42
+ outputs=[gr.HighlightedText(label='Output'), gr.Markdown()])
interfaces/sentiment.py CHANGED
@@ -12,7 +12,7 @@ from label_dicts import MANIFESTO_LABEL_NAMES
12
  HF_TOKEN = os.environ["hf_read"]
13
 
14
  languages = [
15
- "czech", "english", "french", "german", "hungarian", "italian"
16
  ]
17
 
18
  def build_huggingface_path(language: str):
 
12
  HF_TOKEN = os.environ["hf_read"]
13
 
14
  languages = [
15
+ "Czech", "English", "French", "German", "Hungarian", "Italian"
16
  ]
17
 
18
  def build_huggingface_path(language: str):
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  pandas
2
  torch==2.2.1
3
  transformers==4.39.1
4
- sentencepiece==0.2.0
 
 
 
1
  pandas
2
  torch==2.2.1
3
  transformers==4.39.1
4
+ sentencepiece==0.2.0
5
+ spacy
6
+ huspacy