umarigan commited on
Commit
8bb7ed4
·
verified ·
1 Parent(s): a34a8fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -22,6 +22,12 @@ def read_file(file):
22
  st.error("Unsupported file type")
23
  return None
24
 
 
 
 
 
 
 
25
  st.title("Turkish NER Models Testing")
26
 
27
  model_list = [
@@ -45,8 +51,6 @@ aggregation = "simple" if model_checkpoint in ["akdeniz27/xlm-roberta-base-turki
45
  st.subheader("Select Text Input Method")
46
  input_method = st.radio("", ('Write or Paste New Text', 'Upload File'))
47
 
48
-
49
-
50
  if input_method == "Write or Paste New Text":
51
  input_text = st.text_area('Write or Paste Text Below', value="", height=128)
52
  else:
@@ -81,10 +85,13 @@ Run_Button = st.button("Run")
81
 
82
  if Run_Button and input_text:
83
  ner_pipeline = setModel(model_checkpoint, aggregation)
84
- output = ner_pipeline(input_text)
85
-
86
- output_comb = entity_comb(output)
87
 
 
 
 
 
 
 
88
  df = pd.DataFrame.from_dict(output_comb)
89
  cols_to_keep = ['word', 'entity_group', 'score', 'start', 'end']
90
  df_final = df[cols_to_keep]
@@ -96,6 +103,6 @@ if Run_Button and input_text:
96
  spacy_display = {"ents": [], "text": input_text, "title": None}
97
  for entity in output_comb:
98
  spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity_group"]})
99
-
100
  html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
101
- st.write(html, unsafe_allow_html=True)
 
22
  st.error("Unsupported file type")
23
  return None
24
 
25
+ # Function to generate text chunks
26
+ def chunk_text(text, max_length=128):
27
+ words = text.split()
28
+ for i in range(0, len(words), max_length):
29
+ yield " ".join(words[i:i + max_length])
30
+
31
  st.title("Turkish NER Models Testing")
32
 
33
  model_list = [
 
51
  st.subheader("Select Text Input Method")
52
  input_method = st.radio("", ('Write or Paste New Text', 'Upload File'))
53
 
 
 
54
  if input_method == "Write or Paste New Text":
55
  input_text = st.text_area('Write or Paste Text Below', value="", height=128)
56
  else:
 
85
 
86
  if Run_Button and input_text:
87
  ner_pipeline = setModel(model_checkpoint, aggregation)
 
 
 
88
 
89
+ # Process the text in chunks
90
+ output_comb = []
91
+ for chunk in chunk_text(input_text):
92
+ output = ner_pipeline(chunk)
93
+ output_comb.extend(entity_comb(output))
94
+
95
  df = pd.DataFrame.from_dict(output_comb)
96
  cols_to_keep = ['word', 'entity_group', 'score', 'start', 'end']
97
  df_final = df[cols_to_keep]
 
103
  spacy_display = {"ents": [], "text": input_text, "title": None}
104
  for entity in output_comb:
105
  spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": entity["entity_group"]})
106
+
107
  html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
108
+ st.write(html, unsafe_allow_html=True)