nickil commited on
Commit
b0ceaae
1 Parent(s): 8a9b6f0

update app

Browse files
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio
2
  import benepar
3
  import spacy
4
- from IPython.display import display
5
  import nltk
6
  from nltk.tree import Tree
7
  nltk.download('stopwords')
@@ -19,9 +18,10 @@ benepar.download('benepar_en3')
19
  nlp = spacy.load("en_core_web_md")
20
  nlp.add_pipe("benepar", config={"model": "benepar_en3"})
21
 
22
- # inside_model = InsideOutsideStringClassifier(model_name_or_path="roberta-base", max_seq_length=256)
23
- fetch_url_inside_model = hf_hub_url(repo_id="nickil/weakly-supervised-parsing", filename="inside_model.ckpt", revision="main")
24
- inside_model = LightningModel.load_from_checkpoint(checkpoint_path=cached_download(fetch_url_inside_model))
 
25
 
26
  # outside_model = InsideOutsideStringClassifier(model_name_or_path="roberta-base", max_seq_length=64)
27
  # outside_model.load_model(pre_trained_model_path=TRAINED_MODEL_PATH + "outside_model.onnx")
 
1
  import gradio
2
  import benepar
3
  import spacy
 
4
  import nltk
5
  from nltk.tree import Tree
6
  nltk.download('stopwords')
 
18
  nlp = spacy.load("en_core_web_md")
19
  nlp.add_pipe("benepar", config={"model": "benepar_en3"})
20
 
21
+ inside_model = InsideOutsideStringClassifier(model_name_or_path="roberta-base", max_seq_length=256)
22
+ fetch_url_inside_model = hf_hub_url(repo_id="nickil/weakly-supervised-parsing", filename="inside_model.onnx", revision="main")
23
+ # inside_model = LightningModel.load_from_checkpoint(checkpoint_path=cached_download(fetch_url_inside_model))
24
+ inside_model.load_model(pre_trained_model_path=cached_download(fetch_url_inside_model))
25
 
26
  # outside_model = InsideOutsideStringClassifier(model_name_or_path="roberta-base", max_seq_length=64)
27
  # outside_model.load_model(pre_trained_model_path=TRAINED_MODEL_PATH + "outside_model.onnx")
weakly_supervised_parser/utils/populate_chart.py CHANGED
@@ -26,9 +26,9 @@ ptb_top_100_common = ['this', 'myself', 'shouldn', 'not', 'analysts', 'same', 'm
26
  # ptb_most_common_first_token = RuleBasedHeuristic(corpus=ptb.retrieve_all_sentences()).augment_using_most_frequent_starting_token(N=1)[0][0].lower()
27
  ptb_most_common_first_token = "the"
28
 
29
- from pytorch_lightning import Trainer
30
 
31
- trainer = Trainer(accelerator="auto", enable_progress_bar=False, max_epochs=-1)
32
 
33
 
34
  class PopulateCKYChart:
@@ -54,19 +54,20 @@ class PopulateCKYChart:
54
 
55
  if predict_type == "inside":
56
 
57
- # if data.shape[0] > chunks:
58
- # data_chunks = np.array_split(data, data.shape[0] // chunks)
59
- # for data_chunk in data_chunks:
60
- # inside_scores.extend(model.predict_proba(spans=data_chunk.rename(columns={"inside_sentence": "sentence"})[["sentence"]],
61
- # scale_axis=scale_axis,
62
- # predict_batch_size=predict_batch_size)[:, 1])
63
- # else:
64
- # inside_scores.extend(model.predict_proba(spans=data.rename(columns={"inside_sentence": "sentence"})[["sentence"]],
65
- # scale_axis=scale_axis,
66
- # predict_batch_size=predict_batch_size)[:, 1])
67
- test_dataloader = DataModule(model_name_or_path="roberta-base", train_df=None, eval_df=None,
68
- test_df=data.rename(columns={"inside_sentence": "sentence"})[["sentence"]])
69
- inside_scores.extend(trainer.predict(model, dataloaders=test_dataloader)[0])
 
70
 
71
  data["inside_scores"] = inside_scores
72
  data.loc[
 
26
  # ptb_most_common_first_token = RuleBasedHeuristic(corpus=ptb.retrieve_all_sentences()).augment_using_most_frequent_starting_token(N=1)[0][0].lower()
27
  ptb_most_common_first_token = "the"
28
 
29
+ # from pytorch_lightning import Trainer
30
 
31
+ # trainer = Trainer(accelerator="auto", enable_progress_bar=False, max_epochs=-1)
32
 
33
 
34
  class PopulateCKYChart:
 
54
 
55
  if predict_type == "inside":
56
 
57
+ if data.shape[0] > chunks:
58
+ data_chunks = np.array_split(data, data.shape[0] // chunks)
59
+ for data_chunk in data_chunks:
60
+ inside_scores.extend(model.predict_proba(spans=data_chunk.rename(columns={"inside_sentence": "sentence"})[["sentence"]],
61
+ scale_axis=scale_axis,
62
+ predict_batch_size=predict_batch_size)[:, 1])
63
+ else:
64
+ inside_scores.extend(model.predict_proba(spans=data.rename(columns={"inside_sentence": "sentence"})[["sentence"]],
65
+ scale_axis=scale_axis,
66
+ predict_batch_size=predict_batch_size)[:, 1])
67
+
68
+ # test_dataloader = DataModule(model_name_or_path="roberta-base", train_df=None, eval_df=None,
69
+ # test_df=data.rename(columns={"inside_sentence": "sentence"})[["sentence"]])
70
+ # inside_scores.extend(trainer.predict(model, dataloaders=test_dataloader)[0])
71
 
72
  data["inside_scores"] = inside_scores
73
  data.loc[