victormiller commited on
Commit
e3fd33e
1 Parent(s): 87a6313

Update curated.py

Browse files
Files changed (1) hide show
  1. curated.py +10 -60
curated.py CHANGED
@@ -511,12 +511,7 @@ def get_freelaw_data(data_source: str = "Freelaw", doc_id: int = 3, target: str
511
  target=target,
512
  )
513
 
514
- freelaw_examples = Div(
515
- Div(
516
- get_freelaw_data(target=gen_random_id()),
517
- style="border: 1px solid #ccc; padding: 20px;",
518
- ),
519
- )
520
 
521
  def get_se_data(data_source: str = "StackExchange", doc_id: int = 3, target: str = "foo"):
522
  doc_id = max(0, min(int(doc_id), 9))
@@ -540,12 +535,7 @@ def get_se_data(data_source: str = "StackExchange", doc_id: int = 3, target: str
540
  target=target,
541
  )
542
 
543
- se_examples = Div(
544
- Div(
545
- get_se_data(target=gen_random_id()),
546
- style="border: 1px solid #ccc; padding: 20px;",
547
- ),
548
- )
549
 
550
  def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str = "foo"):
551
  doc_id = max(0, min(int(doc_id), 9))
@@ -568,12 +558,7 @@ def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str
568
  target=target,
569
  )
570
 
571
- phil_examples = Div(
572
- Div(
573
- get_phil_data(target=gen_random_id()),
574
- style="border: 1px solid #ccc; padding: 20px;",
575
- ),
576
- )
577
 
578
  def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo"):
579
  doc_id = max(0, min(int(doc_id), 9))
@@ -597,12 +582,7 @@ def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo
597
  target=target,
598
  )
599
 
600
- arx_examples = Div(
601
- Div(
602
- get_arx_data(target=gen_random_id()),
603
- style="border: 1px solid #ccc; padding: 20px;",
604
- ),
605
- )
606
 
607
  def get_S2ORC_data(data_source: str = "S2ORC", doc_id: int = 3, target: str = "foo"):
608
  doc_id = max(0, min(int(doc_id), 9))
@@ -625,12 +605,7 @@ def get_S2ORC_data(data_source: str = "S2ORC", doc_id: int = 3, target: str = "f
625
  target=target,
626
  )
627
 
628
- s2o_examples = Div(
629
- Div(
630
- get_S2ORC_data(target=gen_random_id()),
631
- style="border: 1px solid #ccc; padding: 20px;",
632
- ),
633
- )
634
 
635
  def get_S2ORCA_data(data_source: str = "S2ORC Abstract", doc_id: int = 3, target: str = "foo"):
636
  doc_id = max(0, min(int(doc_id), 9))
@@ -653,12 +628,7 @@ def get_S2ORCA_data(data_source: str = "S2ORC Abstract", doc_id: int = 3, target
653
  target=target,
654
  )
655
 
656
- s2oa_examples = Div(
657
- Div(
658
- get_S2ORCA_data(target=gen_random_id()),
659
- style="border: 1px solid #ccc; padding: 20px;",
660
- ),
661
- )
662
 
663
  def get_pubmed_data(data_source: str = "Pubmed", doc_id: int = 3, target: str = "foo"):
664
  doc_id = max(0, min(int(doc_id), 9))
@@ -682,12 +652,7 @@ def get_pubmed_data(data_source: str = "Pubmed", doc_id: int = 3, target: str =
682
  target=target,
683
  )
684
 
685
- pubmed_examples = Div(
686
- Div(
687
- get_pubmed_data(target=gen_random_id()),
688
- style="border: 1px solid #ccc; padding: 20px;",
689
- ),
690
- )
691
 
692
  def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "foo"):
693
  doc_id = max(0, min(int(doc_id), 9))
@@ -711,12 +676,7 @@ def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "f
711
  target=target,
712
  )
713
 
714
- dmm_examples = Div(
715
- Div(
716
- get_dmm_data(target=gen_random_id()),
717
- style="border: 1px solid #ccc; padding: 20px;",
718
- ),
719
- )
720
 
721
  def get_pg19_data(data_source: str = "PG19", doc_id: int = 3, target: str = "foo"):
722
  doc_id = max(0, min(int(doc_id), 9))
@@ -739,12 +699,7 @@ def get_pg19_data(data_source: str = "PG19", doc_id: int = 3, target: str = "foo
739
  target=target,
740
  )
741
 
742
- pg19_examples = Div(
743
- Div(
744
- get_pg19_data(target=gen_random_id()),
745
- style="border: 1px solid #ccc; padding: 20px;",
746
- ),
747
- )
748
 
749
  def get_eu_data(data_source: str = "Europarl", doc_id: int = 3, target: str = "foo"):
750
  doc_id = max(0, min(int(doc_id), 9))
@@ -767,12 +722,7 @@ def get_eu_data(data_source: str = "Europarl", doc_id: int = 3, target: str = "f
767
  target=target,
768
  )
769
 
770
- eu_examples = Div(
771
- Div(
772
- get_eu_data(target=gen_random_id()),
773
- style="border: 1px solid #ccc; padding: 20px;",
774
- ),
775
- )
776
 
777
  filtering_process = Div(
778
  Section(
 
511
  target=target,
512
  )
513
 
514
+ freelaw_examples = DV("data/curated_samples/freelaw_extract.json", 0, "Freelaw")
 
 
 
 
 
515
 
516
  def get_se_data(data_source: str = "StackExchange", doc_id: int = 3, target: str = "foo"):
517
  doc_id = max(0, min(int(doc_id), 9))
 
535
  target=target,
536
  )
537
 
538
+ se_examples = DV("data/curated_samples/stackexchange_extract.json", 0, "StackExchange")
 
 
 
 
 
539
 
540
  def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str = "foo"):
541
  doc_id = max(0, min(int(doc_id), 9))
 
558
  target=target,
559
  )
560
 
561
+ phil_examples = DV("data/curated_samples/philpapers_raw.json", 0, "PhilPapers")
 
 
 
 
 
562
 
563
  def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo"):
564
  doc_id = max(0, min(int(doc_id), 9))
 
582
  target=target,
583
  )
584
 
585
+ arx_examples = DV("data/curated_samples/arxiv_extract.json", 0, "Arxiv")
 
 
 
 
 
586
 
587
  def get_S2ORC_data(data_source: str = "S2ORC", doc_id: int = 3, target: str = "foo"):
588
  doc_id = max(0, min(int(doc_id), 9))
 
605
  target=target,
606
  )
607
 
608
+ s2o_examples = DV("data/curated_samples/s2orc_raw.json", 0, "S2ORC")
 
 
 
 
 
609
 
610
  def get_S2ORCA_data(data_source: str = "S2ORC Abstract", doc_id: int = 3, target: str = "foo"):
611
  doc_id = max(0, min(int(doc_id), 9))
 
628
  target=target,
629
  )
630
 
631
+ s2oa_examples = DV("data/curated_samples/s2orc_abstract_raw.json", 0, "S2ORC Abstract")
 
 
 
 
 
632
 
633
  def get_pubmed_data(data_source: str = "Pubmed", doc_id: int = 3, target: str = "foo"):
634
  doc_id = max(0, min(int(doc_id), 9))
 
652
  target=target,
653
  )
654
 
655
+ pubmed_examples = DV("data/curated_samples/pubmed_extract.json", 0, "PubMed")
 
 
 
 
 
656
 
657
  def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "foo"):
658
  doc_id = max(0, min(int(doc_id), 9))
 
676
  target=target,
677
  )
678
 
679
+ dmm_examples = DV("data/curated_samples/dm_maths_extract.json", 0, "DM Math")
 
 
 
 
 
680
 
681
  def get_pg19_data(data_source: str = "PG19", doc_id: int = 3, target: str = "foo"):
682
  doc_id = max(0, min(int(doc_id), 9))
 
699
  target=target,
700
  )
701
 
702
+ pg19_examples = DV("data/curated_samples/pg19_raw.json", 0, "PG19")
 
 
 
 
 
703
 
704
  def get_eu_data(data_source: str = "Europarl", doc_id: int = 3, target: str = "foo"):
705
  doc_id = max(0, min(int(doc_id), 9))
 
722
  target=target,
723
  )
724
 
725
+ eu_examples = DV("data/curated_samples/europarl_raw.json", 0, "Europarl")
 
 
 
 
 
726
 
727
  filtering_process = Div(
728
  Section(