Spaces:
Running
Running
victormiller
commited on
Commit
•
e3fd33e
1
Parent(s):
87a6313
Update curated.py
Browse files- curated.py +10 -60
curated.py
CHANGED
@@ -511,12 +511,7 @@ def get_freelaw_data(data_source: str = "Freelaw", doc_id: int = 3, target: str
|
|
511 |
target=target,
|
512 |
)
|
513 |
|
514 |
-
freelaw_examples =
|
515 |
-
Div(
|
516 |
-
get_freelaw_data(target=gen_random_id()),
|
517 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
518 |
-
),
|
519 |
-
)
|
520 |
|
521 |
def get_se_data(data_source: str = "StackExchange", doc_id: int = 3, target: str = "foo"):
|
522 |
doc_id = max(0, min(int(doc_id), 9))
|
@@ -540,12 +535,7 @@ def get_se_data(data_source: str = "StackExchange", doc_id: int = 3, target: str
|
|
540 |
target=target,
|
541 |
)
|
542 |
|
543 |
-
se_examples =
|
544 |
-
Div(
|
545 |
-
get_se_data(target=gen_random_id()),
|
546 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
547 |
-
),
|
548 |
-
)
|
549 |
|
550 |
def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str = "foo"):
|
551 |
doc_id = max(0, min(int(doc_id), 9))
|
@@ -568,12 +558,7 @@ def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str
|
|
568 |
target=target,
|
569 |
)
|
570 |
|
571 |
-
phil_examples =
|
572 |
-
Div(
|
573 |
-
get_phil_data(target=gen_random_id()),
|
574 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
575 |
-
),
|
576 |
-
)
|
577 |
|
578 |
def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo"):
|
579 |
doc_id = max(0, min(int(doc_id), 9))
|
@@ -597,12 +582,7 @@ def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo
|
|
597 |
target=target,
|
598 |
)
|
599 |
|
600 |
-
arx_examples =
|
601 |
-
Div(
|
602 |
-
get_arx_data(target=gen_random_id()),
|
603 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
604 |
-
),
|
605 |
-
)
|
606 |
|
607 |
def get_S2ORC_data(data_source: str = "S2ORC", doc_id: int = 3, target: str = "foo"):
|
608 |
doc_id = max(0, min(int(doc_id), 9))
|
@@ -625,12 +605,7 @@ def get_S2ORC_data(data_source: str = "S2ORC", doc_id: int = 3, target: str = "f
|
|
625 |
target=target,
|
626 |
)
|
627 |
|
628 |
-
s2o_examples =
|
629 |
-
Div(
|
630 |
-
get_S2ORC_data(target=gen_random_id()),
|
631 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
632 |
-
),
|
633 |
-
)
|
634 |
|
635 |
def get_S2ORCA_data(data_source: str = "S2ORC Abstract", doc_id: int = 3, target: str = "foo"):
|
636 |
doc_id = max(0, min(int(doc_id), 9))
|
@@ -653,12 +628,7 @@ def get_S2ORCA_data(data_source: str = "S2ORC Abstract", doc_id: int = 3, target
|
|
653 |
target=target,
|
654 |
)
|
655 |
|
656 |
-
s2oa_examples =
|
657 |
-
Div(
|
658 |
-
get_S2ORCA_data(target=gen_random_id()),
|
659 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
660 |
-
),
|
661 |
-
)
|
662 |
|
663 |
def get_pubmed_data(data_source: str = "Pubmed", doc_id: int = 3, target: str = "foo"):
|
664 |
doc_id = max(0, min(int(doc_id), 9))
|
@@ -682,12 +652,7 @@ def get_pubmed_data(data_source: str = "Pubmed", doc_id: int = 3, target: str =
|
|
682 |
target=target,
|
683 |
)
|
684 |
|
685 |
-
pubmed_examples =
|
686 |
-
Div(
|
687 |
-
get_pubmed_data(target=gen_random_id()),
|
688 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
689 |
-
),
|
690 |
-
)
|
691 |
|
692 |
def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "foo"):
|
693 |
doc_id = max(0, min(int(doc_id), 9))
|
@@ -711,12 +676,7 @@ def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "f
|
|
711 |
target=target,
|
712 |
)
|
713 |
|
714 |
-
dmm_examples =
|
715 |
-
Div(
|
716 |
-
get_dmm_data(target=gen_random_id()),
|
717 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
718 |
-
),
|
719 |
-
)
|
720 |
|
721 |
def get_pg19_data(data_source: str = "PG19", doc_id: int = 3, target: str = "foo"):
|
722 |
doc_id = max(0, min(int(doc_id), 9))
|
@@ -739,12 +699,7 @@ def get_pg19_data(data_source: str = "PG19", doc_id: int = 3, target: str = "foo
|
|
739 |
target=target,
|
740 |
)
|
741 |
|
742 |
-
pg19_examples =
|
743 |
-
Div(
|
744 |
-
get_pg19_data(target=gen_random_id()),
|
745 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
746 |
-
),
|
747 |
-
)
|
748 |
|
749 |
def get_eu_data(data_source: str = "Europarl", doc_id: int = 3, target: str = "foo"):
|
750 |
doc_id = max(0, min(int(doc_id), 9))
|
@@ -767,12 +722,7 @@ def get_eu_data(data_source: str = "Europarl", doc_id: int = 3, target: str = "f
|
|
767 |
target=target,
|
768 |
)
|
769 |
|
770 |
-
eu_examples =
|
771 |
-
Div(
|
772 |
-
get_eu_data(target=gen_random_id()),
|
773 |
-
style="border: 1px solid #ccc; padding: 20px;",
|
774 |
-
),
|
775 |
-
)
|
776 |
|
777 |
filtering_process = Div(
|
778 |
Section(
|
|
|
511 |
target=target,
|
512 |
)
|
513 |
|
514 |
+
freelaw_examples = DV("data/curated_samples/freelaw_extract.json", 0, "Freelaw")
|
|
|
|
|
|
|
|
|
|
|
515 |
|
516 |
def get_se_data(data_source: str = "StackExchange", doc_id: int = 3, target: str = "foo"):
|
517 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
535 |
target=target,
|
536 |
)
|
537 |
|
538 |
+
se_examples = DV("data/curated_samples/stackexchange_extract.json", 0, "StackExchange")
|
|
|
|
|
|
|
|
|
|
|
539 |
|
540 |
def get_phil_data(data_source: str = "PhilPapers", doc_id: int = 3, target: str = "foo"):
|
541 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
558 |
target=target,
|
559 |
)
|
560 |
|
561 |
+
phil_examples = DV("data/curated_samples/philpapers_raw.json", 0, "PhilPapers")
|
|
|
|
|
|
|
|
|
|
|
562 |
|
563 |
def get_arx_data(data_source: str = "Arxiv", doc_id: int = 3, target: str = "foo"):
|
564 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
582 |
target=target,
|
583 |
)
|
584 |
|
585 |
+
arx_examples = DV("data/curated_samples/arxiv_extract.json", 0, "Arxiv")
|
|
|
|
|
|
|
|
|
|
|
586 |
|
587 |
def get_S2ORC_data(data_source: str = "S2ORC", doc_id: int = 3, target: str = "foo"):
|
588 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
605 |
target=target,
|
606 |
)
|
607 |
|
608 |
+
s2o_examples = DV("data/curated_samples/s2orc_raw.json", 0, "S2ORC")
|
|
|
|
|
|
|
|
|
|
|
609 |
|
610 |
def get_S2ORCA_data(data_source: str = "S2ORC Abstract", doc_id: int = 3, target: str = "foo"):
|
611 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
628 |
target=target,
|
629 |
)
|
630 |
|
631 |
+
s2oa_examples = DV("data/curated_samples/s2orc_abstract_raw.json", 0, "S2ORC Abstract")
|
|
|
|
|
|
|
|
|
|
|
632 |
|
633 |
def get_pubmed_data(data_source: str = "Pubmed", doc_id: int = 3, target: str = "foo"):
|
634 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
652 |
target=target,
|
653 |
)
|
654 |
|
655 |
+
pubmed_examples = DV("data/curated_samples/pubmed_extract.json", 0, "PubMed")
|
|
|
|
|
|
|
|
|
|
|
656 |
|
657 |
def get_dmm_data(data_source: str = "DM Math", doc_id: int = 3, target: str = "foo"):
|
658 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
676 |
target=target,
|
677 |
)
|
678 |
|
679 |
+
dmm_examples = DV("data/curated_samples/dm_maths_extract.json", 0, "DM Math")
|
|
|
|
|
|
|
|
|
|
|
680 |
|
681 |
def get_pg19_data(data_source: str = "PG19", doc_id: int = 3, target: str = "foo"):
|
682 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
699 |
target=target,
|
700 |
)
|
701 |
|
702 |
+
pg19_examples = DV("data/curated_samples/pg19_raw.json", 0, "PG19")
|
|
|
|
|
|
|
|
|
|
|
703 |
|
704 |
def get_eu_data(data_source: str = "Europarl", doc_id: int = 3, target: str = "foo"):
|
705 |
doc_id = max(0, min(int(doc_id), 9))
|
|
|
722 |
target=target,
|
723 |
)
|
724 |
|
725 |
+
eu_examples = DV("data/curated_samples/europarl_raw.json", 0, "Europarl")
|
|
|
|
|
|
|
|
|
|
|
726 |
|
727 |
filtering_process = Div(
|
728 |
Section(
|