Spaces:

shortform
/

chapter_summary_summvis

Runtime error

App Files Files Community

kmfoda commited on Oct 25, 2022

Commit

0379fdb

•

0 Parent(s):

Initial upload

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +35 -0
.gitignore +5 -0
LICENSE +201 -0
README.md +12 -0
align.py +364 -0
app.py +323 -0
components.py +567 -0
examples/booksum/booksum_sf_sample.jsonl +3 -0
examples/booksum/booksum_sf_sample_processed.cache/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/blocks/6074137904/data.feather +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/blocks/6074137904/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/data.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/state.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/data.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/state.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/data.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/state.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum/data.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum/state.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/data.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/state.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/data.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/state.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/data.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/state.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/data.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/state.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum/data.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum/state.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/data.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/state.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/data.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/state.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/data.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/state.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/data.dill +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/meta.yaml +3 -0
examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/state.dill +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+examples/** filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.DS_STORE
+_workings
+env
+_env
+__pycache__

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2021 SummVis
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: Chapter Summary Summvis
+emoji: 📈
+colorFrom: green
+colorTo: gray
+sdk: streamlit
+sdk_version: 1.10.0
+app_file: app.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

align.py ADDED Viewed

	@@ -0,0 +1,364 @@

+import heapq
+import itertools
+from abc import ABC, abstractmethod
+from collections import defaultdict
+from operator import itemgetter
+from typing import List, Dict, Tuple
+from typing import Sequence
+from abc import ABC
+import numpy as np
+import torch
+from bert_score import BERTScorer
+from nltk import PorterStemmer
+from spacy.tokens import Doc, Span
+from toolz import itertoolz
+from transformers import AutoTokenizer
+from transformers.tokenization_utils_base import PaddingStrategy
+class EmbeddingModel(ABC):
+    @abstractmethod
+    def embed(
+        self,
+        sents: List[Span]
+    ):
+        pass
+class ContextualEmbedding(EmbeddingModel):
+    def __init__(self, model, tokenizer_name, max_length, batch_size=32):
+        self.model = model
+        self.tokenizer = SpacyHuggingfaceTokenizer(tokenizer_name, max_length)
+        self._device = model.device
+        self.batch_size = batch_size
+    def embed(
+        self,
+        sents: List[Span]
+    ):
+        spacy_embs_list = []
+        for start_idx in range(0, len(sents), self.batch_size):
+            batch = sents[start_idx: start_idx + self.batch_size]
+            encoded_input, special_tokens_masks, token_alignments = self.tokenizer.batch_encode(batch)
+            encoded_input = {k: v.to(self._device) for k, v in encoded_input.items()}
+            with torch.no_grad():
+                model_output = self.model(**encoded_input)
+                embeddings = model_output[0].cpu()
+            for embs, mask, token_alignment \
+                in zip(embeddings, special_tokens_masks, token_alignments):
+                mask = torch.tensor(mask)
+                embs = embs[mask == 0]  # Filter embeddings at special token positions
+                spacy_embs = []
+                for hf_idxs in token_alignment:
+                    if hf_idxs is None:
+                        pooled_embs = torch.zeros_like(embs[0])
+                    else:
+                        pooled_embs = embs[hf_idxs].mean(dim=0)  # Pool embeddings that map to the same spacy token
+                    spacy_embs.append(pooled_embs.numpy())
+                spacy_embs = np.stack(spacy_embs)
+                spacy_embs = spacy_embs / np.linalg.norm(spacy_embs, axis=-1, keepdims=True)  # Normalize
+                spacy_embs_list.append(spacy_embs)
+        for embs, sent in zip(spacy_embs_list, sents):
+            assert len(embs) == len(sent)
+        return spacy_embs_list
+class StaticEmbedding(EmbeddingModel):
+    def embed(
+        self,
+        sents: List[Span]
+    ):
+        return [
+            np.stack([t.vector / (t.vector_norm or 1) for t in sent])
+            for sent in sents
+        ]
+class Aligner(ABC):
+    @abstractmethod
+    def align(
+        self,
+        source: Doc,
+        targets: Sequence[Doc]
+    ) -> List[Dict]:
+        """Compute alignment from summary tokens to doc tokens
+        Args:
+            source: Source spaCy document
+            targets: Target spaCy documents
+        Returns: List of alignments, one for each target document"""
+        pass
+class EmbeddingAligner(Aligner):
+    def __init__(
+        self,
+        embedding: EmbeddingModel,
+        threshold: float,
+        top_k: int,
+        baseline_val=0
+    ):
+        self.threshold = threshold
+        self.top_k = top_k
+        self.embedding = embedding
+        self.baseline_val = baseline_val
+    def align(
+        self,
+        source: Doc,
+        targets: Sequence[Doc]
+    ) -> List[Dict]:
+        """Compute alignment from summary tokens to doc tokens with greatest semantic similarity
+        Args:
+            source: Source spaCy document
+            targets: Target spaCy documents
+        Returns: List of alignments, one for each target document
+        """
+        if len(source) == 0:
+            return [{} for _ in targets]
+        all_sents = list(source.sents) + list(itertools.chain.from_iterable(target.sents for target in targets))
+        chunk_sizes = [_iter_len(source.sents)] + \
+                      [_iter_len(target.sents) for target in targets]
+        all_sents_token_embeddings = self.embedding.embed(all_sents)
+        chunked_sents_token_embeddings = _split(all_sents_token_embeddings, chunk_sizes)
+        source_sent_token_embeddings = chunked_sents_token_embeddings[0]
+        source_token_embeddings = np.concatenate(source_sent_token_embeddings)
+        for token_idx, token in enumerate(source):
+            if token.is_stop or token.is_punct:
+                source_token_embeddings[token_idx] = 0
+        alignments = []
+        for i, target in enumerate(targets):
+            target_sent_token_embeddings = chunked_sents_token_embeddings[i + 1]
+            target_token_embeddings = np.concatenate(target_sent_token_embeddings)
+            for token_idx, token in enumerate(target):
+                if token.is_stop or token.is_punct:
+                    target_token_embeddings[token_idx] = 0
+            alignment = defaultdict(list)
+            for score, target_idx, source_idx in self._emb_sim_sparse(
+                target_token_embeddings,
+                source_token_embeddings,
+            ):
+                alignment[target_idx].append((source_idx, score))
+            # TODO used argpartition to get nlargest
+            for j in list(alignment):
+                alignment[j] = heapq.nlargest(self.top_k, alignment[j], itemgetter(1))
+            alignments.append(alignment)
+        return alignments
+    def _emb_sim_sparse(self, embs_1, embs_2):
+        sim = embs_1 @ embs_2.T
+        sim = (sim - self.baseline_val) / (1 - self.baseline_val)
+        keep = sim > self.threshold
+        keep_idxs_1, keep_idxs_2 = np.where(keep)
+        keep_scores = sim[keep]
+        return list(zip(keep_scores, keep_idxs_1, keep_idxs_2))
+class BertscoreAligner(EmbeddingAligner):
+    def __init__(
+        self,
+        threshold,
+        top_k
+    ):
+        scorer = BERTScorer(lang="en", rescale_with_baseline=True)
+        model = scorer._model
+        embedding = ContextualEmbedding(model, "roberta-large", 510)
+        baseline_val = scorer.baseline_vals[2].item()
+        super(BertscoreAligner, self).__init__(
+            embedding, threshold, top_k, baseline_val
+        )
+class StaticEmbeddingAligner(EmbeddingAligner):
+    def __init__(
+        self,
+        threshold,
+        top_k
+    ):
+        embedding = StaticEmbedding()
+        super(StaticEmbeddingAligner, self).__init__(
+            embedding, threshold, top_k
+        )
+class NGramAligner(Aligner):
+    def __init__(self):
+        self.stemmer = PorterStemmer()
+    def align(
+        self,
+        source: Doc,
+        targets: List[Doc],
+    ) -> List[Dict]:
+        alignments = []
+        source_ngram_spans = self._get_ngram_spans(source)
+        for target in targets:
+            target_ngram_spans = self._get_ngram_spans(target)
+            alignments.append(
+                self._align_ngrams(target_ngram_spans, source_ngram_spans)
+            )
+        return alignments
+    def _get_ngram_spans(
+        self,
+        doc: Doc,
+    ):
+        ngrams = []
+        for sent in doc.sents:
+            for n in range(1, len(list(sent))):
+                tokens = [t for t in sent if not (t.is_stop or t.is_punct)]
+                ngrams.extend(_ngrams(tokens, n))
+        def ngram_key(ngram):
+            return tuple(self.stemmer.stem(token.text).lower() for token in ngram)
+        key_to_ngrams = itertoolz.groupby(ngram_key, ngrams)
+        key_to_spans = {}
+        for k, grouped_ngrams in key_to_ngrams.items():
+            key_to_spans[k] = [
+                (ngram[0].i, ngram[-1].i + 1)
+                for ngram in grouped_ngrams
+            ]
+        return key_to_spans
+    def _align_ngrams(
+        self,
+        ngram_spans_1: Dict[Tuple[str], List[Tuple[int, int]]],
+        ngram_spans_2: Dict[Tuple[str], List[Tuple[int, int]]]
+    ) -> Dict[Tuple[int, int], List[Tuple[int, int]]]:
+        """Align ngram spans between two documents
+        Args:
+            ngram_spans_1: Map from (normalized_token1, normalized_token2, ...) n-gram tuple to a list of token spans
+                of format (start_pos, end_pos)
+            ngram_spans_2: Same format as above, but for second text
+        Returns: map from each (start, end) span in text 1 to list of aligned (start, end) spans in text 2
+        """
+        if not ngram_spans_1 or not ngram_spans_2:
+            return {}
+        max_span_end_1 = max(span[1] for span in itertools.chain.from_iterable(ngram_spans_1.values()))
+        token_is_available_1 = [True] * max_span_end_1  #
+        matched_keys = list(set(ngram_spans_1.keys()) & set(ngram_spans_2.keys()))  # Matched normalized ngrams betwee
+        matched_keys.sort(key=len, reverse=True)  # Process n-grams from longest to shortest
+        alignment = defaultdict(list)  # Map from each matched span in text 1 to list of aligned spans in text 2
+        for key in matched_keys:
+            spans_1 = ngram_spans_1[key]
+            spans_2 = ngram_spans_2[key]
+            available_spans_1 = [span for span in spans_1 if all(token_is_available_1[slice(*span)])]
+            matched_spans_1 = []
+            if available_spans_1 and spans_2:
+                # if ngram can be matched to available spans in both sequences
+                for span in available_spans_1:
+                    # It's possible that these newly matched spans may be overlapping with one another, so
+                    # check that token positions still available (only one span allowed ber token in text 1):
+                    if all(token_is_available_1[slice(*span)]):
+                        matched_spans_1.append(span)
+                        token_is_available_1[slice(*span)] = [False] * (span[1] - span[0])
+            for span1 in matched_spans_1:
+                alignment[span1] = spans_2
+        return alignment
+class SpacyHuggingfaceTokenizer:
+    def __init__(
+        self,
+        model_name,
+        max_length
+    ):
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
+        self.max_length = max_length
+    def batch_encode(
+        self,
+        sents: List[Span]
+    ):
+        token_alignments = []
+        token_ids_list = []
+        # Tokenize each sentence and special tokens.
+        for sent in sents:
+            hf_tokens, token_alignment = self.tokenize(sent)
+            token_alignments.append(token_alignment)
+            token_ids = self.tokenizer.convert_tokens_to_ids(hf_tokens)
+            encoding = self.tokenizer.prepare_for_model(
+                token_ids,
+                add_special_tokens=True,
+                padding=False,
+            )
+            token_ids_list.append(encoding['input_ids'])
+        # Add padding
+        max_length = max(map(len, token_ids_list))
+        attention_mask = []
+        input_ids = []
+        special_tokens_masks = []
+        for token_ids in token_ids_list:
+            encoding = self.tokenizer.prepare_for_model(
+                token_ids,
+                padding=PaddingStrategy.MAX_LENGTH,
+                max_length=max_length,
+                add_special_tokens=False
+            )
+            input_ids.append(encoding['input_ids'])
+            attention_mask.append(encoding['attention_mask'])
+            special_tokens_masks.append(
+                self.tokenizer.get_special_tokens_mask(
+                    encoding['input_ids'],
+                    already_has_special_tokens=True
+                )
+            )
+        encoded = {
+            'input_ids': torch.tensor(input_ids),
+            'attention_mask': torch.tensor(attention_mask)
+        }
+        return encoded, special_tokens_masks, token_alignments
+    def tokenize(
+        self,
+        sent
+    ):
+        """Convert spacy sentence to huggingface tokens and compute the alignment"""
+        hf_tokens = []
+        token_alignment = []
+        for i, token in enumerate(sent):
+            # "Tokenize" each word individually, so as to track the alignment between spaCy/HF tokens
+            # Prefix all tokens with a space except the first one in the sentence
+            if i == 0:
+                token_text = token.text
+            else:
+                token_text = ' ' + token.text
+            start_hf_idx = len(hf_tokens)
+            word_tokens = self.tokenizer.tokenize(token_text)
+            end_hf_idx = len(hf_tokens) + len(word_tokens)
+            if end_hf_idx < self.max_length:
+                hf_tokens.extend(word_tokens)
+                hf_idxs = list(range(start_hf_idx, end_hf_idx))
+            else:
+                hf_idxs = None
+            token_alignment.append(hf_idxs)
+        return hf_tokens, token_alignment
+def _split(data, sizes):
+    it = iter(data)
+    return [[next(it) for _ in range(size)] for size in sizes]
+def _iter_len(it):
+    return sum(1 for _ in it)
+    # TODO set up batching
+    # To get top K axis and value per row: https://stackoverflow.com/questions/42832711/using-np-argpartition-to-index-values-in-a-multidimensional-array
+def _ngrams(tokens, n):
+    for i in range(len(tokens) - n + 1):
+        yield tokens[i:i + n]

app.py ADDED Viewed

	@@ -0,0 +1,323 @@

+import argparse
+import json
+import operator
+import os
+import re
+from pathlib import Path
+import spacy
+import spacy.lang.en
+import streamlit as st
+from meerkat import DataPanel
+from spacy.tokens import Doc
+from align import NGramAligner, BertscoreAligner, StaticEmbeddingAligner
+from components import MainView
+from utils import clean_text
+MIN_SEMANTIC_SIM_THRESHOLD = 0.1
+MAX_SEMANTIC_SIM_TOP_K = 10
+Doc.set_extension("name", default=None, force=True)
+Doc.set_extension("column", default=None, force=True)
+class Instance():
+    def __init__(self, id_, document, reference, preds, data=None):
+        self.id = id_
+        self.document = document
+        self.reference = reference
+        self.preds = preds
+        self.data = data
+@st.cache(allow_output_mutation=True)
+def load_from_index(filename, index):
+    with open(filename) as f:
+        for i, line in enumerate(f):
+            if i == index:
+                return json.loads(line.strip())
+def _nlp_key(x: spacy.Language):
+    return str(x.path)
+@st.cache(allow_output_mutation=True, hash_funcs={spacy.lang.en.English: _nlp_key})
+def load_dataset(path: str, nlp: spacy.Language):
+    if path.endswith('.jsonl'):
+        return DataPanel.from_jsonl(path)
+    try:
+        return DataPanel.read(path, nlp=nlp)
+    except NotADirectoryError:
+        return DataPanel.from_jsonl(path)
+@st.cache(allow_output_mutation=True)
+def get_nlp():
+    try:
+        nlp = spacy.load("en_core_web_lg")
+    except:
+        nlp = spacy.load("en_core_web_sm")
+        is_lg = False
+    else:
+        is_lg = True
+    nlp.add_pipe('sentencizer', before="parser")
+    return nlp, is_lg
+def retrieve(dataset, index, filename=None):
+    if index >= len(dataset):
+        st.error(f"Index {index} exceeds dataset length.")
+    eval_dataset = None
+    if filename:
+        # TODO Handle this through dedicated fields
+        if "cnn_dailymail" in filename:
+            eval_dataset = "cnndm"
+        elif "xsum" in filename:
+            eval_dataset = "xsum"
+    data = dataset[index]
+    id_ = data.get('id', '')
+    try:
+        document = data['spacy:document']
+    except KeyError:
+        if not is_lg:
+            st.error("'en_core_web_lg model' is required unless loading from cached file."
+                     "To install: 'python -m spacy download en_core_web_lg'")
+        try:
+            text = data['document']
+        except KeyError:
+            text = data['article']
+        if not text:
+            st.error("Document is blank")
+            return
+        document = nlp(text if args.no_clean else clean_text(text))
+    document._.name = "Document"
+    document._.column = "document"
+    try:
+        reference = data['spacy:summary:reference']
+    except KeyError:
+        if not is_lg:
+            st.error("'en_core_web_lg model' is required unless loading from cached file."
+                     "To install: 'python -m spacy download en_core_web_lg'")
+        try:
+            text = data['summary'] if 'summary' in data else data['summary:reference']
+        except KeyError:
+            text = data.get('highlights')
+        if text:
+            reference = nlp(text if args.no_clean else clean_text(text))
+        else:
+            reference = None
+    if reference is not None:
+        reference._.name = "Reference"
+        reference._.column = "summary:reference"
+    model_names = set()
+    for k in data:
+        m = re.match('(preprocessed_)?summary:(?P<model>.*)', k)
+        if m:
+            model_name = m.group('model')
+            if model_name != 'reference':
+                model_names.add(model_name)
+    preds = []
+    for model_name in model_names:
+        try:
+            pred = data[f"spacy:summary:{model_name}"]
+        except KeyError:
+            if not is_lg:
+                st.error("'en_core_web_lg model' is required unless loading from cached file."
+                         "To install: 'python -m spacy download en_core_web_lg'")
+            text = data[f"summary:{model_name}"]
+            pred = nlp(text if args.no_clean else clean_text(text))
+        parts = model_name.split("-")
+        primary_sort = 0
+        if len(parts) == 2:
+            model, train_dataset = parts
+            if train_dataset == eval_dataset:
+                formatted_model_name = model.upper()
+            else:
+                formatted_model_name = f"{model.upper()} ({train_dataset.upper()}-trained)"
+                if train_dataset in ["xsum", "cnndm"]:
+                    primary_sort = 1
+                else:
+                    primary_sort = 2
+        else:
+            formatted_model_name = model_name.upper()
+        pred._.name = formatted_model_name
+        pred._.column = f"summary:{model_name}"
+        preds.append(
+            ((primary_sort, formatted_model_name), pred)
+        )
+    preds = [pred for _, pred in sorted(preds)]
+    return Instance(
+        id_=id_,
+        document=document,
+        reference=reference,
+        preds=preds,
+        data=data,
+    )
+def filter_alignment(alignment, threshold, top_k):
+    filtered_alignment = {}
+    for k, v in alignment.items():
+        filtered_matches = [(match_idx, score) for match_idx, score in v if score >= threshold]
+        if filtered_matches:
+            filtered_alignment[k] = sorted(filtered_matches, key=operator.itemgetter(1), reverse=True)[:top_k]
+    return filtered_alignment
+def select_comparison(example):
+    all_summaries = []
+    if example.reference:
+        all_summaries.append(example.reference)
+    if example.preds:
+        all_summaries.extend(example.preds)
+    from_documents = [example.document]
+    if example.reference:
+        from_documents.append(example.reference)
+    document_names = [document._.name for document in from_documents]
+    select_document_name = sidebar_placeholder_from.selectbox(
+        label="Comparison FROM:",
+        options=document_names
+    )
+    document_index = document_names.index(select_document_name)
+    selected_document = from_documents[document_index]
+    remaining_summaries = [summary for summary in all_summaries if
+                           summary._.name != selected_document._.name]
+    remaining_summary_names = [summary._.name for summary in remaining_summaries]
+    selected_summary_names = sidebar_placeholder_to.multiselect(
+        'Comparison TO:',
+        remaining_summary_names,
+        remaining_summary_names
+    )
+    selected_summaries = []
+    for summary_name in selected_summary_names:
+        summary_index = remaining_summary_names.index(summary_name)
+        selected_summaries.append(remaining_summaries[summary_index])
+    return selected_document, selected_summaries
+def show_main(example):
+    # Get user input
+    semantic_sim_type = st.sidebar.radio(
+        "Semantic similarity type:",
+        ["Contextual embedding", "Static embedding"]
+    )
+    semantic_sim_threshold = st.sidebar.slider(
+        "Semantic similarity threshold:",
+        min_value=MIN_SEMANTIC_SIM_THRESHOLD,
+        max_value=1.0,
+        step=0.1,
+        value=0.2,
+    )
+    semantic_sim_top_k = st.sidebar.slider(
+        "Semantic similarity top-k:",
+        min_value=1,
+        max_value=MAX_SEMANTIC_SIM_TOP_K,
+        step=1,
+        value=10,
+    )
+    document, summaries = select_comparison(example)
+    layout = st.sidebar.radio("Layout:", ["Vertical", "Horizontal"]).lower()
+    scroll = True
+    gray_out_stopwords = st.sidebar.checkbox(label="Gray out stopwords", value=True)
+    # Gather data
+    try:
+        lexical_alignments = [
+            example.data[f'{NGramAligner.__name__}:spacy:{document._.column}:spacy:{summary._.column}']
+            for summary in summaries
+        ]
+    except KeyError:
+        lexical_alignments = NGramAligner().align(document, summaries)
+    if semantic_sim_type == "Static embedding":
+        try:
+            semantic_alignments = [
+                example.data[f'{StaticEmbeddingAligner.__name__}:spacy:{document._.column}:spacy:{summary._.column}']
+                for summary in summaries
+            ]
+        except KeyError:
+            semantic_alignments = StaticEmbeddingAligner(
+                semantic_sim_threshold,
+                semantic_sim_top_k).align(
+                document,
+                summaries
+            )
+    else:
+        try:
+            semantic_alignments = [
+                example.data[f'{BertscoreAligner.__name__}:spacy:{document._.column}:spacy:{summary._.column}']
+                for summary in summaries
+            ]
+        except KeyError:
+            semantic_alignments = BertscoreAligner(semantic_sim_threshold,
+                                                   semantic_sim_top_k).align(document,
+                                                                             summaries)
+    MainView(
+        document,
+        summaries,
+        semantic_alignments,
+        lexical_alignments,
+        layout,
+        scroll,
+        gray_out_stopwords,
+    ).show(height=720)
+if __name__ == "__main__":
+    st.set_page_config(layout="wide")
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--path', type=str, default='data')
+    parser.add_argument('--no_clean', action='store_true', default=False,
+                        help="Do not clean text (remove extraneous spaces, newlines).")
+    args = parser.parse_args()
+    nlp, is_lg = get_nlp()
+    path = Path(args.path)
+    path_dir = path.parent
+    all_files = set(map(os.path.basename, path_dir.glob('*')))
+    files = sorted([
+        fname for fname in all_files if not (fname.endswith(".py") or fname.startswith("."))
+    ])
+    if path.is_file:
+        try:
+            file_index = files.index(path.name)
+        except:
+            raise FileNotFoundError(f"File not found: {path.name}")
+    else:
+        file_index = 0
+    col1, col2 = st.beta_columns((3, 1))
+    filename = col1.selectbox(label="File:", options=files, index=file_index)
+    dataset = load_dataset(str(path_dir / filename), nlp=nlp)
+    dataset_size = len(dataset)
+    query = col2.number_input(f"Index (Size: {dataset_size}):", value=0, min_value=0, max_value=dataset_size - 1)
+    sidebar_placeholder_from = st.sidebar.empty()
+    sidebar_placeholder_to = st.sidebar.empty()
+    if query is not None:
+        example = retrieve(dataset, query, filename)
+        if example:
+            show_main(example)

components.py ADDED Viewed

	@@ -0,0 +1,567 @@

+from collections import defaultdict
+from itertools import count
+from operator import itemgetter
+from pathlib import Path
+from typing import Dict, Optional
+from typing import List, Tuple, Union
+import htbuilder
+import streamlit as st
+from htbuilder import span, div, script, style, link, styles, HtmlElement, br
+from htbuilder.units import px
+from spacy.tokens import Doc
+palette = [
+    "#66c2a5",
+    "#fc8d62",
+    "#8da0cb",
+    "#e78ac3",
+    "#a6d854",
+    "#ffd92f",
+    "#e5c494",
+    "#b3b3b3",
+]
+inactive_color = "#BBB"
+def local_stylesheet(path):
+    with open(path) as f:
+        css = f.read()
+    return style()(
+        css
+    )
+def remote_stylesheet(url):
+    return link(
+        href=url
+    )
+def local_script(path):
+    with open(path) as f:
+        code = f.read()
+    return script()(
+        code
+    )
+def remote_script(url):
+    return script(
+        src=url
+    )
+def get_color(sent_idx):
+    return palette[sent_idx % len(palette)]
+def hex_to_rgb(hex):
+    hex = hex.replace("#", '')
+    return tuple(int(hex[i:i + 2], 16) for i in (0, 2, 4))
+def color_with_opacity(hex_color, opacity):
+    rgb = hex_to_rgb(hex_color)
+    return f"rgba({rgb[0]},{rgb[1]},{rgb[2]},{opacity:.2f})"
+class Component:
+    def show(self, width=None, height=None, scrolling=True, **kwargs):
+        out = div(style=styles(
+            **kwargs
+        ))(self.html())
+        html = str(out)
+        st.components.v1.html(html, width=width, height=height, scrolling=scrolling)
+    def html(self):
+        raise NotImplemented
+class MainView(Component):
+    def __init__(
+        self,
+        document: Doc,
+        summaries: List[Doc],
+        semantic_alignments: Optional[List[Dict]],
+        lexical_alignments: Optional[List[Dict]],
+        layout: str,
+        scroll: bool,
+        gray_out_stopwords: bool
+    ):
+        self.document = document
+        self.summaries = summaries
+        self.semantic_alignments = semantic_alignments
+        self.lexical_alignments = lexical_alignments
+        self.layout = layout
+        self.scroll = scroll
+        self.gray_out_stopwords = gray_out_stopwords
+    def html(self):
+        # Add document elements
+        if self.document._.name == 'Document':
+            document_name = 'Source Document'
+        else:
+            document_name = self.document._.name + ' summary'
+        doc_header = div(
+            id_="document-header"
+        )(
+            document_name
+        )
+        doc_elements = []
+        # Add document content, which comprises multiple elements, one for each summary. Only the elment corresponding to
+        # selected summary will be visible.
+        mu = MultiUnderline()
+        for summary_idx, summary in enumerate(self.summaries):
+            token_idx_to_sent_idx = {}
+            for sent_idx, sent in enumerate(summary.sents):
+                for token in sent:
+                    token_idx_to_sent_idx[token.i] = sent_idx
+            is_selected_summary = (summary_idx == 0)  # By default, first summary is selected
+            if self.semantic_alignments is not None:
+                doc_token_idx_to_matches = defaultdict(list)
+                semantic_alignment = self.semantic_alignments[summary_idx]
+                for summary_token_idx, matches in semantic_alignment.items():
+                    for doc_token_idx, sim in matches:
+                        doc_token_idx_to_matches[doc_token_idx].append((summary_token_idx, sim))
+            else:
+                doc_token_idx_to_matches = {}
+            token_elements = []
+            for doc_token_idx, doc_token in enumerate(self.document):
+                if doc_token.is_stop or doc_token.is_punct:
+                    classes = ["stopword"]
+                    if self.gray_out_stopwords:
+                        classes.append("grayed-out")
+                    el = span(
+                        _class=" ".join(classes)
+                    )(
+                        doc_token.text
+                    )
+                else:
+                    matches = doc_token_idx_to_matches.get(doc_token_idx)
+                    if matches:
+                        summary_token_idx, sim = max(matches, key=itemgetter(1))
+                        sent_idx = token_idx_to_sent_idx[summary_token_idx]
+                        color_primary = get_color(sent_idx)
+                        highlight_color_primary = color_with_opacity(color_primary, sim)
+                        props = {
+                            'data-highlight-id': str(doc_token_idx),
+                            'data-primary-color': highlight_color_primary
+                        }
+                        match_classes = []
+                        for summary_token_idx, sim in matches:
+                            sent_idx = token_idx_to_sent_idx[summary_token_idx]
+                            match_classes.append(f"summary-highlight-{summary_idx}-{summary_token_idx}")
+                            color = color_with_opacity(get_color(sent_idx), sim)
+                            props[f"data-color-{summary_idx}-{summary_token_idx}"] = color
+                        props["data-match-classes"] = " ".join(match_classes)
+                        el = self._highlight(
+                            doc_token.text,
+                            highlight_color_primary,
+                            color_primary,
+                            match_classes + ["annotation-hidden"],
+                            **props
+                        )
+                    else:
+                        el = doc_token.text
+                token_elements.append(el)
+            spans = []
+            if self.lexical_alignments is not None:
+                lexical_alignment = self.lexical_alignments[summary_idx]
+                for summary_span, doc_spans in lexical_alignment.items():
+                    summary_span_start, summary_span_end = summary_span
+                    span_id = f"{summary_idx}-{summary_span_start}-{summary_span_end}"
+                    sent_idx = token_idx_to_sent_idx[summary_span_start]
+                    for doc_span_start, doc_span_end in doc_spans:
+                        spans.append((
+                            doc_span_start,
+                            doc_span_end,
+                            sent_idx,
+                            get_color(sent_idx),
+                            span_id
+                        ))
+            token_elements = mu.markup(token_elements, spans)
+            classes = ["main-doc", "bordered"]
+            if self.scroll:
+                classes.append("scroll")
+            main_doc = div(
+                _class=" ".join(classes)
+            )(
+                token_elements
+            ),
+            classes = ["doc"]
+            if is_selected_summary:
+                classes.append("display")
+            else:
+                classes.append("nodisplay")
+            doc_elements.append(
+                div(
+                    **{
+                        "class": " ".join(classes),
+                        "data-index": summary_idx
+                    }
+                )(
+                    main_doc,
+                    div(_class="proxy-doc"),
+                    div(_class="proxy-scroll")
+                )
+            )
+        summary_title = "Summary"
+        summary_header = div(
+            id_="summary-header"
+        )(
+            summary_title,
+            div(id="summary-header-gap"),
+        )
+        summary_items = []
+        for summary_idx, summary in enumerate(self.summaries):
+            token_idx_to_sent_idx = {}
+            for sent_idx, sent in enumerate(summary.sents):
+                for token in sent:
+                    token_idx_to_sent_idx[token.i] = sent_idx
+            spans = []
+            matches_ngram = [False] * len(list(summary))
+            if self.lexical_alignments is not None:
+                lexical_alignment = self.lexical_alignments[summary_idx]
+                for summary_span in lexical_alignment.keys():
+                    start, end = summary_span
+                    matches_ngram[slice(start, end)] = [True] * (end - start)
+                    span_id = f"{summary_idx}-{start}-{end}"
+                    sent_idx = token_idx_to_sent_idx[start]
+                    spans.append((
+                        start,
+                        end,
+                        sent_idx,
+                        get_color(sent_idx),
+                        span_id
+                    ))
+            if self.semantic_alignments is not None:
+                semantic_alignment = self.semantic_alignments[summary_idx]
+            else:
+                semantic_alignment = {}
+            token_elements = []
+            for token_idx, token in enumerate(summary):
+                if token.is_stop or token.is_punct:
+                    classes = ["stopword"]
+                    if self.gray_out_stopwords:
+                        classes.append("grayed-out")
+                    el = span(
+                        _class=" ".join(classes)
+                    )(
+                        token.text
+                    )
+                else:
+                    classes = []
+                    if token.ent_iob_ in ('I', 'B'):
+                        classes.append("entity")
+                    if matches_ngram[token_idx]:
+                        classes.append("matches-ngram")
+                    matches = semantic_alignment.get(token_idx)
+                    if matches:
+                        top_match = max(matches, key=itemgetter(1))
+                        top_sim = max(top_match[1], 0)
+                        top_doc_token_idx = top_match[0]
+                        props = {
+                            "data-highlight-id": f"{summary_idx}-{token_idx}",
+                            "data-top-doc-highlight-id": str(top_doc_token_idx),
+                            "data-top-doc-sim": f"{top_sim:.2f}",
+                        }
+                        classes.extend([
+                            "annotation-hidden",
+                            f"summary-highlight-{summary_idx}-{token_idx}"
+                        ])
+                        sent_idx = token_idx_to_sent_idx[token_idx]
+                        el = self._highlight(
+                            token.text,
+                            color_with_opacity(get_color(sent_idx), top_sim),
+                            color_with_opacity(get_color(sent_idx), 1),
+                            classes,
+                            **props
+                        )
+                    else:
+                        if classes:
+                            el = span(_class=" ".join(classes))(token.text)
+                        else:
+                            el = token.text
+                token_elements.append(el)
+            token_elements = mu.markup(token_elements, spans)
+            classes = ["summary-item"]
+            if summary_idx == 0:  # Default is for first summary to be selected
+                classes.append("selected")
+            summary_items.append(
+                div(
+                    **{"class": ' '.join(classes), "data-index": summary_idx}
+                )(
+                    div(_class="name")(summary._.name),
+                    div(_class="content")(token_elements)
+                )
+            )
+        classes = ["summary-list", "bordered"]
+        if self.scroll:
+            classes.append("scroll")
+        if self.lexical_alignments is not None:
+            classes.append("has-lexical-alignment")
+        if self.semantic_alignments is not None:
+            classes.append("has-semantic-alignment")
+        summary_list = div(
+            _class=" ".join(classes)
+        )(
+            summary_items
+        )
+        annotation_key = \
+            """
+              <ul class="annotation-key">
+                <li class="annotation-key-label">Annotations:</li>
+                <li id="option-lexical" class="option selected">
+                    <span class="annotation-key-ngram">N-Gram overlap</span>
+                </li>
+                <li id="option-semantic" class="option selected">
+                    <span class="annotation-key-semantic">Semantic overlap</span>
+                </li>
+                <li id="option-novel" class="option selected">
+                    <span class="annotation-key-novel">Novel words</span>
+                </li>
+                <li id="option-entity" class="option selected">
+                    <span class="annotation-key-entity">Novel entities</span>
+                </li>
+            </ul>
+            """
+        body = div(
+            annotation_key,
+            div(
+                _class=f"vis-container {self.layout}-layout"
+            )(
+                div(
+                    _class="doc-container"
+                )(
+                    doc_header,
+                    *doc_elements
+                ),
+                div(
+                    _class="summary-container"
+                )(
+                    summary_header,
+                    summary_list
+                )
+            ),
+        )
+        return [
+            """<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-+0n0xVW2eSR5OomGNYDnhzAbDsOXxcvSN1TPprVMTNDbiYZCxYbOOl7+AMvyTG2x" crossorigin="anonymous">""",
+            local_stylesheet(Path(__file__).parent / "resources" / "summvis.css"),
+            """<link rel="preconnect" href="https://fonts.gstatic.com">
+                <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;500&display=swap" rel="stylesheet">""",
+            body,
+            """<script
+                src="https://code.jquery.com/jquery-3.5.1.min.js"
+                integrity="sha256-9/aliU8dGd2tb6OSsuzixeV4y/faTqgFtohetphbbj0="
+                crossorigin="anonymous"></script>
+                <script src="https://cdn.jsdelivr.net/npm/bootstrap@4.6.0/dist/js/bootstrap.bundle.min.js"
+                 integrity="sha384-Piv4xVNRyMGpqkS2by6br4gNJ7DXjqk09RmUpJ8jgGtD7zP9yug3goQfGII0yAns"
+                  crossorigin="anonymous"></script>""",
+            local_script(Path(__file__).parent / "resources" / "jquery.color-2.1.2.min.js"),
+            local_script(Path(__file__).parent / "resources" / "summvis.js"),
+            """<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.1/dist/js/bootstrap.bundle.min.js" integrity="sha384-gtEjrD/SeCtmISkJkNUaaKMoLD0//ElJ19smozuHV6z3Iehds+3Ulb9Bn9Plx0x4" crossorigin="anonymous"></script>"""
+        ]
+    def _highlight(
+        self,
+        token: Union[str, HtmlElement],
+        background_color,
+        dotted_underline_color,
+        classes: List[str],
+        **props
+    ):
+        return span(
+            _class=" ".join(classes + ["highlight"]),
+            style=styles(
+                background_color=background_color,
+                border_bottom=f"4px dotted {dotted_underline_color}",
+            ),
+            **props
+        )(token)
+SPACE = "&ensp;"
+class MultiUnderline:
+    def __init__(
+        self,
+        underline_thickness=3,
+        underline_spacing=1
+    ):
+        self.underline_thickness = underline_thickness
+        self.underline_spacing = underline_spacing
+    def markup(
+        self,
+        tokens: List[Union[str, HtmlElement]],
+        spans: List[Tuple[int, int, int, str, str]]
+    ):
+        """Style text with multiple layers of colored underlines.
+            Args:
+                tokens: list of tokens, either string or html element
+                spans: list of (start_pos, end_pos, rank, color, id) tuples defined as:
+                    start_pos: start position of underline span
+                    end_pos: end position of underline span
+                    rank: rank for stacking order of underlines, all else being equal
+                    color: color of underline
+                    id: id of underline (encoded as a class label in resulting html element)
+            Returns:
+                List of HTML elements
+        """
+        # Map from span start position to span
+        start_to_spans = defaultdict(list)
+        for span in spans:
+            start = span[0]
+            start_to_spans[start].append(span)
+        # Map from each underline slot position to list of active spans
+        slot_to_spans = {}
+        # Collection of html elements
+        elements = []
+        first_token_in_line = True
+        for pos, token in enumerate(tokens):
+            # Remove spans that are no longer active (end < pos)
+            slot_to_spans = defaultdict(
+                list,
+                {
+                    slot: [span for span in spans if span[1] > pos]  # span[1] contains end of spans
+                    for slot, spans in slot_to_spans.items() if spans
+                }
+            )
+            # Add underlines to space between tokens for any continuing underlines
+            if first_token_in_line:
+                first_token_in_line = False
+            else:
+                elements.append(self._get_underline_element(SPACE, slot_to_spans))
+            # Find slot for any new spans
+            new_spans = start_to_spans.pop(pos, None)
+            if new_spans:
+                new_spans.sort(
+                    key=lambda span: (-(span[1] - span[0]), span[2]))  # Sort by span length (reversed), rank
+                for new_span in new_spans:
+                    # Find an existing slot or add a new one
+                    for slot, spans in sorted(slot_to_spans.items(), key=itemgetter(0)):  # Sort by slot index
+                        if spans:
+                            containing_span = spans[
+                                0]  # The first span in the slot strictly contains all other spans
+                            containing_start, containing_end = containing_span[0:2]
+                            containing_color = containing_span[3]
+                            start, end = new_span[0:2]
+                            color = new_span[3]
+                            # If the new span (1) is strictly contained in this span, or (2) exactly matches this span
+                            # and is the same color, then add span to this slot
+                            if end <= containing_end and (
+                                (start > containing_start or end < containing_end) or
+                                (start == containing_start and end == containing_end and color == containing_color)
+                            ):
+                                spans.append(new_span)
+                                break
+                    else:
+                        # Find a new slot index to add the span
+                        for slot_index in count():
+                            spans = slot_to_spans[slot_index]
+                            if not spans:  # If slot is free, take it
+                                spans.append(new_span)
+                                break
+            if token in ("\n", "\r", "\r\n"):
+                elements.append(br())
+                first_token_in_line = True
+            else:
+                # Add underlines to token for all active spans
+                elements.append(self._get_underline_element(token, slot_to_spans))
+        return elements
+    def _get_underline_element(self, token, slot_to_spans):
+        if not slot_to_spans:
+            return token
+        max_slot_index = max(slot_to_spans.keys())
+        element = token
+        for slot_index in range(max_slot_index + 1):
+            spans = slot_to_spans[slot_index]
+            if not spans:
+                color = "rgba(0, 0, 0, 0)"  # Transparent element w/opacity=0
+                props = {}
+            else:
+                containing_slot = spans[0]
+                color = containing_slot[3]
+                classes = ["underline"]
+                if token != SPACE:
+                    classes.append("token-underline")
+                classes.extend([f"span-{span[4]}" for span in spans])  # Encode ids in class names
+                props = {
+                    "class": " ".join(classes),
+                    "data-primary-color": color
+                }
+            if slot_index == 0:
+                padding_bottom = 0
+            else:
+                padding_bottom = self.underline_spacing
+            display = "inline-block"
+            element = htbuilder.span(
+                style=styles(
+                    display=display,
+                    border_bottom=f"{self.underline_thickness}px solid",
+                    border_color=color,
+                    padding_bottom=px(padding_bottom),
+                ),
+                **props
+            )(element)
+            # Return outermost nested span
+        return element
+if __name__ == "__main__":
+    from htbuilder import div
+    # Test
+    text = "The quick brown fox jumps"
+    tokens = text.split()
+    tokens = [
+        "The",
+        htbuilder.span(style=styles(color="red"))("quick"),
+        "brown",
+        "fox",
+        "jumps"
+    ]
+    spans = [
+        (0, 2, 0, "green", "green1"),
+        (1, 3, 0, "orange", "orange1"),
+        (3, 4, 0, "red", "red1"),
+        (2, 4, 0, "blue", "blue1"),
+        (1, 5, 0, "orange", "orange1"),
+    ]
+    mu = MultiUnderline()
+    html = str(div(mu.markup(tokens, spans)))
+    print(html)

examples/booksum/booksum_sf_sample.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:480c9b9d1e401075aa1fc17d9409f899fc01462202b8688da1cdef8a7d7fef94
+size 102823

examples/booksum/booksum_sf_sample_processed.cache/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f78e53c09ab076c9b88a065d732be8e1f99d480293c505d86421dd59f20903e
+size 2139

examples/booksum/booksum_sf_sample_processed.cache/mgr/blocks/6074137904/data.feather ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc9991b16ce0d69587ae842d082a8cc6dca7c16bf52b55f53e5d894b6ff12773
+size 139442

examples/booksum/booksum_sf_sample_processed.cache/mgr/blocks/6074137904/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a0664bbc8c1074184efb08415a61f149a4b2385b57df87bef71ec5e511aa48a
+size 63

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/data.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:adf75a80c7218afdcd8690c9bcf50d96b312d4b9222dbca0f03e014f265bc48f
+size 223683

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a834ab33bb0405b065b9ca6ae9a57dbdef46609cd3079d68de2a4c45dda60a1
+size 70

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/state.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d4540a4caae3db3fd5922acb905088aa5ee4ae29ec3e06c8e2b397716c7106f
+size 126

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/data.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2e5968ac162a2a3c0c875cef5e5df1991598147c99b5899ee291181aecfc88d
+size 186772

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a834ab33bb0405b065b9ca6ae9a57dbdef46609cd3079d68de2a4c45dda60a1
+size 70

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/state.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d4540a4caae3db3fd5922acb905088aa5ee4ae29ec3e06c8e2b397716c7106f
+size 126

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/data.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9fa245b2f5f44ae0d3a4c6338c124e681a397d85e77bf9a852bbd039ab9c6e2c
+size 230480

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a834ab33bb0405b065b9ca6ae9a57dbdef46609cd3079d68de2a4c45dda60a1
+size 70

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:document:spacy:summary:reference/state.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d4540a4caae3db3fd5922acb905088aa5ee4ae29ec3e06c8e2b397716c7106f
+size 126

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum/data.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12017f50df0c5694c1e3d60dc5848c23e54471f661a1f61898fc939f10abbdec
+size 111010

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a834ab33bb0405b065b9ca6ae9a57dbdef46609cd3079d68de2a4c45dda60a1
+size 70

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum/state.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d4540a4caae3db3fd5922acb905088aa5ee4ae29ec3e06c8e2b397716c7106f
+size 126

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/data.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7be389d66dc4c379035647c6d901c33f83cfbe7a532798ef046c80d07a136ed
+size 100990

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a834ab33bb0405b065b9ca6ae9a57dbdef46609cd3079d68de2a4c45dda60a1
+size 70

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/BertscoreAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/state.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d4540a4caae3db3fd5922acb905088aa5ee4ae29ec3e06c8e2b397716c7106f
+size 126

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/data.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5489f6e5d6f02ddc656f3ac10a920312cb45a37e0342a1f105b7631a278f31e3
+size 16355

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a834ab33bb0405b065b9ca6ae9a57dbdef46609cd3079d68de2a4c45dda60a1
+size 70

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/state.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d4540a4caae3db3fd5922acb905088aa5ee4ae29ec3e06c8e2b397716c7106f
+size 126

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/data.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0f99fac854e2681b08a8a941b4f069f994b9c5842364bde9b9a77b52eec85ba
+size 16791

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a834ab33bb0405b065b9ca6ae9a57dbdef46609cd3079d68de2a4c45dda60a1
+size 70

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/state.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d4540a4caae3db3fd5922acb905088aa5ee4ae29ec3e06c8e2b397716c7106f
+size 126

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/data.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:68c30a9596aa97a27a12fe057a9e5ab10e0c5f503a0d42589d08c789be3f7fee
+size 15608

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a834ab33bb0405b065b9ca6ae9a57dbdef46609cd3079d68de2a4c45dda60a1
+size 70

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:document:spacy:summary:reference/state.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d4540a4caae3db3fd5922acb905088aa5ee4ae29ec3e06c8e2b397716c7106f
+size 126

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum/data.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb100474825bb20478a36f7dcea6b3f718ac5ff864540290644c560a00c8090a
+size 4634

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a834ab33bb0405b065b9ca6ae9a57dbdef46609cd3079d68de2a4c45dda60a1
+size 70

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum/state.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d4540a4caae3db3fd5922acb905088aa5ee4ae29ec3e06c8e2b397716c7106f
+size 126

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/data.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd6faabe10cb2532d6f6258091ba47d503c51798bc9664c6b873d1b52ae879b4
+size 4915

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a834ab33bb0405b065b9ca6ae9a57dbdef46609cd3079d68de2a4c45dda60a1
+size 70

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/NGramAligner:spacy:summary:reference:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/state.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d4540a4caae3db3fd5922acb905088aa5ee4ae29ec3e06c8e2b397716c7106f
+size 126

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/data.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51cc7fde06cea550f5f10749e1c7d534fe23ba6b635785e0c5af2595d6ba47e8
+size 320091

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a834ab33bb0405b065b9ca6ae9a57dbdef46609cd3079d68de2a4c45dda60a1
+size 70

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum/state.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d4540a4caae3db3fd5922acb905088aa5ee4ae29ec3e06c8e2b397716c7106f
+size 126

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/data.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf7ca905febdfef1cd2f702125c2daa131af6fe6ff360bfc7121c6b595e44349
+size 294430

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a834ab33bb0405b065b9ca6ae9a57dbdef46609cd3079d68de2a4c45dda60a1
+size 70

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:long_t5_tglobal_xl_booksum_sfsum/state.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d4540a4caae3db3fd5922acb905088aa5ee4ae29ec3e06c8e2b397716c7106f
+size 126

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/data.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6f0cfe2f81a5e4fbc5e3bde2a436c4171507caa3a19ee2379458e27a9a033fbb
+size 341548

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/meta.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a834ab33bb0405b065b9ca6ae9a57dbdef46609cd3079d68de2a4c45dda60a1
+size 70

examples/booksum/booksum_sf_sample_processed.cache/mgr/columns/StaticEmbeddingAligner:spacy:document:spacy:summary:reference/state.dill ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d4540a4caae3db3fd5922acb905088aa5ee4ae29ec3e06c8e2b397716c7106f
+size 126