jvamvas commited on
Commit
57db812
1 Parent(s): c923c2b

Add description

Browse files
Files changed (4) hide show
  1. app.py +5 -0
  2. description.md +29 -0
  3. preamble.md +3 -0
  4. requirements.txt +1 -1
app.py CHANGED
@@ -59,7 +59,10 @@ def generate_diff(text_a: str, text_b: str, method: str):
59
  my_pipeline = None
60
  tokenizer = Whitespace()
61
 
 
62
  with gr.Blocks() as demo:
 
 
63
  with gr.Row():
64
  text_a = gr.Textbox(label="Text A", value="Chinese shares close higher Friday.", lines=2)
65
  text_b = gr.Textbox(label="Text B", value="Les actions chinoises clôturent en baisse mercredi.", lines=2)
@@ -77,6 +80,8 @@ with gr.Blocks() as demo:
77
  inputs=[text_a, text_b, method],
78
  outputs=[output_a, output_b],
79
  )
 
 
80
 
81
 
82
  if my_pipeline is None:
 
59
  my_pipeline = None
60
  tokenizer = Whitespace()
61
 
62
+
63
  with gr.Blocks() as demo:
64
+ preamble = (Path(__file__).parent / "preamble.md").read_text()
65
+ gr.Markdown(preamble)
66
  with gr.Row():
67
  text_a = gr.Textbox(label="Text A", value="Chinese shares close higher Friday.", lines=2)
68
  text_b = gr.Textbox(label="Text B", value="Les actions chinoises clôturent en baisse mercredi.", lines=2)
 
80
  inputs=[text_a, text_b, method],
81
  outputs=[output_a, output_b],
82
  )
83
+ description = (Path(__file__).parent / "description.md").read_text()
84
+ gr.Markdown(description)
85
 
86
 
87
  if my_pipeline is None:
description.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Small print
2
+
3
+ <p style="background-color: #fff9f9; border: 1px solid #ff0000; padding: 10px;">
4
+ Warning: This demo is highly experimental and not ready for production use.
5
+ </p>
6
+
7
+ This demo is a proof of concept for visualizing the semantic differences between two text documents.
8
+ The input documents may or may not be written in the same language.
9
+
10
+ In our paper, we evaluate three simple, unsupervised approaches based on BERT-like encoder models.
11
+ This demo implements the approaches `DiffAlign` and `DiffDel` using the model [ZurichNLP/unsup-simcse-xlm-roberta-base](https://huggingface.co/ZurichNLP/unsup-simcse-xlm-roberta-base). See the [XLM-R model](https://huggingface.co/xlm-roberta-base) for a list of supported languages.
12
+
13
+ The third approach, `DiffMask`, was not included in the demo because it is very slow.
14
+
15
+ More resources:
16
+ - Paper: https://arxiv.org/abs/2305.13303
17
+ - Code: https://github.com/ZurichNLP/recognizing-semantic-differences
18
+
19
+ ## Citation
20
+ ```bibtex
21
+ @article{vamvas-sennrich-2023-rsd,
22
+ title={Towards Unsupervised Recognition of Semantic Differences in Related Documents},
23
+ author={Jannis Vamvas and Rico Sennrich},
24
+ year={2023},
25
+ eprint={2305.13303},
26
+ archivePrefix={arXiv},
27
+ primaryClass={cs.CL}
28
+ }
29
+ ```
preamble.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Diff tool for natural language text
2
+
3
+ Demo for the EMNLP 2023 paper ["Towards Unsupervised Recognition of Semantic Differences in Related Documents"](https://arxiv.org/abs/2305.13303).
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
  transformers==4.34.0
2
  --find-links https://download.pytorch.org/whl/cpu
3
- torch
 
1
  transformers==4.34.0
2
  --find-links https://download.pytorch.org/whl/cpu
3
+ torch==2.0.1