Spaces:

gabrielanicole
/

MNV-beam_search

Sleeping

App Files Files Community

Gabriela Nicole Gonzalez Saez commited on Feb 15

Commit

9e85aff

•

1 Parent(s): b114ef2

app

Browse files

Files changed (2) hide show

app.py +209 -0
plotsjs.js +264 -0

app.py CHANGED Viewed

	@@ -0,0 +1,209 @@

+import gradio as gr
+import inseq
+import captum
+import torch
+import os
+# import nltk
+import argparse
+import random
+import numpy as np
+from argparse import Namespace
+from tqdm.notebook import tqdm
+from torch.utils.data import DataLoader
+from functools import partial
+from transformers import AutoTokenizer, MarianTokenizer, AutoModel, AutoModelForSeq2SeqLM, MarianMTModel
+model_es = "Helsinki-NLP/opus-mt-en-es"
+model_fr = "Helsinki-NLP/opus-mt-en-fr"
+model_zh = "Helsinki-NLP/opus-mt-en-zh"
+tokenizer_es = AutoTokenizer.from_pretrained(model_es)
+tokenizer_fr = AutoTokenizer.from_pretrained(model_fr)
+tokenizer_zh = AutoTokenizer.from_pretrained(model_zh)
+model_tr_es = MarianMTModel.from_pretrained(model_es)
+model_tr_fr = MarianMTModel.from_pretrained(model_fr)
+model_tr_zh = MarianMTModel.from_pretrained(model_zh)
+model_es = inseq.load_model("Helsinki-NLP/opus-mt-en-es", "input_x_gradient")
+model_fr = inseq.load_model("Helsinki-NLP/opus-mt-en-fr", "input_x_gradient")
+model_zh = inseq.load_model("Helsinki-NLP/opus-mt-en-zh", "input_x_gradient")
+dict_models = {
+	'en-es': model_es,
+	'en-fr': model_fr,
+	'en-zh': model_zh,
+}
+dict_models_tr = {
+	'en-es': model_tr_es,
+	'en-fr': model_tr_fr,
+	'en-zh': model_tr_zh,
+}
+dict_tokenizer_tr = {
+	'en-es': tokenizer_es,
+	'en-fr': tokenizer_fr,
+	'en-zh': tokenizer_zh,
+}
+saliency_examples = [
+	"Peace of Mind: Protection for consumers.",
+	"The sustainable development goals report: towards a rescue plan for people and planet",
+	"We will leave no stone unturned to hold those responsible to account.",
+	"The clock is now ticking on our work to finalise the remaining key legislative proposals presented by this Commission to ensure that citizens and businesses can reap the benefits of our policy actions.",
+	"Pumpkins, squash and gourds, fresh or chilled, excluding courgettes",
+	"The labour market participation of mothers with infants has even deteriorated over the past two decades, often impacting their career and incomes for years.",
+]
+contrastive_examples = [
+["Peace of Mind: Protection for consumers.",
+"Paz mental: protección de los consumidores",
+"Paz de la mente: protección de los consumidores"],
+["the slaughterer has finished his work.",
+"l'abatteur a terminé son travail.",
+"l'abatteuse a terminé son travail."],
+['A fundamental shift is needed - in commitment, solidarity, financing and action - to put the world on a better path.',
+ '需要在承诺、团结、筹资和行动方面进行根本转变,使世界走上更美好的道路。',
+ '我们需要从根本上转变承诺、团结、资助和行动，使世界走上更美好的道路。',]
+	]
+def split_token_from_sequences(sequences, model) -> dict :
+	n_sentences = len(sequences)
+	gen_sequences_texts = []
+	for bs in range(n_sentences):
+		#### decoder per token.
+		gen_sequences_texts.append(dict_tokenizer_tr[model].decode(sequences[:, 1:][bs],  skip_special_tokens=True).split(' '))
+	print(gen_sequences_texts)
+	score = 0
+	#raw dict is bos
+	text = 'bos'
+	new_id = text +'--1'
+	dict_parent = [{'id': new_id, 'parentId': None , 'text': text, 'name': 'bos', 'prob':score }]
+	id_dict_pos = {}
+	step_i = 0
+	cont = True
+	words_by_step = [] #[['bos' for i in range(n_sentences)]]
+	while cont:
+		# append to dict_parent for all beams of step_i
+		cont = False
+		step_words = []
+		for beam in range(n_sentences):
+			app_text = ''
+			if step_i < len(gen_sequences_texts[beam]):
+				app_text = gen_sequences_texts[beam][step_i]
+				cont = True
+			step_words.append(app_text)
+		words_by_step.append(step_words)
+		print(words_by_step)
+		for i_bs, step_w in enumerate(step_words):
+			if step_w != '':
+				#new id if the same word is not in another beam (?) [beam[i] was a token id]
+				#parent id = previous word and previous step.
+				# new_parent_id = "-".join([str(beam[i]) for i in range(step_i)])
+				new_id = "-".join([str(words_by_step[i][i_bs])+ '-' + str(i) for i in range(step_i+1)])
+				parent_id = "-".join([words_by_step[i][i_bs] + '-' + str(i) for i in range(step_i) ])
+				# new_id = step_w +'-' + str(step_i)
+				# parent_id = words_by_step[step_i-1][i_bs] + '-' + str(step_i -1)
+				if step_i == 0 :
+					parent_id =  'bos--1'
+				## if the dict already exists remove it, if it is not a root...
+				## root?? then next is ''
+				next_word_flag = len(gen_sequences_texts[i_bs][step_i]) > step_i
+				if next_word_flag:
+					if not (new_id in id_dict_pos):
+						dict_parent.append({'id': new_id, 'parentId': parent_id , 'text': step_w, 'name': step_w, 'prob' : score })
+						id_dict_pos[new_id] = len(dict_parent) - 1
+				else:
+					dict_parent.append({'id': new_id, 'parentId': parent_id , 'text': step_w, 'name': step_w, 'prob' : score })
+					id_dict_pos[new_id] = len(dict_parent) - 1
+		step_i += 1
+	return dict_parent
+import gradio as gr
+html = """
+<html>
+<script async src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js"></script>
+  <body>
+    <p id="demo"></p>
+    <p id="viz"></p>
+    <p id="demo2"></p>
+    <div id="d3_beam_search"></div>
+  </body>
+</html>
+"""
+def sentence_maker(w1, model, var2={}):
+  #translate and get internal values
+  # src_text = saliency_examples[0]
+  inputs = dict_tokenizer_tr[model](w1, return_tensors="pt")
+  num_ret_seq = 4
+  translated  = dict_models_tr[model].generate(**inputs,
+                  num_beams=4,
+                  num_return_sequences=num_ret_seq,
+                  return_dict_in_generate=True,
+                  output_attentions =True,
+                  output_hidden_states = True,
+                  output_scores=True,)
+  beam_dict = split_token_from_sequences(translated.sequences,model )
+  tgt_text = dict_tokenizer_tr[model].decode(translated.sequences[0], skip_special_tokens=True)
+  return [tgt_text,beam_dict]
+def sentence_maker2(w1,j2):
+  #  json_value = {'one':1}
+  #  return f"{w1['two']} in sentence22..."
+   print(w1,j2)
+   return "in sentence22..."
+with gr.Blocks(js="plotsjs.js") as demo:
+	gr.Markdown(
+	"""
+	# MAKE NMT Workshop \t `BeamSearch`
+	""")
+	in_text = gr.Textbox(label="source text")
+	out_text  = gr.Textbox(label="target text")
+	out_text2  = gr.Textbox(visible=False)
+	var2 = gr.JSON(visible=False)
+	radio_c = gr.Radio(choices=['en-zh', 'en-es', 'en-fr'], value="en-zh", label= '', container=False)
+	btn = gr.Button("Translate")
+	input_mic = gr.HTML(html)
+	btn.click(sentence_maker, [in_text, radio_c], [out_text,var2], js="(in_text,radio_c) => testFn_out(in_text,radio_c)") #should return an output comp.
+	out_text.change(sentence_maker2, [out_text, var2], out_text2, js="(out_text,var2) => testFn_out_json(var2)") #
+	# run script function on load,
+	# demo.load(None,None,None,js="plotsjs.js")
+if __name__ == "__main__":
+    demo.launch()

plotsjs.js ADDED Viewed

	@@ -0,0 +1,264 @@

+async () => {
+   // set testFn() function on globalThis, so you html onlclick can access it
+    globalThis.testFn = () => {
+      document.getElementById('demo').innerHTML = "Hello?"
+    };
+    const d3 = await import("https://cdn.jsdelivr.net/npm/d3@7/+esm");
+    globalThis.d3 = d3;
+    globalThis.d3Fn = () => {
+		d3.select('#viz').append('svg')
+				.append('rect')
+				.attr('width', 50)
+				.attr('height', 50)
+				.attr('fill', 'black')
+				.on('mouseover', function(){d3.select(this).attr('fill', 'red')})
+				.on('mouseout', function(){d3.select(this).attr('fill', 'black')});
+    };
+	globalThis.testFn_out = (val,radio_c) => {
+		// document.getElementById('demo').innerHTML = val
+		console.log(val);
+		// globalThis.d3Fn();
+		return([val,radio_c]);
+	  };
+	//   Is this function well commented???
+	//   globalThis.testFn_out_json = (val) => {
+	// 	document.getElementById('demo2').innerHTML = JSON.stringify(val);
+	// 	console.log(val);
+	// 	globalThis.d3Fn();
+	// 	return(['string', {}])
+	// 	// return(JSON.stringify(val), JSON.stringify(val) );
+	//   };
+	  globalThis.testFn_out_json = (data) => {
+		const idMapping = data.reduce((acc, el, i) => {
+		acc[el.id] = i;
+		return acc;
+		}, {});
+		let root;
+		data.forEach(el => {
+		// Handle the root element
+		if (el.parentId === null) {
+			root = el;
+			return;
+		}
+		// Use our mapping to locate the parent element in our data array
+		const parentEl = data[idMapping[el.parentId]];
+		// Add our current el to its parent's `children` array
+		parentEl.children = [...(parentEl.children || []), el];
+		});
+		// console.log(Tree(root));
+		// document.getElementById('d3_beam_search').innerHTML = Tree(root)
+		d3.select('#d3_beam_search').html("");
+		d3.select('#d3_beam_search').append(function(){return  Tree(root);});
+		// $('#d3_beam_search').html(Tree(root)) ;
+		return(['string', {}])
+	}
+// Copyright 2021 Observable, Inc.
+// Released under the ISC license.
+// https://observablehq.com/@d3/tree
+function Tree(data, { // data is either tabular (array of objects) or hierarchy (nested objects)
+	path, // as an alternative to id and parentId, returns an array identifier, imputing internal nodes
+	id = Array.isArray(data) ? d => d.id : null, // if tabular data, given a d in data, returns a unique identifier (string)
+	parentId = Array.isArray(data) ? d => d.parentId : null, // if tabular data, given a node d, returns its parent’s identifier
+	children, // if hierarchical data, given a d in data, returns its children
+	tree = d3.tree, // layout algorithm (typically d3.tree or d3.cluster)
+	sort, // how to sort nodes prior to layout (e.g., (a, b) => d3.descending(a.height, b.height))
+	label =  d => d.name, // given a node d, returns the display name
+	title = d => d.name, // given a node d, returns its hover text
+	link , // given a node d, its link (if any)
+	linkTarget = "_blank", // the target attribute for links (if any)
+	width = 800, // outer width, in pixels
+	height, // outer height, in pixels
+	r = 3, // radius of nodes
+	padding = 1, // horizontal padding for first and last column
+	fill = "#999", // fill for nodes
+	fillOpacity, // fill opacity for nodes
+	stroke = "#555", // stroke for links
+	strokeWidth = 2, // stroke width for links
+	strokeOpacity = 0.4, // stroke opacity for links
+	strokeLinejoin, // stroke line join for links
+	strokeLinecap, // stroke line cap for links
+	halo = "#fff", // color of label halo
+	haloWidth = 3, // padding around the labels
+	curve = d3.curveBumpX, // curve for the link
+  } = {}) {
+	// If id and parentId options are specified, or the path option, use d3.stratify
+	// to convert tabular data to a hierarchy; otherwise we assume that the data is
+	// specified as an object {children} with nested objects (a.k.a. the “flare.json”
+	// format), and use d3.hierarchy.
+	const root = path != null ? d3.stratify().path(path)(data)
+		: id != null || parentId != null ? d3.stratify().id(id).parentId(parentId)(data)
+		: d3.hierarchy(data, children);
+	// Sort the nodes.
+	if (sort != null) root.sort(sort);
+	// Compute labels and titles.
+	const descendants = root.descendants();
+	const L = label == null ? null : descendants.map(d => label(d.data, d));
+	// Compute the layout.
+	const descWidth = 10;
+	// console.log('descendants', descendants);
+	const realWidth = descWidth * descendants.length
+	const totalWidth = (realWidth > width) ? realWidth : width;
+	const dx = 25;
+	const dy = totalWidth / (root.height + padding);
+	tree().nodeSize([dx, dy])(root);
+	// Center the tree.
+	let x0 = Infinity;
+	let x1 = -x0;
+	root.each(d => {
+	  if (d.x > x1) x1 = d.x;
+	  if (d.x < x0) x0 = d.x;
+	});
+	// Compute the default height.
+	if (height === undefined) height = x1 - x0 + dx * 2;
+	// Use the required curve
+	if (typeof curve !== "function") throw new Error(`Unsupported curve`);
+	const parent = d3.create("div");
+	const body = parent.append("div")
+	.style("overflow-x", "scroll")
+	.style("-webkit-overflow-scrolling", "touch");
+	const svg = body.append("svg")
+		.attr("viewBox", [-dy * padding / 2, x0 - dx, totalWidth, height])
+		.attr("width", totalWidth)
+		.attr("height", height)
+		.attr("style", "max-width: 100%; height: auto; height: intrinsic;")
+		.attr("font-family", "sans-serif")
+		.attr("font-size", 12);
+	svg.append("g")
+		.attr("fill", "none")
+		.attr("stroke", stroke)
+		.attr("stroke-opacity", strokeOpacity)
+		.attr("stroke-linecap", strokeLinecap)
+		.attr("stroke-linejoin", strokeLinejoin)
+		.attr("stroke-width", strokeWidth)
+	  .selectAll("path")
+		.data(root.links())
+		.join("path")
+		// .attr("stroke", d => d.prob > 0.5 ? 'red' : 'blue'  )
+		// .attr("fill", "red")
+		  .attr("d", d3.link(curve)
+			  .x(d => d.y)
+			  .y(d => d.x));
+	const node = svg.append("g")
+	  .selectAll("a")
+	  .data(root.descendants())
+	  .join("a")
+		.attr("xlink:href", link == null ? null : d => link(d.data, d))
+		.attr("target", link == null ? null : linkTarget)
+		.attr("transform", d => `translate(${d.y},${d.x})`);
+	node.append("circle")
+		.attr("fill", d => d.children ? stroke : fill)
+		.attr("r", r);
+	title = d => (d.name + ( d.prob));
+	if (title != null) node.append("title")
+		.text(d => title(d.data, d));
+	if (L) node.append("text")
+		.attr("dy", "0.32em")
+		.attr("x", d => d.children ? -6 : 6)
+		.attr("text-anchor", d => d.children ? "end" : "start")
+		.attr("paint-order", "stroke")
+		.attr("stroke", 'white')
+		.attr("fill", d => d.data.prob == 1 ? ('red') : ('black')  )
+		.attr("stroke-width", haloWidth)
+		.text((d, i) => L[i]);
+		body.node().scrollBy(totalWidth, 0);
+	return svg.node();
+  }
+}
+// define('viz', ['d3'], function (d3) {
+//     function draw(container) {
+//         d3.select(container).append("svg").append('rect').attr('id', 'viz_rect').attr('width', 50).attr('height', 50);
+//     }
+//     return draw;
+// });
+// console.log("HERE!")
+// element.append('Loaded 😄 ');
+// variable2='hello';
+// draw('.gradio-container')
+// function transform_beamsearch(data){
+// 	const idMapping = data.reduce((acc, el, i) => {
+// 	acc[el.id] = i;
+// 	return acc;
+// 	}, {});
+// 	let root;
+// 	data.forEach(el => {
+// 	// Handle the root element
+// 	if (el.parentId === null) {
+// 		root = el;
+// 		return;
+// 	}
+// 	// Use our mapping to locate the parent element in our data array
+// 	const parentEl = data[idMapping[el.parentId]];
+// 	// Add our current el to its parent's `children` array
+// 	parentEl.children = [...(parentEl.children || []), el];
+// 	});
+// 	// console.log(Tree(root, { label: d => d.name,}));
+// 	console.log(root);
+// 	// $('#d3_beam_search').html(Tree(root)) ;
+// 	return root;
+// }
+// var gradioContainer = document.querySelector('.gradio-container');
+// gradioContainer.insertBefore(container, gradioContainer.firstChild);