diff --git "a/ctfidf_config.json" "b/ctfidf_config.json" new file mode 100644--- /dev/null +++ "b/ctfidf_config.json" @@ -0,0 +1,63226 @@ +{ + "ctfidf_model": { + "bm25_weighting": false, + "reduce_frequent_words": false + }, + "vectorizer_model": { + "params": { + "analyzer": "word", + "binary": false, + "decode_error": "strict", + "encoding": "utf-8", + "input": "content", + "lowercase": true, + "max_df": 1.0, + "max_features": null, + "min_df": 5, + "ngram_range": [ + 1, + 3 + ], + "stop_words": "english", + "strip_accents": null, + "token_pattern": "(?u)\\b\\w\\w+\\b", + "vocabulary": null + }, + "vocab": { + "intelligent": 26541, + "input": 26252, + "methods": 32722, + "im": 24527, + "essential": 18320, + "making": 31643, + "text": 56419, + "entries": 18156, + "east": 16554, + "asian": 4513, + "scripts": 48951, + "application": 3159, + "languages": 28588, + "fully": 21711, + "explored": 19755, + "paper": 39247, + "discusses": 15488, + "tools": 57374, + "contribute": 10926, + "development": 14665, + "computer": 9885, + "processing": 42847, + "propose": 43280, + "design": 14260, + "philosophy": 40817, + "regards": 45797, + "service": 50092, + "platform": 40948, + "treats": 58738, + "study": 53316, + "cross": 11806, + "disciplinary": 15373, + "subject": 53550, + "perspectives": 40778, + "software": 51634, + "engineering": 17765, + "human": 24086, + "interaction": 26595, + "hci": 23493, + "natural": 36408, + "language": 27947, + "nlp": 37459, + "discuss": 15457, + "indicate": 25523, + "number": 37980, + "possible": 41313, + "future": 21863, + "research": 46977, + "directions": 15288, + "includes": 25229, + "reflection": 45779, + "role": 48301, + "networks": 36826, + "english": 17772, + "acquisition": 1447, + "collection": 8978, + "practical": 41455, + "criteria": 11772, + "annotate": 2869, + "free": 21637, + "speech": 52250, + "corpora": 11170, + "children": 8293, + "utterances": 61143, + "theoretical": 57017, + "level": 30054, + "main": 31421, + "claim": 8380, + "syntactic": 54291, + "interpreted": 26738, + "outcome": 38765, + "use": 59813, + "machinery": 31395, + "intrinsic": 26767, + "features": 20514, + "accessible": 835, + "directly": 15303, + "known": 27653, + "network": 36691, + "properties": 43257, + "global": 22820, + "patterns": 39964, + "view": 61595, + "power": 41423, + "organization": 38684, + "underlying": 59261, + "grammar": 23061, + "taking": 54786, + "look": 31063, + "issues": 27083, + "examines": 18873, + "build": 7384, + "net": 36688, + "projection": 43138, + "relations": 46014, + "recall": 45238, + "opposed": 38515, + "adult": 1877, + "grammars": 23066, + "early": 16510, + "child": 8288, + "defined": 13781, + "concept": 9920, + "structure": 53087, + "overcome": 39056, + "difficulty": 15197, + "develop": 14569, + "set": 50098, + "systematic": 54389, + "assuming": 4637, + "constituency": 10346, + "hierarchy": 23702, + "based": 5553, + "lexico": 30402, + "thematic": 57004, + "end": 17610, + "obtain": 38158, + "annotation": 2933, + "enables": 17435, + "perform": 40063, + "statistics": 52776, + "size": 51376, + "structures": 53180, + "ii": 24503, + "standard": 52460, + "measures": 32074, + "complexity": 9674, + "provide": 44001, + "detailed": 14407, + "example": 18875, + "general": 22042, + "model": 33480, + "lexical": 30351, + "information": 25747, + "conforms": 10147, + "abstract": 755, + "reflects": 45782, + "typical": 59130, + "dictionary": 14803, + "entry": 18167, + "mapped": 31799, + "formed": 21370, + "xml": 63034, + "document": 15765, + "transformation": 58442, + "used": 60078, + "implement": 24631, + "semantics": 49398, + "enable": 17419, + "extraction": 20042, + "manipulation": 31711, + "format": 21365, + "hypothesized": 24353, + "approach": 3385, + "form": 21312, + "solution": 51650, + "framework": 21445, + "understanding": 59320, + "reasoning": 45181, + "intelligence": 26534, + "combines": 9090, + "discourse": 15384, + "powerful": 41432, + "representation": 46487, + "formalism": 21355, + "capable": 7614, + "exploiting": 19672, + "ontological": 38396, + "advanced": 1885, + "solve": 51675, + "following": 21261, + "problems": 42692, + "compromising": 9823, + "practicality": 41481, + "factors": 20304, + "restriction": 47425, + "nature": 36476, + "question": 44684, + "response": 47391, + "limitation": 30534, + "scale": 48550, + "domains": 16231, + "real": 45097, + "life": 30436, + "describes": 14217, + "experiments": 19341, + "learning": 29497, + "dutch": 16476, + "rules": 48389, + "using": 60544, + "inductive": 25609, + "logic": 30978, + "programming": 43082, + "machine": 31297, + "discipline": 15374, + "logical": 30982, + "operators": 38497, + "different": 14832, + "ways": 61839, + "approaching": 3958, + "problem": 42495, + "experimented": 19336, + "compared": 9378, + "related": 45886, + "work": 62551, + "task": 54865, + "results": 47479, + "direct": 15250, + "correspondence": 11542, + "quality": 44486, + "background": 5490, + "knowledge": 27386, + "constructed": 10405, + "theory": 57035, + "demonstrating": 14050, + "ability": 591, + "ilp": 24526, + "good": 22924, + "advantage": 1937, + "prior": 42392, + "domain": 15992, + "available": 5258, + "outlined": 38775, + "range": 44906, + "deep": 13681, + "make": 31538, + "morphological": 35838, + "resources": 47291, + "word": 62108, + "similarity": 51081, + "bootstrap": 7265, + "seed": 49042, + "lexicon": 30407, + "deployed": 14170, + "items": 27115, + "precision": 41612, + "shown": 50692, + "strengths": 52975, + "weaknesses": 61870, + "classes": 8415, + "particular": 39830, + "focus": 21142, + "relative": 46088, + "accessibility": 834, + "resource": 47209, + "types": 59076, + "predicted": 41664, + "associated": 4620, + "applications": 3183, + "examine": 18860, + "evolutionary": 18834, + "naming": 36378, + "game": 21949, + "communicating": 9244, + "agents": 2061, + "equipped": 18196, + "selected": 49115, + "coupling": 11634, + "biological": 7168, + "linguistic": 30744, + "ingredients": 26198, + "transition": 58536, + "small": 51461, + "change": 8167, + "control": 10960, + "parameter": 39664, + "poorly": 41148, + "group": 23270, + "linguistically": 30813, + "transforms": 58535, + "perfectly": 40062, + "large": 28829, + "abilities": 589, + "kept": 27286, + "fixed": 21073, + "appears": 3146, + "continuous": 10840, + "genetic": 22636, + "proceeds": 42752, + "effect": 16609, + "initially": 26230, + "learn": 29342, + "creates": 11735, + "niche": 37439, + "pressure": 42136, + "increase": 25402, + "suggests": 53845, + "cultural": 11935, + "processes": 42844, + "intensive": 26558, + "took": 57353, + "place": 40926, + "performance": 40164, + "species": 52039, + "experienced": 19227, + "triggered": 58794, + "rapid": 44985, + "expansion": 19188, + "civilization": 8377, + "zipf": 63192, + "law": 29172, + "states": 52719, + "words": 62358, + "ranked": 44951, + "order": 38584, + "decreasing": 13672, + "frequency": 21668, + "texts": 56855, + "inversely": 26931, + "proportional": 43276, + "rank": 44947, + "robust": 48236, + "experimental": 19257, + "observation": 38121, + "date": 13492, + "satisfactory": 48523, + "explanation": 19603, + "suggest": 53812, + "arise": 4183, + "evolution": 18833, + "dominated": 16310, + "meanings": 32032, + "competition": 9533, + "synonyms": 54287, + "retrieval": 47937, + "term": 56231, + "relevance": 46189, + "taken": 54772, + "mean": 31986, + "formal": 21343, + "conformity": 10146, + "given": 22721, + "user": 60403, + "query": 44660, + "rule": 48377, + "documents": 15852, + "submitted": 53582, + "certain": 7933, + "perceived": 40047, + "selection": 49131, + "formally": 21363, + "solving": 51700, + "supplemented": 54108, + "procedure": 42741, + "relevant": 46198, + "introduce": 26775, + "quantitative": 44614, + "measure": 32044, + "single": 51282, + "exists": 19180, + "determination": 14552, + "shall": 50437, + "consider": 10206, + "simplest": 51232, + "opinion": 38498, + "proposed": 43709, + "does": 15932, + "restrictions": 47426, + "applied": 3262, + "data": 12100, + "mining": 33309, + "allows": 2450, + "exploration": 19678, + "sequences": 50020, + "phenomena": 40809, + "usually": 61033, + "tends": 56213, + "isolated": 27050, + "relation": 45963, + "offers": 38298, + "invaluable": 26915, + "analyses": 2588, + "sentences": 49675, + "dialogues": 14797, + "report": 46423, + "attempt": 4677, + "inspecting": 26397, + "verbs": 61520, + "french": 21657, + "accounts": 885, + "road": 48210, + "analysis": 2609, + "comes": 9130, + "original": 38703, + "unsupervised": 59678, + "training": 57920, + "allowing": 2443, + "discovery": 15414, + "sequential": 50035, + "analyzer": 2837, + "appearing": 3145, + "provided": 44157, + "classification": 8426, + "links": 30837, + "successive": 53752, + "distinct": 15588, + "clusters": 8750, + "segmentation": 49078, + "interpretation": 26733, + "applying": 3357, + "statistical": 52733, + "independent": 25493, + "semantic": 49230, + "annotations": 2982, + "despite": 14354, + "importance": 24676, + "summarizing": 53911, + "evolving": 18841, + "events": 18790, + "received": 45253, + "attention": 4706, + "researchers": 47148, + "field": 20749, + "multi": 35936, + "previous": 42236, + "et": 18392, + "al": 2228, + "2007": 244, + "presented": 42057, + "methodology": 32715, + "automatic": 5066, + "summarization": 53875, + "emitted": 17278, + "multiple": 36164, + "sources": 51826, + "event": 18778, + "heart": 23525, + "lies": 30434, + "identification": 24383, + "similarities": 51078, + "differences": 14820, + "various": 61297, + "axes": 5481, + "synchronic": 54276, + "diachronic": 14733, + "achieved": 1213, + "introduction": 26903, + "notion": 37733, + "connect": 10172, + "messages": 32323, + "resulting": 47460, + "graph": 23093, + "grid": 23247, + "creation": 11747, + "completes": 9609, + "planning": 40944, + "phase": 40804, + "nlg": 37444, + "architecture": 4019, + "case": 7787, + "contained": 10478, + "exceeding": 18947, + "required": 46897, + "compression": 9812, + "rate": 45012, + "initial": 26213, + "thoughts": 57075, + "probabilistic": 42453, + "content": 10511, + "stage": 52426, + "tries": 58792, + "alleviate": 2398, + "present": 41839, + "automated": 5034, + "method": 32352, + "origin": 38702, + "non": 37632, + "native": 36399, + "speakers": 52003, + "identified": 24403, + "listener": 30844, + "detection": 14453, + "nationality": 36397, + "existence": 19019, + "phoneme": 40820, + "allow": 2433, + "new": 37123, + "discriminative": 15442, + "phonemes": 40822, + "database": 12782, + "construct": 10381, + "classifier": 8589, + "phone": 40819, + "significant": 50845, + "result": 47431, + "developed": 14623, + "correct": 11464, + "96": 569, + "error": 18210, + "reduction": 45716, + "tested": 56393, + "techniques": 56052, + "2001": 237, + "great": 23198, + "linguist": 30743, + "pioneer": 40887, + "article": 4446, + "written": 62994, + "memory": 32239, + "characteristics": 8233, + "behaviour": 6401, + "parallel": 39641, + "romance": 48331, + "portuguese": 41224, + "examples": 18885, + "port": 41214, + "fr": 21426, + "meaning": 31999, + "definition": 13792, + "class": 8390, + "complement": 9583, + "object": 38080, + "nominal": 37631, + "distribution": 15632, + "limited": 30560, + "nouns": 37744, + "head": 23494, + "restricted": 47421, + "noun": 37739, + "verbal": 61512, + "idioms": 24482, + "la": 27686, + "excluded": 18973, + "constructions": 10431, + "reductions": 45725, + "complex": 9615, + "com": 9003, + "tune": 58853, + "instrument": 26487, + "industry": 25620, + "comprises": 9818, + "components": 9715, + "implementing": 24650, + "fundamental": 21774, + "operations": 38494, + "lexicons": 30415, + "exploitation": 19670, + "management": 31690, + "structured": 53151, + "formats": 21367, + "compact": 9279, + "readable": 45070, + "binary": 7142, + "necessary": 36528, + "included": 25227, + "combining": 9107, + "approaches": 3750, + "manually": 31754, + "originating": 38750, + "substantial": 53615, + "coverage": 11650, + "distributed": 15621, + "lr": 31232, + "license": 30428, + "speaking": 52009, + "achieving": 1388, + "proficiency": 43066, + "highly": 23877, + "process": 42753, + "requires": 46910, + "kinds": 27369, + "skills": 51415, + "like": 30460, + "connection": 10180, + "communicative": 9255, + "goals": 22907, + "intentions": 26572, + "usual": 61032, + "starting": 52569, + "point": 41044, + "help": 23547, + "learner": 29494, + "acquire": 1440, + "enhanced": 17929, + "electronic": 16966, + "version": 61550, + "age": 2045, + "old": 38326, + "pattern": 39958, + "henceforth": 23613, + "regarded": 45791, + "partially": 39807, + "lack": 27869, + "grounding": 23264, + "context": 10578, + "shortcomings": 50578, + "believe": 6409, + "regard": 45790, + "basic": 6326, + "course": 11635, + "needs": 36607, + "improvement": 24975, + "unlike": 59591, + "books": 7248, + "computers": 9900, + "open": 38409, + "media": 32157, + "dynamic": 16481, + "changes": 8176, + "users": 60457, + "performances": 40636, + "preferences": 41791, + "account": 876, + "building": 7435, + "amounts": 2543, + "changing": 8184, + "procedures": 42747, + "extracting": 20028, + "verb": 61508, + "raw": 45033, + "polish": 41102, + "novel": 37747, + "technique": 56022, + "em": 16999, + "algorithm": 2258, + "performs": 40696, + "disambiguation": 15358, + "valence": 61169, + "frame": 21437, + "forests": 21304, + "obtained": 38201, + "parser": 39757, + "post": 41344, + "second": 48994, + "idea": 24366, + "concerns": 9961, + "filtering": 20810, + "incorrect": 25397, + "frames": 21444, + "detected": 14443, + "parsed": 39756, + "motivated": 35863, + "similar": 51027, + "arguments": 4178, + "tend": 56197, + "phenomenon": 40814, + "described": 14211, + "terms": 56262, + "newly": 37368, + "introduced": 26879, + "occurrence": 38272, + "matrices": 31938, + "split": 52346, + "steps": 52840, + "list": 30838, + "valid": 61171, + "determined": 14559, + "according": 852, + "combined": 9076, + "computed": 9882, + "best": 6743, + "extracted": 20005, + "reaches": 45056, + "score": 48782, + "45": 410, + "39": 383, + "principles": 42390, + "realization": 45158, + "compilation": 9576, + "concordance": 9980, + "paths": 39951, + "forms": 21374, + "presentation": 42055, + "lexicographic": 30406, + "online": 38352, + "surveys": 54223, + "public": 44302, + "mood": 35831, + "costly": 11598, + "impractical": 24803, + "web": 61875, + "material": 31925, + "indicative": 25546, + "blogs": 7228, + "emails": 17002, + "queries": 44648, + "inexpensive": 25630, + "extensive": 19851, + "assess": 4575, + "fluctuations": 21123, + "concerned": 9959, + "extension": 19847, + "profile": 43069, + "questionnaire": 44764, + "indicators": 25549, + "10": 27, + "2006": 243, + "org": 38676, + "send": 49477, + "delivered": 13834, + "later": 29149, + "long": 31002, + "medium": 32216, + "confusion": 10160, + "current": 11958, + "prediction": 41688, + "systems": 54417, + "gram": 23049, + "models": 34647, + "lm": 30903, + "estimate": 18370, + "probability": 42472, + "phrase": 40834, + "past": 39928, + "years": 63049, + "attempts": 4696, + "enrich": 17960, + "want": 61767, + "explore": 19684, + "predictive": 41773, + "powers": 41450, + "latent": 29117, + "lsa": 31235, + "reliable": 46249, + "distance": 15540, + "dependencies": 14104, + "evaluate": 18433, + "integrate": 26504, + "cache": 7520, + "partial": 39806, + "reranking": 46971, + "interpolation": 26707, + "improvements": 25042, + "baseline": 6145, + "simple": 51132, + "investigate": 26937, + "grapheme": 23180, + "ukrainian": 59188, + "alphabet": 2486, + "recognition": 45489, + "involvement": 27016, + "description": 14240, + "actors": 1486, + "influences": 25731, + "story": 52879, + "line": 30645, + "higher": 23810, + "se": 48956, + "represents": 46816, + "flow": 21118, + "expressions": 19806, + "read": 45066, + "lost": 31112, + "respect": 47343, + "actor": 1484, + "exactly": 18855, + "behaves": 6389, + "major": 31499, + "concern": 9958, + "beings": 6404, + "try": 58844, + "store": 52872, + "short": 50549, + "associating": 4628, + "diverse": 15691, + "aspects": 4536, + "incidents": 25220, + "virtual": 61622, + "collocations": 8996, + "associative": 4631, + "completion": 9611, + "acting": 1454, + "discovered": 15410, + "managed": 31689, + "separated": 49880, + "blocks": 7223, + "broken": 7367, + "refer": 45732, + "mind": 33274, + "maps": 31810, + "priority": 42436, + "functions": 21768, + "represent": 46466, + "actual": 1488, + "temporal": 56178, + "situation": 51368, + "inside": 26382, + "map": 31793, + "reconstruct": 45576, + "recent": 45276, + "computational": 9831, + "construction": 10424, + "exposure": 19788, + "built": 7481, + "simulate": 51255, + "effects": 16823, + "values": 61212, + "paragraph": 39635, + "viewed": 61604, + "association": 4629, + "turns": 58994, + "strongly": 53067, + "increases": 25433, + "decreases": 13671, + "slightly": 51434, + "high": 23705, + "occurrences": 38279, + "probably": 42484, + "introduces": 26890, + "bias": 7019, + "cases": 7804, + "scientific": 48753, + "newspapers": 37431, + "marked": 31832, + "attitudes": 4870, + "opinions": 38505, + "author": 4999, + "persons": 40771, + "objective": 38084, + "subjective": 53561, + "statements": 52716, + "attitude": 4869, + "accomplished": 848, + "modality": 33474, + "german": 22661, + "expressed": 19796, + "special": 52012, + "modalities": 33467, + "induces": 25603, + "proven": 43991, + "sentence": 49513, + "adverbs": 1962, + "modal": 33451, + "punctuation": 44387, + "marks": 31851, + "combination": 9031, + "instruments": 26489, + "express": 19791, + "finding": 20896, + "pre": 41497, + "step": 52795, + "arbitrary": 4012, + "output": 38964, + "consists": 10319, + "representing": 46810, + "conception": 9929, + "toolkit": 57370, + "designed": 14306, + "maintenance": 31498, + "collaborative": 8932, + "generative": 22583, + "ease": 16522, + "portability": 41215, + "spreading": 52380, + "tool": 57355, + "source": 51740, + "products": 43057, + "eventually": 18801, + "showed": 50663, + "filters": 20814, + "adequate": 1832, + "anaphoric": 2849, + "reference": 45734, + "modifier": 35733, + "compounds": 9757, + "modular": 35741, + "generating": 22362, + "definitions": 13794, + "specific": 52040, + "uses": 60489, + "java": 27150, + "prolog": 43149, + "custom": 12052, + "functional": 21763, + "notation": 37719, + "implemented": 24646, + "generator": 22614, + "technological": 56155, + "parts": 39903, + "brought": 7368, + "consistent": 10272, + "goal": 22875, + "corpus": 11261, + "reproducing": 46831, + "exposed": 19785, + "presenting": 42064, + "literature": 30856, + "preliminary": 41799, + "million": 33249, + "space": 51847, + "tests": 56416, + "norms": 37713, + "vocabulary": 61700, + "test": 56329, + "judgments": 27243, + "tasks": 55483, + "composed": 9732, + "corresponding": 11544, + "ages": 2066, + "stratified": 52957, + "intended": 26549, + "basis": 6337, + "developmental": 14710, + "studies": 53241, + "finally": 20834, + "aims": 2170, + "tracing": 57491, + "implementation": 24637, + "comprehension": 9762, + "derived": 14199, + "integration": 26527, + "1998": 206, + "way": 61788, + "categories": 7841, + "learned": 29450, + "experience": 19225, + "shaped": 50449, + "symbol": 54266, + "larger": 29067, + "smaller": 51514, + "grounded": 23259, + "algorithms": 2318, + "compute": 9878, + "sets": 50280, + "measuring": 32083, + "contexts": 10749, + "provides": 44178, + "unifying": 59491, + "categorized": 7857, + "methodologies": 32714, + "appear": 3137, + "quite": 44825, + "surface": 54149, + "fact": 20286, + "closely": 8702, + "include": 25222, + "headed": 23501, + "versus": 61561, + "measured": 32066, + "micro": 33217, + "macro": 31401, + "views": 61609, + "thread": 57082, + "decisions": 13572, + "share": 50454, + "common": 9163, + "key": 27292, + "means": 32038, + "translation": 58571, + "russian": 48413, + "dictionaries": 14802, + "calculate": 7522, + "weights": 61937, + "statistic": 52732, + "consideration": 10243, + "realized": 45161, + "program": 43076, + "integrated": 26512, + "monitoring": 35786, + "bilingual": 7103, + "publications": 44331, + "containing": 10481, + "30": 354, + "thousand": 57076, + "created": 11721, + "ancient": 2854, + "investigated": 26997, + "avenues": 5395, + "investigation": 27005, + "profound": 43073, + "humanitarian": 24265, + "going": 22909, + "war": 61771, + "dog": 15986, + "hearing": 23524, + "era": 18206, + "voices": 61725, + "telling": 56168, + "thinking": 57049, + "argue": 4161, + "conscious": 10194, + "able": 665, + "feel": 20724, + "lower": 31205, + "concepts": 9931, + "keeps": 27281, + "inspiring": 26420, + "inquiry": 26373, + "insights": 26386, + "body": 7238, + "cognition": 8888, + "decipherment": 13557, + "modern": 35700, + "surrounding": 54195, + "areas": 4151, + "successful": 53732, + "borrowed": 7270, + "paired": 39161, + "currently": 12030, + "extract": 19967, + "unknown": 59557, + "multilingual": 36062, + "abbreviated": 583, + "versions": 61559, + "translations": 58706, + "spanish": 51936, + "creating": 11738, + "authoring": 5003, + "publishing": 44378, + "chain": 7956, + "deals": 13522, + "descriptive": 14255, + "mark": 31831, + "emphasize": 17310, + "mistakes": 33373, + "necessity": 36540, + "describing": 14239, + "considered": 10245, + "important": 24697, + "assessment": 4589, + "concludes": 9973, + "setting": 50314, + "guidelines": 23351, + "activity": 1483, + "elements": 16977, + "modelling": 34640, + "creole": 11759, + "dialect": 14745, + "inspiration": 26399, + "1999": 207, + "manual": 31729, + "earlier": 16505, + "works": 62872, + "generation": 22406, + "1997": 205, + "unification": 59464, + "tree": 58740, + "adjoining": 1847, + "tag": 54724, + "prototype": 43972, + "supporting": 54136, + "pr": 41454, + "communication": 9245, + "mod": 33450, + "cr": 11675, + "en": 17412, + "descriptions": 14250, + "du": 16456, + "es": 18255, + "cat": 7827, + "base": 5536, + "er": 18205, + "ce": 7903, + "core": 11143, + "dialects": 14749, + "rely": 46275, + "sub": 53514, + "fs": 21701, + "attributes": 4905, + "attribute": 4899, + "instantiated": 26441, + "subset": 53606, + "equivalent": 18201, + "hybrid": 24308, + "principle": 42384, + "west": 61946, + "area": 4136, + "inferred": 25709, + "gathered": 22002, + "inferences": 25704, + "syntactical": 54336, + "syntagmatic": 54344, + "bigram": 7098, + "paradigmatic": 39631, + "pair": 39146, + "clustered": 8734, + "sublanguage": 53570, + "vocabularies": 61699, + "possibilities": 41310, + "markov": 31840, + "spectral": 52244, + "matrix": 31939, + "distributions": 15674, + "yields": 63113, + "soft": 51619, + "clustering": 8735, + "heterogeneous": 23616, + "visually": 61686, + "segmented": 49091, + "homogeneous": 23995, + "segments": 49095, + "specifically": 52178, + "useful": 60355, + "happened": 23434, + "mixed": 33400, + "distinguishing": 15609, + "hard": 23439, + "especially": 18262, + "toolbox": 57369, + "resolve": 47198, + "accurately": 1091, + "distinction": 15597, + "replace": 46401, + "membership": 32230, + "assignment": 4605, + "presents": 42068, + "bypass": 7506, + "bottleneck": 7276, + "furthermore": 21801, + "relies": 46264, + "convenient": 10995, + "readily": 45076, + "clearly": 8656, + "display": 15519, + "potential": 41379, + "brings": 7338, + "appealing": 3136, + "define": 13774, + "altogether": 2515, + "trying": 58847, + "establish": 18340, + "objects": 38116, + "efforts": 16933, + "lead": 29254, + "emergence": 17264, + "efficient": 16857, + "exhibits": 19008, + "degree": 13813, + "synonymy": 54288, + "reduces": 45687, + "efficiency": 16837, + "dynamical": 16494, + "indefinite": 25490, + "time": 57112, + "hand": 23382, + "reduce": 45647, + "feature": 20472, + "limit": 30532, + "negligible": 36652, + "observed": 38142, + "noise": 37594, + "communicated": 9242, + "examined": 18872, + "evenly": 18776, + "streams": 52966, + "leads": 29303, + "generally": 22161, + "redundancy": 45727, + "ambiguity": 2525, + "resolution": 47194, + "pronouns": 43233, + "putting": 44433, + "simulation": 51263, + "artificial": 4487, + "challenge": 7965, + "gate": 21991, + "wide": 61955, + "assembling": 4570, + "incoming": 25328, + "strategy": 52923, + "having": 23484, + "adjacent": 1840, + "enriched": 17963, + "pronoun": 43230, + "engine": 17761, + "bases": 6322, + "klein": 27379, + "understood": 59422, + "textual": 56951, + "inspired": 26404, + "argumentative": 4177, + "prototypical": 43974, + "considering": 10255, + "gain": 21903, + "realize": 45160, + "understand": 59288, + "summaries": 53868, + "intellectual": 26532, + "ownership": 39129, + "detectable": 14442, + "far": 20394, + "collocation": 8995, + "classical": 8420, + "fuzzy": 21902, + "grade": 22998, + "easy": 16556, + "existing": 19021, + "mutual": 36341, + "utilized": 61107, + "inference": 25638, + "function": 21751, + "easily": 16531, + "seen": 49059, + "demonstrated": 14001, + "utility": 61079, + "pairs": 39166, + "working": 62868, + "novels": 37959, + "constituting": 10361, + "project": 43131, + "gutenberg": 23363, + "www": 63024, + "advantages": 1947, + "overcoming": 39076, + "drawbacks": 16403, + "better": 6842, + "databases": 12785, + "relationships": 46076, + "psycholinguistics": 44290, + "studied": 53219, + "disentangle": 15499, + "rich": 48091, + "topology": 57471, + "produce": 42973, + "production": 43046, + "ai": 2111, + "cognitive": 8889, + "scientists": 48774, + "argued": 4167, + "analogy": 2583, + "influential": 25733, + "modeling": 34557, + "mapping": 31800, + "smt": 51543, + "requirement": 46908, + "coded": 8871, + "representations": 46611, + "ideas": 24378, + "relational": 46002, + "remove": 46374, + "builds": 7478, + "analogical": 2579, + "mappings": 31808, + "lists": 30847, + "automatically": 5137, + "discover": 15405, + "analogies": 2581, + "metaphors": 32349, + "achieves": 1283, + "compare": 9326, + "variety": 61260, + "alternative": 2496, + "reach": 45043, + "obey": 38076, + "established": 18351, + "parallels": 39663, + "physical": 40860, + "laying": 29239, + "groundwork": 23269, + "perspective": 40773, + "scaling": 48647, + "regularities": 45833, + "dynamics": 16500, + "suggesting": 53835, + "possibility": 41311, + "findings": 20904, + "principal": 42379, + "frequent": 21682, + "discussion": 15490, + "groups": 23278, + "disparate": 15513, + "levels": 30235, + "formality": 21357, + "distances": 15551, + "deviations": 14721, + "poisson": 41085, + "characterized": 8249, + "exponential": 19781, + "extent": 19921, + "deviation": 14720, + "depends": 14160, + "type": 59047, + "behavior": 6390, + "determines": 14561, + "usage": 59799, + "conclusions": 9977, + "significance": 50842, + "recurrence": 45605, + "times": 57246, + "empirical": 17317, + "anticipated": 3120, + "uniquely": 59519, + "precise": 41609, + "lens": 30043, + "thought": 57073, + "implications": 24654, + "overt": 39106, + "manifestations": 31702, + "collective": 8991, + "lingual": 30690, + "browse": 7374, + "semantically": 49379, + "mathematical": 31932, + "called": 7539, + "atlas": 4647, + "senses": 49492, + "create": 11690, + "graphs": 23186, + "projected": 43136, + "projections": 43143, + "constitute": 10359, + "denote": 14071, + "sense": 49481, + "trends": 58781, + "fed": 20703, + "produced": 43016, + "link": 30826, + "trend": 58777, + "stored": 52874, + "index": 25511, + "instances": 26432, + "linked": 30830, + "correspond": 11540, + "makes": 31613, + "thanks": 56998, + "world": 62925, + "achieve": 1105, + "webpages": 61904, + "google": 22951, + "search": 48961, + "associate": 4619, + "page": 39138, + "count": 11610, + "webpage": 61903, + "conditional": 9991, + "probabilities": 42470, + "correlate": 11500, + "inequality": 25627, + "confidence": 10110, + "reliability": 46248, + "hypothesis": 24344, + "testing": 56401, + "decide": 13552, + "probable": 42483, + "sequence": 49905, + "individually": 25589, + "low": 31131, + "25": 332, + "track": 57492, + "occur": 38268, + "independently": 25508, + "sufficient": 53798, + "generality": 22106, + "supervised": 53959, + "tackle": 54696, + "unrelated": 59632, + "tailoring": 54770, + "series": 50058, + "induction": 25605, + "predicate": 41627, + "argument": 4170, + "inducing": 25604, + "conceptual": 9948, + "framenet": 21443, + "previously": 42327, + "proposing": 43945, + "essentially": 18339, + "stages": 52448, + "representative": 46794, + "apply": 3319, + "identify": 24410, + "predicates": 41632, + "roles": 48326, + "jargon": 27149, + "evaluates": 18555, + "variations": 61250, + "support": 54114, + "multiword": 36332, + "mwes": 36355, + "clear": 8650, + "cut": 12066, + "difference": 14818, + "situations": 51371, + "involved": 27015, + "finite": 21056, + "state": 52572, + "frequently": 21684, + "consequences": 10200, + "recognising": 45488, + "linguistics": 30818, + "survey": 54201, + "trees": 58766, + "structuring": 53198, + "active": 1470, + "voice": 61721, + "passive": 39927, + "transitive": 58547, + "grammatical": 23067, + "serves": 50089, + "patient": 39955, + "receiving": 45271, + "action": 1455, + "purpose": 44400, + "generated": 22265, + "boolean": 7249, + "constituents": 10358, + "subgroups": 53549, + "phrases": 40848, + "permutation": 40732, + "isomorphism": 27055, + "property": 43274, + "resulted": 47458, + "developers": 14645, + "evolve": 18836, + "transformations": 58446, + "technical": 56017, + "storage": 52871, + "access": 818, + "environment": 18170, + "traditionally": 57558, + "vital": 61690, + "style": 53480, + "specifications": 52235, + "fit": 21067, + "efficiently": 16911, + "retrieved": 47981, + "naturally": 36472, + "oriented": 38695, + "redundant": 45728, + "indexing": 25514, + "demonstrate": 13858, + "fast": 20419, + "iso": 27048, + "deal": 13512, + "psychology": 44293, + "science": 48745, + "frequencies": 21667, + "chapter": 8190, + "tap": 54800, + "internet": 26696, + "typed": 59075, + "local": 30928, + "aiming": 2167, + "personal": 40754, + "objectives": 38110, + "globally": 22850, + "effectively": 16719, + "largest": 29092, + "return": 47997, + "aggregate": 2071, + "estimates": 18378, + "introducing": 26897, + "normalized": 37708, + "doesn": 15985, + "searches": 48990, + "anymore": 3125, + "opt": 38520, + "neutral": 37121, + "determine": 14553, + "settings": 50357, + "publicly": 44332, + "offer": 38290, + "mechanisms": 32150, + "canonical": 7589, + "amazon": 2520, + "reviews": 48047, + "come": 9127, + "26": 341, + "32": 367, + "people": 40026, + "review": 48026, + "helpful": 23595, + "evaluation": 18571, + "including": 25237, + "market": 31836, + "political": 41107, + "campaigns": 7565, + "fundamentally": 21795, + "asking": 4523, + "did": 14809, + "think": 57048, + "analyzing": 2840, + "book": 7245, + "dataset": 12786, + "helpfulness": 23599, + "just": 27248, + "subtle": 53675, + "relates": 45961, + "evaluations": 18750, + "product": 43041, + "plagiarism": 40932, + "distinguish": 15602, + "predictions": 41754, + "competing": 9528, + "theories": 57034, + "social": 51553, + "unexpected": 59438, + "populations": 41213, + "countries": 11627, + "typology": 59171, + "universal": 59537, + "implication": 24653, + "facts": 20316, + "extant": 19817, + "adjectives": 1845, + "typically": 59135, + "sample": 48446, + "assisting": 4616, + "deserve": 14258, + "careful": 7754, + "hierarchical": 23653, + "cope": 11127, + "sampling": 48496, + "noisy": 37612, + "channel": 8187, + "derives": 14207, + "overall": 39032, + "drop": 16440, + "generate": 22173, + "coherent": 8913, + "length": 30023, + "outperforms": 38864, + "operates": 38487, + "simplifying": 51244, + "sequentially": 50054, + "plays": 40988, + "entity": 18093, + "tracking": 57496, + "identifying": 24453, + "mentions": 32302, + "entities": 18036, + "extending": 19840, + "named": 36368, + "coreference": 11158, + "names": 36377, + "definite": 13791, + "ne": 36502, + "tagging": 54734, + "solutions": 51666, + "separate": 49873, + "mention": 32295, + "aspect": 4526, + "doing": 15990, + "contrast": 10873, + "simultaneously": 51269, + "joint": 27164, + "effectiveness": 16765, + "japanese": 27144, + "originally": 38742, + "variants": 61234, + "candidates": 7583, + "log": 30971, + "likelihood": 30516, + "suitable": 53854, + "compound": 9754, + "phrasal": 40833, + "units": 59528, + "compositionality": 9751, + "polysemy": 41121, + "validate": 61172, + "candidate": 7568, + "collect": 8937, + "pages": 39140, + "adapted": 1550, + "compositional": 9741, + "polysemous": 41119, + "shows": 50761, + "hurdles": 24302, + "dravidian": 16397, + "indian": 25516, + "explores": 19771, + "chinese": 8296, + "join": 27160, + "coin": 8924, + "literally": 30853, + "sounds": 51739, + "combinatory": 9058, + "sound": 51734, + "pronunciation": 43234, + "letter": 30050, + "remains": 46325, + "interesting": 26647, + "entirely": 18031, + "dimension": 15223, + "add": 1587, + "traditional": 57509, + "hindi": 23934, + "teaching": 56000, + "reported": 46450, + "accuracy": 892, + "60": 456, + "80": 520, + "depending": 14154, + "concentrate": 9915, + "difficulties": 15195, + "assumption": 4638, + "referring": 45758, + "expression": 19804, + "restructuring": 47429, + "linkage": 30829, + "explicitly": 19629, + "mentioned": 32298, + "discursive": 15456, + "perceptual": 40059, + "referents": 45756, + "note": 37721, + "mechanism": 32095, + "handle": 23405, + "treated": 58733, + "fresh": 21688, + "widely": 61988, + "recognized": 45553, + "proliferation": 43148, + "schemes": 48733, + "runs": 48408, + "counter": 11613, + "need": 36541, + "standards": 52548, + "increasingly": 25468, + "mandatory": 31699, + "answer": 3029, + "comprised": 9817, + "morpho": 35835, + "annotator": 3009, + "overview": 39108, + "applicability": 3152, + "comparative": 9317, + "rdf": 45042, + "outline": 38774, + "committee": 9160, + "international": 26694, + "tc": 55983, + "37": 379, + "sc": 48543, + "primary": 42368, + "solicit": 51648, + "participation": 39826, + "members": 32229, + "community": 9258, + "entering": 18014, + "composing": 9736, + "message": 32320, + "services": 50094, + "instance": 26423, + "abbreviations": 585, + "writing": 62988, + "saving": 48537, + "fashion": 20411, + "collected": 8953, + "processed": 42842, + "sms": 51542, + "strategies": 52890, + "challenging": 8081, + "consonant": 10340, + "skeleton": 51408, + "phonetic": 40825, + "rough": 48356, + "handwritten": 23431, + "care": 7752, + "proposes": 43929, + "dependency": 14116, + "parsing": 39769, + "expresses": 19802, + "interactions": 26615, + "polarity": 41088, + "composition": 9739, + "led": 29987, + "saturation": 48532, + "polarities": 41087, + "lexicalized": 30397, + "translated": 58553, + "consequence": 10199, + "refinement": 45767, + "sheds": 50531, + "light": 30448, + "conflicting": 10141, + "recursion": 45634, + "recursive": 45635, + "exist": 19014, + "byproduct": 7510, + "insight": 26383, + "universals": 59552, + "constructs": 10433, + "multimodal": 36141, + "interfaces": 26663, + "graphics": 23185, + "gestures": 22683, + "facial": 20259, + "promise": 43154, + "effective": 16625, + "possibly": 41343, + "imprecise": 24804, + "ambiguous": 2529, + "coordinated": 11124, + "cohesive": 8922, + "presentations": 42056, + "adapting": 1563, + "interface": 26659, + "prevention": 42233, + "interactive": 26626, + "audio": 4924, + "video": 61577, + "constraints": 10371, + "generic": 22625, + "approached": 3748, + "takes": 54776, + "workshop": 62919, + "informal": 25739, + "http": 24047, + "proofs": 43237, + "interact": 26591, + "follow": 21253, + "intersection": 26745, + "proof": 43235, + "theoretic": 57010, + "infinite": 25715, + "wsd": 63019, + "selects": 49167, + "proximity": 44263, + "variant": 61232, + "heuristic": 23626, + "analyzes": 2839, + "glosses": 22854, + "matched": 31902, + "obtains": 38240, + "58": 448, + "35": 373, + "respectively": 47357, + "recommends": 45574, + "deeper": 13756, + "experiment": 19230, + "comparable": 9287, + "individual": 25562, + "aligned": 2354, + "respective": 47355, + "counterparts": 11623, + "carried": 7768, + "alignment": 2362, + "employed": 17396, + "74": 497, + "alignments": 2390, + "expected": 19197, + "contrary": 10871, + "intuition": 26906, + "rise": 48151, + "consistently": 10288, + "close": 8683, + "discriminate": 15437, + "rendering": 46383, + "unsuitable": 59677, + "prove": 43979, + "productive": 43055, + "central": 7916, + "drive": 16415, + "facilitated": 20279, + "referencing": 45752, + "sanskrit": 48515, + "involves": 27017, + "handling": 23423, + "integral": 26501, + "firstly": 21064, + "necessitates": 36538, + "conjunctions": 10166, + "points": 41064, + "letters": 30051, + "transform": 58439, + "accepted": 815, + "authority": 5006, + "approximately": 3981, + "precisely": 41611, + "comprehensively": 9804, + "pertaining": 40784, + "schema": 48722, + "beginnings": 6383, + "comprehensive": 9779, + "computationally": 9870, + "lean": 29337, + "neural": 36927, + "ann": 2867, + "scanned": 48653, + "character": 8194, + "segment": 49071, + "ongoing": 38347, + "aid": 2126, + "characters": 8252, + "assamese": 4566, + "north": 37714, + "eastern": 16555, + "india": 25515, + "static": 52722, + "recorded": 45588, + "individuals": 25591, + "segmenting": 49093, + "entire": 18019, + "trained": 57666, + "recognizes": 45557, + "boundary": 7282, + "assumed": 4635, + "performed": 40656, + "match": 31894, + "confirmed": 10135, + "recently": 45400, + "spaces": 51908, + "availability": 5244, + "programs": 43089, + "analyze": 2804, + "contain": 10459, + "enhance": 17908, + "readability": 45069, + "journal": 27226, + "titles": 57271, + "china": 8295, + "papers": 39608, + "citations": 8366, + "visualized": 61684, + "factor": 20298, + "analyzed": 2834, + "ellipsis": 16993, + "formula": 21380, + "validity": 61199, + "newspaper": 37429, + "fragments": 21436, + "converted": 11075, + "mono": 35787, + "sentential": 49810, + "discourses": 15404, + "require": 46839, + "primitive": 42376, + "ones": 38334, + "discovering": 15412, + "reproduction": 46832, + "mutation": 36339, + "happens": 23436, + "living": 30896, + "family": 20388, + "relatedness": 45957, + "1950s": 194, + "percentage": 40051, + "historical": 23955, + "weak": 61847, + "judgment": 27242, + "avoids": 5438, + "subjectivity": 53568, + "replicated": 46415, + "quick": 44818, + "comparison": 9488, + "indo": 25592, + "european": 18427, + "families": 20387, + "position": 41259, + "carry": 7774, + "roots": 48344, + "explorer": 19770, + "geographical": 22648, + "division": 15750, + "shared": 50464, + "levenshtein": 30252, + "averages": 5425, + "stability": 52410, + "replacement": 46407, + "speed": 52319, + "tackled": 54715, + "technology": 56158, + "society": 51610, + "codes": 8873, + "differ": 14813, + "varying": 61425, + "sides": 50824, + "reflexive": 45783, + "facilitate": 20261, + "stem": 52792, + "cell": 7908, + "relate": 45885, + "sociological": 51618, + "quest": 44683, + "activities": 1482, + "reconstructed": 45579, + "contains": 10488, + "debated": 13528, + "beginning": 6381, + "apparently": 3134, + "accurate": 1074, + "optimal": 38525, + "studying": 53479, + "reconstruction": 45581, + "completed": 9604, + "ordinary": 38675, + "spoken": 52350, + "signed": 50841, + "humans": 24270, + "distinguished": 15607, + "enabling": 17453, + "check": 8271, + "aim": 2132, + "finds": 20918, + "errors": 18232, + "parse": 39753, + "encoding": 17562, + "paraphrasing": 39745, + "recognize": 45549, + "longer": 31049, + "convey": 11082, + "entailment": 17998, + "reads": 45095, + "element": 16972, + "likely": 30524, + "infer": 25635, + "true": 58819, + "bidirectional": 7064, + "answering": 3063, + "summarize": 53904, + "turn": 58986, + "pointing": 41063, + "prominent": 43151, + "articles": 4462, + "resurgence": 47918, + "spatio": 51989, + "motivates": 35876, + "looking": 31069, + "parameters": 39686, + "convergence": 11024, + "invariance": 26916, + "vowels": 61745, + "samples": 48463, + "american": 2535, + "98": 572, + "06": 23, + "percent": 40050, + "90": 551, + "13": 121, + "vowel": 61744, + "protein": 43968, + "biomedical": 7171, + "accelerate": 799, + "informatics": 25746, + "advance": 1879, + "art": 4207, + "neglected": 36648, + "characteristic": 8231, + "impact": 24587, + "simplification": 51236, + "showing": 50675, + "poses": 41243, + "parsers": 39763, + "seeks": 49055, + "abstracts": 778, + "improve": 24820, + "pipeline": 40890, + "evaluated": 18519, + "annotated": 2875, + "23": 322, + "simplified": 51238, + "record": 45587, + "17th": 177, + "century": 7931, + "analyse": 2585, + "morphologically": 35846, + "syntactically": 54338, + "contemporary": 10509, + "transcribed": 58333, + "transcription": 58339, + "transducers": 58344, + "si": 50816, + "equivalents": 18204, + "ac": 786, + "comment": 9139, + "transcriptions": 58340, + "omission": 38330, + "let": 30048, + "writers": 62986, + "wrongly": 63017, + "plural": 41030, + "symbols": 54271, + "visual": 61647, + "isolation": 27053, + "morpheme": 35833, + "16th": 171, + "19th": 209, + "standardised": 52543, + "problematic": 42691, + "complaint": 9581, + "websites": 61906, + "mis": 33339, + "travel": 58725, + "published": 44366, + "noticed": 37731, + "accompany": 842, + "highlight": 23859, + "lexemes": 30349, + "trace": 57487, + "gov": 22962, + "arabic": 3992, + "formation": 21366, + "person": 40749, + "reason": 45167, + "ameliorate": 2532, + "integrates": 26518, + "transliteration": 58715, + "typological": 59164, + "conferences": 10109, + "module": 35751, + "syntax": 54345, + "glimpse": 22818, + "appropriate": 3962, + "critique": 11804, + "topic": 57388, + "nps": 37967, + "heuristics": 23633, + "classify": 8630, + "coding": 8884, + "orthogonal": 38752, + "diversity": 15730, + "scheme": 48726, + "selective": 49162, + "suppress": 54146, + "signals": 50833, + "transmitted": 58718, + "transmit": 58717, + "receiver": 45268, + "adaptive": 1572, + "squared": 52400, + "null": 37979, + "combat": 9029, + "interference": 26665, + "illustrate": 24514, + "steering": 52791, + "arrival": 4203, + "decoders": 13622, + "preserved": 42118, + "decoding": 13624, + "incorporating": 25377, + "hidden": 23637, + "hmms": 23977, + "successfully": 53740, + "spite": 52345, + "hmm": 23975, + "independence": 25491, + "obviously": 38263, + "violated": 61615, + "mismatch": 33350, + "modify": 35736, + "dependence": 14101, + "fair": 20355, + "say": 48540, + "2010": 248, + "consensus": 10198, + "choice": 8331, + "commercially": 9157, + "aimed": 2163, + "departure": 14096, + "diagnostic": 14739, + "resampling": 46973, + "rarely": 45001, + "manipulations": 31712, + "demonstrates": 14026, + "responsible": 47408, + "numbers": 38057, + "rates": 45021, + "crucial": 11893, + "improving": 25170, + "little": 30868, + "profoundly": 43075, + "limits": 30637, + "instructions": 26483, + "explain": 19593, + "actions": 1461, + "vector": 61449, + "address": 1740, + "organize": 38687, + "vsm": 61749, + "broad": 7348, + "yielding": 63108, + "category": 7860, + "breadth": 7309, + "familiar": 20385, + "pointers": 41062, + "poetry": 41041, + "know": 27384, + "govern": 22963, + "prosody": 43958, + "numerous": 38064, + "converts": 11080, + "prose": 43955, + "accordance": 851, + "ensure": 17988, + "conjunction": 10165, + "verse": 61549, + "handled": 23421, + "considerably": 10237, + "reducing": 45701, + "target": 54801, + "gives": 22802, + "suggestions": 53842, + "impossible": 24802, + "allowed": 2442, + "component": 9700, + "interacts": 26637, + "ambiguities": 2524, + "addition": 1598, + "unique": 59507, + "addressed": 1804, + "impaired": 24616, + "dictate": 14800, + "ner": 36675, + "immediate": 24581, + "getting": 22685, + "right": 48136, + "extended": 19835, + "assume": 4632, + "systematically": 54407, + "spatial": 51984, + "markers": 31835, + "reveal": 48007, + "characterizing": 8251, + "accounting": 884, + "highlighted": 23870, + "commonsense": 9232, + "geometry": 22659, + "pragmatic": 41494, + "formalized": 21361, + "suited": 53864, + "prepositions": 41821, + "illustrating": 24522, + "inferential": 25706, + "adequacy": 1830, + "psycholinguistic": 44287, + "mainly": 31466, + "distinguishes": 15608, + "underlies": 59259, + "adults": 1878, + "fulfilling": 21709, + "ontologies": 38397, + "coherence": 8903, + "talk": 54790, + "variability": 61220, + "bringing": 7337, + "capacities": 7634, + "detecting": 14444, + "relied": 46259, + "elementary": 16974, + "edus": 16607, + "linear": 30648, + "nested": 36686, + "unfortunately": 59451, + "strong": 52998, + "enforces": 17754, + "round": 48358, + "effort": 16923, + "bank": 5525, + "validated": 61185, + "47": 412, + "445": 409, + "encouraging": 17604, + "73": 494, + "calculus": 7528, + "foundation": 21415, + "categorial": 7838, + "concatenation": 9911, + "discontinuous": 15380, + "displacement": 15518, + "generalization": 22112, + "preserves": 42119, + "elimination": 16992, + "contribution": 10940, + "scene": 48713, + "guess": 23324, + "shape": 50448, + "correctly": 11488, + "modeled": 34554, + "avoid": 5430, + "72": 493, + "manifested": 31703, + "compiled": 9578, + "peculiarities": 40008, + "half": 23365, + "20th": 310, + "explanatory": 19608, + "writer": 62985, + "discussed": 15485, + "tables": 54691, + "implicit": 24658, + "table": 54686, + "undefined": 59248, + "script": 48949, + "conducted": 10072, + "digital": 15207, + "manipulated": 31708, + "processor": 42970, + "seven": 50414, + "persian": 40743, + "affects": 2024, + "additional": 1650, + "suggested": 53834, + "companion": 9284, + "complements": 9596, + "arxiv": 4505, + "ca": 7519, + "complete": 9598, + "highlighting": 23871, + "worse": 62971, + "worst": 62976, + "subsequent": 53604, + "focuses": 21234, + "pipelines": 40910, + "decades": 13540, + "publication": 44330, + "location": 30966, + "france": 21634, + "vs": 61747, + "sixth": 51373, + "iteration": 27119, + "release": 46140, + "exploit": 19651, + "massive": 31878, + "parallelism": 39658, + "computation": 9825, + "physically": 40862, + "architectures": 4101, + "variable": 61221, + "grained": 23020, + "strictly": 52986, + "deterministic": 14564, + "allocation": 2430, + "modules": 35771, + "variables": 61226, + "transparent": 58721, + "abstraction": 766, + "exhibit": 19000, + "aiding": 2130, + "verification": 61525, + "scheduling": 48721, + "computing": 9903, + "resolving": 47205, + "incremental": 25482, + "manner": 31713, + "whilst": 61952, + "considerations": 10244, + "forward": 21401, + "incorporates": 25372, + "simulator": 51267, + "compiler": 9579, + "code": 8790, + "consisting": 10314, + "bit": 7185, + "marking": 31839, + "instruction": 26478, + "routes": 48363, + "crisis": 11771, + "cue": 11931, + "associations": 4630, + "statistically": 52769, + "prevalent": 42226, + "practice": 41483, + "giving": 22812, + "weightage": 61922, + "unigram": 59493, + "detect": 14434, + "biases": 7053, + "span": 51918, + "agnostic": 2086, + "fidelity": 20748, + "cues": 11933, + "benchmark": 6422, + "relatively": 46113, + "obscure": 38118, + "capture": 7650, + "followed": 21256, + "dice": 14799, + "popular": 41154, + "pmi": 41035, + "incomplete": 25331, + "lacks": 27930, + "consistency": 10265, + "usable": 59798, + "reflect": 45772, + "influence": 25723, + "myriad": 36356, + "reproduced": 46824, + "decline": 13581, + "interplay": 26702, + "environments": 18175, + "communities": 9257, + "relationship": 46066, + "quantify": 44607, + "topics": 57443, + "controlling": 10989, + "indicated": 25537, + "success": 53695, + "scales": 48644, + "reveals": 48017, + "sizes": 51407, + "shorter": 50588, + "adapts": 1583, + "groupings": 23277, + "endogenous": 17743, + "contributors": 10959, + "force": 21285, + "driven": 16417, + "distinctive": 15599, + "identity": 24472, + "deriving": 14208, + "taxonomy": 55981, + "contributions": 10951, + "challenges": 8026, + "targeting": 54861, + "briefly": 7329, + "faced": 20250, + "limiting": 30635, + "scope": 48778, + "details": 14432, + "contributing": 10938, + "practically": 41482, + "ontology": 38398, + "applies": 3316, + "italian": 27108, + "pedagogical": 40009, + "specialized": 52031, + "rapidly": 44992, + "concerning": 9960, + "progressively": 43125, + "replaced": 46404, + "par": 39610, + "ais": 2223, + "le": 29253, + "sp": 51846, + "plus": 41033, + "cycle": 12073, + "ee": 16608, + "systemic": 54416, + "operationalized": 38493, + "measurement": 32072, + "correlations": 11532, + "summarizes": 53910, + "choices": 8340, + "analyst": 2797, + "visualization": 61677, + "unambiguous": 59203, + "located": 30963, + "altered": 2490, + "reading": 45079, + "dramatically": 16386, + "reader": 45073, + "exceptions": 18960, + "consolidate": 10336, + "weakened": 61853, + "enormous": 17957, + "manifold": 31705, + "affected": 2019, + "simply": 51246, + "ignored": 24494, + "recognised": 45487, + "african": 2039, + "indonesian": 25599, + "population": 41212, + "belongs": 6420, + "greater": 23222, + "lives": 30895, + "possess": 41307, + "skill": 51413, + "navigation": 36495, + "ethnic": 18420, + "unclear": 59234, + "reached": 45055, + "path": 39944, + "disputed": 15525, + "configuration": 10124, + "draw": 16399, + "authors": 5007, + "cite": 8367, + "2008": 245, + "2009": 246, + "consist": 10262, + "200": 233, + "covering": 11654, + "island": 27046, + "randomized": 44896, + "random": 44868, + "surprising": 54181, + "adequately": 1833, + "hypothesize": 24352, + "inferring": 25710, + "speaker": 51995, + "certainty": 7952, + "prosodic": 43956, + "signal": 50828, + "dialogue": 14763, + "centered": 7913, + "causing": 7892, + "uncertainty": 59226, + "utterance": 61132, + "improves": 25111, + "predict": 41634, + "uncertain": 59224, + "eliciting": 16982, + "contextually": 10818, + "elicit": 16979, + "ratings": 45023, + "listeners": 30845, + "internal": 26684, + "fragment": 21434, + "bulgarian": 7494, + "placed": 40928, + "matches": 31903, + "classified": 8585, + "double": 16320, + "keywords": 27353, + "debate": 13527, + "participants": 39812, + "conversations": 11056, + "converge": 11021, + "coordinate": 11123, + "dimensions": 15243, + "pitch": 40912, + "empirically": 17355, + "supported": 54135, + "exclusively": 18978, + "controlled": 10982, + "laboratory": 27864, + "twitter": 59030, + "novelty": 37961, + "140": 141, + "geared": 22030, + "conversation": 11028, + "priori": 42429, + "conversational": 11037, + "verified": 61528, + "investigating": 27004, + "stylistic": 53509, + "symmetry": 54273, + "commonly": 9215, + "status": 52779, + "overviews": 39121, + "lithuanian": 30867, + "today": 57277, + "tackles": 54717, + "issue": 27058, + "mt": 35918, + "requirements": 46909, + "regular": 45830, + "designing": 14338, + "deploying": 14174, + "run": 48400, + "windows": 62066, + "managing": 31695, + "accessing": 836, + "pause": 39981, + "perceives": 40049, + "stream": 52958, + "boundaries": 7281, + "capability": 7607, + "remember": 46369, + "recurring": 45633, + "locating": 30965, + "removed": 46377, + "complexities": 9673, + "capabilities": 7595, + "locations": 30969, + "viterbi": 61694, + "hypothetical": 24356, + "segmentations": 49090, + "block": 7220, + "incurs": 25489, + "latency": 29116, + "sending": 49479, + "appendix": 3150, + "immediately": 24582, + "adapt": 1499, + "styles": 53507, + "adjust": 1850, + "partner": 39901, + "preceding": 41606, + "striking": 52989, + "coordination": 11126, + "arisen": 4184, + "gaining": 21925, + "emphasizing": 17315, + "adaptation": 1518, + "deeply": 13762, + "embedded": 17006, + "fictional": 20746, + "dialogs": 14762, + "don": 16313, + "receive": 45252, + "benefits": 6579, + "movie": 35894, + "suggestive": 53844, + "gender": 22032, + "surprisingly": 54185, + "average": 5398, + "females": 20731, + "males": 31682, + "distributional": 15661, + "linguists": 30825, + "categorical": 7840, + "cs": 11919, + "cl": 8378, + "vectors": 61479, + "competitors": 9575, + "showcases": 50661, + "pronominal": 43229, + "anaphora": 2848, + "proposals": 43279, + "translate": 58549, + "chains": 7963, + "zero": 63150, + "hardly": 23457, + "unrestricted": 59638, + "84": 530, + "entropy": 18157, + "telugu": 56170, + "syllabic": 54263, + "somewhat": 51710, + "complicated": 9696, + "notes": 37724, + "continuation": 10826, + "covered": 11653, + "answers": 3104, + "consumers": 10439, + "consequently": 10203, + "consumer": 10438, + "targets": 54864, + "spam": 51916, + "focused": 21215, + "primarily": 42360, + "identifiable": 24381, + "deceptive": 13551, + "deliberately": 13827, + "authentic": 4997, + "integrating": 26519, + "ultimately": 59192, + "nearly": 36525, + "gold": 22910, + "additionally": 1711, + "revealing": 48016, + "wikipedia": 62041, + "explicit": 19610, + "wordnet": 62355, + "highest": 23848, + "ws": 63018, + "spearman": 52010, + "rho": 48090, + "coefficient": 8885, + "79": 514, + "75": 501, + "value": 61207, + "87": 539, + "78": 511, + "polynomial": 41118, + "svm": 54231, + "esa": 18256, + "2011": 250, + "unsuccessful": 59676, + "section": 49035, + "normalizing": 37709, + "layered": 29216, + "matching": 31908, + "normalization": 37704, + "metrics": 33132, + "connected": 10173, + "missing": 33360, + "geopolitical": 22660, + "potentially": 41412, + "organizations": 38686, + "adding": 1592, + "augmenting": 4986, + "nlm": 37457, + "correcting": 11480, + "ocr": 38284, + "pubmed": 44379, + "country": 11628, + "graphical": 23182, + "download": 16325, + "facilitates": 20280, + "tuned": 58866, + "shot": 50595, + "gun": 23362, + "produces": 43024, + "simpler": 51228, + "constituent": 10354, + "optimized": 38565, + "indexed": 25512, + "improved": 24942, + "20": 218, + "downloaded": 16327, + "https": 24051, + "renders": 46384, + "offered": 38296, + "semi": 49445, + "frameworks": 21631, + "34": 371, + "49": 414, + "html": 24046, + "moments": 35780, + "represented": 46805, + "stochastic": 52854, + "derivations": 14195, + "string": 52991, + "belonging": 6418, + "scalar": 48549, + "widespread": 62030, + "practices": 41489, + "binding": 7159, + "node": 37583, + "treebank": 58762, + "showcasing": 50662, + "topological": 57466, + "fields": 20775, + "tandem": 54796, + "usefulness": 60399, + "greatly": 23225, + "increased": 25427, + "layers": 29217, + "kind": 27365, + "threads": 57083, + "intuitive": 26909, + "concise": 9963, + "expressive": 19813, + "specification": 52234, + "formulas": 21383, + "inverse": 26926, + "lambda": 27938, + "derive": 14197, + "operator": 38496, + "combinatorial": 9056, + "directed": 15263, + "assign": 4597, + "robot": 48231, + "command": 9135, + "questions": 44766, + "querying": 44682, + "puzzles": 44435, + "translating": 58564, + "solvers": 51697, + "applicable": 3155, + "investigates": 27002, + "ewc": 18843, + "ad": 1494, + "hoc": 23978, + "utilised": 61075, + "retrieve": 47976, + "ntcir": 37973, + "promising": 43157, + "iterated": 27117, + "explains": 19602, + "embedding": 17008, + "serve": 50072, + "purposes": 44416, + "expense": 19200, + "learnability": 29447, + "dis": 15347, + "encoded": 17475, + "rhetorical": 48086, + "play": 40960, + "tion": 57263, + "motivation": 35883, + "emotionally": 17301, + "comments": 9144, + "datasets": 13138, + "examining": 18874, + "consecutive": 10195, + "posts": 41369, + "discussions": 15494, + "comparisons": 9512, + "correlation": 11517, + "emotional": 17295, + "variation": 61239, + "inter": 26574, + "rater": 45018, + "assessing": 4586, + "likert": 30526, + "agreement": 2103, + "rating": 45022, + "bottlenecks": 7279, + "scoring": 48932, + "tunisian": 58975, + "specificity": 52237, + "arises": 4185, + "referred": 45757, + "assigning": 4603, + "methodological": 32713, + "combinations": 9055, + "concentrates": 9917, + "combine": 9060, + "undertaken": 59430, + "spanning": 51951, + "essay": 18315, + "proposal": 43278, + "literary": 30854, + "enabled": 17434, + "validation": 61191, + "dynamically": 16495, + "transcribing": 58336, + "implied": 24672, + "alternating": 2494, + "conditions": 10019, + "discussing": 15489, + "projects": 43145, + "machines": 31396, + "things": 57047, + "animals": 2863, + "cause": 7883, + "causes": 7890, + "converting": 11077, + "proper": 43250, + "operate": 38486, + "comparing": 9478, + "developing": 14646, + "complementary": 9586, + "providing": 44234, + "industrial": 25615, + "stable": 52412, + "documented": 15850, + "interoperability": 26699, + "lexica": 30350, + "affective": 2022, + "verify": 61533, + "unbiased": 59221, + "positive": 41277, + "lends": 30022, + "negative": 36615, + "informativeness": 26179, + "uniformly": 59488, + "decrease": 13666, + "emotions": 17302, + "sentiment": 49814, + "orientation": 38693, + "precomputed": 41620, + "85": 534, + "02": 19, + "outperforming": 38844, + "biggest": 7096, + "deployment": 14175, + "trainable": 57662, + "planner": 40942, + "restaurant": 47412, + "template": 56173, + "supports": 54141, + "planners": 40943, + "demonstration": 14057, + "affecting": 2020, + "responses": 47403, + "reproduce": 46821, + "agglutinative": 2069, + "listed": 30842, + "schedule": 48717, + "mwe": 36354, + "crf": 11760, + "disadvantage": 15349, + "choosing": 8346, + "running": 48405, + "tried": 58791, + "generations": 22582, + "fold": 21249, + "fitness": 21068, + "64": 468, + "08": 25, + "86": 536, + "oral": 38583, + "socio": 51611, + "tags": 54756, + "file": 20793, + "metadata": 32346, + "added": 1589, + "transducer": 58343, + "cascades": 7785, + "modified": 35731, + "cascade": 7781, + "locate": 30962, + "merging": 32317, + "campaign": 7564, + "passage": 39918, + "greek": 23245, + "conversion": 11069, + "plain": 40936, + "constant": 10342, + "adverbial": 1961, + "ending": 17741, + "ly": 31293, + "2000": 236, + "1986": 197, + "1990": 198, + "fine": 20921, + "haven": 23483, + "exploited": 19671, + "deleting": 13821, + "freely": 21650, + "yes": 63086, + "interrogative": 26744, + "permits": 40730, + "places": 40930, + "operation": 38489, + "business": 7503, + "medicine": 32215, + "hashtags": 23476, + "29": 347, + "extensions": 19850, + "tagger": 54730, + "contextual": 10758, + "critical": 11775, + "expressing": 19803, + "unable": 59198, + "preprocessing": 41825, + "clarify": 8387, + "tokens": 57321, + "augment": 4938, + "largely": 29050, + "sent": 49511, + "mobile": 33447, + "phones": 40824, + "artifact": 4485, + "private": 42442, + "battery": 6346, + "paying": 39991, + "privacy": 42438, + "live": 30894, + "collects": 8993, + "submissions": 53577, + "checks": 8279, + "adds": 1827, + "releasing": 46187, + "resultant": 47457, + "sql": 52393, + "month": 35829, + "sender": 49478, + "000": 1, + "focusing": 21245, + "mandarin": 31696, + "paragraphs": 39639, + "iii": 24508, + "framing": 21633, + "agenda": 2051, + "positions": 41276, + "communicate": 9241, + "recursively": 45639, + "directional": 15275, + "normal": 37701, + "image": 24528, + "commutative": 9278, + "historically": 23963, + "autonomous": 5211, + "brain": 7297, + "room": 48337, + "intermediate": 26673, + "starts": 52571, + "peoples": 40045, + "bridge": 7318, + "gap": 21957, + "bridges": 7323, + "linking": 30833, + "worth": 62979, + "pursuing": 44421, + "achievable": 1104, + "reasonable": 45171, + "cost": 11576, + "2012": 251, + "weight": 61915, + "minimal": 33282, + "liu": 30891, + "tfidf": 56995, + "extra": 19960, + "wikinews": 62040, + "18": 178, + "hints": 23951, + "hypotheses": 24343, + "circumstances": 8362, + "deduction": 13676, + "plausible": 40958, + "laws": 29174, + "predicts": 41781, + "presence": 41836, + "dimensional": 15225, + "richer": 48128, + "goes": 22908, + "psychological": 44291, + "observations": 38126, + "obtaining": 38229, + "visualize": 61683, + "notions": 37734, + "asr": 4554, + "remarkable": 46356, + "surroundings": 54199, + "discard": 15366, + "performing": 40669, + "sensitive": 49494, + "lexically": 30398, + "pinpoint": 40885, + "rational": 45025, + "treatment": 58736, + "expectation": 19193, + "resembles": 47176, + "risk": 48160, + "decision": 13558, + "favor": 20452, + "generalized": 22150, + "left": 29997, + "corner": 11165, + "informed": 26180, + "conflicts": 10142, + "misleading": 33349, + "costs": 11606, + "minimized": 33299, + "minimizing": 33301, + "duration": 16474, + "computations": 9877, + "filler": 20799, + "decompositions": 13660, + "tensor": 56222, + "paradigm": 39621, + "maximizing": 31963, + "race": 44843, + "remote": 46372, + "preferred": 41792, + "configurations": 10125, + "subgroup": 53548, + "dominant": 16305, + "interlocutor": 26670, + "agent": 2053, + "interconnected": 26640, + "advantageous": 1946, + "yield": 63088, + "male": 31680, + "equally": 18190, + "disjoint": 15508, + "emerged": 17258, + "confirm": 10128, + "thesis": 57043, + "simulations": 51266, + "analytically": 2802, + "measurements": 32073, + "thorough": 57054, + "emergent": 17268, + "dispersion": 15517, + "conventions": 11020, + "cfg": 7954, + "pos": 41228, + "chunking": 8357, + "naive": 36360, + "bayesian": 6356, + "disambiguate": 15354, + "tagged": 54728, + "mimic": 33267, + "proxies": 44259, + "lay": 29175, + "attractive": 4896, + "poor": 41131, + "paid": 39141, + "limitations": 30542, + "conclude": 9967, + "brief": 7327, + "ranking": 44962, + "indirect": 25554, + "merits": 32319, + "exhaustive": 18996, + "outputs": 39011, + "bring": 7330, + "22": 317, + "15": 145, + "31": 364, + "77": 510, + "14": 135, + "qa": 44444, + "ir": 27033, + "engines": 17771, + "bound": 7280, + "bearing": 6372, + "ranks": 44980, + "40": 394, + "held": 23543, + "trec": 58739, + "analysed": 2587, + "enhancing": 17947, + "70": 486, + "difficult": 15153, + "blind": 7216, + "feedback": 20715, + "rf": 48083, + "unlikely": 59614, + "ranging": 44944, + "calculations": 7527, + "intricate": 26765, + "algorithmic": 2313, + "optimization": 38542, + "demanding": 13843, + "weather": 61874, + "forecasting": 21294, + "convert": 11071, + "versatile": 61547, + "pervasive": 40797, + "erroneous": 18208, + "solved": 51694, + "prone": 43225, + "spelling": 52333, + "operating": 38488, + "bad": 5498, + "editing": 16595, + "correction": 11482, + "corrections": 11486, + "selecting": 49121, + "virtue": 61627, + "microsoft": 33232, + "inclusive": 25324, + "speeches": 52318, + "parallelized": 39662, + "shed": 50523, + "spread": 52376, + "facets": 20258, + "persuasive": 40781, + "overlap": 39087, + "ground": 23249, + "claims": 8384, + "automata": 5031, + "varieties": 61259, + "situate": 51366, + "formalisms": 21356, + "cover": 11643, + "structural": 53074, + "divergences": 15687, + "transduction": 58345, + "experimenting": 19340, + "awareness": 5479, + "phrased": 40847, + "affect": 2011, + "quotes": 44841, + "situational": 51369, + "distinctiveness": 15600, + "portable": 41216, + "proved": 43984, + "extensively": 19916, + "released": 46170, + "edition": 16598, + "thesaurus": 57042, + "150": 155, + "ago": 2099, + "countless": 11626, + "students": 53216, + "50": 419, + "accepting": 816, + "tractable": 57499, + "dissertation": 15536, + "transforming": 58534, + "contrasted": 10893, + "computerized": 9899, + "benchmarks": 6509, + "turned": 58992, + "excellent": 18953, + "abundant": 780, + "optical": 38521, + "conceived": 9914, + "inaccurate": 25209, + "misspellings": 33369, + "cornerstone": 11166, + "spell": 52331, + "spellings": 52338, + "images": 24551, + "developments": 14711, + "advent": 1956, + "lot": 31113, + "textbooks": 56851, + "transformed": 58447, + "imperfect": 24628, + "occasionally": 38264, + "falsely": 20384, + "identifies": 24408, + "leading": 29287, + "suggestion": 53841, + "harnesses": 23467, + "huge": 24068, + "replacements": 46408, + "revealed": 48015, + "executed": 18982, + "platforms": 40951, + "judges": 27240, + "assigned": 4602, + "traced": 57489, + "65": 471, + "gets": 22684, + "replicate": 46414, + "esl": 18260, + "300": 359, + "digest": 15206, + "synonym": 54284, + "82": 527, + "00": 0, + "33": 369, + "labelled": 27799, + "qualitative": 44469, + "supposed": 54144, + "editors": 16601, + "requested": 46836, + "guideline": 23350, + "richness": 48132, + "grams": 23083, + "drastically": 16391, + "conclusion": 9976, + "topical": 57438, + "biographical": 7165, + "conflict": 10140, + "controversial": 10991, + "concluding": 9975, + "controversy": 10993, + "metric": 33109, + "quantifies": 44606, + "proportion": 43275, + "edit": 16590, + "penalty": 40020, + "penalties": 40019, + "coefficients": 8886, + "configurable": 10123, + "suit": 53852, + "interested": 26646, + "bridging": 7325, + "unit": 59521, + "decomposition": 13658, + "meta": 32328, + "layer": 29177, + "standalone": 52459, + "decomposed": 13654, + "growing": 23287, + "crowdsourcing": 11888, + "accommodate": 838, + "cheap": 8268, + "commercial": 9153, + "advertising": 2003, + "ecological": 16575, + "tips": 57266, + "pilot": 40883, + "checking": 8275, + "incorrectly": 25401, + "basically": 6335, + "checker": 8273, + "bigger": 7095, + "checkers": 8274, + "suffer": 53758, + "sparseness": 51974, + "acronyms": 1450, + "terminologies": 56260, + "fail": 20328, + "catch": 7836, + "hinges": 23947, + "big": 7087, + "volume": 61727, + "detector": 14542, + "detects": 14544, + "generates": 22336, + "corrector": 11498, + "outstanding": 39029, + "drastic": 16390, + "instructional": 26482, + "drawn": 16407, + "considers": 10261, + "enriches": 17965, + "dealing": 13520, + "straight": 52883, + "formulate": 21384, + "consisted": 10264, + "55": 444, + "42": 405, + "representativeness": 46803, + "conveying": 11086, + "uniform": 59485, + "associates": 4627, + "transitions": 58546, + "modification": 35728, + "induced": 25602, + "syllable": 54264, + "spectrum": 52246, + "syllables": 54265, + "400": 397, + "logarithmic": 30977, + "beta": 6841, + "piecewise": 40882, + "fitting": 21071, + "piece": 40875, + "fits": 21069, + "smallest": 51527, + "sum": 53867, + "lowest": 31229, + "criterion": 11773, + "chosen": 8350, + "16": 161, + "tie": 57102, + "bayes": 6347, + "unaligned": 59202, + "pairing": 39164, + "needing": 36606, + "carefully": 7757, + "curated": 11947, + "wider": 62026, + "biased": 7050, + "favors": 20461, + "abstain": 754, + "deemed": 13680, + "ensemble": 17969, + "parents": 39748, + "keeping": 27277, + "unlabeled": 59561, + "97": 570, + "predicting": 41673, + "relying": 46306, + "ensembles": 17984, + "counts": 11629, + "tip": 57265, + "tongue": 57352, + "valuable": 61201, + "clues": 8731, + "organisation": 38680, + "mental": 32289, + "psycho": 44285, + "ve": 61448, + "knows": 27671, + "guided": 23343, + "portion": 41220, + "morphology": 35850, + "bigrams": 7099, + "threshold": 57089, + "regarding": 45792, + "puzzle": 44434, + "unstructured": 59666, + "unannotated": 59205, + "dirichlet": 15343, + "choose": 8342, + "replacing": 46410, + "beginners": 6380, + "learners": 29495, + "networking": 36824, + "declarative": 13578, + "bangla": 5523, + "preserving": 42120, + "synchronously": 54280, + "facilitating": 20281, + "opportunity": 38513, + "adopt": 1862, + "involving": 27023, + "interests": 26658, + "pro": 42449, + "anti": 3118, + "tone": 57350, + "rhetoric": 48085, + "professional": 43060, + "debates": 13529, + "contradicts": 10870, + "assertions": 4574, + "hope": 24005, + "encourage": 17589, + "pursue": 44418, + "abbreviation": 584, + "unpaired": 59624, + "runtime": 48409, + "github": 22692, + "plugin": 41029, + "licensed": 30430, + "supplement": 54104, + "lie": 30433, + "pca": 39996, + "cca": 7900, + "computes": 9901, + "theoretically": 57027, + "efficacy": 16830, + "superior": 53929, + "remain": 46311, + "notoriously": 37736, + "weeks": 61910, + "moderately": 35697, + "sized": 51406, + "expensive": 19201, + "gradients": 23012, + "contrastive": 10895, + "estimation": 18381, + "estimating": 18379, + "unnormalized": 59620, + "penn": 40021, + "magnitude": 31414, + "fewer": 20735, + "scalability": 48545, + "labeling": 27775, + "transfer": 58349, + "label": 27688, + "gained": 21913, + "conducting": 10100, + "subspace": 53612, + "regularized": 45848, + "jointly": 27192, + "minimizes": 33300, + "penalizing": 40018, + "evaluating": 18556, + "adversarial": 1963, + "performers": 40668, + "measurable": 32043, + "adopting": 1873, + "characterize": 8248, + "successes": 53731, + "failures": 20354, + "simulating": 51262, + "extrinsic": 20169, + "belief": 6406, + "greedily": 23240, + "train": 57562, + "pretraining": 42196, + "tuning": 58897, + "maximum": 31966, + "nb": 36496, + "kernel": 27288, + "outperformed": 38835, + "separating": 49885, + "excluding": 18975, + "meaningful": 32023, + "scheduled": 48718, + "usages": 59811, + "bengali": 6591, + "91": 555, + "21": 313, + "analysing": 2608, + "specifies": 52240, + "adjective": 1843, + "enter": 18012, + "appropriateness": 3972, + "reflected": 45777, + "thousands": 57078, + "permitted": 40731, + "ibm": 24359, + "asked": 4520, + "sections": 49036, + "history": 23965, + "alpha": 2485, + "explained": 19600, + "realizing": 45163, + "dream": 16412, + "passing": 39926, + "turing": 58978, + "recipe": 45480, + "dedicated": 13673, + "corrects": 11499, + "inserting": 26377, + "finer": 21037, + "ngram": 37436, + "dated": 13494, + "centuries": 7930, + "started": 52568, + "period": 40723, + "contributed": 10934, + "separately": 49881, + "tendencies": 56210, + "contributes": 10935, + "significantly": 50931, + "desired": 14346, + "barrier": 5530, + "strings": 52995, + "easier": 16525, + "answered": 3061, + "defines": 13789, + "exploits": 19677, + "stemming": 52793, + "inflected": 25717, + "root": 48341, + "suffix": 53811, + "inflectional": 25720, + "audience": 4922, + "balanced": 5514, + "terminology": 56261, + "algorithmically": 2317, + "instead": 26445, + "summary": 53912, + "summarizer": 53909, + "inputs": 26361, + "evaluator": 18774, + "asks": 4525, + "specify": 52241, + "wishes": 62088, + "ask": 4517, + "highlights": 23874, + "red": 45640, + "implementations": 24645, + "overload": 39094, + "receives": 45269, + "day": 13500, + "obstacles": 38156, + "progress": 43090, + "justify": 27260, + "bio": 7161, + "clarity": 8389, + "workers": 62865, + "days": 13504, + "week": 61908, + "news": 37382, + "center": 7912, + "majority": 31525, + "numeric": 38059, + "impression": 24805, + "presumably": 42138, + "narrator": 36387, + "knowing": 27385, + "recommend": 45562, + "leave": 29981, + "rated": 45017, + "uncover": 59244, + "optional": 38577, + "recover": 45592, + "female": 20730, + "nuanced": 37975, + "orientations": 38694, + "multifaceted": 36050, + "gendered": 22040, + "connections": 10184, + "homophily": 23997, + "correlated": 11509, + "emerges": 17269, + "audiences": 4923, + "mainstream": 31479, + "mediated": 32195, + "driving": 16438, + "comprising": 9819, + "107": 75, + "authored": 5002, + "autoregressive": 5212, + "diffusion": 15204, + "united": 59524, + "unpredictable": 59629, + "characterization": 8246, + "demographic": 13854, + "geographic": 22647, + "predictors": 41780, + "cities": 8372, + "racial": 44845, + "demographics": 13857, + "moving": 35901, + "unified": 59465, + "reproduces": 46825, + "lines": 30686, + "companies": 9283, + "presidents": 42132, + "chronological": 8353, + "select": 49098, + "400k": 399, + "conceptualization": 9950, + "institute": 26471, + "late": 29112, + "sophisticated": 51715, + "government": 22967, + "officials": 38315, + "needed": 36600, + "strengthen": 52972, + "collaboration": 8931, + "historians": 23953, + "instantly": 26444, + "axis": 5482, + "radicals": 44851, + "radical": 44849, + "horizontal": 24023, + "middle": 33235, + "chart": 8257, + "vocal": 61720, + "slight": 51432, + "figure": 20791, + "particularly": 39876, + "switching": 54260, + "lattice": 29163, + "microblogs": 33231, + "conforming": 10145, + "package": 39135, + "classifiers": 8610, + "scenarios": 48690, + "regardless": 45795, + "seemingly": 49058, + "supplementary": 54105, + "owl": 39127, + "notations": 37720, + "engineers": 17770, + "latvian": 29168, + "compliant": 9694, + "usability": 59797, + "write": 62984, + "ahead": 2110, + "continuations": 10827, + "lookahead": 31067, + "satisfying": 48529, + "references": 45751, + "ace": 1101, + "considerable": 10227, + "ordered": 38664, + "permutations": 40734, + "27": 342, + "28": 345, + "seconds": 49033, + "universe": 59553, + "insightful": 26385, + "pp": 41451, + "62": 464, + "63": 466, + "transitivity": 58548, + "vast": 61436, + "mere": 32312, + "exponentially": 19783, + "shifts": 50546, + "volatility": 61726, + "exemplify": 18993, + "manifest": 31701, + "token": 57279, + "overlook": 39096, + "inform": 25737, + "intuitions": 26908, + "specialist": 52027, + "posing": 41257, + "creative": 11751, + "dubbed": 16466, + "readers": 45075, + "invention": 26923, + "approximate": 3975, + "retrieves": 47991, + "sufficiently": 53808, + "organized": 38688, + "start": 52565, + "exact": 18848, + "substring": 53660, + "stepwise": 52846, + "substrings": 53661, + "aligning": 2361, + "bounds": 7287, + "collections": 8988, + "refers": 45762, + "romanian": 48333, + "endings": 17742, + "enjoy": 17950, + "interpret": 26708, + "autoencoder": 5024, + "reduced": 45684, + "loss": 31081, + "employs": 17403, + "hampered": 23378, + "sr": 52403, + "faster": 20431, + "stanford": 52555, + "tokenizer": 57319, + "recognizer": 45555, + "extend": 19818, + "benefit": 6557, + "accumulation": 890, + "multitude": 36329, + "election": 16961, + "winner": 62068, + "presidential": 42130, + "president": 42129, + "100": 56, + "predictor": 41779, + "incompleteness": 25332, + "generalizations": 22135, + "initializing": 26229, + "queried": 44647, + "generalizes": 22153, + "unseen": 59642, + "desirable": 14343, + "functionalities": 21764, + "observe": 38129, + "billion": 7117, + "requiring": 46958, + "corrected": 11479, + "evidence": 18806, + "raising": 44864, + "grammaticality": 23080, + "judgements": 27239, + "simplicity": 51234, + "scaled": 48641, + "amenable": 2533, + "tensors": 56227, + "generalising": 22105, + "outperform": 38779, + "face": 20239, + "leverages": 30300, + "array": 4201, + "workload": 62871, + "server": 50087, + "cluster": 8732, + "servers": 50088, + "python": 44438, + "incorporate": 25343, + "analytical": 2800, + "planned": 40941, + "releases": 46186, + "citep": 8370, + "urgent": 59787, + "wizard": 62095, + "oz": 39131, + "react": 45062, + "failure": 20351, + "experts": 19589, + "follows": 21273, + "sees": 49069, + "submission": 53571, + "reordering": 46389, + "decided": 13553, + "accomplish": 845, + "moses": 35853, + "decoder": 13584, + "reorder": 46387, + "farsi": 20408, + "urdu": 59783, + "bleu": 7202, + "kendall": 27284, + "tau": 55977, + "hamming": 23376, + "scarce": 48656, + "feasible": 20471, + "option": 38576, + "comparability": 9286, + "cosine": 11572, + "calculated": 7523, + "categorization": 7854, + "addresses": 1807, + "monolingual": 35789, + "differently": 15147, + "constrain": 10362, + "visibility": 61631, + "nodes": 37588, + "degrees": 13815, + "informational": 26167, + "assisted": 4615, + "founded": 21422, + "raised": 44856, + "teachers": 55998, + "specialists": 52028, + "chomsky": 8341, + "properly": 43255, + "rewriting": 48079, + "justified": 27258, + "edge": 16585, + "modifications": 35729, + "termination": 56258, + "derivation": 14193, + "increasing": 25441, + "mail": 31420, + "arrange": 4198, + "maintaining": 31485, + "removing": 46379, + "stop": 52864, + "secondly": 49031, + "thirdly": 57050, + "tf": 56989, + "idf": 24475, + "minimum": 33302, + "preprocessed": 41824, + "counted": 11612, + "reuters": 48006, + "subsets": 53611, + "trade": 57501, + "money": 35783, + "grain": 23019, + "classic": 8419, + "hardware": 23459, + "scarcity": 48661, + "pivot": 40916, + "trilingual": 58799, + "tuples": 58977, + "disambiguating": 15357, + "mixing": 33414, + "favorable": 20453, + "reports": 46460, + "merge": 32314, + "morphosyntactic": 35852, + "classifying": 8634, + "ict": 24364, + "eu": 18422, + "automates": 5065, + "updating": 59769, + "innovative": 26249, + "adopts": 1875, + "crawl": 11685, + "predefined": 41623, + "sites": 51365, + "extrinsically": 20174, + "crawled": 11686, + "acquiring": 1446, + "descriptors": 14257, + "dealt": 13523, + "validating": 61190, + "iterating": 27118, + "distinctions": 15598, + "bootstrapping": 7267, + "minimize": 33297, + "sparse": 51963, + "holds": 23985, + "perception": 40055, + "robots": 48235, + "salient": 48439, + "behaviors": 6400, + "attribution": 4914, + "nonverbal": 37699, + "wiki": 62036, + "restricts": 47428, + "friendly": 21690, + "collaboratively": 8933, + "edited": 16594, + "customize": 12063, + "technologies": 56156, + "recognizers": 45556, + "unless": 59590, + "self": 49172, + "confusions": 10162, + "telephone": 56165, + "exploring": 19776, + "populated": 41211, + "stopwords": 52870, + "logically": 30991, + "scored": 48883, + "devised": 14727, + "geolocation": 22653, + "devices": 14723, + "configure": 10126, + "lan": 27941, + "guage": 23318, + "faces": 20252, + "elicitation": 16980, + "specialised": 52025, + "removal": 46373, + "exchanged": 18966, + "regions": 45806, + "digitized": 15219, + "disparity": 15515, + "consequent": 10201, + "violation": 61617, + "align": 2351, + "passages": 39922, + "chapters": 8191, + "distilled": 15581, + "targeted": 54858, + "gains": 21930, + "extractive": 20133, + "offering": 38297, + "worthy": 62981, + "onset": 38395, + "stopping": 52867, + "auditory": 4937, + "stops": 52869, + "msa": 35913, + "carrier": 7772, + "cv": 12069, + "outcomes": 38766, + "formalize": 21359, + "issued": 27082, + "intent": 26563, + "cooperation": 11121, + "procedural": 42739, + "paraphrases": 39744, + "inventory": 26925, + "labels": 27807, + "collapsing": 8936, + "submit": 53581, + "triplets": 58810, + "plans": 40946, + "timely": 57244, + "capturing": 7729, + "negation": 36612, + "formulation": 21392, + "subsequently": 53605, + "lemmas": 30017, + "accompanying": 843, + "commons": 9231, + "nc": 36497, + "sa": 48417, + "permit": 40729, + "lemma": 30016, + "initiative": 26234, + "recommendations": 45567, + "illustration": 24523, + "synergies": 54281, + "arrangement": 4199, + "animal": 2862, + "brains": 7298, + "discriminating": 15439, + "semantical": 49378, + "importantly": 24795, + "walk": 61758, + "compares": 9474, + "discrimination": 15440, + "fluently": 21135, + "preparation": 41813, + "rigid": 48147, + "commands": 9137, + "interpreting": 26741, + "respond": 47388, + "punjabi": 44390, + "fruitful": 21697, + "governance": 22964, + "records": 45591, + "legacy": 30002, + "matters": 31945, + "mother": 35854, + "110": 92, + "10th": 80, + "total": 57474, + "900": 554, + "nations": 36398, + "covers": 11662, + "disadvantages": 15350, + "ends": 17746, + "enhancements": 17943, + "personalized": 40765, + "freebase": 21648, + "removes": 46378, + "judged": 27237, + "unifies": 59484, + "labeled": 27735, + "mechanical": 32091, + "turk": 58980, + "approximation": 3984, + "indicates": 25538, + "shallow": 50438, + "ndcg": 36501, + "68": 478, + "93": 561, + "inclusion": 25323, + "beneficial": 6554, + "scores": 48886, + "politeness": 41106, + "requests": 46838, + "guide": 23328, + "polite": 41105, + "elections": 16962, + "stack": 52415, + "exchange": 18964, + "reputation": 46834, + "utilizes": 61112, + "student": 53208, + "tracked": 57494, + "curriculum": 12042, + "cooccurrence": 11118, + "pointwise": 41082, + "heavy": 23538, + "embeddings": 17074, + "competitive": 9536, + "quantitatively": 44629, + "sole": 51640, + "near": 36503, + "danish": 12095, + "swedish": 54248, + "captured": 7723, + "enhancement": 17942, + "emerging": 17270, + "gujarati": 23358, + "transliterated": 58714, + "sheer": 50533, + "forums": 21399, + "surveyed": 54221, + "picture": 40871, + "marathi": 31813, + "trigram": 58797, + "calculating": 7525, + "preserve": 42113, + "phonological": 40831, + "television": 56166, + "closed": 8694, + "captions": 7649, + "channels": 8189, + "months": 35830, + "annotating": 2930, + "providers": 44177, + "uncovered": 59245, + "worldwide": 62970, + "appropriately": 3971, + "ex": 18844, + "preprocess": 41823, + "cats": 7865, + "witnessed": 62090, + "translators": 58713, + "manager": 31692, + "felt": 20729, + "multinomial": 36160, + "triples": 58804, + "sparsity": 51978, + "learns": 29951, + "noting": 37732, + "concentration": 9919, + "fi": 20744, + "filter": 20807, + "24": 327, + "categorize": 7856, + "hold": 23979, + "unwanted": 59761, + "weighted": 61923, + "sensitivity": 49506, + "medical": 32198, + "clinical": 8667, + "synthesis": 54357, + "adjustment": 1854, + "experimentally": 19330, + "strict": 52983, + "clauses": 8640, + "concrete": 9981, + "saved": 48535, + "forming": 21373, + "encyclopedic": 17609, + "microblogging": 33230, + "region": 45803, + "visualising": 61676, + "aggregating": 2075, + "uk": 59185, + "birth": 7184, + "2013": 252, + "validates": 61188, + "feasibility": 20467, + "visualisation": 61673, + "volumes": 61731, + "tweets": 59008, + "rest": 47409, + "surrounded": 54194, + "overlapping": 39091, + "enforcing": 17755, + "influx": 25734, + "quantities": 44634, + "reuse": 48002, + "curation": 11953, + "propagated": 43240, + "originated": 38748, + "provenance": 43996, + "propagation": 43243, + "heavily": 23527, + "8000": 524, + "occurred": 38271, + "inconsistent": 25339, + "visualise": 61675, + "website": 61905, + "inflection": 25719, + "disciplines": 15375, + "latest": 29153, + "assumptions": 4639, + "multilevel": 36061, + "ratio": 45024, + "emotion": 17285, + "strength": 52970, + "wisdom": 62077, + "quickly": 44820, + "scenario": 48682, + "notably": 37718, + "discourage": 15381, + "malicious": 31683, + "reject": 45879, + "markedly": 31833, + "hot": 24028, + "cold": 8927, + "warm": 61772, + "freezing": 21656, + "contrasting": 10894, + "begin": 6378, + "intervention": 26750, + "korean": 27674, + "prepared": 41815, + "350": 374, + "retrieving": 47993, + "accomplishing": 850, + "delivers": 13836, + "parametrized": 39734, + "interpretable": 26718, + "files": 20795, + "nl": 37443, + "raises": 44857, + "circumvent": 8363, + "libraries": 30423, + "scholars": 48738, + "divide": 15743, + "genre": 22641, + "assist": 4609, + "pose": 41237, + "genres": 22642, + "gradually": 23015, + "internally": 26693, + "trains": 58323, + "library": 30424, + "fiction": 20745, + "proportions": 43277, + "narrative": 36381, + "themes": 57007, + "stone": 52862, + "analogous": 2582, + "likewise": 30529, + "house": 24040, + "synonymous": 54286, + "recognizing": 45558, + "compositions": 9753, + "dual": 16457, + "materials": 31926, + "develops": 14716, + "automate": 5032, + "refine": 45764, + "trivial": 58811, + "occurring": 38280, + "mixture": 33417, + "recurrent": 45606, + "spans": 51956, + "substantially": 53631, + "700": 488, + "hierarchically": 23699, + "organised": 38682, + "institutions": 26474, + "authorities": 5005, + "member": 32228, + "union": 59506, + "official": 38306, + "viewing": 61606, + "categorisation": 7851, + "ec": 16572, + "centre": 7924, + "tm": 57275, + "professionally": 43062, + "professionals": 43063, + "departments": 14095, + "monitor": 35784, + "eye": 20175, + "europe": 18426, + "19": 183, + "press": 42133, + "functionality": 21765, + "multilinguality": 36138, + "motor": 35887, + "perceive": 40046, + "foreign": 21296, + "accent": 807, + "proceed": 42748, + "mirror": 33336, + "neurons": 37119, + "grasping": 23195, + "neuro": 37115, + "augmented": 4975, + "update": 59765, + "revisits": 48058, + "extracts": 20142, + "reframing": 45788, + "dissemination": 15535, + "danger": 12093, + "complemented": 9594, + "marketing": 31837, + "captures": 7727, + "inspire": 26401, + "granularity": 23091, + "smoothly": 51540, + "overcomes": 39075, + "insertion": 26378, + "upper": 59772, + "unprecedented": 59627, + "visualizations": 61682, + "density": 14091, + "updates": 59768, + "daily": 12083, + "gaussian": 22010, + "multivariate": 36330, + "mel": 32226, + "gmm": 22870, + "dnn": 15756, + "acoustic": 1433, + "email": 17001, + "love": 31129, + "hate": 23479, + "suicide": 53851, + "genders": 22041, + "women": 62105, + "joy": 27234, + "sadness": 48425, + "men": 32288, + "prefer": 41787, + "fear": 20466, + "trust": 58830, + "backbone": 5484, + "rising": 48159, + "participating": 39823, + "constantly": 10343, + "enterprise": 18015, + "university": 59555, + "quantification": 44602, + "behavioral": 6399, + "movements": 35890, + "unfold": 59448, + "interlocutors": 26671, + "exchanging": 18968, + "lag": 27933, + "maximally": 31952, + "perfect": 40060, + "improper": 24819, + "calculation": 7526, + "translator": 58712, + "taggers": 54733, + "viz": 61696, + "38": 380, + "46": 411, + "worked": 62863, + "emphasis": 17308, + "meteor": 32350, + "diseases": 15498, + "genes": 22635, + "proxy": 44264, + "generalize": 22136, + "sciences": 48752, + "economic": 16577, + "morphemes": 35834, + "weakly": 61856, + "transferred": 58433, + "bitext": 7187, + "expectations": 19196, + "facilities": 20283, + "encode": 17459, + "f1": 20179, + "attaining": 4672, + "crfs": 11766, + "12k": 119, + "5k": 451, + "ontonotes": 38400, + "conll": 10167, + "03": 20, + "branch": 7299, + "narrowing": 36390, + "constrained": 10363, + "integer": 26498, + "relaxation": 46137, + "lastly": 29106, + "stress": 52979, + "encountering": 17587, + "totally": 57478, + "1st": 216, + "optimality": 38535, + "maximization": 31954, + "fake": 20370, + "regulation": 45857, + "crawling": 11689, + "curating": 11952, + "benchmarking": 6507, + "cis": 8364, + "edu": 16603, + "estimations": 18388, + "researcher": 47147, + "practitioners": 41491, + "national": 36395, + "hundreds": 24295, + "millions": 33262, + "downstream": 16330, + "popularity": 41200, + "policies": 41092, + "classifies": 8629, + "43": 406, + "56": 446, + "11": 82, + "accuracies": 891, + "subjectively": 53567, + "parameterized": 39685, + "indicating": 25541, + "cornell": 11164, + "inadequate": 25211, + "solid": 51649, + "grounds": 23267, + "away": 5480, + "aware": 5439, + "obvious": 38262, + "meaningless": 32031, + "contradictions": 10868, + "hash": 23473, + "valued": 61210, + "optimize": 38561, + "europarl": 18425, + "preexisting": 41786, + "learnt": 29979, + "timeline": 57241, + "epochs": 18186, + "evolves": 18840, + "salience": 48435, + "neglect": 36647, + "dependent": 14145, + "mined": 33276, + "rouge": 48348, + "consuming": 10441, + "simplify": 51241, + "really": 45164, + "3rd": 389, + "selectional": 49159, + "preference": 41789, + "covariance": 11640, + "discrete": 15419, + "setup": 50408, + "dissimilar": 15537, + "informative": 26168, + "employing": 17399, + "walks": 61759, + "nuances": 37976, + "dl": 15751, + "factual": 20317, + "truly": 58823, + "adopted": 1869, + "formalization": 21358, + "equivalence": 18200, + "helps": 23601, + "logics": 30992, + "wh": 61950, + "desire": 14345, + "flexible": 21107, + "smoothed": 51536, + "absolute": 736, + "kneser": 27380, + "ney": 37434, + "smoothing": 51538, + "baselines": 6225, + "perplexity": 40737, + "bag": 5500, + "favorably": 20455, + "geo": 22646, + "intrinsically": 26774, + "wrong": 63015, + "plausibility": 40957, + "mimics": 33271, + "expert": 19568, + "beliefs": 6408, + "perceptions": 40056, + "epistemic": 18184, + "cognitively": 8899, + "turkish": 58983, + "grouping": 23276, + "synsets": 54290, + "hypernyms": 24335, + "hyponyms": 24341, + "determining": 14562, + "gather": 22000, + "tens": 56217, + "extreme": 20151, + "restrictive": 47427, + "czech": 12077, + "optimised": 38539, + "differs": 15151, + "categorised": 7853, + "repository": 46463, + "leveraging": 30318, + "keyphrase": 27345, + "dramatic": 16384, + "growth": 23307, + "pros": 43953, + "cons": 10193, + "keyphrases": 27346, + "paraphrase": 39737, + "robustness": 48270, + "encourages": 17602, + "enhances": 17944, + "scattered": 48679, + "resides": 47184, + "quantity": 44638, + "final": 20816, + "extensible": 19846, + "01": 18, + "36": 377, + "94": 564, + "denotation": 14069, + "cohesion": 8921, + "fusion": 21850, + "topicality": 57441, + "cc": 7896, + "similarly": 51131, + "poem": 41038, + "safety": 48428, + "reporting": 46458, + "shaping": 50451, + "looks": 31070, + "acquired": 1443, + "transductive": 58346, + "incident": 25219, + "compressed": 9809, + "counterpart": 11622, + "spotting": 52375, + "minor": 33329, + "synchronous": 54279, + "substitution": 53658, + "distortion": 15612, + "mismatches": 33354, + "discriminatively": 15449, + "margin": 31816, + "scalable": 48547, + "extractions": 20132, + "referential": 45754, + "resolves": 47204, + "95": 566, + "purely": 44394, + "encyclopedia": 17608, + "minority": 33331, + "pairwise": 39236, + "doubt": 16323, + "devise": 14724, + "extends": 19842, + "templates": 56177, + "subtask": 53667, + "hopes": 24019, + "integrity": 26531, + "leverage": 30254, + "pruning": 44268, + "confirming": 10136, + "idiosyncratic": 24484, + "constraint": 10369, + "judge": 27236, + "suitability": 53853, + "wise": 62078, + "capacity": 7635, + "refining": 45771, + "modifying": 35738, + "para": 39620, + "parametric": 39730, + "orthographic": 38754, + "eliminates": 16988, + "rare": 44997, + "chunk": 8356, + "forest": 21300, + "keyword": 27348, + "pagerank": 39139, + "centrality": 7922, + "neighborhood": 36660, + "assumes": 4636, + "completely": 9605, + "computable": 9824, + "gaze": 22021, + "movement": 35889, + "incorporated": 25367, + "coupled": 11632, + "detrimental": 14565, + "situated": 51367, + "bi": 6998, + "opportunities": 38510, + "options": 38579, + "malay": 31676, + "rivaling": 48168, + "entails": 18009, + "buy": 7505, + "asymmetric": 4641, + "treat": 58730, + "representatives": 46804, + "nowadays": 37964, + "bible": 7062, + "conduct": 10023, + "alternatives": 2513, + "pivoting": 40922, + "cascading": 7786, + "pseudo": 44272, + "motivating": 35880, + "suffers": 53788, + "drawback": 16402, + "subtree": 53681, + "ted": 56159, + "st": 52409, + "pay": 39989, + "cheaper": 8269, + "snippets": 51548, + "prerequisite": 41831, + "blog": 7225, + "asynchronous": 4644, + "beat": 6373, + "vectorial": 61475, + "autoencoders": 5029, + "regularizer": 45849, + "cycles": 12075, + "quasi": 44645, + "cyclic": 12076, + "sought": 51733, + "huang": 24066, + "eat": 16570, + "reverse": 48021, + "preparing": 41816, + "act": 1451, + "inconsistencies": 25336, + "documentation": 15849, + "tts": 58850, + "intelligible": 26547, + "sounding": 51737, + "confusing": 10159, + "degrade": 13804, + "interview": 26752, + "death": 13525, + "occurs": 38283, + "health": 23511, + "worker": 62864, + "interviews": 26753, + "reviewed": 48043, + "site": 51364, + "normalised": 37703, + "locally": 30961, + "compelling": 9518, + "succeeds": 53694, + "virtually": 61626, + "surpass": 54161, + "possessing": 41309, + "2004": 241, + "year": 63046, + "sign": 50827, + "possesses": 41308, + "boosting": 7260, + "misses": 33359, + "als": 2487, + "signature": 50839, + "neglecting": 36650, + "emphasise": 17309, + "chat": 8259, + "talking": 54791, + "modes": 35725, + "evolved": 18838, + "tweet": 59001, + "speak": 51993, + "differentiable": 15139, + "journalists": 27231, + "mr": 35904, + "writings": 62993, + "entertaining": 18017, + "educational": 16605, + "landscape": 27945, + "spurred": 52391, + "employ": 17372, + "solely": 51641, + "citation": 8365, + "encouraged": 17601, + "imposing": 24801, + "unconstrained": 59241, + "convex": 11081, + "1995": 203, + "idiomatic": 24481, + "mistaken": 33371, + "patent": 39943, + "nascent": 36392, + "aggressive": 2084, + "stay": 52782, + "representational": 46608, + "symbolic": 54267, + "viewpoints": 61608, + "stakeholders": 52452, + "customers": 12059, + "designers": 14337, + "corpuses": 11463, + "reliably": 46255, + "establishing": 18364, + "inherent": 26199, + "dag": 12082, + "ic": 24360, + "personalization": 40763, + "imbalanced": 24565, + "multiclass": 36046, + "id": 24365, + "prompt": 43197, + "forecast": 21293, + "meet": 32221, + "tamil": 54793, + "weibo": 61912, + "billions": 7123, + "sentimental": 49868, + "emoticons": 17284, + "shifting": 50545, + "deliberate": 13826, + "api": 3130, + "500": 423, + "convolutional": 11100, + "max": 31948, + "pooling": 41124, + "handles": 23422, + "distant": 15553, + "supervision": 54075, + "strongest": 53065, + "narrow": 36388, + "balance": 5513, + "benchmarked": 6506, + "pearson": 40004, + "optimising": 38540, + "standing": 52549, + "defining": 13790, + "tailored": 54768, + "gradient": 23001, + "descent": 14209, + "blending": 7200, + "ignore": 24488, + "judgement": 27238, + "enjoys": 17953, + "2003": 239, + "han": 23381, + "unicode": 59461, + "ids": 24485, + "bloom": 7229, + "party": 39913, + "psychologists": 44292, + "exemplars": 18990, + "exemplar": 18989, + "differential": 15141, + "equations": 18194, + "portions": 41222, + "preservation": 42112, + "anomalous": 3022, + "discarded": 15367, + "merely": 32313, + "prejudice": 41798, + "marginalizing": 31828, + "lacking": 27928, + "completeness": 9608, + "protection": 43966, + "instantiation": 26442, + "counting": 11625, + "plots": 41024, + "contrasts": 10925, + "differentiate": 15143, + "activate": 1463, + "adjacency": 1838, + "arousal": 4196, + "intensity": 26556, + "harder": 23454, + "roughly": 48357, + "crowd": 11879, + "sourced": 51820, + "implicitly": 24665, + "regression": 45811, + "na": 36357, + "harvesting": 23472, + "phonetics": 40830, + "corresponds": 11564, + "optimum": 38575, + "direction": 15269, + "shortest": 50591, + "sindhi": 51281, + "glyphs": 22869, + "blend": 7198, + "glyph": 22868, + "satisfy": 48528, + "request": 46835, + "critically": 11798, + "monetary": 35782, + "tangible": 54799, + "drawing": 16405, + "operationalize": 38492, + "recipient": 45482, + "indications": 25545, + "helping": 23600, + "abundantly": 784, + "croatian": 11805, + "shuffled": 50814, + "shuffling": 50815, + "topologies": 57470, + "corroborate": 11565, + "lose": 31078, + "ordering": 38665, + "paris": 39749, + "pieces": 40878, + "dense": 14074, + "bilinear": 7102, + "rendered": 46382, + "suitably": 53862, + "thematically": 57005, + "reinforced": 45862, + "maximal": 31951, + "reinforce": 45861, + "caused": 7889, + "newsgroups": 37428, + "rigorous": 48148, + "thesauri": 57041, + "centric": 7926, + "older": 38328, + "got": 22960, + "newer": 37367, + "joining": 27162, + "iv": 27135, + "48": 413, + "randomly": 44897, + "picked": 40868, + "57": 447, + "remarkably": 46363, + "44": 407, + "attested": 4868, + "window": 62063, + "shortening": 50587, + "tutorial": 58995, + "guarantees": 23323, + "predominantly": 41784, + "theme": 57006, + "carrying": 7779, + "unigrams": 59495, + "89": 545, + "expanding": 19186, + "asset": 4595, + "vietnam": 61590, + "resourced": 47287, + "initialized": 26227, + "coming": 9133, + "headwords": 23510, + "noted": 37723, + "lookup": 31071, + "hausa": 23482, + "doc": 15759, + "hierarchies": 23701, + "swiss": 54251, + "twice": 59027, + "predetermined": 41626, + "offset": 38319, + "operational": 38490, + "origins": 38751, + "fluent": 21128, + "publish": 44364, + "trials": 58786, + "encapsulate": 17456, + "interestingly": 26656, + "implements": 24651, + "simulated": 51258, + "games": 21950, + "establishes": 18359, + "correspondences": 11543, + "commentaries": 9141, + "kernels": 27291, + "smith": 51533, + "decomposing": 13656, + "glass": 22816, + "passes": 39925, + "slower": 51452, + "refines": 45770, + "rnn": 48179, + "encoder": 17487, + "encodes": 17560, + "decodes": 13623, + "maximize": 31956, + "qualitatively": 44483, + "realistic": 45147, + "collecting": 8972, + "south": 51844, + "africa": 2038, + "home": 23992, + "imperative": 24626, + "verbalization": 61514, + "elaborate": 16951, + "subsumption": 53665, + "depend": 14097, + "singular": 51359, + "precondition": 41621, + "biomedicine": 7179, + "ke": 27276, + "exploratory": 19681, + "novices": 37963, + "behaviours": 6403, + "screen": 48947, + "recordings": 45590, + "guidance": 23326, + "arts": 4504, + "tourism": 57480, + "tailor": 54767, + "agency": 2050, + "newswire": 37432, + "snapshot": 51545, + "ag": 2043, + "guides": 23354, + "emerge": 17257, + "loose": 31076, + "prescriptive": 41835, + "fledged": 21103, + "stories": 52876, + "lab": 27687, + "majorly": 31537, + "expect": 19192, + "flexibility": 21106, + "induce": 25601, + "memorized": 32236, + "illustrated": 24520, + "host": 24025, + "2014": 253, + "financial": 20889, + "facing": 20285, + "shift": 50542, + "sociolinguistic": 51616, + "centers": 7915, + "fall": 20372, + "contact": 10458, + "norm": 37700, + "sharp": 50521, + "urban": 59782, + "popularly": 41208, + "hinglish": 23948, + "rural": 48411, + "interacting": 26594, + "asymptotic": 4643, + "maintain": 31481, + "sizeable": 51405, + "routing": 48367, + "continue": 10828, + "happiness": 23437, + "energy": 17747, + "acceleration": 806, + "ideal": 24376, + "interlinked": 26669, + "specifying": 52242, + "straightforward": 52885, + "paradigms": 39632, + "leaving": 29983, + "pointed": 41054, + "extremely": 20153, + "analyzers": 2838, + "regularity": 45834, + "encountered": 17586, + "hosted": 24026, + "expressiveness": 19815, + "tight": 57106, + "sinhala": 51361, + "automation": 5208, + "inherently": 26204, + "reasons": 45234, + "kbp": 27273, + "nist": 37442, + "analytics": 2803, + "conference": 10108, + "tac": 54695, + "begins": 6384, + "exercise": 18994, + "partitions": 39898, + "young": 63142, + "engineer": 17762, + "sphere": 52341, + "substitutes": 53656, + "ternary": 56326, + "curve": 12049, + "lying": 31294, + "editor": 16600, + "monotonic": 35820, + "communicates": 9243, + "auxiliary": 5228, + "completions": 9614, + "movies": 35899, + "tv": 58998, + "retrain": 47932, + "arrive": 4204, + "retraining": 47935, + "adaptable": 1517, + "updated": 59766, + "title": 57268, + "76": 506, + "leaves": 29982, + "annotators": 3014, + "periods": 40726, + "amt": 2578, + "disease": 15496, + "protocol": 43970, + "iterations": 27120, + "arrived": 4205, + "merged": 32315, + "voting": 61739, + "imitate": 24572, + "innovation": 26247, + "minorities": 33330, + "broadcast": 7357, + "originate": 38747, + "innovations": 26248, + "chance": 8165, + "regularization": 45835, + "producing": 43037, + "disproportionately": 15523, + "monte": 35825, + "carlo": 7765, + "regimes": 45802, + "adoption": 1874, + "moderate": 35695, + "interpreter": 26739, + "actually": 1489, + "incentivizes": 25216, + "arguably": 4160, + "concreteness": 9985, + "surpassed": 54169, + "ceiling": 7905, + "plenty": 41010, + "guiding": 23356, + "legal": 30003, + "instant": 26438, + "messaging": 32325, + "increment": 25481, + "threats": 57087, + "contractions": 10863, + "tokenization": 57316, + "additive": 1739, + "multiplicative": 36317, + "superiority": 53948, + "dates": 13495, + "currency": 11957, + "390": 384, + "variance": 61230, + "c4": 7518, + "knn": 27382, + "dimensionality": 15241, + "lemmatization": 30018, + "belong": 6416, + "conjecture": 10163, + "agree": 2100, + "concatenated": 9908, + "gated": 21993, + "degrades": 13809, + "assistant": 4613, + "displaying": 15521, + "food": 21275, + "leaning": 29338, + "referenced": 45750, + "adaptations": 1549, + "complications": 9698, + "synthesize": 54360, + "tremendously": 58776, + "contents": 10575, + "searching": 48991, + "accompanied": 841, + "owing": 39125, + "purchase": 44391, + "influenced": 25730, + "accordingly": 872, + "posting": 41366, + "deceive": 13545, + "minute": 33334, + "unintentionally": 59503, + "shorthand": 50593, + "normalisation": 37702, + "globalization": 22849, + "te": 55986, + "clause": 8637, + "attachment": 4657, + "degraded": 13807, + "achievements": 1282, + "court": 11639, + "friends": 21691, + "seek": 49048, + "weigh": 61913, + "vote": 61737, + "counterfactual": 11616, + "stand": 52458, + "markup": 31852, + "hebrew": 23540, + "analytic": 2799, + "workflows": 62867, + "van": 61213, + "nearest": 36517, + "neighbour": 36667, + "ive": 27136, + "sorting": 51721, + "involve": 27013, + "brown": 7370, + "throw": 57095, + "searched": 48989, + "lengths": 30040, + "algebra": 2255, + "weighting": 61932, + "jhu": 27156, + "summer": 53917, + "trigger": 58793, + "holder": 23982, + "expanded": 19185, + "ldc": 29252, + "vertices": 61564, + "edges": 16587, + "normally": 37711, + "minimization": 33296, + "crossing": 11875, + "paves": 39985, + "diminishing": 15247, + "returns": 48000, + "gathering": 22003, + "ea": 16502, + "greedy": 23241, + "agglomerative": 2067, + "pe": 40000, + "microblog": 33228, + "dbpedia": 13507, + "identifiers": 24407, + "repair": 46390, + "nmt": 37573, + "weakness": 61869, + "conventional": 10999, + "inability": 25205, + "oov": 38404, + "translates": 58563, + "wmt14": 62100, + "contest": 10577, + "dot": 16317, + "partitioning": 39897, + "fraction": 21427, + "wsj": 63020, + "inevitable": 25628, + "automating": 5206, + "rival": 48167, + "scratch": 48943, + "labor": 27862, + "electricity": 16965, + "ultimate": 59190, + "prospective": 43960, + "capitalization": 7640, + "simplistic": 51245, + "specificities": 52236, + "svms": 54240, + "explaining": 19601, + "accessed": 831, + "apis": 3131, + "meeting": 32222, + "word2vec": 62344, + "mikolov": 33239, + "attracted": 4873, + "notice": 37726, + "preventing": 42232, + "explanations": 19605, + "cbow": 7895, + "skip": 51417, + "sg": 50433, + "softmax": 51627, + "interpretations": 26737, + "alongside": 2484, + "basics": 6336, + "neuron": 37118, + "backpropagation": 5495, + "demo": 13845, + "devoted": 14732, + "arising": 4186, + "vehicle": 61503, + "vehicles": 61504, + "drives": 16437, + "dialog": 14750, + "upstream": 59778, + "motifs": 35855, + "portal": 41217, + "slow": 51448, + "ccg": 7901, + "truth": 58834, + "borrow": 7269, + "parses": 39767, + "jobs": 27159, + "atis": 4646, + "duplicate": 16469, + "gazetteer": 22022, + "logistic": 30993, + "constructing": 10418, + "confounding": 10149, + "conditioning": 10017, + "revisit": 48055, + "signatures": 50840, + "attributed": 4903, + "spurious": 52384, + "broader": 7362, + "watson": 61785, + "bing": 7160, + "apache": 3127, + "cnn": 8757, + "2d": 348, + "1d": 213, + "convolution": 11091, + "17": 172, + "th": 56996, + "seminal": 49471, + "sex": 50430, + "condition": 9990, + "promote": 43189, + "education": 16604, + "woman": 62104, + "weaker": 61855, + "absence": 733, + "resolved": 47203, + "washington": 61779, + "city": 8375, + "poorer": 41147, + "acceptable": 812, + "12": 103, + "eliminating": 16990, + "extractor": 20140, + "unbalanced": 59218, + "surpasses": 54170, + "learnable": 29448, + "unveil": 59757, + "contradicting": 10866, + "universally": 59550, + "vietnamese": 61591, + "ought": 38759, + "flexibly": 21112, + "optimizes": 38573, + "aka": 2224, + "optimally": 38536, + "succeeded": 53692, + "lda": 29248, + "interpretability": 26713, + "illustrates": 24521, + "contract": 10862, + "repositories": 46462, + "inflections": 25721, + "cumbersome": 11941, + "generators": 22624, + "boosted": 7258, + "bow": 7288, + "remaining": 46322, + "factorization": 20300, + "fillers": 20800, + "estimated": 18376, + "degradation": 13800, + "neighbours": 36670, + "neighbourhoods": 36669, + "pushing": 44430, + "convolutions": 11117, + "parameterization": 39682, + "advocates": 2006, + "vision": 61633, + "compresses": 9810, + "optimisation": 38537, + "hypergraph": 24331, + "compactly": 9281, + "synset": 54289, + "numerical": 38060, + "positivity": 41305, + "negativity": 36646, + "attains": 4673, + "connects": 10189, + "caption": 7647, + "coco": 8788, + "transmission": 58716, + "aided": 2129, + "tell": 56167, + "sharing": 50513, + "intra": 26756, + "reproducibility": 46826, + "sophistication": 51718, + "mix": 33399, + "ecosystem": 16582, + "indirectly": 25556, + "broadly": 7365, + "gesture": 22682, + "propagate": 43239, + "intriguing": 26766, + "paving": 39987, + "embodied": 17252, + "neighbor": 36655, + "neighbors": 36665, + "shortcoming": 50576, + "mode": 33479, + "conflate": 10138, + "neighborhoods": 36662, + "horizon": 24022, + "glove": 22855, + "progresses": 43121, + "pictures": 40872, + "attracting": 4891, + "modifiers": 35734, + "denoting": 14073, + "thing": 57046, + "treating": 58735, + "comparably": 9315, + "turkic": 58982, + "strives": 52997, + "lms": 30916, + "necessarily": 36527, + "fusing": 21849, + "uni": 59458, + "fused": 21847, + "41": 401, + "clean": 8641, + "studio": 53315, + "83": 529, + "tremendous": 58772, + "journals": 27232, + "rnns": 48208, + "gpus": 22996, + "perplexities": 40736, + "hit": 23971, + "succeed": 53691, + "enlarge": 17954, + "isolates": 27051, + "conveys": 11087, + "grafting": 23018, + "falling": 20376, + "metaphor": 32347, + "organizing": 38692, + "drifts": 16414, + "05": 22, + "drift": 16413, + "decreased": 13670, + "democracy": 13850, + "ads": 1876, + "1993": 202, + "indices": 25551, + "replicable": 46413, + "lstm": 31237, + "cells": 7909, + "accumulates": 888, + "click": 8661, + "attenuate": 4867, + "unimportant": 59500, + "specially": 52037, + "ngrams": 37437, + "treebanks": 58765, + "modelled": 34639, + "feed": 20710, + "lstms": 31289, + "semeval": 49424, + "constrains": 10368, + "akin": 2226, + "turning": 58993, + "shannon": 50446, + "deficiencies": 13769, + "optimizing": 38574, + "formulating": 21389, + "black": 7189, + "box": 7289, + "formalise": 21353, + "supplied": 54111, + "cooking": 11119, + "transcript": 58337, + "recipes": 45481, + "devlin": 14729, + "augments": 4992, + "heuristically": 23632, + "gating": 22005, + "fuse": 21845, + "stronger": 53060, + "framed": 21442, + "engage": 17756, + "breakthrough": 7315, + "grammatically": 23081, + "wang": 61764, + "competitor": 9574, + "margins": 31830, + "register": 45807, + "vanishing": 61218, + "culture": 11939, + "imposed": 24799, + "principled": 42385, + "thresholds": 57092, + "decade": 13538, + "break": 7310, + "jensen": 27152, + "divergence": 15685, + "scholarly": 48736, + "revision": 48053, + "59": 450, + "memories": 32233, + "utilizing": 61118, + "gen": 22031, + "feedforward": 20718, + "affords": 2034, + "healthcare": 23521, + "expertise": 19587, + "ran": 44866, + "650": 472, + "patients": 39957, + "chronic": 8352, + "emergency": 17267, + "department": 14094, + "visits": 61646, + "stores": 52875, + "proficient": 43068, + "grows": 23305, + "vary": 61419, + "quotient": 44842, + "finance": 20888, + "thoroughly": 57067, + "everyday": 18802, + "handful": 23403, + "dnns": 15758, + "aggregated": 2073, + "detectors": 14543, + "shedding": 50529, + "cater": 7864, + "british": 7345, + "analysts": 2798, + "infeasible": 25632, + "robustly": 48269, + "nell": 36672, + "cutting": 12067, + "gazetteers": 22023, + "served": 50086, + "song": 51711, + "examination": 18858, + "locality": 30954, + "notable": 37716, + "figures": 20792, + "biographies": 7166, + "academia": 787, + "governments": 22968, + "statement": 52715, + "antonyms": 3123, + "subtasks": 53669, + "disregarded": 15528, + "symmetric": 54272, + "connectivity": 10188, + "displayed": 15520, + "ubiquitous": 59174, + "betweenness": 6996, + "authorship": 5009, + "harvest": 23470, + "prioritize": 42432, + "maximizes": 31962, + "impacting": 24612, + "engineered": 17763, + "anchor": 2850, + "treatments": 58737, + "lets": 30049, + "affairs": 2010, + "implies": 24673, + "obstacle": 38155, + "variational": 61240, + "engaged": 17757, + "mitigating": 33394, + "persistent": 40748, + "unlabelled": 59586, + "labelling": 27805, + "emph": 17307, + "ent": 17994, + "facet": 20255, + "ignoring": 24498, + "hours": 24039, + "kb": 27270, + "hop": 24000, + "atomic": 4649, + "assemble": 4567, + "ae": 2007, + "reconstructing": 45580, + "correlational": 11531, + "advances": 1904, + "inversion": 26932, + "formulated": 21387, + "yelp": 63084, + "mass": 31877, + "fetching": 20733, + "forum": 21397, + "visualizing": 61685, + "cloud": 8719, + "hosting": 24027, + "fly": 21137, + "diversified": 15728, + "sina": 51279, + "2nd": 351, + "5th": 453, + "stacking": 52422, + "exciting": 18969, + "multiplication": 36315, + "mildly": 33244, + "subsumes": 53664, + "88": 543, + "1000": 67, + "kg": 27357, + "pdf": 39998, + "pixel": 40924, + "mu": 35935, + "fueled": 21706, + "businesses": 7504, + "manage": 31687, + "saying": 48541, + "expands": 19187, + "aggregation": 2078, + "egyptian": 16945, + "dialectal": 14747, + "hotel": 24033, + "13th": 134, + "parsimonious": 39768, + "partly": 39899, + "bots": 7274, + "organic": 38679, + "wanted": 61769, + "alert": 2252, + "advertisements": 2002, + "robotic": 48233, + "believed": 6414, + "adjusted": 1852, + "las": 29102, + "2x": 353, + "frontiers": 21693, + "advancing": 1933, + "drug": 16451, + "agreed": 2101, + "varied": 61253, + "forces": 21288, + "exposing": 19787, + "reproducible": 46828, + "scenes": 48716, + "3d": 386, + "geometric": 22654, + "robotics": 48234, + "specified": 52239, + "correlates": 11511, + "conveyed": 11084, + "algebraic": 2256, + "justifications": 27257, + "parliament": 39751, + "sessions": 50097, + "external": 19927, + "stimuli": 52852, + "parliamentary": 39752, + "impacted": 24610, + "4th": 417, + "nlpcc": 37561, + "2015": 254, + "tracks": 57498, + "cn": 8756, + "standardized": 52546, + "antonym": 3122, + "solver": 51696, + "exceed": 18944, + "closer": 8709, + "introductory": 26904, + "composable": 9730, + "essence": 18319, + "legitimate": 30011, + "subjects": 53569, + "distinguishable": 15606, + "conditioned": 10013, + "captioning": 7648, + "credit": 11758, + "cnns": 8778, + "compose": 9731, + "multitask": 36322, + "addressing": 1818, + "abnormal": 732, + "deliver": 13833, + "impacts": 24613, + "auto": 5011, + "footnote": 21278, + "plot": 41023, + "gates": 21999, + "nets": 36690, + "pipelined": 40909, + "varies": 61254, + "geographically": 22651, + "coordinates": 11125, + "attached": 4655, + "attributable": 4898, + "geography": 22652, + "compatibility": 9515, + "subtly": 53678, + "sentiments": 49869, + "projecting": 43137, + "victim": 61575, + "connotation": 10190, + "losses": 31109, + "amr": 2572, + "subgraphs": 53547, + "_1": 579, + "compatible": 9516, + "elaboration": 16956, + "supervising": 54074, + "activation": 1466, + "ended": 17736, + "concurrently": 9987, + "mimicking": 33270, + "acquires": 1445, + "politics": 41115, + "sports": 52371, + "entertainment": 18018, + "smartphones": 51532, + "obama": 38075, + "iphone": 27031, + "tied": 57103, + "holistic": 23989, + "personality": 40761, + "posted": 41355, + "yahoo": 63040, + "observable": 38120, + "aligner": 2360, + "empowers": 17409, + "abstractions": 767, + "generalizable": 22110, + "ablations": 663, + "elucidate": 16997, + "pitman": 40915, + "yor": 63138, + "particle": 39829, + "beam": 6364, + "opening": 38474, + "door": 16316, + "distilling": 15585, + "lightweight": 30456, + "distill": 15565, + "retain": 47920, + "compromise": 9820, + "lags": 27936, + "lyrics": 31295, + "anew": 2857, + "echo": 16574, + "music": 36337, + "promoting": 43195, + "intense": 26554, + "resort": 47206, + "manuscripts": 31792, + "paramount": 39735, + "artificially": 4501, + "explosion": 19778, + "videos": 61589, + "played": 40982, + "repeated": 46393, + "prevents": 42234, + "concentrating": 9918, + "adverse": 1997, + "reactions": 45064, + "spontaneous": 52369, + "radically": 44850, + "revise": 48050, + "compete": 9523, + "normalize": 37707, + "warping": 61775, + "layout": 29242, + "seamless": 48958, + "managers": 31693, + "players": 40986, + "reinforcement": 45863, + "rewards": 48074, + "worlds": 62969, + "shifted": 50544, + "bounding": 7286, + "apparent": 3133, + "sensible": 49493, + "umbrella": 59195, + "filling": 20801, + "propositional": 43949, + "societies": 51609, + "demand": 13841, + "lots": 31125, + "commonplace": 9230, + "deluge": 13839, + "acknowledged": 1429, + "traces": 57490, + "manipulate": 31707, + "twofold": 59044, + "navigate": 36493, + "interactively": 26636, + "decompose": 13652, + "provision": 44257, + "conform": 10144, + "intentionally": 26571, + "pragmatics": 41496, + "judicial": 27244, + "philosophical": 40816, + "grouped": 23275, + "saliency": 48436, + "reaction": 45063, + "messenger": 32326, + "reddit": 45641, + "controls": 10990, + "timing": 57260, + "subreddits": 53600, + "faq": 20393, + "delays": 13818, + "confused": 10158, + "motivations": 35886, + "fundamentals": 21797, + "pace": 39133, + "sector": 49037, + "impose": 24798, + "segmental": 49077, + "rescoring": 46976, + "pass": 39916, + "hinge": 23945, + "marginal": 31823, + "lattices": 29167, + "leveraged": 30299, + "mapreduce": 31809, + "inferior": 25707, + "textit": 56853, + "closeness": 8708, + "depth": 14182, + "trades": 57506, + "fronts": 21694, + "compounding": 9756, + "typing": 59161, + "mit": 33377, + "temperature": 56171, + "encodings": 17581, + "refined": 45766, + "listen": 30843, + "attend": 4701, + "transcribe": 58332, + "accepts": 817, + "spectra": 52243, + "emits": 17277, + "ctc": 11927, + "wer": 61945, + "assessed": 4583, + "item": 27114, + "render": 46381, + "residual": 47185, + "comply": 9699, + "understandable": 59319, + "sparql": 51962, + "benefited": 6577, + "advancement": 1895, + "triple": 58802, + "opposite": 38517, + "opponent": 38509, + "fluency": 21124, + "negatively": 36641, + "transcripts": 58341, + "causal": 7867, + "denoted": 14072, + "ed": 16583, + "compactness": 9282, + "pronounced": 43232, + "overlooked": 39097, + "default": 13763, + "hyperparameters": 24339, + "coarse": 8781, + "hyperparameter": 24337, + "chunks": 8358, + "wmt": 62097, + "recognise": 45486, + "laymen": 29241, + "preferring": 41793, + "overly": 39101, + "ascertain": 4511, + "anomaly": 3023, + "competent": 9527, + "elusive": 16998, + "pure": 44392, + "modulated": 35749, + "nearby": 36516, + "severe": 50424, + "overfitting": 39081, + "chose": 8349, + "testbed": 56392, + "dropout": 16444, + "emphasized": 17313, + "regularizations": 45845, + "opaque": 38408, + "svd": 54230, + "incur": 25486, + "blockwise": 7224, + "accounted": 883, + "intention": 26569, + "attain": 4666, + "contiguous": 10819, + "semitic": 49476, + "experimentation": 19334, + "pick": 40867, + "adjusting": 1853, + "posterior": 41358, + "calibration": 7534, + "intervals": 26748, + "persuasiveness": 40782, + "bipartite": 7180, + "versatility": 61548, + "funding": 21798, + "toolkits": 57373, + "crowdsourced": 11885, + "finish": 21054, + "fix": 21072, + "conveniently": 10997, + "spur": 52382, + "innovatively": 26251, + "surge": 54157, + "sociolinguistics": 51617, + "featuring": 20701, + "synergy": 54283, + "influencing": 25732, + "emission": 17274, + "evident": 18828, + "subword": 53684, + "copying": 11137, + "byte": 7512, + "fraud": 21636, + "posits": 41306, + "employees": 17398, + "generalizability": 22108, + "severely": 50426, + "justification": 27256, + "rst": 48372, + "reweighting": 48075, + "calculates": 7524, + "submodular": 53591, + "600": 459, + "verbnet": 61519, + "ukp": 59186, + "tu": 58851, + "proceedings": 42751, + "talks": 54792, + "commentary": 9142, + "instrumental": 26488, + "translationese": 58705, + "shelf": 50535, + "splitting": 52349, + "amharic": 2538, + "interoperable": 26700, + "actively": 1480, + "99": 574, + "exception": 18957, + "rationales": 45029, + "sliding": 51430, + "psychometric": 44294, + "recovery": 45597, + "unify": 59490, + "relating": 45962, + "recovered": 45594, + "nonlinear": 37695, + "sexual": 50431, + "wants": 61770, + "seeing": 49047, + "elaborated": 16952, + "pave": 39983, + "overlaps": 39093, + "offline": 38316, + "clef": 8659, + "load": 30926, + "gpu": 22994, + "decoded": 13583, + "dividing": 15749, + "histories": 23964, + "topically": 57442, + "regularly": 45853, + "assistance": 4612, + "filtered": 20809, + "adaption": 1571, + "supplies": 54112, + "opus": 38580, + "ter": 56230, + "factored": 20299, + "casing": 7820, + "understudy": 59427, + "adjustable": 1851, + "intend": 26548, + "resemble": 47175, + "orders": 38669, + "ensured": 17991, + "iwslt": 27138, + "stems": 52794, + "cleaning": 8649, + "embed": 17004, + "met": 32327, + "profit": 43072, + "unobserved": 59621, + "logs": 31000, + "taxonomies": 55980, + "hyponymy": 24342, + "specialization": 52029, + "missions": 33368, + "scl": 48776, + "divided": 15745, + "revisiting": 48057, + "siamese": 50817, + "separates": 49884, + "impede": 24621, + "solves": 51698, + "replies": 46420, + "customer": 12053, + "exchanges": 18967, + "silver": 51022, + "reply": 46421, + "ap": 3126, + "recommended": 45570, + "composes": 9735, + "denotations": 14070, + "playing": 40987, + "positional": 41270, + "referent": 45753, + "conceptually": 9954, + "linearly": 30682, + "linearity": 30678, + "pressures": 42137, + "understands": 59421, + "attempted": 4694, + "618": 463, + "showcase": 50660, + "potentials": 41421, + "literatures": 30866, + "consume": 10436, + "journey": 27233, + "taiwan": 54771, + "complementing": 9595, + "1m": 215, + "acts": 1487, + "averaging": 5426, + "subtitles": 53674, + "ranged": 44942, + "ranges": 44943, + "67": 475, + "temporally": 56195, + "71": 492, + "09": 26, + "relaxed": 46138, + "i2b2": 24357, + "tense": 56219, + "narratives": 36386, + "underrepresented": 59284, + "blstm": 7230, + "corrupted": 11568, + "prevalence": 42225, + "indication": 25544, + "differing": 15150, + "regional": 45804, + "neighboring": 36663, + "shrinking": 50812, + "rsa": 48371, + "literal": 30851, + "criticized": 11803, + "unrealistic": 59630, + "opens": 38480, + "approximating": 3983, + "enjoyed": 17951, + "understudied": 59425, + "prohibitively": 43127, + "deletion": 13822, + "dqn": 16378, + "iteratively": 27129, + "rephrasing": 46400, + "decode": 13582, + "competitively": 9570, + "msr": 35917, + "tang": 54797, + "poems": 41039, + "li": 30418, + "colors": 9000, + "white": 61953, + "color": 8998, + "colored": 8999, + "fostering": 21413, + "mismatched": 33353, + "fairly": 20360, + "pooled": 41123, + "frozen": 21695, + "marker": 31834, + "happy": 23438, + "sleep": 51427, + "serial": 50056, + "nonsensical": 37697, + "annual": 3019, + "sea": 48957, + "reviewers": 48045, + "synthetic": 54367, + "spot": 52372, + "retained": 47923, + "paraphrased": 39742, + "imitates": 24574, + "attempting": 4695, + "compensate": 9519, + "recommender": 45571, + "star": 52562, + "replication": 46418, + "gcn": 22026, + "slang": 51425, + "absent": 735, + "secondary": 49030, + "alleviates": 2421, + "superficial": 53924, + "hinder": 23925, + "vqa": 61746, + "tuple": 58976, + "concisely": 9965, + "priors": 42437, + "spawned": 51992, + "intractable": 26763, + "exceeds": 18948, + "fortunately": 21396, + "tendency": 56211, + "socially": 51606, + "complicates": 9697, + "arithmetic": 4187, + "verbatim": 61518, + "offs": 38318, + "footprint": 21283, + "budget": 7380, + "lately": 29114, + "impressive": 24807, + "gone": 22923, + "regularize": 45846, + "surveillance": 54200, + "injury": 26241, + "compensation": 9522, + "disability": 15348, + "chemical": 8280, + "harvested": 23471, + "aids": 2131, + "lecture": 29986, + "ds": 16453, + "york": 63139, + "spend": 52339, + "personally": 40766, + "fascinating": 20410, + "vastly": 61442, + "recording": 45589, + "pretrained": 42146, + "sick": 50823, + "guarantee": 23319, + "meets": 32225, + "markovian": 31850, + "accumulating": 889, + "slt": 51457, + "enriching": 17966, + "bytes": 7516, + "affinity": 2027, + "romantic": 48336, + "objectively": 38109, + "indicator": 25548, + "109": 77, + "parent": 39746, + "regularizes": 45851, + "130": 129, + "songs": 51713, + "rankings": 44979, + "charts": 8258, + "factoid": 20295, + "proposition": 43948, + "fails": 20349, + "threefold": 57088, + "summarized": 53908, + "alternatively": 2512, + "uncovers": 59247, + "insignificant": 26394, + "61": 462, + "tackling": 54719, + "multilabel": 36054, + "formalised": 21354, + "compositionally": 9752, + "toy": 57483, + "estimator": 18389, + "complementarity": 9585, + "expand": 19183, + "pi": 40866, + "interdependent": 26643, + "160": 167, + "derivative": 14196, + "affixes": 2029, + "inheritance": 26207, + "acyclic": 1491, + "correctness": 11496, + "sure": 54148, + "checked": 8272, + "statically": 52729, + "backward": 5497, + "imdb": 24569, + "simulates": 51261, + "formatted": 21368, + "reaching": 45059, + "man": 31686, + "contradiction": 10867, + "reusing": 48005, + "transferring": 58435, + "ignores": 24497, + "boost": 7250, + "doubling": 16321, + "substitute": 53654, + "icon": 24362, + "averaged": 5420, + "amplifies": 2570, + "functioning": 21767, + "69": 481, + "surpassing": 54178, + "labeler": 27773, + "slot": 51440, + "66": 473, + "overfit": 39079, + "width": 62035, + "ups": 59776, + "injects": 26240, + "belgian": 6405, + "convincing": 11089, + "peak": 40001, + "prompted": 43212, + "prompts": 43223, + "transactions": 58331, + "environmental": 18174, + "setups": 50412, + "supportive": 54140, + "opposing": 38516, + "undergraduate": 59257, + "biology": 7170, + "classrooms": 8636, + "powered": 41431, + "elastic": 16957, + "instructors": 26486, + "upload": 59770, + "unlimited": 59615, + "downloadable": 16326, + "browser": 7375, + "raters": 45020, + "teach": 55987, + "portals": 41218, + "reside": 47182, + "superposition": 53957, + "mathematically": 31936, + "arora": 4192, + "2016": 255, + "atoms": 4650, + "succinct": 53755, + "converter": 11076, + "conceptualize": 9952, + "cast": 7821, + "extrapolate": 20148, + "bandit": 5521, + "storyline": 52881, + "dd": 13510, + "gibbs": 22687, + "cited": 8368, + "dearth": 13524, + "plug": 41025, + "distributionally": 15673, + "invariant": 26917, + "converging": 11027, + "phonology": 40832, + "tricky": 58789, + "exceptional": 18958, + "tension": 56221, + "credible": 11757, + "incredible": 25479, + "viral": 61621, + "quantifying": 44613, + "nonparametric": 37696, + "incrementally": 25485, + "facebook": 20244, + "manuscript": 31791, + "hypothesizing": 24355, + "homogeneity": 23994, + "81": 526, + "incorporation": 25396, + "sparsemax": 51973, + "smooth": 51535, + "vectorized": 61477, + "rows": 48369, + "columns": 9002, + "partition": 39895, + "parallelize": 39661, + "remained": 46320, + "naturalistic": 36471, + "exams": 18943, + "posit": 41258, + "wealth": 61872, + "modifies": 35735, + "customization": 12062, + "faithfully": 20367, + "reversing": 48025, + "forgetting": 21306, + "nnlm": 37581, + "rescore": 46975, + "ptb": 44296, + "crafted": 11677, + "resembling": 47177, + "centroid": 7928, + "centroids": 7929, + "assignments": 4607, + "discriminant": 15435, + "violence": 61619, + "draws": 16411, + "connotations": 10191, + "liwc": 30897, + "dissimilarities": 15538, + "fourth": 21425, + "smarter": 51530, + "prepositional": 41819, + "coder": 8872, + "acted": 1453, + "happening": 23435, + "keyboard": 27344, + "deleted": 13820, + "completing": 9610, + "saves": 48536, + "latin": 29161, + "quotations": 44839, + "identical": 24379, + "infrequent": 26188, + "ru": 48374, + "pathways": 39954, + "pathway": 39953, + "pays": 39993, + "comparatively": 9324, + "activations": 1469, + "highway": 23923, + "judging": 27241, + "joins": 27163, + "differentially": 15142, + "penalizes": 40017, + "subcomponents": 53543, + "inspected": 26396, + "foundational": 21420, + "cascaded": 7782, + "apart": 3129, + "downside": 16328, + "vanilla": 61214, + "feeding": 20722, + "apps": 3986, + "sampler": 48462, + "academic": 789, + "yoruba": 63141, + "archives": 4132, + "unsatisfactory": 59640, + "quiz": 44835, + "participant": 39811, + "engagement": 17758, + "agencies": 2049, + "statuses": 52781, + "anns": 3018, + "maintained": 31484, + "humanities": 24266, + "matter": 31944, + "criticism": 11800, + "inaccessible": 25206, + "printed": 42391, + "largescale": 29091, + "inspection": 26398, + "batched": 6344, + "speedup": 52328, + "nli": 37448, + "adaptively": 1582, + "softly": 51626, + "wat": 61783, + "attentional": 4856, + "hypernymy": 24336, + "pluggable": 41027, + "seq2seq": 49892, + "selectively": 49164, + "repeat": 46392, + "copynet": 11140, + "utilize": 61085, + "stacked": 52420, + "crucially": 11918, + "trading": 57507, + "card": 7750, + "filtration": 20815, + "film": 20806, + "evaluators": 18775, + "loses": 31079, + "sts": 53206, + "authenticity": 4998, + "exhibited": 19006, + "approximates": 3982, + "seeking": 49054, + "experiences": 19228, + "consumption": 10456, + "dissimilarity": 15539, + "reward": 48065, + "extractors": 20141, + "propagating": 43242, + "longitudinal": 31060, + "trajectories": 58328, + "arrangements": 4200, + "multilayer": 36057, + "perceptron": 40057, + "mlp": 33442, + "preceded": 41604, + "waiting": 61756, + "famous": 20390, + "behave": 6386, + "violent": 61620, + "rewrite": 48076, + "gist": 22690, + "reusable": 48001, + "harnessing": 23468, + "guaranteeing": 23322, + "antecedent": 3115, + "rewritten": 48082, + "mutually": 36352, + "crosslingual": 11876, + "nn": 37578, + "conditionals": 10012, + "trans": 58330, + "interpolated": 26704, + "depicted": 14165, + "satisfied": 48526, + "phrasing": 40859, + "f_": 20233, + "synthesized": 54362, + "automotive": 5210, + "approximations": 3985, + "patch": 39941, + "consult": 10434, + "categorizing": 7859, + "recruitment": 45601, + "company": 9285, + "job": 27158, + "inappropriate": 25213, + "postings": 41367, + "board": 7235, + "seekers": 49053, + "resume": 47916, + "decomposes": 13655, + "indexes": 25513, + "outside": 39024, + "zone": 63195, + "radio": 44852, + "engaging": 17760, + "fixing": 21087, + "ing": 26195, + "conceptnet": 9930, + "ppdb": 41452, + "casual": 7826, + "hindered": 23926, + "cloze": 8722, + "50k": 428, + "rocstories": 48299, + "struggle": 53199, + "youtube": 63146, + "modestly": 35727, + "initialize": 26226, + "ensembling": 17986, + "umls": 59196, + "researches": 47172, + "promotes": 43194, + "requisite": 46967, + "progression": 43123, + "ubiquity": 59178, + "identifier": 24406, + "presumed": 42139, + "proactively": 42452, + "hits": 23973, + "prefixes": 41797, + "degenerated": 13797, + "longest": 31057, + "subsequence": 53602, + "uninterpretable": 59504, + "textbf": 56849, + "imposes": 24800, + "assigns": 4608, + "unidirectional": 59462, + "interdependence": 26641, + "constitutes": 10360, + "mrr": 35909, + "reliance": 46257, + "corruptions": 11571, + "sat": 48521, + "closest": 8715, + "1500": 157, + "dropped": 16446, + "dp": 16375, + "embeds": 17251, + "column": 9001, + "encoders": 17551, + "row": 48368, + "237": 326, + "essays": 18317, + "grades": 23000, + "courses": 11638, + "digitization": 15218, + "aggression": 2083, + "anonymous": 3028, + "observing": 38153, + "intensities": 26555, + "conducts": 10106, + "handcrafted": 23398, + "meetings": 32224, + "reflecting": 45778, + "team": 56001, + "smart": 51528, + "confirms": 10137, + "recovering": 45595, + "attended": 4703, + "programmers": 43081, + "cpu": 11672, + "cores": 11163, + "finnish": 21061, + "execution": 18985, + "headlines": 23507, + "divergent": 15688, + "exercises": 18995, + "predecessors": 41622, + "meaningfulness": 32030, + "repeating": 46395, + "dropping": 16447, + "batch": 6339, + "picks": 40870, + "polyglot": 41117, + "inventories": 26924, + "monolingually": 35816, + "curricula": 12041, + "characterizes": 8250, + "stimulus": 52853, + "rigorously": 48150, + "generalizing": 22158, + "premise": 41810, + "discrepancy": 15416, + "illness": 24512, + "pressing": 42134, + "actionable": 1459, + "branches": 7302, + "commonalities": 9212, + "relevancy": 46197, + "school": 48740, + "adam": 1498, + "faithfulness": 20368, + "blogging": 7227, + "venue": 61506, + "peers": 40012, + "intermediary": 26672, + "92": 558, + "granularities": 23090, + "iterative": 27121, + "contextualized": 10798, + "sudden": 53757, + "disaster": 15364, + "crises": 11770, + "52": 435, + "qualities": 44485, + "100k": 70, + "cooperative": 11122, + "strengthens": 52974, + "equip": 18195, + "reparameterization": 46391, + "tri": 58782, + "imagenet": 24549, + "verifying": 61545, + "equal": 18188, + "imply": 24674, + "crawler": 11688, + "ablation": 654, + "pushed": 44428, + "trivially": 58813, + "parallelizable": 39659, + "snli": 51550, + "infers": 25712, + "textsc": 56949, + "unbounded": 59222, + "indispensable": 25559, + "basque": 6338, + "hungarian": 24299, + "taxonomic": 55979, + "idiosyncrasies": 24483, + "splits": 52348, + "ar": 3989, + "inherits": 26210, + "2005": 242, + "policy": 41093, + "critic": 11774, + "rl": 48172, + "bootstrapped": 7266, + "sped": 52249, + "immune": 24586, + "investigations": 27007, + "protect": 43963, + "insurance": 26496, + "accountability": 882, + "protected": 43964, + "ehr": 16947, + "assembled": 4568, + "injecting": 26238, + "sampled": 48460, + "recommendation": 45564, + "vague": 61168, + "fourier": 21423, + "probing": 42490, + "dull": 16468, + "remedy": 46366, + "passed": 39924, + "desiderata": 14259, + "finetuning": 21047, + "copy": 11132, + "capitalize": 7642, + "greatest": 23224, + "governed": 22965, + "triggers": 58796, + "observes": 38152, + "parallelization": 39660, + "albeit": 2249, + "elegant": 16971, + "throughput": 57094, + "modest": 35726, + "cambridge": 7561, + "loosely": 31077, + "couple": 11630, + "ill": 24509, + "denoising": 14062, + "summarises": 53873, + "favourably": 20463, + "abstracting": 765, + "timelines": 57242, + "disagreement": 15352, + "standardizing": 52547, + "normative": 37712, + "meant": 32041, + "genuine": 22644, + "promises": 43156, + "genuinely": 22645, + "indistinguishable": 25560, + "negations": 36614, + "2022": 302, + "2017": 263, + "plethora": 41011, + "researched": 47146, + "police": 41091, + "hashtag": 23475, + "amplified": 2569, + "americans": 2537, + "argues": 4168, + "diverge": 15684, + "opposition": 38519, + "enforcement": 17753, + "confront": 10152, + "grow": 23286, + "annealing": 2868, + "epoch": 18185, + "wall": 61760, + "street": 52967, + "picking": 40869, + "expressivity": 19816, + "answerable": 3060, + "53": 438, + "staged": 52447, + "diagram": 14743, + "retaining": 47924, + "execute": 18981, + "executions": 18987, + "5000": 425, + "brand": 7304, + "advertisement": 2001, + "delivering": 13835, + "lift": 30446, + "sponsored": 52368, + "winning": 62070, + "jump": 27246, + "listening": 30846, + "nesting": 36687, + "isolate": 27049, + "memorization": 32234, + "irrelevant": 27039, + "arcs": 4133, + "caching": 7521, + "chen": 8283, + "distillation": 15566, + "eliminate": 16985, + "teacher": 55989, + "probed": 42488, + "disregarding": 15529, + "unacceptable": 59200, + "commodity": 9162, + "traffic": 57561, + "ms": 35911, + "hypernym": 24334, + "curse": 12047, + "votes": 61738, + "compensates": 9521, + "ucca": 59179, + "transportation": 58724, + "undesirable": 59433, + "innate": 26242, + "agrees": 2109, + "fuel": 21705, + "reviewing": 48046, + "separation": 49886, + "lasting": 29105, + "reformulating": 45785, + "bird": 7183, + "plurality": 41031, + "capital": 7639, + "contextualize": 10797, + "contextualization": 10796, + "naturalness": 36475, + "unreliable": 59636, + "debiasing": 13533, + "plentiful": 41009, + "kinyarwanda": 27375, + "kbs": 27274, + "io": 27028, + "subtrees": 53682, + "tiny": 57261, + "biasing": 7061, + "commerce": 9150, + "posed": 41242, + "bills": 7125, + "laden": 27932, + "accelerated": 803, + "democratic": 13851, + "prioritizing": 42435, + "opened": 38471, + "utilization": 61084, + "51": 430, + "va": 61162, + "savings": 48538, + "nmf": 37572, + "substantively": 53653, + "executable": 18979, + "falls": 20377, + "ary": 4506, + "imitation": 24576, + "tease": 56014, + "hands": 23430, + "ingredient": 26197, + "eval": 18432, + "super": 53920, + "burden": 7496, + "tier": 57104, + "phonemic": 40823, + "flat": 21095, + "premises": 41812, + "terminological": 56259, + "encompassing": 17584, + "modulation": 35750, + "undergo": 59254, + "subjected": 53560, + "quadratic": 44463, + "gaps": 21986, + "nlu": 37562, + "expansions": 19191, + "yielded": 63103, + "player": 40985, + "506": 427, + "borrowing": 7271, + "connecting": 10179, + "amazing": 2519, + "sensors": 49509, + "sensory": 49510, + "alike": 2394, + "advancements": 1896, + "convenience": 10994, + "acceptance": 814, + "attentions": 4861, + "beats": 6375, + "exploitable": 19669, + "avoided": 5436, + "utilities": 61078, + "examinations": 18859, + "restrict": 47419, + "preferable": 41788, + "earliest": 16509, + "seamlessly": 48959, + "nonetheless": 37693, + "uptake": 59780, + "initialisation": 26221, + "subspaces": 53613, + "contextualised": 10791, + "positioning": 41275, + "generalised": 22104, + "quantifiers": 44605, + "quantifier": 44604, + "branching": 7303, + "quantum": 44643, + "mover": 35891, + "bm25": 7233, + "typicality": 59134, + "gradual": 23014, + "graded": 22999, + "heritage": 23614, + "replicating": 46417, + "reused": 48003, + "angle": 2859, + "orthography": 38757, + "rights": 48146, + "generalisability": 22100, + "wikidata": 62037, + "dozens": 16374, + "unhealthy": 59457, + "temporality": 56194, + "severity": 50429, + "mild": 33243, + "nonexistent": 37694, + "hu": 24065, + "explosive": 19779, + "storing": 52877, + "incurring": 25488, + "continues": 10834, + "lesser": 30044, + "plurals": 41032, + "propositions": 43950, + "eighth": 16949, + "2019": 279, + "krippendorff": 27676, + "generalise": 22103, + "skipping": 51423, + "surprisal": 54179, + "autoencoding": 5030, + "tradeoff": 57504, + "economy": 16581, + "predominant": 41782, + "tying": 59046, + "regularizing": 45852, + "harming": 23462, + "multimedia": 36139, + "endeavor": 17733, + "routinely": 48365, + "pool": 41122, + "insensitive": 26374, + "insertions": 26380, + "deletions": 13824, + "substitutions": 53659, + "substantive": 53652, + "appraisal": 3383, + "arguing": 4169, + "classifications": 8584, + "neighbourhood": 36668, + "soon": 51714, + "drops": 16448, + "prominently": 43153, + "shapes": 50450, + "hitherto": 23972, + "embrace": 17255, + "shi": 50541, + "ci": 8359, + "foundations": 21421, + "imagery": 24550, + "appeared": 3143, + "distantly": 15560, + "hashing": 23474, + "formulae": 21381, + "familiarity": 20386, + "refrain": 45787, + "signaling": 50831, + "trait": 58326, + "straightforwardly": 52887, + "trial": 58784, + "adjusts": 1856, + "conneau": 10169, + "assertion": 4573, + "125": 117, + "disorders": 15512, + "gru": 23312, + "skewed": 51412, + "younger": 63143, + "densely": 14087, + "ties": 57105, + "discarding": 15368, + "9k": 576, + "800": 523, + "intuitively": 26912, + "interdependencies": 26642, + "participate": 39815, + "fifth": 20786, + "ordinal": 38674, + "outputting": 39023, + "concatenate": 9907, + "placing": 40931, + "viability": 61567, + "productions": 43054, + "deteriorates": 14546, + "dominance": 16304, + "creativity": 11754, + "faceted": 20256, + "wish": 62087, + "fourteen": 21424, + "collectively": 8992, + "facebookresearch": 20249, + "surrogate": 54193, + "oracle": 38581, + "reception": 45477, + "confronted": 10153, + "sharply": 50522, + "optimistic": 38541, + "ethical": 18415, + "grown": 23304, + "grading": 23013, + "reliant": 46258, + "instructor": 26485, + "unordered": 59622, + "motivate": 35857, + "viable": 61569, + "cleaner": 8648, + "initialization": 26223, + "extendable": 19834, + "tunes": 58893, + "drugs": 16452, + "legislation": 30009, + "officially": 38314, + "seeds": 49046, + "institution": 26472, + "climate": 8665, + "tradeoffs": 57505, + "verbalizations": 61515, + "developer": 14644, + "hallucinated": 23372, + "inverted": 26933, + "sem": 49229, + "tagset": 54760, + "resnet": 47193, + "ud": 59180, + "v1": 61157, + "equation": 18193, + "1k": 214, + "deciding": 13555, + "traveling": 58726, + "authoritative": 5004, + "factorized": 20302, + "documenting": 15851, + "endangered": 17730, + "alternate": 2492, + "marginalization": 31826, + "memorize": 32235, + "attentive": 4862, + "marginalized": 31827, + "abstractive": 768, + "emoji": 17282, + "avoiding": 5437, + "seventeen": 50422, + "segmenter": 49092, + "cmu": 8754, + "participated": 39817, + "bioasq": 7162, + "batches": 6345, + "gp": 22969, + "dsl": 16454, + "stance": 52454, + "questioning": 44763, + "commenting": 9143, + "breaking": 7312, + "tail": 54762, + "synthetically": 54387, + "degrading": 13812, + "restricting": 47424, + "cpus": 11673, + "simultaneous": 51268, + "delay": 13816, + "5m": 452, + "partitioned": 39896, + "respects": 47387, + "motion": 35856, + "entailed": 17996, + "outlines": 38776, + "aforementioned": 2035, + "subcategories": 53541, + "abuse": 785, + "crime": 11767, + "disasters": 15365, + "ratios": 45032, + "workflow": 62866, + "browsing": 7376, + "expose": 19784, + "continuously": 10858, + "outliers": 38773, + "outlier": 38771, + "impressively": 24818, + "unnecessary": 59619, + "strengthening": 52973, + "client": 8663, + "device": 14722, + "prevent": 42228, + "sigmoid": 50826, + "pertinent": 40786, + "deploy": 14168, + "spirit": 52344, + "proto": 43969, + "staff": 52425, + "doctor": 15762, + "sizable": 51374, + "lays": 29246, + "false": 20379, + "violating": 61616, + "pretty": 42223, + "multilingualism": 36137, + "postulate": 41377, + "collaborate": 8930, + "cooperate": 11120, + "conjunct": 10164, + "genia": 22639, + "controller": 10988, + "concretely": 9983, + "bot": 7273, + "enormously": 17959, + "monotonically": 35822, + "projective": 43144, + "min": 33272, + "kl": 27377, + "deficiency": 13770, + "divides": 15748, + "moved": 35888, + "ready": 45096, + "foster": 21409, + "delivery": 13837, + "f_1": 20235, + "vlsp": 61698, + "controllable": 10976, + "ami": 2539, + "save": 48533, + "fare": 20407, + "loop": 31073, + "bpe": 7296, + "compress": 9808, + "distil": 15562, + "ample": 2565, + "repetitive": 46398, + "synthesizing": 54366, + "conversely": 11068, + "pruned": 44266, + "abundance": 779, + "disregard": 15527, + "ported": 41219, + "vulnerable": 61753, + "restaurants": 47414, + "celebrities": 7907, + "100b": 69, + "hi": 23636, + "bn": 7234, + "verbalize": 61516, + "receptive": 45478, + "excessive": 18962, + "executing": 18984, + "programmer": 43080, + "fst": 21703, + "mitigate": 33379, + "primal": 42359, + "subwords": 53690, + "unusual": 59756, + "bits": 7188, + "predictable": 41663, + "credibility": 11756, + "biography": 7167, + "evaluative": 18773, + "speeds": 52327, + "clue": 8729, + "definitive": 13795, + "satisfaction": 48522, + "dr": 16379, + "begun": 6385, + "unexplored": 59441, + "summarisation": 53870, + "coherently": 8920, + "scraped": 48941, + "metaphorical": 32348, + "figurative": 20789, + "suboptimal": 53594, + "committing": 9161, + "gigaword": 22689, + "confined": 10127, + "md": 31983, + "el": 16950, + "boards": 7236, + "755": 505, + "mitigates": 33393, + "oversampling": 39103, + "regularizers": 45850, + "sacrificing": 48421, + "verifies": 61531, + "undirected": 59436, + "np": 37965, + "fuses": 21848, + "pivotal": 40919, + "experiencing": 19229, + "empower": 17406, + "nontrivial": 37698, + "pointer": 41055, + "consortium": 10341, + "aa": 581, + "lasso": 29104, + "l2": 27684, + "rnng": 48206, + "agreeing": 2102, + "minimally": 33294, + "nns": 37582, + "emphasizes": 17314, + "competency": 9526, + "constructive": 10432, + "boosts": 7262, + "leaf": 29332, + "lexicalization": 30396, + "structurally": 53086, + "pioneering": 40889, + "mislead": 33348, + "predication": 41633, + "tedious": 56161, + "flagging": 21090, + "7000": 489, + "kannada": 27266, + "concluded": 9972, + "foremost": 21298, + "triplet": 58807, + "phases": 40807, + "till": 57111, + "weakens": 61854, + "copied": 11128, + "connectionist": 10181, + "lengthy": 30041, + "misclassified": 33343, + "born": 7268, + "continuum": 10861, + "exclude": 18972, + "positives": 41304, + "deteriorating": 14547, + "allocate": 2428, + "noises": 37609, + "diagnosis": 14738, + "inferencing": 25705, + "observational": 38125, + "physiological": 40865, + "condensed": 9989, + "diagnoses": 14736, + "vectorization": 61476, + "quadratically": 44466, + "multitasking": 36328, + "bilstm": 7126, + "clips": 8678, + "uncorrelated": 59243, + "quantization": 44640, + "artefacts": 4445, + "fasttext": 20445, + "continuity": 10839, + "sourcing": 51843, + "incompatible": 25329, + "cer": 7932, + "pu": 44300, + "delicate": 13829, + "feelings": 20726, + "communications": 9254, + "recommending": 45573, + "emojis": 17283, + "explorations": 19680, + "faithful": 20364, + "stands": 52554, + "prominence": 43150, + "nucleus": 37977, + "localization": 30957, + "median": 32193, + "stackoverflow": 52423, + "scorer": 48884, + "constraining": 10367, + "arc": 4015, + "offensive": 38288, + "disclosure": 15377, + "excerpts": 18961, + "suspected": 54226, + "prospects": 43961, + "reciprocal": 45484, + "ensuring": 17993, + "depict": 14164, + "retains": 47928, + "told": 57346, + "distraction": 15615, + "profiling": 43071, + "flip": 21113, + "whatsapp": 61951, + "pathology": 39950, + "frontier": 21692, + "bulk": 7495, + "lowering": 31227, + "productivity": 43056, + "hyper": 24324, + "mtl": 35929, + "upto": 59781, + "db": 13505, + "speedups": 52330, + "overwhelming": 39123, + "isn": 27047, + "prune": 44265, + "aside": 4516, + "drama": 16383, + "probe": 42485, + "contingent": 10820, + "malayalam": 31677, + "london": 31001, + "54": 441, + "novice": 37962, + "sarcasm": 48517, + "categorizes": 7858, + "hurt": 24303, + "multichannel": 36045, + "tracker": 57495, + "utilise": 61074, + "multi30k": 36044, + "discriminator": 15451, + "fooling": 21277, + "adversary": 1996, + "pitfalls": 40914, + "adversarially": 1993, + "mle": 33435, + "offensiveness": 38289, + "ssl": 52407, + "slu": 51458, + "altering": 2491, + "sgd": 50434, + "chi": 8286, + "square": 52398, + "presently": 42067, + "prices": 42358, + "allocated": 2429, + "price": 42357, + "plate": 40947, + "hong": 23998, + "kong": 27672, + "collapse": 8934, + "anchors": 2853, + "scorers": 48885, + "editions": 16599, + "snapshots": 51546, + "ml": 33427, + "contradict": 10865, + "clearer": 8655, + "certainly": 7951, + "culturally": 11938, + "attending": 4704, + "changed": 8175, + "customised": 12060, + "speeding": 52326, + "losing": 31080, + "excel": 18952, + "traits": 58327, + "2016a": 262, + "incoherent": 25326, + "revolutionized": 48061, + "switches": 54259, + "serving": 50095, + "dilated": 15220, + "20x": 312, + "8x": 550, + "maybe": 31975, + "harness": 23466, + "announcements": 3017, + "cohort": 8923, + "thirty": 57052, + "300k": 361, + "truths": 58843, + "boxes": 7294, + "misclassification": 33342, + "upcoming": 59764, + "predictability": 41662, + "afterward": 2042, + "cqa": 11674, + "200k": 247, + "edits": 16602, + "fulfills": 21710, + "discriminability": 15434, + "insufficient": 26491, + "underlie": 59258, + "publishers": 44377, + "wing": 62067, + "unsurprisingly": 59750, + "screening": 48948, + "ot": 38758, + "characterisation": 8228, + "arab": 3990, + "anticipate": 3119, + "durations": 16475, + "pretrain": 42142, + "tabular": 54693, + "outbreak": 38761, + "outbreaks": 38764, + "epidemiological": 18180, + "symptoms": 54275, + "moment": 35779, + "disambiguated": 15355, + "attaching": 4656, + "spain": 51915, + "encompasses": 17583, + "displays": 15522, + "aligns": 2393, + "competence": 9525, + "misaligned": 33340, + "misalignment": 33341, + "confident": 10120, + "teams": 56006, + "revisited": 48056, + "subsystems": 53666, + "repetition": 46396, + "societal": 51607, + "undertake": 59429, + "citizens": 8374, + "identities": 24471, + "nation": 36394, + "chooses": 8345, + "embodies": 17253, + "continually": 10824, + "revisions": 48054, + "propagates": 43241, + "bond": 7243, + "duplicated": 16470, + "stated": 52714, + "winners": 62069, + "comprehend": 9758, + "auc": 4920, + "roc": 48297, + "rankers": 44961, + "interventions": 26751, + "visible": 61632, + "architectural": 4016, + "deduce": 13674, + "euclidean": 18423, + "homonyms": 23996, + "entirety": 18035, + "claimed": 8382, + "canadian": 7567, + "encounters": 17588, + "arbitrarily": 4010, + "breakthroughs": 7316, + "gotten": 22961, + "atypical": 4919, + "120": 112, + "subsampling": 53601, + "modulate": 35748, + "approximated": 3980, + "tasked": 55480, + "clarifying": 8388, + "misunderstandings": 33375, + "250": 336, + "sketch": 51409, + "inspect": 26395, + "alters": 2514, + "advice": 2004, + "discovers": 15413, + "su": 53513, + "episodes": 18182, + "rewarding": 48073, + "irrespective": 27045, + "inner": 26243, + "deficit": 13772, + "lewis": 30346, + "paced": 39134, + "tightly": 57109, + "deficient": 13771, + "deficits": 13773, + "puts": 44431, + "gans": 21956, + "golden": 22922, + "mini": 33279, + "win": 62062, + "transformer": 58448, + "cup": 11943, + "effortless": 16931, + "elicited": 16981, + "chit": 8329, + "120k": 114, + "streaming": 52962, + "suite": 53863, + "probabilistically": 42469, + "outer": 38769, + "retailers": 47919, + "quora": 44837, + "voluminous": 61733, + "establishment": 18368, + "overhead": 39085, + "thereof": 57040, + "vice": 61572, + "versa": 61546, + "october": 38285, + "545": 443, + "additions": 1738, + "synthesizer": 54364, + "musical": 36338, + "king": 27373, + "reasonably": 45176, + "tunable": 58852, + "acl": 1430, + "switchboard": 54254, + "undergone": 59256, + "immense": 24583, + "untapped": 59751, + "harmful": 23461, + "interprets": 26742, + "hardest": 23456, + "numerals": 38058, + "aged": 2048, + "accelerating": 805, + "intents": 26573, + "renewed": 46385, + "appeal": 3135, + "imagination": 24559, + "lacked": 27927, + "failed": 20346, + "moral": 35832, + "sent2vec": 49512, + "anthology": 3117, + "ps": 44271, + "journalism": 27229, + "stereotypical": 52848, + "terminal": 56256, + "forget": 21305, + "meticulously": 33108, + "correlating": 11516, + "brands": 7305, + "procrustes": 42972, + "estonian": 18391, + "satisfies": 48527, + "2018": 272, + "v2": 61159, + "2021": 296, + "calls": 7558, + "july": 27245, + "semiotic": 49473, + "typologically": 59165, + "ugc": 59183, + "brazilian": 7307, + "risks": 48165, + "princeton": 42378, + "shrink": 50811, + "explainable": 19597, + "4m": 416, + "shortage": 50575, + "infrequently": 26190, + "rerank": 46968, + "overarching": 39055, + "decomposable": 13651, + "gcns": 22028, + "positively": 41302, + "stocks": 52861, + "mentioning": 32301, + "squad": 52394, + "archive": 4131, + "snippet": 51547, + "url": 59791, + "sparked": 51961, + "recast": 45250, + "entail": 17995, + "exclusive": 18977, + "crafting": 11684, + "inception": 25217, + "phenomenal": 40813, + "proving": 44256, + "alleviating": 2426, + "duplication": 16473, + "trending": 58778, + "demands": 13844, + "argumentation": 4176, + "bilstms": 7138, + "pas": 39915, + "exhaustively": 18999, + "prepare": 41814, + "anchored": 2851, + "shares": 50512, + "ark": 4188, + "elaborately": 16953, + "rightarrow": 48145, + "manipulating": 31710, + "went": 61944, + "10k": 78, + "decentralized": 13550, + "strive": 52996, + "attained": 4671, + "genome": 22640, + "alleviated": 2420, + "usc": 59812, + "hat": 23478, + "invisible": 27010, + "rooted": 48343, + "leap": 29340, + "longstanding": 31061, + "backgrounds": 5494, + "bounded": 7285, + "interval": 26747, + "280": 346, + "attributions": 4918, + "disagree": 15351, + "typologies": 59170, + "catalyze": 7830, + "overtly": 39107, + "underperform": 59282, + "enrichment": 17967, + "gloss": 22853, + "augmentation": 4946, + "utilising": 61077, + "came": 7562, + "164": 169, + "sorts": 51722, + "encompass": 17582, + "angles": 2860, + "4x": 418, + "timestep": 57259, + "sec": 48993, + "initializations": 26225, + "task4": 55479, + "supervisory": 54102, + "japan": 27143, + "overflow": 39084, + "mitigation": 33395, + "openly": 38477, + "pertain": 40783, + "eases": 16524, + "wu": 63021, + "lid": 30432, + "smoother": 51537, + "renowned": 46386, + "externally": 19959, + "keras": 27287, + "realisation": 45144, + "underfitting": 59253, + "attends": 4705, + "empathy": 17306, + "dyadic": 16480, + "sustain": 54227, + "car": 7747, + "trick": 58787, + "bodies": 7237, + "governing": 22966, + "reformulate": 45784, + "probes": 42489, + "aggregates": 2074, + "returned": 47998, + "reformulation": 45786, + "recruited": 45599, + "discrepancies": 15415, + "favoring": 20459, + "calibrated": 7530, + "headline": 23505, + "6m": 483, + "moderation": 35698, + "16k": 170, + "char": 8192, + "exhibiting": 19007, + "originates": 38749, + "concatenating": 9910, + "moves": 35893, + "pulling": 44384, + "derivational": 14194, + "fitted": 21070, + "decouples": 13664, + "20k": 309, + "sort": 51719, + "colloquial": 8997, + "unwritten": 59763, + "switched": 54256, + "injected": 26237, + "warrants": 61778, + "synergistic": 54282, + "relax": 46136, + "rte": 48373, + "separable": 49872, + "separability": 49871, + "hurts": 24307, + "bt": 7379, + "universities": 59554, + "cds": 7902, + "organizational": 38685, + "disregards": 15530, + "entered": 18013, + "formulates": 21388, + "causality": 7880, + "infrastructure": 26186, + "infrastructures": 26187, + "quo": 44836, + "assets": 4596, + "pan": 39244, + "raise": 44854, + "lessons": 30046, + "schemas": 48724, + "padding": 39136, + "floating": 21114, + "ensures": 17992, + "workings": 62870, + "instantaneous": 26439, + "arrives": 4206, + "initiated": 26233, + "personalities": 40760, + "slots": 51445, + "grus": 23315, + "posteriori": 41365, + "gumbel": 23360, + "indonesia": 25598, + "urges": 59790, + "overloaded": 39095, + "ats": 4652, + "envision": 18178, + "apple": 3151, + "protocols": 43971, + "invented": 26922, + "e2e": 16501, + "launched": 29171, + "uniqueness": 59520, + "theart": 57002, + "afforded": 2033, + "mc": 31981, + "levantine": 30053, + "replay": 46411, + "episodic": 18183, + "catastrophic": 7832, + "organizers": 38690, + "kingdom": 27374, + "s2s": 48416, + "doc2vec": 15760, + "distribute": 15619, + "bottle": 7275, + "noticeable": 37727, + "schemata": 48725, + "corruption": 11570, + "forcing": 21290, + "hour": 24038, + "prescriptions": 41834, + "domestic": 16303, + "socioeconomic": 51615, + "transitional": 58544, + "graphic": 23181, + "proves": 43998, + "converges": 11026, + "parties": 39894, + "mp": 35903, + "esim": 18259, + "struggling": 53205, + "heads": 23509, + "transparency": 58720, + "anonymity": 3025, + "witness": 62089, + "hide": 23652, + "obfuscate": 38077, + "soundness": 51738, + "obfuscation": 38079, + "sanity": 48513, + "rg": 48084, + "unstable": 59664, + "fulfill": 21708, + "assistants": 4614, + "siri": 51362, + "chatting": 8267, + "hinders": 23930, + "hindrance": 23944, + "episode": 18181, + "interpersonal": 26701, + "coined": 8926, + "progressed": 43120, + "ear": 16504, + "manages": 31694, + "glance": 22815, + "apt": 3988, + "steady": 52786, + "wikitext": 62058, + "prize": 42447, + "spherical": 52342, + "invalid": 26914, + "unsupported": 59749, + "connectives": 10187, + "kgs": 27362, + "corrupt": 11567, + "disconnected": 15379, + "mature": 31946, + "xgboost": 63027, + "inherited": 26208, + "gauge": 22008, + "directionality": 15287, + "hypothesise": 24351, + "evidences": 18827, + "delayed": 13817, + "granular": 23089, + "mnli": 33446, + "openie": 38473, + "gn": 22871, + "500k": 426, + "cap": 7594, + "sim": 51026, + "triggering": 58795, + "disjunction": 15509, + "ridge": 48134, + "mscoco": 35915, + "disfluencies": 15504, + "perturbation": 40789, + "json": 27235, + "zenodo": 63149, + "occupation": 38265, + "mission": 33367, + "watch": 61784, + "enforce": 17751, + "algerian": 2257, + "rounds": 48361, + "reviewer": 48044, + "urls": 59795, + "duplicates": 16471, + "holes": 23988, + "owner": 39128, + "cove": 11642, + "sst": 52408, + "launch": 29170, + "vertical": 61563, + "novelties": 37960, + "ablative": 664, + "disagreements": 15353, + "contradictory": 10869, + "submitting": 53590, + "enforced": 17752, + "therapy": 57039, + "warrant": 61776, + "2k": 349, + "suffice": 53795, + "unaware": 59217, + "shots": 50659, + "cider": 8360, + "shortcut": 50583, + "emnlp": 17280, + "romanized": 48335, + "designs": 14342, + "missed": 33358, + "storytelling": 52882, + "kanji": 27265, + "ser": 50055, + "wild": 62059, + "generalisation": 22102, + "office": 38305, + "bar": 5528, + "waste": 61782, + "clinicians": 8676, + "unsafe": 59639, + "harm": 23460, + "diminish": 15246, + "librispeech": 30426, + "ranker": 44960, + "deduced": 13675, + "titan": 57267, + "enjoying": 17952, + "senior": 49480, + "encounter": 17585, + "barriers": 5531, + "combing": 9106, + "dialogic": 14761, + "decent": 13547, + "procedurally": 42740, + "licenses": 30431, + "tolerance": 57347, + "agile": 2085, + "coping": 11130, + "sorted": 51720, + "saw": 48539, + "happen": 23433, + "sectors": 49038, + "2017a": 270, + "substructures": 53662, + "appearance": 3141, + "rests": 47430, + "eating": 16571, + "hops": 24021, + "composite": 9738, + "spent": 52340, + "manuals": 31790, + "distributing": 15631, + "10m": 79, + "binarized": 7141, + "illustrations": 24524, + "anonymized": 3027, + "injection": 26239, + "nuance": 37974, + "guaranteed": 23321, + "liked": 30515, + "conditionally": 10011, + "dev": 14566, + "7x": 519, + "swapping": 54244, + "finely": 21036, + "modularity": 35746, + "pm": 41034, + "trigrams": 58798, + "routine": 48364, + "multinli": 36159, + "hyperbolic": 24328, + "interestingness": 26657, + "plan": 40940, + "inject": 26236, + "viewpoint": 61607, + "zipfian": 63194, + "curves": 12051, + "deviate": 14717, + "demonstrations": 14058, + "revised": 48051, + "opensubtitles": 38485, + "profiles": 43070, + "encapsulated": 17457, + "restoration": 47415, + "deconvolutional": 13661, + "thirteen": 57051, + "summarise": 53871, + "retrofitting": 47995, + "wikihow": 62039, + "insert": 26375, + "shorten": 50585, + "imputation": 25203, + "disfluency": 15505, + "mid": 33234, + "cleaned": 8646, + "restarts": 47411, + "preprint": 41822, + "february": 20702, + "blue": 7231, + "fc": 20465, + "pt": 44295, + "decipher": 13556, + "dblp": 13506, + "incapable": 25215, + "sums": 53918, + "dark": 12096, + "replicability": 46412, + "estate": 18369, + "translational": 58704, + "reality": 45157, + "shopping": 50548, + "tutoring": 58997, + "craft": 11676, + "android": 2856, + "optimizers": 38572, + "optimizer": 38571, + "chatbots": 8263, + "brazil": 7306, + "said": 48431, + "unavailable": 59213, + "factuality": 20324, + "readings": 45094, + "8m": 548, + "252": 338, + "accept": 808, + "acceptability": 810, + "january": 27142, + "trump": 58827, + "letting": 30052, + "catching": 7837, + "attract": 4871, + "mse": 35916, + "earth": 16519, + "unfamiliar": 59446, + "stochastically": 52858, + "realism": 45146, + "totaling": 57476, + "skim": 51416, + "struggles": 53204, + "lemmatizer": 30020, + "operationalization": 38491, + "consolidating": 10338, + "pyramid": 44437, + "360": 378, + "intelligently": 26545, + "gave": 22020, + "hospital": 24024, + "propbank": 43247, + "ubiquitously": 59177, + "diversely": 15726, + "customized": 12064, + "neuroscience": 37120, + "tells": 56169, + "wording": 62354, + "coffee": 8887, + "clip": 8677, + "assessors": 4594, + "laborious": 27865, + "assembly": 4571, + "bidirectionally": 7086, + "switzerland": 54262, + "seldom": 49097, + "60k": 461, + "backtranslation": 5496, + "susceptible": 54224, + "attacks": 4664, + "exemplified": 18992, + "coattention": 8787, + "triviaqa": 58814, + "strike": 52988, + "effortlessly": 16932, + "scan": 48652, + "systematicity": 54415, + "notorious": 37735, + "principally": 42383, + "suspect": 54225, + "srl": 52405, + "compressing": 9811, + "sacrifices": 48420, + "skilled": 51414, + "norwegian": 37715, + "broadening": 7361, + "reserved": 47179, + "pytorch": 44443, + "brittle": 7346, + "trouble": 58816, + "comprehending": 9759, + "cumulative": 11942, + "blanks": 7197, + "switch": 54253, + "ppl": 41453, + "exploding": 19650, + "repeatedly": 46394, + "appending": 3149, + "keys": 27347, + "sgns": 50435, + "timestamps": 57258, + "route": 48362, + "10x": 81, + "facto": 20293, + "serialized": 50057, + "wikisql": 62057, + "objectivity": 38115, + "reactive": 45065, + "mediation": 32196, + "elaborating": 16955, + "functionally": 21766, + "acknowledge": 1428, + "incapability": 25214, + "perturbations": 40792, + "mrc": 35905, + "ma": 31296, + "baidu": 5512, + "resilience": 47189, + "transformers": 58518, + "unstated": 59665, + "transferability": 58431, + "spatially": 51988, + "linearization": 30679, + "gqa": 22997, + "repetitions": 46397, + "marco": 31815, + "prohibitive": 43126, + "chemistry": 8282, + "inconsistency": 25337, + "lancopku": 27943, + "hr": 24043, + "resilient": 47190, + "realizations": 45159, + "idiom": 24480, + "remembering": 46370, + "medias": 32194, + "fantasy": 20392, + "chatbot": 8262, + "fluid": 21136, + "migration": 33238, + "infancy": 25631, + "benign": 6598, + "deterioration": 14548, + "push": 44423, + "azure": 5483, + "illustrative": 24525, + "promotion": 43196, + "replicates": 46416, + "maintains": 31497, + "occupies": 38266, + "pa": 39132, + "periodically": 40725, + "vis": 61629, + "peer": 40010, + "assessments": 4593, + "instagram": 26422, + "disorder": 15511, + "newcomers": 37366, + "likes": 30528, + "casts": 7825, + "oft": 38320, + "avenue": 5394, + "oil": 38324, + "gas": 21989, + "accident": 837, + "symptom": 54274, + "germanic": 22679, + "slavic": 51426, + "utmost": 61130, + "sellers": 49228, + "dan": 12092, + "ancillary": 2855, + "vaswani": 61444, + "mask": 31855, + "san": 48512, + "roman": 48328, + "tensorflow": 56226, + "pinyin": 40886, + "inputting": 26370, + "evidently": 18830, + "australian": 4996, + "evidenced": 18826, + "swaps": 54245, + "attack": 4658, + "inspirations": 26400, + "session": 50096, + "consulting": 10435, + "forensic": 21299, + "questionable": 44760, + "blends": 7201, + "concentrated": 9916, + "characterised": 8230, + "imperfections": 24629, + "monitored": 35785, + "numerically": 38063, + "gan": 21952, + "tricks": 58788, + "challenged": 8025, + "cyber": 12072, + "security": 49041, + "continual": 10821, + "accumulate": 886, + "intact": 26497, + "disentangled": 15500, + "comprise": 9816, + "accommodates": 839, + "scitail": 48775, + "approx": 3974, + "fool": 21276, + "simplifies": 51240, + "banks": 5527, + "sota": 51723, + "lifelong": 30442, + "omitted": 38332, + "literacy": 30850, + "stably": 52414, + "discerning": 15372, + "jaccard": 27140, + "recurrently": 45632, + "ulmfit": 59189, + "hindering": 23928, + "319": 366, + "coarser": 8786, + "att": 4653, + "relieves": 46271, + "inefficient": 25626, + "feeds": 20723, + "converse": 11067, + "subgraph": 53546, + "coreferences": 11161, + "mirrors": 33338, + "yago": 63039, + "discriminators": 15454, + "copies": 11129, + "characterise": 8229, + "multidimensional": 36048, + "assesses": 4585, + "regulators": 45859, + "grants": 23088, + "infersent": 25713, + "supply": 54113, + "6k": 482, + "noticeably": 37730, + "compile": 9577, + "delimiter": 13830, + "overnight": 39102, + "nvidia": 38072, + "accelerates": 804, + "embody": 17254, + "vertex": 61562, + "30k": 362, + "covariates": 11641, + "diagonal": 14742, + "multiply": 36320, + "viewers": 61605, + "consecutively": 10197, + "stating": 52730, + "ambiguously": 2531, + "massively": 31891, + "exacerbated": 18846, + "biocreative": 7164, + "selections": 49161, + "omit": 38331, + "factorize": 20301, + "reordered": 46388, + "penalized": 40016, + "transport": 58722, + "neulab": 36926, + "rc": 45040, + "isolating": 27052, + "worthwhile": 62980, + "abstracted": 764, + "permissive": 40728, + "licence": 30427, + "closes": 8713, + "lingually": 30742, + "overestimated": 39077, + "neo": 36673, + "banking": 5526, + "regulations": 45858, + "absolutely": 751, + "mistake": 33370, + "seriously": 50071, + "won": 62106, + "intensively": 26562, + "epsilon": 18187, + "trajectory": 58329, + "noteworthy": 37725, + "kappa": 27267, + "reranker": 46969, + "parity": 39750, + "quantized": 44642, + "leaderboard": 29283, + "105": 74, + "ideally": 24377, + "allen": 2396, + "secured": 49040, + "memorizing": 32238, + "catalan": 7828, + "inconclusive": 25334, + "subproblem": 53598, + "proprietary": 43951, + "128": 118, + "256": 339, + "paucity": 39980, + "aryan": 4507, + "magahi": 31413, + "differentiating": 15145, + "outlining": 38777, + "irregular": 27036, + "income": 25327, + "charge": 8256, + "depression": 14181, + "minutes": 33335, + "casting": 7824, + "hub": 24067, + "ecommerce": 16576, + "dominating": 16312, + "surveying": 54222, + "diagrams": 14744, + "understandability": 59318, + "tonal": 57349, + "backed": 5488, + "curate": 11944, + "deductive": 13677, + "recalling": 45249, + "bonus": 7244, + "balancing": 5518, + "redesign": 45646, + "plagues": 40935, + "researching": 47174, + "interdisciplinary": 26644, + "synchronized": 54278, + "personalize": 40764, + "capsule": 7643, + "blended": 7199, + "attackers": 4662, + "uploaded": 59771, + "sketches": 51410, + "helped": 23594, + "silence": 51020, + "negatives": 36645, + "prototypes": 43973, + "precedence": 41605, + "tions": 57264, + "lemmatized": 30019, + "coreferent": 11162, + "mexican": 33212, + "2018a": 278, + "prefix": 41795, + "agreements": 2108, + "2002": 238, + "suffering": 53787, + "liang": 30421, + "plugged": 41028, + "alexa": 2253, + "winograd": 62072, + "quantifiable": 44601, + "disentanglement": 15501, + "disentangles": 15502, + "clinically": 8675, + "holding": 23983, + "verifiability": 61523, + "outdated": 38768, + "underspecified": 59287, + "rotation": 48346, + "heatmap": 23526, + "exposes": 19786, + "pathological": 39949, + "counterintuitive": 11621, + "unimodal": 59498, + "revolves": 48064, + "narrating": 36380, + "bidaf": 7063, + "questioned": 44761, + "comprehensible": 9761, + "featured": 20513, + "ade": 1828, + "duplicating": 16472, + "cloning": 8682, + "blackbox": 7195, + "debug": 13536, + "7k": 517, + "drafts": 16382, + "venues": 61507, + "originality": 38741, + "mm": 33443, + "mdp": 31985, + "unexploited": 59440, + "troublesome": 58817, + "alternately": 2493, + "peculiar": 40007, + "standpoint": 52553, + "hotels": 24035, + "stylistically": 53511, + "ate": 4645, + "existent": 19020, + "multiplications": 36316, + "delete": 13819, + "writes": 62987, + "outlets": 38770, + "persist": 40747, + "stock": 52860, + "filings": 20796, + "regulatory": 45860, + "markets": 31838, + "makers": 31612, + "decoupling": 13665, + "adhere": 1836, + "yearly": 63048, + "appearances": 3142, + "america": 2534, + "programmatic": 43078, + "suffices": 53796, + "modularized": 35747, + "adaptability": 1516, + "alterations": 2489, + "inadvertently": 25212, + "nrc": 37969, + "canada": 7566, + "smm4h": 51534, + "medication": 32213, + "imbalance": 24561, + "acc": 798, + "lrls": 31233, + "hrl": 24044, + "da": 12080, + "weighing": 61914, + "textrank": 56854, + "tutorials": 58996, + "clarification": 8385, + "kim": 27364, + "estimators": 18390, + "subtypes": 53683, + "mixtures": 33424, + "ta": 54685, + "unite": 59523, + "looked": 31068, + "kurdish": 27682, + "warning": 61774, + "signs": 51019, + "bernoulli": 6599, + "multiparty": 36163, + "perturb": 40788, + "landmarks": 27944, + "consolidation": 10339, + "practitioner": 41490, + "commitment": 9159, + "alter": 2488, + "latex": 29160, + "mathematics": 31937, + "dozen": 16373, + "extrapolation": 20150, + "maximise": 31953, + "testable": 56391, + "170": 174, + "untested": 59752, + "decides": 13554, + "unnatural": 59617, + "allenai": 2397, + "obfuscated": 38078, + "asymmetry": 4642, + "vulnerabilities": 61751, + "theorem": 57008, + "delexicalized": 13825, + "webnlg": 61902, + "inadequacy": 25210, + "cultures": 11940, + "couples": 11633, + "fastest": 20444, + "afford": 2030, + "september": 49887, + "december": 13546, + "filled": 20798, + "snips": 51549, + "iot": 27029, + "1a": 211, + "6th": 484, + "12th": 120, + "duality": 16464, + "3x": 392, + "ineffective": 25623, + "armed": 4190, + "willing": 62061, + "succinctly": 53756, + "discern": 15370, + "cardinal": 7751, + "humanoid": 24269, + "coherency": 8912, + "admit": 1860, + "termed": 56255, + "densenet": 14089, + "prioritizes": 42434, + "extensibility": 19845, + "dating": 13496, + "bert": 6600, + "delta": 13838, + "follower": 21260, + "unintended": 59502, + "sdp": 48954, + "competitions": 9535, + "codalab": 8789, + "5x": 454, + "lsh": 31236, + "cuda": 11930, + "hurting": 24305, + "ticket": 57100, + "master": 31893, + "customizable": 12061, + "pharmaceutical": 40803, + "rat": 45011, + "granted": 23087, + "unfair": 59443, + "richly": 48131, + "enrolled": 17968, + "indigenous": 25552, + "humor": 24293, + "funny": 21800, + "misconceptions": 33345, + "meme": 32231, + "humorous": 24294, + "memes": 32232, + "sd": 48953, + "icelandic": 24361, + "continuing": 10836, + "bags": 5510, + "unresolved": 59637, + "uncovering": 59246, + "favour": 20462, + "extralinguistic": 20145, + "sememe": 49423, + "tangent": 54798, + "rectified": 45602, + "forced": 21287, + "uninformative": 59501, + "revising": 48052, + "validations": 61198, + "advocate": 2005, + "imaging": 24560, + "intentional": 26570, + "particularities": 39875, + "penalization": 40014, + "defending": 13767, + "impediment": 24624, + "112": 94, + "differentiation": 15146, + "src": 52404, + "hamper": 23377, + "notebook": 37722, + "proliferate": 43146, + "dilemma": 15221, + "sake": 48432, + "employment": 17402, + "informs": 26185, + "discriminates": 15438, + "interpolating": 26706, + "transferable": 58432, + "monologue": 35818, + "adaboost": 1497, + "rmse": 48178, + "financially": 20895, + "industries": 25619, + "labour": 27866, + "subsequences": 53603, + "136": 131, + "assurance": 4640, + "violations": 61618, + "wechat": 61907, + "app": 3132, + "necessitating": 36539, + "designated": 14305, + "multiscale": 36321, + "caveats": 7894, + "censorship": 7910, + "seeker": 49052, + "unsegmented": 59660, + "trip": 58800, + "novelly": 37958, + "prototyping": 43977, + "deem": 13679, + "criminal": 11769, + "ultra": 59193, + "pools": 41130, + "worsened": 62975, + "freedom": 21649, + "civil": 8376, + "leak": 29334, + "exemplary": 18991, + "stacks": 52424, + "standardize": 52545, + "standardization": 52544, + "deaths": 13526, + "diagnosed": 14735, + "budgets": 7381, + "affordable": 2031, + "invested": 26936, + "sequencing": 50034, + "funds": 21799, + "lighter": 30455, + "mitigated": 33392, + "diet": 14811, + "strategically": 52889, + "threat": 57084, + "vulnerability": 61752, + "defense": 13768, + "loading": 30927, + "l1": 27683, + "failing": 20347, + "enterprises": 18016, + "focal": 21139, + "seller": 49227, + "arose": 4195, + "ref": 45731, + "scripted": 48950, + "leakage": 29335, + "replaces": 46409, + "mechanistic": 32155, + "1990s": 199, + "info": 25735, + "anxiety": 3124, + "deteriorate": 14545, + "curiosity": 11956, + "transmitting": 58719, + "ood": 38401, + "textbook": 56850, + "breaks": 7314, + "neat": 36526, + "overheads": 39086, + "traditions": 57560, + "senteval": 49813, + "assists": 4618, + "substantiate": 53651, + "inquire": 26371, + "wins": 62076, + "folds": 21252, + "usefully": 60398, + "inherit": 26206, + "vinyals": 61611, + "alzheimer": 2516, + "unavailability": 59212, + "germeval": 22681, + "kullback": 27678, + "leibler": 30012, + "chats": 8266, + "inquiries": 26372, + "suites": 53865, + "perturbing": 40796, + "cent": 7911, + "conventionally": 11019, + "broadcasts": 7359, + "merges": 32316, + "flows": 21122, + "redundantly": 45730, + "artifacts": 4486, + "counterfactuals": 11619, + "scarcely": 48660, + "amateur": 2518, + "hypernetwork": 24333, + "unexpectedly": 59439, + "identically": 24380, + "tokenized": 57318, + "diverging": 15690, + "restore": 47416, + "wasserstein": 61780, + "supervise": 53958, + "coqa": 11141, + "8k": 547, + "knowledgeable": 27652, + "relu": 46274, + "penalize": 40015, + "meaningfully": 32029, + "elmo": 16994, + "proximal": 44260, + "crowdsource": 11884, + "underline": 59260, + "infusion": 26194, + "unchanged": 59233, + "rd": 45041, + "comprehensiveness": 9807, + "distinctly": 15601, + "hyperlink": 24332, + "warranted": 61777, + "qe": 44461, + "broaden": 7360, + "linearized": 30680, + "inserted": 26376, + "college": 8994, + "economics": 16580, + "coincide": 8925, + "traversing": 58729, + "contributor": 10958, + "multimodality": 36158, + "augmentations": 4974, + "outlook": 38778, + "realistically": 45156, + "ungrounded": 59456, + "parameterize": 39684, + "permuted": 40735, + "brevity": 7317, + "attracts": 4897, + "distress": 15618, + "scaffolds": 48544, + "anger": 2858, + "surprise": 54180, + "adjunct": 1848, + "confounds": 10151, + "discoveries": 15411, + "instability": 26421, + "optima": 38524, + "thresholding": 57091, + "consumed": 10437, + "uncommon": 59239, + "questionnaires": 44765, + "hint": 23949, + "initiate": 26232, + "remarks": 46364, + "resorted": 47207, + "badly": 5499, + "behavioural": 6402, + "sacrifice": 48419, + "differentiability": 15138, + "bed": 6376, + "recovers": 45596, + "safe": 48426, + "abstractness": 777, + "tenth": 56229, + "216": 314, + "doubly": 16322, + "rarer": 45009, + "styled": 53506, + "combating": 9030, + "dub": 16465, + "cas": 7780, + "poverty": 41422, + "distractors": 15617, + "irregularities": 27037, + "remedies": 46365, + "percentages": 40054, + "understandings": 59420, + "thai": 56997, + "jin": 27157, + "udpipe": 59182, + "youth": 63144, + "loved": 31130, + "replying": 46422, + "lime": 30531, + "rationality": 45030, + "specializing": 52036, + "pop": 41153, + "sad": 48424, + "audiovisual": 4934, + "bimodal": 7139, + "formalizing": 21362, + "fight": 20787, + "attraction": 4895, + "clms": 8680, + "clm": 8679, + "conservative": 10205, + "pursued": 44419, + "misinformation": 33346, + "trustworthiness": 58832, + "purposed": 44415, + "multihead": 36052, + "steer": 52788, + "violate": 61614, + "formedness": 21371, + "noisier": 37610, + "rarity": 45010, + "teaches": 55999, + "math": 31929, + "taught": 55978, + "decouple": 13662, + "recognizable": 45548, + "offense": 38287, + "instantiate": 26440, + "layouts": 29244, + "healthy": 23523, + "eighteen": 16948, + "unleash": 59589, + "relieve": 46270, + "driver": 16435, + "nlms": 37458, + "unanswerable": 59208, + "reasoner": 45179, + "spanned": 51950, + "resolutions": 47197, + "prefers": 41794, + "undermine": 59280, + "compounded": 9755, + "pauses": 39982, + "extremes": 20168, + "bear": 6371, + "military": 33248, + "secret": 49034, + "tradition": 57508, + "professions": 43064, + "stereotyped": 52847, + "grasp": 23194, + "glue": 22863, + "justifying": 27261, + "reconstructor": 45585, + "fisher": 21066, + "verifier": 61530, + "conciseness": 9966, + "vae": 61164, + "federal": 20707, + "fb": 20464, + "doubts": 16324, + "summarised": 53872, + "finished": 21055, + "seq": 49888, + "wait": 61755, + "zh": 63185, + "privileged": 42446, + "persona": 40753, + "tropes": 58815, + "squad2": 52397, + "grand": 23085, + "desktop": 14353, + "bandwidth": 5522, + "articulate": 4484, + "cls": 8727, + "explainability": 19596, + "defect": 13764, + "defects": 13765, + "retriever": 47988, + "192": 193, + "gb": 22024, + "universality": 59549, + "inevitably": 25629, + "ofthe": 38322, + "journalistic": 27230, + "uneven": 59437, + "complaints": 9582, + "bagging": 5509, + "lp": 31231, + "noising": 37611, + "illuminate": 24513, + "appended": 3148, + "thunlp": 57099, + "twin": 59028, + "subtraction": 53680, + "physicians": 40863, + "assistive": 4617, + "toxic": 57481, + "bahdanau": 5511, + "progressive": 43124, + "underutilized": 59432, + "quarter": 44644, + "stuck": 53207, + "oxford": 39130, + "stimulate": 52849, + "electronics": 16970, + "hungry": 24300, + "carries": 7773, + "schedules": 48720, + "nd": 36500, + "notwithstanding": 37738, + "supplementing": 54109, + "peters": 40799, + "radford": 44846, + "openbookqa": 38470, + "regimen": 45801, + "stochasticity": 52859, + "factually": 20325, + "prevailing": 42224, + "elaborates": 16954, + "ok": 38325, + "ims": 25204, + "acoustics": 1439, + "manifests": 31704, + "gauging": 22009, + "manners": 31728, + "lample": 27940, + "etal": 18413, + "disclose": 15376, + "overlapped": 39090, + "miss": 33356, + "slowly": 51454, + "justifies": 27259, + "formatting": 21369, + "fulfil": 21707, + "alibaba": 2349, + "7th": 518, + "stances": 52457, + "traversal": 58727, + "resorting": 47208, + "differentiates": 15144, + "uncertainties": 59225, + "formidable": 21372, + "perturbed": 40794, + "academy": 797, + "benefiting": 6578, + "gnns": 22874, + "revolution": 48059, + "anchoring": 2852, + "unconditional": 59240, + "compliance": 9693, + "134": 130, + "fever": 20734, + "refuted": 45789, + "paralinguistic": 39640, + "ablate": 652, + "alice": 2350, + "believes": 6415, + "historic": 23954, + "stays": 52784, + "prerequisites": 41832, + "steadily": 52785, + "lend": 30021, + "testset": 56418, + "goods": 22950, + "diagnose": 14734, + "expedite": 19199, + "6000": 460, + "anticipation": 3121, + "finetuned": 21042, + "doctors": 15764, + "emr": 17410, + "fragile": 21432, + "certified": 7953, + "accumulated": 887, + "zhang": 63187, + "smoothness": 51541, + "equivalently": 18203, + "documentary": 15848, + "backend": 5489, + "priorities": 42431, + "interleaved": 26666, + "localized": 30959, + "pg": 40802, + "leaders": 29286, + "tokenizers": 57320, + "iid": 24507, + "pushes": 44429, + "nat": 36393, + "infoboxes": 25736, + "deepen": 13754, + "interchange": 26638, + "simplex": 51233, + "career": 7753, + "biaffine": 7018, + "canonicalization": 7593, + "bilateral": 7101, + "visualisations": 61674, + "3m": 388, + "rectifying": 45604, + "distills": 15587, + "activated": 1464, + "inaccuracy": 25208, + "interrelated": 26743, + "investing": 27008, + "antecedents": 3116, + "fairness": 20362, + "seg": 49070, + "manipulates": 31709, + "symbolically": 54270, + "adjustments": 1855, + "iconic": 24363, + "prospect": 43959, + "reflections": 45780, + "tech": 56016, + "squeeze": 52402, + "initializes": 26228, + "masked": 31860, + "ethnicity": 18421, + "confronts": 10154, + "automl": 5209, + "acronym": 1449, + "nq": 37968, + "unambiguously": 59204, + "cws": 12071, + "amplify": 2571, + "subordinate": 53596, + "fan": 20391, + "interfere": 26664, + "emitting": 17279, + "interwoven": 26754, + "intends": 26553, + "uttered": 61154, + "fatal": 20450, + "shallower": 50445, + "firms": 21063, + "flores": 21117, + "nepali": 36674, + "typos": 59172, + "multiplicity": 36318, + "peaks": 40003, + "orderings": 38668, + "decoupled": 13663, + "spots": 52374, + "dca": 13509, + "tagalog": 54727, + "responsibility": 47407, + "fairseq": 20363, + "characterizations": 8247, + "economical": 16579, + "western": 61947, + "sf": 50432, + "mnb": 33445, + "attentively": 4866, + "multidomain": 36049, + "citet": 8371, + "realm": 45165, + "impeding": 24625, + "chances": 8166, + "impressions": 24806, + "07": 24, + "closing": 8716, + "inefficiency": 25625, + "bypasses": 7507, + "aroused": 4197, + "rationale": 45028, + "intervene": 26749, + "suffered": 53786, + "exam": 18857, + "affordances": 2032, + "naively": 36367, + "supplements": 54110, + "loops": 31075, + "convention": 10998, + "retention": 47929, + "heterogeneity": 23615, + "lat": 29110, + "propensity": 43249, + "specifics": 52238, + "processors": 42971, + "openai": 38468, + "deviates": 14718, + "flavors": 21099, + "tasking": 55482, + "regeneration": 45798, + "federated": 20708, + "scibert": 48744, + "inspires": 26419, + "04": 21, + "gpt": 22970, + "obsolete": 38154, + "bug": 7383, + "scraping": 48942, + "ada": 1496, + "clients": 8664, + "diagnosing": 14737, + "anonymization": 3026, + "interpreters": 26740, + "tb": 55982, + "activates": 1465, + "disseminated": 15534, + "comparator": 9325, + "inclined": 25221, + "pdp": 39999, + "124": 116, + "104": 73, + "frustratingly": 21699, + "brittleness": 7347, + "atop": 4651, + "psychiatric": 44284, + "confidences": 10119, + "enumeration": 18169, + "eliminated": 16987, + "exceptionally": 18959, + "depicts": 14167, + "natively": 36407, + "registers": 45810, + "debias": 13531, + "religion": 46272, + "experimentations": 19335, + "professor": 43065, + "intelligibility": 26546, + "condense": 9988, + "quicker": 44819, + "lu": 31292, + "2019b": 288, + "wav2vec": 61786, + "confirmation": 10134, + "sparsely": 51972, + "discernible": 15371, + "occupy": 38267, + "punctuated": 44386, + "crowdworkers": 11892, + "sqa": 52392, + "103": 72, + "30x": 363, + "disputes": 15526, + "substance": 53614, + "debiased": 13532, + "shortened": 50586, + "8th": 549, + "pb": 39995, + "ernie": 18207, + "masking": 31872, + "masks": 31876, + "autoregressively": 5227, + "distracting": 15614, + "youtu": 63145, + "encapsulating": 17458, + "iqa": 27032, + "insufficiency": 26490, + "existed": 19018, + "concurrent": 9986, + "misunderstanding": 33374, + "unfortunate": 59450, + "spectrograms": 52245, + "bigru": 7100, + "dominates": 16311, + "mrs": 35910, + "hotpotqa": 24036, + "formulations": 21394, + "marginally": 31829, + "entailing": 17997, + "relief": 46263, + "darpa": 12097, + "denoise": 14060, + "denoised": 14061, + "discards": 15369, + "relabeling": 45884, + "noised": 37608, + "centralized": 7923, + "graduate": 23017, + "catalog": 7829, + "240": 330, + "gt": 23316, + "runner": 48404, + "slices": 51428, + "sales": 48433, + "heterogenous": 23625, + "3000": 360, + "flair": 21092, + "underperforms": 59283, + "35k": 375, + "detailing": 14431, + "assert": 4572, + "minimalist": 33293, + "mg": 33213, + "partners": 39902, + "unilm": 59497, + "container": 10480, + "arm": 4189, + "transfers": 58437, + "implausible": 24630, + "academics": 796, + "untrustworthy": 59755, + "trustworthy": 58833, + "monotonicity": 35823, + "ctrl": 11929, + "fosters": 21414, + "underscore": 59286, + "merit": 32318, + "extents": 19926, + "conversions": 11070, + "clicks": 8662, + "ge": 22029, + "religious": 46273, + "unfolds": 59449, + "clever": 8660, + "conceive": 9913, + "recency": 45274, + "hugely": 24081, + "ramifications": 44865, + "conflating": 10139, + "linker": 30832, + "distorted": 15611, + "stabilize": 52411, + "maml": 31685, + "personas": 40769, + "headroom": 23508, + "swap": 54243, + "infilling": 25714, + "blank": 7196, + "xnli": 63036, + "leaps": 29341, + "xl": 63028, + "extrapolating": 20149, + "vaes": 61167, + "vacancy": 61163, + "tan": 54795, + "calling": 7557, + "appreciable": 3384, + "adversaries": 1995, + "propaganda": 43238, + "disinformation": 15507, + "mechanics": 32094, + "lf": 30416, + "144": 143, + "prescribed": 41833, + "inductively": 25614, + "lifts": 30447, + "navigating": 36494, + "generically": 22634, + "crossmodal": 11878, + "endeavors": 17734, + "speculation": 52247, + "kaggle": 27264, + "latencies": 29115, + "neurological": 37117, + "masculine": 31854, + "feminine": 20732, + "continued": 10830, + "ece": 16573, + "flaws": 21102, + "humanlike": 24268, + "wolf": 62103, + "stark": 52563, + "optionally": 38578, + "isomorphic": 27054, + "ka": 27263, + "schools": 48742, + "21st": 316, + "6x": 485, + "organisations": 38681, + "poincar": 41042, + "regressive": 45824, + "ro": 48209, + "slovene": 51446, + "736": 496, + "factorizing": 20303, + "defend": 13766, + "organizes": 38691, + "invertible": 26934, + "mathcal": 31931, + "april": 3987, + "swahili": 54242, + "synchronization": 54277, + "restores": 47417, + "diversify": 15729, + "shuffle": 50813, + "706": 491, + "9th": 577, + "magnitudes": 31419, + "pivots": 40923, + "tones": 57351, + "ideologies": 24473, + "ideology": 24474, + "attacking": 4663, + "polar": 41086, + "lifestyle": 30444, + "ungrammatical": 59454, + "shareable": 50463, + "suits": 53866, + "orthogonality": 38753, + "infection": 25634, + "distractor": 15616, + "multiview": 36331, + "encouragingly": 17607, + "dominate": 16309, + "sibling": 50822, + "ethics": 18419, + "justice": 27255, + "prioritized": 42433, + "kd": 27275, + "labelers": 27774, + "strategic": 52888, + "yang": 63043, + "shortcuts": 50584, + "consequential": 10202, + "profession": 43059, + "diversification": 15727, + "decay": 13543, + "roberta": 48213, + "mlm": 33437, + "electra": 16963, + "corrupting": 11569, + "neglects": 36651, + "finetune": 21040, + "xlnet": 63032, + "ingests": 26196, + "wmt19": 62101, + "laser": 29103, + "wordpiece": 62357, + "locates": 30964, + "plagued": 40934, + "traction": 57500, + "useless": 60402, + "ht": 24045, + "exacerbate": 18845, + "exacerbates": 18847, + "mi": 33215, + "filipino": 20797, + "tl": 57272, + "privately": 42445, + "lossless": 31110, + "forefront": 21295, + "immensely": 24584, + "lee": 29996, + "hoping": 24020, + "shades": 50436, + "das": 12098, + "memorizes": 32237, + "registered": 45808, + "burst": 7502, + "stretch": 52982, + "thompson": 57053, + "antagonistic": 3114, + "politicians": 41114, + "patches": 39942, + "bypassing": 7508, + "farther": 20409, + "transe": 58348, + "mrl": 35908, + "frustrating": 21698, + "198": 195, + "imperceptible": 24627, + "urgency": 59786, + "adapter": 1556, + "untrained": 59754, + "disentangling": 15503, + "pairings": 39165, + "unparalleled": 59626, + "deliberation": 13828, + "polarization": 41089, + "liberal": 30422, + "tagsets": 54761, + "undesired": 59434, + "sustainable": 54228, + "regime": 45800, + "quad": 44462, + "xlm": 63029, + "spanbert": 51935, + "mil": 33242, + "misuse": 33376, + "depicting": 14166, + "fingerprints": 21053, + "paddlepaddle": 39137, + "checkpoints": 8278, + "enlarging": 17955, + "confer": 10107, + "diagnostics": 14741, + "reversal": 48020, + "barely": 5529, + "intending": 26552, + "caregivers": 7764, + "empowered": 17407, + "disruptive": 15531, + "dispersed": 15516, + "draft": 16380, + "rises": 48158, + "asrs": 4565, + "synthesizes": 54365, + "fills": 20804, + "classroom": 8635, + "abductive": 587, + "monolithic": 35817, + "strides": 52987, + "hallucination": 23373, + "guessing": 23325, + "muse": 36336, + "reuses": 48004, + "rand": 44867, + "provably": 43978, + "relaxes": 46139, + "hallmark": 23370, + "victims": 61576, + "texttt": 56950, + "leader": 29281, + "stationary": 52731, + "annotates": 2929, + "entanglement": 18011, + "tailed": 54765, + "recruit": 45598, + "abc": 586, + "nyt": 38073, + "skew": 51411, + "gpt2": 22992, + "retrained": 47934, + "2020": 289, + "vectorizer": 61478, + "rogue": 48300, + "ace05": 1103, + "toxicity": 57482, + "lagging": 27935, + "wnut": 62102, + "spider": 52343, + "volunteers": 61735, + "crimes": 11768, + "dm": 15755, + "142": 142, + "impedes": 24623, + "checkpoint": 8277, + "180k": 181, + "700k": 490, + "utilises": 61076, + "attributing": 4913, + "feeling": 20725, + "sans": 48514, + "mbert": 31977, + "isotropic": 27056, + "contextualizing": 10817, + "optimizations": 38560, + "leaderboards": 29285, + "alias": 2348, + "placement": 40929, + "innocuous": 26246, + "tactics": 54723, + "hypersphere": 24340, + "visit": 61645, + "overestimation": 39078, + "subclass": 53542, + "selector": 49165, + "exceeded": 18946, + "milestone": 33247, + "unfaithful": 59444, + "presumptions": 42140, + "commonsenseqa": 9240, + "270": 343, + "favorite": 20460, + "sway": 54247, + "lexeme": 30348, + "narrowed": 36389, + "pertains": 40785, + "bin": 7140, + "unnecessarily": 59618, + "nsp": 37971, + "hans": 23432, + "hotspot": 24037, + "countering": 11620, + "calm": 7559, + "propensities": 43248, + "rephrase": 46399, + "incredibly": 25480, + "resistant": 47192, + "populate": 41210, + "stylized": 53512, + "supervisions": 54101, + "qualified": 44468, + "2019a": 287, + "speculative": 52248, + "conducive": 10022, + "incidence": 25218, + "jeopardize": 27155, + "auditing": 4936, + "excels": 18956, + "medications": 32214, + "multifarious": 36051, + "followup": 21274, + "depths": 14192, + "inflated": 25716, + "infused": 26192, + "acute": 1490, + "truncated": 58828, + "june": 27247, + "band": 5520, + "117": 96, + "hazards": 23492, + "miulab": 33398, + "carbon": 7748, + "tough": 57479, + "risen": 48157, + "empowering": 17408, + "strikingly": 52990, + "sari": 48519, + "truthfulness": 58842, + "ng": 37435, + "california": 7538, + "admits": 1861, + "technically": 56021, + "tp": 57484, + "prepending": 41818, + "scholar": 48735, + "initialised": 26222, + "regulated": 45855, + "joined": 27161, + "swift": 54250, + "flag": 21089, + "initiatives": 26235, + "degenerate": 13796, + "dashboard": 12099, + "berts": 6740, + "latch": 29111, + "linearizing": 30681, + "alternation": 2495, + "vein": 61505, + "cg": 7955, + "residents": 47183, + "reinforcing": 45878, + "totalling": 57477, + "determinantal": 14549, + "trusted": 58831, + "gc": 22025, + "holistically": 23991, + "lottery": 31126, + "subnetworks": 53593, + "subnetwork": 53592, + "signify": 51018, + "unintuitive": 59505, + "bart": 5532, + "variances": 61231, + "conceptualizations": 9951, + "harms": 23465, + "giant": 22686, + "assembles": 4569, + "mlms": 33441, + "compiling": 9580, + "grave": 23196, + "hugging": 24082, + "hire": 23952, + "narrows": 36391, + "parameterizations": 39683, + "minima": 33281, + "attainable": 4670, + "plms": 41014, + "negated": 36611, + "priming": 42375, + "verifiable": 61524, + "vat": 61447, + "pitfall": 40913, + "dive": 15683, + "forgotten": 21311, + "camembert": 7563, + "specialize": 52030, + "ablating": 653, + "temporary": 56196, + "answerer": 3062, + "surged": 54159, + "odds": 38286, + "began": 6377, + "trie": 58790, + "113": 95, + "plm": 41012, + "thu": 57096, + "keg": 27283, + "biologically": 7169, + "autocomplete": 5023, + "prime": 42374, + "191": 192, + "contracts": 10864, + "chair": 7964, + "inaccuracies": 25207, + "programmatically": 43079, + "180": 180, + "dictated": 14801, + "dangerous": 12094, + "slovenian": 51447, + "aaai": 582, + "reserve": 47178, + "scrutiny": 48952, + "tighter": 57108, + "jzbjyb": 27262, + "accesses": 833, + "spark": 51960, + "endow": 17744, + "eda": 16584, + "rivals": 48169, + "pidgin": 40873, + "evoked": 18832, + "omitting": 38333, + "scholarship": 48739, + "globe": 22852, + "commonality": 9214, + "excessively": 18963, + "corrective": 11487, + "confluence": 10143, + "groundings": 23266, + "sds": 48955, + "physics": 40864, + "mutations": 36340, + "manhattan": 31700, + "2m": 350, + "facility": 20284, + "chronologically": 8355, + "underexplored": 59251, + "dt": 16455, + "1996": 204, + "concatenates": 9909, + "von": 61736, + "geometrically": 22658, + "24k": 331, + "robbert": 48212, + "counteract": 11615, + "administrative": 1858, + "purity": 44399, + "mbart": 31976, + "promptly": 43222, + "prlms": 42448, + "albert": 2250, + "usa": 59796, + "russia": 48412, + "confuse": 10156, + "uit": 59184, + "mdd": 31984, + "substituted": 53655, + "restoring": 47418, + "imitating": 24575, + "pandemic": 39245, + "causation": 7882, + "samsung": 48511, + "adapters": 1561, + "indic": 25521, + "groundtruth": 23268, + "unary": 59211, + "sidestep": 50825, + "iwslt14": 27139, + "ct": 11926, + "buried": 7501, + "momentum": 35781, + "pretext": 42141, + "signalling": 50832, + "conquer": 10192, + "discouraging": 15383, + "undertaking": 59431, + "lit": 30848, + "diverges": 15689, + "hampers": 23380, + "spacy": 51914, + "fleiss": 21104, + "pursuit": 44422, + "arabert": 3991, + "hardness": 23458, + "phobert": 40818, + "heightened": 23542, + "lenient": 30042, + "gamma": 21951, + "fallback": 20375, + "fragmented": 21435, + "asia": 4512, + "aes": 2008, + "empathetic": 17305, + "pain": 39145, + "discriminatory": 15455, + "kit": 27376, + "routines": 48366, + "march": 31814, + "118": 97, + "esperanto": 18314, + "eos": 18179, + "investment": 27009, + "stepping": 52838, + "opinionated": 38504, + "cohen": 8901, + "stanza": 52561, + "returning": 47999, + "creators": 11755, + "aesthetic": 2009, + "calibrate": 7529, + "t5": 54676, + "winogrande": 62075, + "gathers": 22004, + "nigerian": 37440, + "exclusion": 18976, + "disparities": 15514, + "learnings": 29950, + "pretrains": 42222, + "pypi": 44436, + "lex": 30347, + "10000": 68, + "igbo": 24487, + "insufficiently": 26495, + "inserts": 26381, + "adversely": 2000, + "ft": 21704, + "brute": 7377, + "ig": 24486, + "donald": 16314, + "blocked": 7221, + "nar": 36379, + "ptlms": 44298, + "emissions": 17276, + "ptlm": 44297, + "biobert": 7163, + "covid": 11667, + "gpt3": 22993, + "tokenizations": 57317, + "overfits": 39080, + "dissect": 15532, + "hinting": 23950, + "msc": 35914, + "lagged": 27934, + "bleurt": 7215, + "facilitation": 20282, + "centering": 7914, + "18k": 182, + "c3": 7517, + "causalities": 7879, + "35x": 376, + "superglue": 53928, + "convnets": 11090, + "stressed": 52981, + "coronavirus": 11167, + "11th": 102, + "milliseconds": 33266, + "palm": 39242, + "exit": 19181, + "oblivious": 38117, + "saturated": 48531, + "convince": 11088, + "amortized": 2540, + "cvae": 12070, + "engages": 17759, + "affirmative": 2028, + "slide": 51429, + "polarized": 41090, + "anli": 2866, + "disk": 15510, + "git": 22691, + "egregious": 16944, + "impair": 24615, + "prohibits": 43130, + "ukplab": 59187, + "august": 4994, + "inuktitut": 26913, + "codebase": 8870, + "solvable": 51674, + "spotlight": 52373, + "localizing": 30960, + "equipping": 18198, + "wake": 61757, + "unfiltered": 59447, + "gum": 23359, + "fixes": 21086, + "paraphraser": 39743, + "questioner": 44762, + "rationalization": 45031, + "truncation": 58829, + "safely": 48427, + "ai4bharat": 2125, + "deploys": 14180, + "21k": 315, + "infected": 25633, + "italy": 27113, + "virus": 61628, + "germany": 22680, + "maltese": 31684, + "unanswered": 59210, + "prunes": 44267, + "regex": 45799, + "hallucinate": 23371, + "rewrites": 48078, + "atmosphere": 4648, + "unmodified": 59616, + "introspective": 26905, + "datastore": 13491, + "streamed": 52961, + "favored": 20458, + "blueprint": 7232, + "corporate": 11259, + "soap": 51552, + "burdensome": 7499, + "breakdown": 7311, + "nel": 36671, + "25k": 340, + "institutional": 26473, + "dispute": 15524, + "subfields": 53545, + "rethink": 47930, + "hallucinations": 23375, + "9x": 578, + "svo": 54241, + "mae": 31412, + "gat": 21990, + "interpolate": 26703, + "decaying": 13544, + "tor": 57473, + "indiscriminately": 25558, + "td": 55985, + "informality": 25745, + "conceptualizing": 9953, + "bertweet": 6742, + "unveils": 59759, + "regulate": 45854, + "fore": 21292, + "finetunes": 21046, + "tatoeba": 55976, + "unaffected": 59201, + "misclassify": 33344, + "provider": 44176, + "mines": 33278, + "frank": 21635, + "bbc": 6362, + "streamline": 52964, + "umt": 59197, + "voluntary": 61734, + "deberta": 13530, + "distilbert": 15563, + "cord": 11142, + "afro": 2041, + "secure": 49039, + "causally": 7881, + "unreliability": 59635, + "collapsed": 8935, + "burgeoning": 7500, + "chaining": 7962, + "comprehensibility": 9760, + "sl": 51424, + "slowing": 51453, + "arduous": 4134, + "fairer": 20358, + "seventh": 50423, + "flawed": 21101, + "pioneered": 40888, + "steep": 52787, + "vicinity": 61574, + "triage": 58783, + "poly": 41116, + "wd": 61846, + "outperformance": 38834, + "salesforce": 48434, + "backdoor": 5487, + "adept": 1829, + "buffer": 7382, + "tlm": 57273, + "labse": 27868, + "sci": 48743, + "flowing": 21121, + "tlms": 57274, + "promoted": 43193, + "shapley": 50452, + "15th": 160, + "covert": 11666, + "mad": 31411, + "subfield": 53544, + "proactive": 42451, + "672": 477, + "complicate": 9695, + "rethinking": 47931, + "lite": 30849, + "personnel": 40770, + "disturbing": 15682, + "goodness": 22949, + "consumes": 10440, + "unsolved": 59661, + "pakistan": 39241, + "bind": 7158, + "confidently": 10122, + "logit": 30998, + "sap": 48516, + "streamlined": 52965, + "ls": 31234, + "pn": 41036, + "weekly": 61909, + "neurips": 37114, + "res": 46972, + "stateof": 52717, + "entangled": 18010, + "750": 504, + "labs": 27867, + "repurpose": 46833, + "harsh": 23469, + "urge": 59785, + "caution": 7893, + "tentative": 56228, + "instructed": 26476, + "huggingface": 24084, + "vgg": 61565, + "412": 403, + "closure": 8718, + "controllability": 10975, + "controllably": 10981, + "timestamp": 57257, + "delineate": 13832, + "doi": 15987, + "incurred": 25487, + "attach": 4654, + "modulo": 35777, + "intricacies": 26764, + "lc": 29247, + "fewshot": 20743, + "emulate": 17411, + "concatenations": 9912, + "garnered": 21988, + "gnn": 22872, + "tn": 57276, + "csebuetnlp": 11923, + "logits": 30999, + "underestimate": 59249, + "bf": 6997, + "enumerating": 18168, + "kazakh": 27269, + "dailydialog": 12089, + "um": 59194, + "cheaply": 8270, + "header": 23503, + "ip": 27030, + "sparsification": 51976, + "primitives": 42377, + "med": 32156, + "protecting": 43965, + "endowing": 17745, + "likelihoods": 30523, + "zhu": 63190, + "impairments": 24619, + "obtainable": 38200, + "uzbek": 61155, + "citing": 8373, + "equips": 18199, + "mismatching": 33355, + "tending": 56212, + "capsules": 7646, + "comet": 9132, + "denser": 14090, + "terminate": 56257, + "proceeding": 42750, + "shortly": 50594, + "realizes": 45162, + "mixup": 33425, + "participatory": 39828, + "pulled": 44383, + "localize": 30958, + "interpolates": 26705, + "decisive": 13577, + "prover": 43997, + "swarnahub": 54246, + "unlikelihood": 59612, + "layman": 29240, + "forth": 21395, + "coh": 8900, + "moe": 35778, + "superficially": 53926, + "inefficiencies": 25624, + "height": 23541, + "instantiations": 26443, + "1b": 212, + "backbones": 5486, + "informing": 26184, + "3k": 387, + "erroneously": 18209, + "absorb": 752, + "substituting": 53657, + "air": 2222, + "flops": 21116, + "reflective": 45781, + "138": 132, + "4k": 415, + "162": 168, + "drafting": 16381, + "bertscore": 6741, + "traverse": 58728, + "poetics": 41040, + "layerwise": 29238, + "upsampling": 59777, + "smartphone": 51531, + "sbert": 48542, + "mt5": 35927, + "101": 71, + "balances": 5517, + "cm": 8752, + "storm": 52878, + "eschewing": 18258, + "deviating": 14719, + "discretized": 15433, + "medial": 32192, + "876": 542, + "instruct": 26475, + "correlative": 11539, + "interleaving": 26668, + "performant": 40655, + "reconstructs": 45586, + "yale": 63041, + "degeneration": 13798, + "iu": 27134, + "beating": 6374, + "cam": 7560, + "chemicals": 8281, + "vi": 61566, + "oftentimes": 38321, + "sister": 51363, + "impactful": 24611, + "anisotropy": 2865, + "drivers": 16436, + "nguyen": 37438, + "oracles": 38582, + "childhood": 8292, + "intertwined": 26746, + "podcast": 41037, + "uda": 59181, + "circular": 8361, + "108": 76, + "moderator": 35699, + "tqa": 57485, + "001": 16, + "175": 175, + "trainings": 58322, + "humanity": 24267, + "realise": 45145, + "fn": 21138, + "quote": 44840, + "weaken": 61852, + "optimise": 38538, + "mediaeval": 32191, + "celebrated": 7906, + "tesseract": 56328, + "remainder": 46319, + "implying": 24675, + "refinements": 45769, + "incoherence": 25325, + "arena": 4159, + "auroc": 4995, + "869": 538, + "verdict": 61522, + "rid": 48133, + "infusing": 26193, + "proliferated": 43147, + "clock": 8681, + "manageable": 31688, + "impart": 24620, + "fid": 20747, + "xsum": 63037, + "iterate": 27116, + "384": 381, + "elicits": 16983, + "sensor": 49508, + "contend": 10510, + "delve": 13840, + "practiced": 41488, + "780": 513, + "inviting": 27012, + "rejecting": 45881, + "geometrical": 22657, + "typographic": 59163, + "quantified": 44603, + "terrorist": 56327, + "salt": 48444, + "task2": 55478, + "uncased": 59223, + "corroborates": 11566, + "subreddit": 53599, + "glean": 22817, + "popularization": 41207, + "pandemics": 39246, + "holdout": 23984, + "docker": 15761, + "119": 98, + "approved": 3973, + "lfs": 30417, + "contrastively": 10924, + "longformer": 31058, + "14th": 144, + "reminiscent": 46371, + "vl": 61697, + "pulls": 44385, + "elderly": 16960, + "dae": 12081, + "aggregator": 2082, + "retrievers": 47990, + "eacl": 16503, + "uniformity": 59487, + "ensembled": 17983, + "customizing": 12065, + "extraordinary": 20147, + "necessitate": 36536, + "40k": 400, + "subtract": 53679, + "laughter": 29169, + "council": 11609, + "indicbert": 25550, + "muril": 36335, + "5281": 436, + "footprints": 21284, + "deployments": 14179, + "unweighted": 59762, + "disconnect": 15378, + "accomplishes": 849, + "admissible": 1859, + "synthesise": 54359, + "stood": 52863, + "lily": 30530, + "adjuncts": 1849, + "summarising": 53874, + "glm": 22819, + "lowers": 31228, + "ball": 5519, + "paradox": 39634, + "audit": 4935, + "mislabeled": 33347, + "mock": 33449, + "sarcastic": 48518, + "mha": 33214, + "1980": 196, + "galician": 21948, + "isotropy": 27057, + "dubious": 16467, + "pl": 40925, + "categorise": 7852, + "unveiling": 59758, + "esp": 18261, + "curie": 11955, + "mcl": 31982, + "homes": 23993, + "discretization": 15432, + "leanings": 29339, + "transitioning": 58545, + "unrealistically": 59631, + "pie": 40874, + "competitiveness": 9573, + "anomalies": 3021, + "reconcile": 45575, + "workshops": 62924, + "immigration": 24585, + "responds": 47390, + "1991": 200, + "decently": 13549, + "amplification": 2568, + "directives": 15302, + "selectors": 49166, + "criticisms": 11801, + "bottlenecked": 7278, + "topv2": 57472, + "1200": 113, + "lossy": 31111, + "7b": 516, + "prepend": 41817, + "prompting": 43213, + "overlooking": 39099, + "endeavour": 17735, + "devising": 14728, + "infuse": 26191, + "debugging": 13537, + "adhering": 1837, + "278": 344, + "804": 525, + "surging": 54160, + "fighting": 20788, + "v4": 61161, + "2017b": 271, + "stimulated": 52851, + "cmcl": 8753, + "traceability": 57488, + "degradations": 13803, + "pegasus": 40013, + "superb": 53923, + "meantime": 32042, + "randomness": 44905, + "deployable": 14169, + "checklist": 8276, + "organisers": 38683, + "handcraft": 23397, + "hypothesizes": 24354, + "medically": 32212, + "resp": 47342, + "dietary": 14812, + "correspondingly": 11563, + "demanded": 13842, + "multiplied": 36319, + "evoke": 18831, + "blindly": 7219, + "widening": 62025, + "invest": 26935, + "quotation": 44838, + "oldest": 38329, + "randomization": 44895, + "rnngs": 48207, + "undergoing": 59255, + "dire": 15249, + "tolerant": 57348, + "compromised": 9821, + "incongruent": 25335, + "anisotropic": 2864, + "declare": 13579, + "intimate": 26755, + "threatening": 57086, + "wow": 62982, + "rucaibox": 48375, + "320": 368, + "claiming": 8383, + "mentally": 32294, + "lifetime": 30445, + "sixteen": 51372, + "inhibit": 26211, + "necessitated": 36537, + "risky": 48166, + "sam": 48445, + "dpps": 16376, + "gray": 23197, + "extraneous": 20146, + "verbalized": 61517, + "simplifications": 51237, + "laypeople": 29245, + "deadline": 13511, + "llms": 30899, + "telegram": 56164, + "formulaic": 21382, + "consolidated": 10337, + "reversed": 48024, + "generalisable": 22101, + "recombining": 45561, + "undermining": 59281, + "redundancies": 45726, + "acm": 1432, + "impeded": 24622, + "bp": 7295, + "successively": 53753, + "confusable": 10155, + "forgo": 21310, + "blocking": 7222, + "eligible": 16984, + "mtop": 35934, + "3b": 385, + "distrust": 15680, + "resumes": 47917, + "struggled": 53203, + "bigbird": 7094, + "spatiotemporal": 51991, + "fined": 21034, + "dpr": 16377, + "11k": 101, + "mediators": 32197, + "inconvenience": 25342, + "multiplex": 36314, + "pet": 40798, + "focussing": 21248, + "evidential": 18829, + "eventual": 18800, + "tydiqa": 59045, + "ns": 37970, + "incompetent": 25330, + "tt": 58849, + "caught": 7866, + "disturbance": 15681, + "ptms": 44299, + "haitian": 23364, + "rewriter": 48077, + "sustained": 54229, + "slows": 51455, + "stakes": 52453, + "twist": 59029, + "centred": 7925, + "rectify": 45603, + "competed": 9524, + "bench": 6421, + "copious": 11131, + "inflexible": 25722, + "distract": 15613, + "neutralize": 37122, + "converged": 11023, + "invite": 27011, + "daunting": 13497, + "append": 3147, + "llm": 30898, + "768": 509, + "122": 115, + "quechua": 44646, + "flatter": 21098, + "precious": 41608, + "wave": 61787, + "faulty": 20451, + "textcnn": 56852, + "sufficiency": 53797, + "347": 372, + "scans": 48655, + "metropolis": 33210, + "hastings": 23477, + "frustration": 21700, + "byt5": 7511, + "unavoidable": 59216, + "sparsified": 51977, + "4000": 398, + "locked": 30970, + "vii": 61610, + "rejected": 45880, + "damage": 12091, + "distort": 15610, + "interacted": 26593, + "thumb": 57098, + "impairment": 24618, + "pull": 44382, + "disambiguates": 15356, + "succeeding": 53693, + "burdens": 7498, + "flaw": 21100, + "uncontrollable": 59242, + "zs": 63196, + "scoping": 48781, + "fl": 21088, + "harmonize": 23463, + "unverifiable": 59760, + "poincare": 41043, + "419": 404, + "csl": 11924, + "recipients": 45483, + "democratizing": 13853, + "jd": 27151, + "resistance": 47191, + "subpopulations": 53597, + "rejection": 45882, + "152": 158, + "silhouette": 51021, + "urgently": 59789, + "hk": 23974, + "headings": 23504, + "jacobs": 27141, + "suppression": 54147, + "superfluous": 53927, + "disseminate": 15533, + "reasoners": 45180, + "artistic": 4503, + "zip": 63191, + "corporations": 11260, + "troubling": 58818, + "emd": 17256, + "540b": 442, + "530b": 439, + "davinci": 13498, + "175b": 176, + "pcl": 39997, + "mistakenly": 33372, + "ts": 58848, + "accommodating": 840, + "household": 24042, + "rose": 48345, + "vit": 61689, + "administered": 1857, + "digitisation": 15217, + "requesting": 46837, + "calibrator": 7537, + "mirroring": 33337, + "codex": 8883, + "unspecified": 59663, + "criticize": 11802, + "exiting": 19182, + "openness": 38479, + "instructgpt": 26477, + "regressions": 45823, + "tr": 57486, + "threaten": 57085, + "outbound": 38760, + "untouched": 59753, + "underestimated": 59250, + "internalize": 26692, + "obscuring": 38119, + "lowered": 31226, + "regressor": 45828, + "tcm": 55984, + "responded": 47389, + "003": 17, + "escalating": 18257, + "hf": 23635, + "scopes": 48780, + "borrows": 7272, + "cure": 11954, + "fsl": 21702, + "downsides": 16329, + "lieu": 30435, + "catastrophically": 7835, + "task1": 55477, + "recruiting": 45600, + "discourages": 15382, + "compromises": 9822, + "overlooks": 39100, + "pivoted": 40921, + "absorption": 753, + "reshaped": 47180, + "clusterings": 8749, + "korea": 27673, + "democratize": 13852, + "ruling": 48399, + "xxl": 63038, + "mmlu": 33444, + "t0": 54674, + "flan": 21093, + "avoidance": 5435, + "reasoned": 45178, + "rr": 48370, + "11b": 99, + "mauve": 31947, + "rescaling": 46974, + "19k": 208, + "dilution": 15222, + "slm": 51439, + "rote": 48347, + "fragility": 21433, + "subsume": 53663, + "commit": 9158, + "alliance": 2427, + "protective": 43967, + "spreads": 52381, + "ob": 38074, + "averagely": 5424, + "inheriting": 26209, + "minds": 33275, + "sun": 53919, + "rag": 44853, + "overwhelmed": 39122, + "ss": 52406, + "stricter": 52985, + "wildly": 62060, + "50x": 429, + "polishing": 41104, + "eyes": 20178, + "embarrassingly": 17003, + "monotonous": 35824, + "220": 320, + "asa": 4510, + "537": 440, + "regulating": 45856, + "sm": 51460, + "wrap": 62983, + "staying": 52783, + "indiscriminate": 25557, + "behaved": 6388, + "calibrating": 7533, + "1992": 201, + "semiparametric": 49474, + "dining": 15248, + "realtime": 45166, + "allegedly": 2395, + "2023": 307, + "postprocessing": 41368, + "iaa": 24358, + "timeliness": 57243, + "110m": 93, + "quadruple": 44467, + "chatgpt": 8264, + "obstruct": 38157, + "rude": 48376, + "deeplearnxmu": 13761, + "calibrates": 7532, + "rap": 44984, + "v100": 61158, + "38k": 382, + "ua": 59173, + "scattering": 48681, + "handcrafting": 23402, + "13k": 133, + "textually": 56988, + "quantize": 44641, + "thriving": 57093, + "dig": 15205, + "pubmedqa": 44381, + "leaking": 29336, + "executes": 18983, + "thoughtful": 57074, + "regressors": 45829, + "loyalty": 31230, + "unpredictability": 59628, + "obviate": 38261, + "css": 11925, + "unfaithfulness": 59445, + "warming": 61773, + "confounders": 10148, + "revolutionize": 48060, + "excludes": 18974, + "supposedly": 54145, + "progressing": 43122, + "rerankers": 46970, + "settle": 50407, + "provoke": 44258, + "beneath": 6553, + "legitimacy": 30010, + "irrelevance": 27038, + "oversight": 39105, + "landscapes": 27946, + "harmonized": 23464, + "peaked": 40002, + "fa": 20238, + "841": 533, + "relabeled": 45883, + "dam": 12090, + "commensurate": 9138, + "potent": 41378, + "worsen": 62974, + "retrospectively": 47996, + "theft": 57003, + "225": 321, + "intractability": 26762, + "interleaves": 26667, + "t1": 54675, + "delimiters": 13831, + "emergencies": 17266, + "abstractly": 776, + "making text": 31671, + "fully explored": 21729, + "explored paper": 19760, + "paper discusses": 39344, + "contribute development": 10929, + "languages propose": 28757, + "software engineering": 51638, + "human computer": 24124, + "computer interaction": 9890, + "natural language": 36412, + "language processing": 28393, + "processing nlp": 42897, + "number possible": 38027, + "possible future": 41325, + "future research": 21883, + "research directions": 47021, + "directions paper": 15298, + "english language": 17831, + "language acquisition": 27951, + "speech corpora": 52254, + "use syntactic": 60036, + "network properties": 36789, + "use global": 59901, + "global view": 22847, + "practical issues": 41466, + "issues paper": 27096, + "paper examines": 39354, + "syntactic relations": 54316, + "child language": 8291, + "grammar based": 23062, + "corpora annotation": 11176, + "perform standard": 40145, + "provide detailed": 44049, + "general model": 22069, + "model lexical": 34060, + "lexical information": 30366, + "hierarchy information": 23704, + "language used": 28568, + "model enable": 33815, + "practical approach": 41460, + "language understanding": 28544, + "understanding reasoning": 59389, + "powerful representation": 41444, + "information reasoning": 26042, + "reasoning approach": 45184, + "real life": 45104, + "language text": 28527, + "text paper": 56687, + "paper describes": 39319, + "describes experiments": 14224, + "experiments learning": 19456, + "rules using": 48398, + "machine learning": 31306, + "different ways": 15128, + "approaching problem": 3961, + "related work": 45955, + "work task": 62838, + "task results": 55346, + "background knowledge": 5492, + "prior domain": 42400, + "domain knowledge": 16093, + "knowledge available": 27402, + "available research": 5359, + "methods make": 32939, + "make use": 31606, + "morphological syntactic": 35845, + "language resources": 28475, + "resources model": 47317, + "model word": 34538, + "word similarity": 62310, + "different methods": 14989, + "learning lexical": 29709, + "lexical items": 30367, + "strengths weaknesses": 52977, + "weaknesses different": 61871, + "different word": 15131, + "particular focus": 39846, + "focus paper": 21188, + "different language": 14963, + "language resource": 28474, + "learning ability": 29499, + "change model": 8171, + "learning abilities": 29498, + "agents learn": 2064, + "learn language": 29387, + "ability model": 625, + "took place": 57354, + "zipf law": 63193, + "words language": 62443, + "frequency word": 21680, + "word semantics": 62296, + "task information": 55137, + "information retrieval": 26061, + "document given": 15797, + "user information": 60421, + "user query": 60441, + "proposed approach": 43720, + "approach does": 3494, + "data mining": 12486, + "tends focus": 56214, + "structure sentences": 53136, + "report results": 46445, + "approach unsupervised": 3729, + "unsupervised training": 59744, + "sequential data": 50036, + "sentences provided": 49775, + "text segmentation": 56756, + "statistical analysis": 52735, + "semantic annotations": 49236, + "despite importance": 14368, + "attention researchers": 4823, + "researchers field": 47157, + "multi document": 35952, + "et al": 18393, + "al 2007": 2231, + "automatic summarization": 5126, + "multiple sources": 36285, + "similarities differences": 51079, + "paper provide": 39554, + "provide initial": 44091, + "probabilistic model": 42465, + "model applied": 33568, + "alleviate problem": 2416, + "problem paper": 42619, + "paper present": 39443, + "present automated": 41851, + "automated method": 5051, + "non native": 37664, + "native speakers": 36406, + "new method": 37250, + "method based": 32398, + "based extraction": 5724, + "native english": 36400, + "english speech": 17883, + "used construct": 60126, + "result work": 47456, + "work developed": 62634, + "achieved significant": 1268, + "error reduction": 18230, + "reduction compared": 45718, + "romance languages": 48332, + "languages french": 28677, + "research development": 47018, + "written text": 63012, + "text processing": 56712, + "data structured": 12699, + "statistical approaches": 52737, + "based approaches": 5572, + "manually constructed": 31768, + "french english": 21660, + "highly complex": 23886, + "complex process": 9649, + "process requires": 42826, + "various kinds": 61348, + "like learning": 30481, + "learning words": 29945, + "communicative goals": 9256, + "starting point": 52570, + "propose enhanced": 43372, + "basic linguistic": 6331, + "new language": 37232, + "improvement achieved": 24983, + "needs paper": 36610, + "raw texts": 45038, + "polish language": 41103, + "novel technique": 37936, + "algorithm performs": 2292, + "obtained applying": 38202, + "post processing": 41351, + "processing text": 42957, + "text second": 56754, + "motivated observation": 35870, + "newly introduced": 37378, + "frame based": 21438, + "work available": 62583, + "available online": 5335, + "large scale": 28958, + "content analysis": 10512, + "corpora used": 11254, + "used assess": 60094, + "work presented": 62761, + "analysis public": 2735, + "web service": 61896, + "allows users": 2483, + "results indicate": 47675, + "long term": 31038, + "current word": 12027, + "word prediction": 62267, + "prediction systems": 41740, + "systems make": 54558, + "gram language": 23053, + "language models": 28224, + "models lm": 35197, + "estimate probability": 18373, + "word phrase": 62263, + "past years": 39940, + "models syntactic": 35572, + "syntactic semantic": 54322, + "semantic information": 49284, + "latent semantic": 29133, + "semantic analysis": 49233, + "analysis lsa": 2692, + "method shown": 32650, + "shown provide": 50745, + "provide reliable": 44121, + "information long": 25958, + "long distance": 31007, + "semantic dependencies": 49267, + "dependencies words": 14114, + "words context": 62387, + "context present": 10691, + "present evaluate": 41904, + "evaluate methods": 18470, + "methods integrate": 32905, + "based information": 5783, + "standard language": 52496, + "language model": 28151, + "model semantic": 34350, + "different forms": 14938, + "methods significant": 33037, + "significant improvements": 50881, + "improvements compared": 25058, + "model investigate": 34021, + "words expressions": 62416, + "understanding text": 59410, + "major concern": 31506, + "human beings": 24116, + "given input": 22748, + "short term": 50567, + "term memory": 56244, + "following approach": 21262, + "computational model": 9849, + "model construction": 33708, + "word meaning": 62245, + "word semantic": 62295, + "semantic similarities": 49345, + "semantic similarity": 49346, + "similarity words": 51130, + "high order": 23756, + "frequency occurrence": 21676, + "objective subjective": 38104, + "languages like": 28712, + "like english": 30469, + "english french": 17807, + "french german": 21661, + "punctuation marks": 44388, + "scientific texts": 48772, + "text output": 56682, + "modalities paper": 33470, + "use generative": 59899, + "open source": 38447, + "generating sentences": 22395, + "underlying linguistic": 59270, + "linguistic structures": 30797, + "using domain": 60667, + "domain specific": 16165, + "specific languages": 52102, + "languages based": 28606, + "implemented using": 24649, + "processing domain": 42868, + "languages used": 28815, + "used tool": 60330, + "goal paper": 22894, + "present model": 41947, + "memory based": 32242, + "based corpus": 5652, + "french corpus": 21658, + "corpus million": 11381, + "million words": 33261, + "semantic space": 49352, + "compared human": 9413, + "human data": 24131, + "tasks second": 55867, + "models semantic": 35476, + "implementation model": 24641, + "model text": 34456, + "text comprehension": 56503, + "based models": 5865, + "dictionary definitions": 14805, + "certain words": 7950, + "problem introduce": 42586, + "introduce concept": 26788, + "larger vocabulary": 29090, + "provide simple": 44130, + "measuring similarity": 32089, + "fundamental problem": 21786, + "problem natural": 42613, + "article provides": 4460, + "unifying framework": 59492, + "short context": 50551, + "proposed solution": 43897, + "quite different": 44828, + "different surface": 15089, + "closely related": 8704, + "second order": 49013, + "information used": 26142, + "used represent": 60290, + "words common": 62381, + "parallel corpora": 39642, + "algorithm based": 2264, + "based use": 6121, + "key words": 27343, + "words text": 62530, + "text documents": 56541, + "rules used": 48397, + "bilingual corpora": 7104, + "analysis allows": 2610, + "assess quality": 4582, + "avenues future": 5396, + "using statistical": 60963, + "statistical techniques": 52765, + "various techniques": 61404, + "using techniques": 60982, + "information theory": 26123, + "similar words": 51077, + "unknown words": 59560, + "multilingual parallel": 36108, + "parallel texts": 39655, + "english spanish": 17878, + "texts paper": 56908, + "paper deals": 39313, + "xml based": 63035, + "considered important": 10248, + "translation quality": 58664, + "quality assessment": 44492, + "used corpus": 60131, + "studies automatic": 53248, + "automatic translation": 5133, + "assessment based": 4590, + "paper concludes": 39296, + "text generation": 56593, + "categories based": 7843, + "based syntactic": 6076, + "semantic properties": 49322, + "applications use": 3255, + "task learning": 55168, + "second language": 49009, + "en en": 17414, + "article describes": 4448, + "common syntactic": 9203, + "does rely": 15966, + "linguistic structure": 30796, + "specific language": 52099, + "model hybrid": 33967, + "propose theoretical": 43671, + "theoretical framework": 57021, + "given corpus": 22731, + "statistical information": 52743, + "words vocabulary": 62546, + "based statistical": 6058, + "data possible": 12548, + "possible build": 41317, + "words share": 62511, + "share common": 50456, + "words tend": 62527, + "semantic classes": 49247, + "markov chain": 31841, + "transition matrix": 58540, + "probability distributions": 42474, + "distributions words": 15679, + "words clusters": 62380, + "method yields": 32711, + "method specifically": 32665, + "related languages": 45915, + "prior knowledge": 42403, + "different languages": 14967, + "hard task": 23451, + "task especially": 55052, + "unsupervised classification": 59685, + "furthermore approach": 21804, + "approach relies": 3673, + "corpus extensive": 11340, + "extensive experiments": 19877, + "readily available": 45078, + "corpus examine": 11334, + "language allows": 27959, + "allows efficient": 2462, + "long time": 31045, + "present natural": 41953, + "natural languages": 36459, + "evenly distributed": 18777, + "linguistic phenomenon": 30782, + "using artificial": 60565, + "applications like": 3217, + "automatic text": 5128, + "text summarization": 56797, + "work present": 62752, + "present framework": 41918, + "step automatic": 52800, + "subject verb": 53558, + "verb object": 61510, + "pronoun resolution": 43231, + "textual documents": 56961, + "automatic processing": 5116, + "automatic recognition": 5119, + "pre defined": 41499, + "necessary pre": 36532, + "automatic generation": 5093, + "important tasks": 24780, + "tasks natural": 55759, + "processing information": 42876, + "retrieval machine": 47950, + "machine translation": 31345, + "statistical methods": 52753, + "methods used": 33094, + "extraction methods": 20081, + "provides easy": 44193, + "logic rules": 30981, + "existing methods": 19092, + "mutual information": 36344, + "word pairs": 62261, + "data based": 12185, + "project gutenberg": 43134, + "proposed method": 43803, + "provides better": 44184, + "better result": 6955, + "methods known": 32913, + "semantic relationships": 49329, + "pairs words": 39234, + "words used": 62540, + "feature based": 20476, + "complex networks": 9642, + "networks propose": 36897, + "propose algorithm": 43288, + "algorithm uses": 2310, + "semantic network": 49303, + "network produce": 36788, + "produce new": 42994, + "new set": 37313, + "relationships words": 46087, + "words similar": 62514, + "work computational": 62605, + "computational modeling": 9850, + "complex hand": 9626, + "hand coded": 23384, + "representations introduce": 46694, + "words using": 62541, + "using large": 60756, + "large corpus": 28863, + "raw text": 45037, + "text automatically": 56450, + "automatically discover": 5163, + "discover semantic": 15408, + "semantic relations": 49327, + "relations words": 46064, + "words evaluate": 62411, + "achieves human": 1338, + "human level": 24196, + "level performance": 30174, + "alternative approaches": 2499, + "approaches able": 3751, + "able reach": 717, + "word frequency": 62209, + "power law": 41426, + "complex systems": 9667, + "human communication": 24123, + "recent research": 45340, + "frequent words": 21683, + "language different": 28028, + "different levels": 14977, + "semantic type": 49368, + "generative model": 22597, + "model behavior": 33611, + "word usage": 62329, + "patterns words": 39979, + "use words": 60076, + "article propose": 4456, + "propose automatic": 43306, + "build multi": 7414, + "multi lingual": 35984, + "lexico semantic": 30403, + "semantic resources": 49336, + "textual information": 56969, + "information contained": 25790, + "texts different": 56873, + "languages method": 28728, + "method uses": 32697, + "mathematical model": 31934, + "model called": 33641, + "represent different": 46468, + "given word": 22801, + "word model": 62247, + "model fed": 33885, + "words extracted": 62417, + "extracted corpus": 20007, + "words meanings": 62456, + "used build": 60110, + "using corpora": 60631, + "corpora different": 11192, + "languages resources": 28773, + "languages makes": 28725, + "makes possible": 31630, + "information languages": 25942, + "meaning words": 32021, + "world wide": 62967, + "wide web": 61987, + "probability word": 42482, + "conditional probabilities": 10001, + "word word": 62343, + "words present": 62483, + "confidence measure": 10114, + "translation problem": 58660, + "sequence words": 50019, + "original word": 38739, + "word level": 62225, + "level confidence": 30080, + "translation based": 58582, + "lexical features": 30364, + "features language": 20609, + "model evaluate": 33840, + "using combination": 60611, + "measures based": 32075, + "classification error": 8461, + "error rate": 18226, + "represent word": 46485, + "keeps track": 27282, + "contexts words": 10757, + "standard supervised": 52531, + "supervised machine": 54013, + "learning algorithm": 29511, + "semantic tasks": 49363, + "tasks good": 55660, + "good results": 22944, + "results task": 47877, + "task specific": 55387, + "series experiments": 50063, + "predicate argument": 41628, + "argument structures": 4175, + "structures used": 53197, + "previously used": 42356, + "used context": 60127, + "context multi": 10677, + "document summarization": 15837, + "stages stage": 52451, + "vocabulary words": 61719, + "later used": 29152, + "second stage": 49021, + "approaches order": 3887, + "order identify": 38625, + "semantic roles": 49340, + "semantics paper": 49410, + "paper presents": 39468, + "report experiments": 46436, + "special type": 52024, + "multiword expressions": 36333, + "expressions mwes": 19809, + "clear cut": 8652, + "extracted large": 20015, + "set examples": 50150, + "results data": 47567, + "data extraction": 12356, + "finite state": 21057, + "techniques results": 56133, + "methods extracting": 32860, + "models context": 34859, + "context word": 10746, + "phrase structure": 40846, + "present paper": 41985, + "sentences corpus": 49698, + "corpus generated": 11349, + "software developers": 51636, + "data structures": 12700, + "relational database": 46004, + "lexicon based": 30408, + "based nlp": 5912, + "model efficiently": 33803, + "compression techniques": 9815, + "model demonstrate": 33744, + "demonstrate possible": 13955, + "great deal": 23202, + "cognitive psychology": 8897, + "computer science": 9891, + "using word": 61027, + "context text": 10732, + "text corpora": 56515, + "measures word": 32082, + "similarity word": 51129, + "word association": 62114, + "general way": 22096, + "search engine": 48970, + "large range": 28951, + "search queries": 48979, + "queries paper": 44657, + "words phrases": 62479, + "express opinions": 19794, + "settings including": 50377, + "fundamentally different": 21796, + "develop framework": 14589, + "evaluation using": 18746, + "scale collection": 48558, + "reviews dataset": 48048, + "subtle ways": 53677, + "approach develop": 3486, + "develop novel": 14607, + "novel methods": 37869, + "methods advantage": 32742, + "evaluation provide": 18688, + "social psychology": 51601, + "different countries": 14881, + "analysis linguistic": 2690, + "linguistic typology": 30804, + "verbs adjectives": 61521, + "analysis small": 2760, + "small sample": 51495, + "propose computational": 43326, + "process model": 42805, + "model able": 33487, + "able discover": 689, + "able cope": 683, + "noisy channel": 37613, + "channel model": 8188, + "syntactic structure": 54328, + "structure sentence": 53135, + "discourse structure": 15399, + "structure text": 53141, + "text given": 56610, + "hierarchical model": 23678, + "syntactic discourse": 54302, + "generate coherent": 22183, + "arbitrary length": 4013, + "outperforms baseline": 38867, + "sentence based": 49519, + "based compression": 5631, + "sentences text": 49794, + "text results": 56747, + "results support": 47871, + "support claim": 54115, + "discourse knowledge": 15391, + "knowledge plays": 27569, + "plays important": 40999, + "important role": 24767, + "entity detection": 18101, + "detection tracking": 14537, + "task identifying": 55122, + "real world": 45121, + "named entity": 36371, + "coreference resolution": 11160, + "resolution task": 47195, + "task considering": 54971, + "mention detection": 32297, + "limited using": 30631, + "using local": 60775, + "local features": 30937, + "task simultaneously": 55379, + "able learn": 704, + "complex non": 9645, + "non local": 37661, + "develop new": 14606, + "model explore": 33860, + "explore utility": 19751, + "demonstrating effectiveness": 14051, + "task paper": 55261, + "paper propose": 39489, + "pattern based": 39959, + "based term": 6088, + "term extraction": 56235, + "extraction approach": 20048, + "originally developed": 38744, + "using morphological": 60815, + "candidates based": 7584, + "log likelihood": 30972, + "present method": 41943, + "method automatic": 32392, + "lexical units": 30394, + "bilingual lexicon": 7110, + "based linguistic": 5816, + "linguistic properties": 30785, + "different aspects": 14844, + "candidate translations": 7581, + "collect new": 8948, + "corpus web": 11457, + "web pages": 61890, + "non compositional": 37643, + "technique based": 56028, + "reach high": 45047, + "high precision": 23763, + "paper reviews": 39569, + "indian languages": 25519, + "languages paper": 28743, + "paper explores": 39369, + "solve problems": 51688, + "chinese text": 8323, + "new word": 37361, + "form words": 21342, + "entirely new": 18033, + "add new": 1588, + "new way": 37359, + "traditional approach": 57510, + "paper using": 39604, + "using rule": 60913, + "rule based": 48378, + "based algorithm": 5561, + "accuracy 60": 899, + "model reference": 34293, + "reference resolution": 45743, + "overcome difficulties": 39062, + "previous approaches": 42238, + "approaches based": 3773, + "referring expression": 45759, + "entities model": 18066, + "model accounts": 33498, + "explicitly mentioned": 19641, + "set potential": 50220, + "important feature": 24726, + "approach provides": 3659, + "fresh perspective": 21689, + "widely recognized": 61999, + "annotation schemes": 2969, + "need use": 36599, + "use language": 59922, + "linguistic annotation": 30748, + "model variety": 34526, + "variety different": 61267, + "different annotation": 14836, + "morpho syntactic": 35836, + "provide overview": 44110, + "framework demonstrate": 21488, + "demonstrate applicability": 13863, + "comparative evaluation": 9320, + "data model": 12488, + "model linguistic": 34067, + "linguistic annotations": 30749, + "use work": 60077, + "research community": 47002, + "alternative method": 2504, + "language features": 28069, + "significant number": 50900, + "used various": 60347, + "various strategies": 61398, + "tackle challenging": 54702, + "results produced": 47778, + "obtained using": 38228, + "using specific": 60957, + "article proposes": 4458, + "proposes method": 43933, + "method extract": 32503, + "dependency structures": 14139, + "interactions words": 26624, + "level words": 30233, + "dependency relations": 14136, + "relations extracted": 46032, + "parsing process": 39793, + "dependency tree": 14142, + "new light": 37239, + "dependency parsing": 14131, + "make available": 31541, + "human languages": 24194, + "mathematical reasoning": 31935, + "use speech": 60024, + "facial expressions": 20260, + "input output": 26308, + "provide new": 44104, + "new possibilities": 37284, + "information effective": 25824, + "effective efficient": 16648, + "models user": 35653, + "domain task": 16201, + "media text": 32183, + "text audio": 56444, + "audio video": 4932, + "representation framework": 46522, + "takes account": 54777, + "meaning representation": 32012, + "http www": 24050, + "processes paper": 42846, + "english words": 17906, + "words lexical": 62448, + "tasks relies": 55849, + "unsupervised approaches": 59682, + "syntactic patterns": 54313, + "precision recall": 41616, + "words task": 62526, + "task second": 55349, + "task provided": 55308, + "provided paper": 44170, + "based bilingual": 5610, + "comparable corpora": 9293, + "noun phrases": 37742, + "language evaluation": 28055, + "evaluation experiment": 18617, + "alignment algorithm": 2363, + "method low": 32567, + "important domain": 24719, + "languages approach": 28602, + "approach able": 3387, + "words words": 62549, + "work presents": 62762, + "new approach": 37131, + "terms computational": 56278, + "new computational": 37152, + "model based": 33598, + "grammar model": 23064, + "model simple": 34385, + "artificial neural": 4497, + "neural network": 36993, + "widely used": 62007, + "artificial intelligence": 4490, + "text sentences": 56762, + "sentences words": 49808, + "major problems": 31520, + "context work": 10748, + "work explores": 62661, + "important language": 24740, + "performance difference": 40290, + "based dynamic": 5693, + "segmentation algorithm": 49079, + "projection based": 43139, + "based static": 6057, + "involves training": 27021, + "text using": 56836, + "using static": 60962, + "segmentation method": 49083, + "compared best": 9390, + "chinese texts": 8324, + "studied using": 53239, + "chinese japanese": 8309, + "using semantic": 60919, + "chinese characters": 8300, + "contain information": 10463, + "information meaning": 25969, + "study analyze": 53324, + "words occur": 62470, + "word occurrence": 62252, + "research based": 46992, + "based approach": 5570, + "world text": 62964, + "source text": 51808, + "investigate new": 26972, + "given text": 22794, + "text languages": 56644, + "evolve time": 18837, + "using similarity": 60942, + "method used": 32694, + "used modern": 60242, + "recently proposed": 45454, + "method avoids": 32397, + "studies use": 53305, + "require specific": 46889, + "specific linguistic": 52105, + "linguistic knowledge": 30775, + "method allows": 32379, + "large number": 28922, + "number languages": 38015, + "languages applied": 28600, + "applied method": 3282, + "indo european": 25596, + "similar previous": 51059, + "previous studies": 42286, + "new information": 37222, + "various languages": 61352, + "method measure": 32573, + "measure degree": 32048, + "proposed new": 43869, + "levenshtein distance": 30253, + "distance words": 15550, + "words meaning": 62455, + "words corresponding": 62392, + "corresponding different": 11548, + "different meanings": 14984, + "meanings words": 32037, + "associated words": 4626, + "problem tackled": 42672, + "methodology based": 32718, + "words considered": 62386, + "meaning different": 32001, + "different domains": 14905, + "science technology": 48751, + "meaning sentences": 32016, + "sentences sentences": 49785, + "words differ": 62397, + "change time": 8173, + "focus recent": 21193, + "study explores": 53376, + "new avenues": 37140, + "study word": 53476, + "analysis context": 2638, + "pairs languages": 39200, + "languages family": 28670, + "distances words": 15552, + "words associated": 62368, + "large vocabulary": 29046, + "useful information": 60370, + "length input": 30028, + "paper tackle": 39590, + "tackle problem": 54708, + "language language": 28130, + "language spoken": 28501, + "spoken written": 52367, + "general purpose": 22085, + "formal languages": 21349, + "programming languages": 43087, + "languages languages": 28706, + "used study": 60314, + "information processing": 26024, + "processing using": 42965, + "using natural": 60828, + "called natural": 7550, + "input sentence": 26329, + "aim produce": 2157, + "grammatical structures": 23079, + "structures sentences": 53195, + "grammar rules": 23065, + "grammatical errors": 23070, + "parse tree": 39754, + "present main": 41939, + "main issues": 31445, + "machine readable": 31338, + "text encoding": 56554, + "phrases sentences": 40853, + "language expressions": 28065, + "convey information": 11083, + "information textual": 26119, + "textual entailment": 56962, + "pairs natural": 39203, + "wide range": 61964, + "range natural": 44924, + "processing applications": 42852, + "applications including": 3210, + "including question": 25290, + "question answering": 44689, + "answering summarization": 3097, + "summarization text": 53903, + "generation machine": 22487, + "summarize key": 53906, + "key ideas": 27316, + "spatio temporal": 51990, + "speech recognition": 52282, + "present investigation": 41932, + "paper approach": 39270, + "approach developed": 3487, + "developed based": 14625, + "aims identify": 2197, + "natural speech": 36466, + "speech samples": 52292, + "recognition accuracy": 45490, + "accuracy 98": 930, + "help accelerate": 23548, + "advance state": 1883, + "state art": 52574, + "extraction problem": 20096, + "current natural": 11989, + "processing systems": 42944, + "biomedical literature": 7175, + "literature paper": 30858, + "paper report": 39565, + "performance state": 40575, + "substantial improvement": 53621, + "method applied": 32383, + "significant impact": 50870, + "poses challenge": 41244, + "challenge natural": 7998, + "parsers typically": 39765, + "typically trained": 59158, + "trained large": 57764, + "scale corpora": 48559, + "non technical": 37685, + "text propose": 56715, + "propose text": 43667, + "text simplification": 56772, + "reduce complexity": 45652, + "order improve": 38626, + "improve performance": 24885, + "syntactic parsers": 54311, + "syntactic parsing": 54312, + "text mining": 56660, + "improvement performance": 25013, + "processing steps": 42943, + "evaluated method": 18536, + "method using": 32698, + "using corpus": 60632, + "sentences annotated": 49680, + "empirical results": 17337, + "results improvement": 47671, + "original sentences": 38728, + "linguistic differences": 30764, + "modern english": 35706, + "19th century": 210, + "internet users": 26698, + "various models": 61366, + "linguistic variations": 30811, + "named entities": 36370, + "problem different": 42539, + "domains natural": 16277, + "context propose": 10695, + "propose method": 43451, + "method integrates": 32547, + "based local": 5817, + "paper focus": 39377, + "model presented": 34228, + "finally method": 20868, + "method results": 32640, + "results evaluation": 47620, + "example sentences": 18881, + "automatic classification": 5073, + "received attention": 45254, + "attention nlp": 4801, + "based heuristics": 5768, + "semantic categories": 49242, + "able classify": 680, + "annotated corpus": 2880, + "space time": 51901, + "high data": 23724, + "paper developed": 39339, + "mean squared": 31995, + "squared error": 52401, + "presented approach": 42058, + "approach compared": 3455, + "systems performance": 54588, + "performance improvement": 40383, + "simulation results": 51265, + "hidden markov": 23641, + "markov models": 31849, + "successfully applied": 53741, + "applied automatic": 3264, + "automatic speech": 5124, + "speech data": 52256, + "data fact": 12358, + "alternative models": 2507, + "models better": 34781, + "better able": 6843, + "able account": 666, + "systems paper": 54579, + "present preliminary": 41987, + "understanding speech": 59403, + "hmm based": 23976, + "based speech": 6052, + "analysis uses": 2789, + "rarely used": 45008, + "used field": 60189, + "result obtained": 47444, + "real data": 45101, + "data statistical": 12694, + "recognition errors": 45503, + "demonstrate using": 13996, + "using simulation": 60944, + "data resulting": 12614, + "recognition error": 45502, + "error rates": 18229, + "taken results": 54775, + "results suggest": 47865, + "better understanding": 6987, + "data crucial": 12264, + "crucial step": 11911, + "human language": 24191, + "limits ability": 30638, + "process text": 42833, + "vector space": 61465, + "space models": 51877, + "paper surveys": 39588, + "semantic processing": 49321, + "broad classes": 7351, + "document word": 15845, + "word context": 62131, + "broad range": 7353, + "range applications": 44907, + "source project": 51790, + "new perspective": 37281, + "novel method": 37863, + "method reducing": 32637, + "resolve ambiguities": 47199, + "work provides": 62796, + "solution problem": 51658, + "problem addressed": 42499, + "provides simple": 44227, + "simple effective": 51150, + "recognition classification": 45497, + "classification named": 8504, + "important component": 24711, + "component natural": 9709, + "nlp applications": 37463, + "classification usually": 8581, + "taking account": 54787, + "classification paper": 8511, + "paper use": 39601, + "syntactic context": 54296, + "context large": 10666, + "ner task": 36683, + "semantics language": 49406, + "language provides": 28453, + "provides means": 44212, + "space propose": 51886, + "linguistic analysis": 30747, + "analysis use": 2787, + "use tools": 60050, + "semantic content": 49259, + "spatial relations": 51986, + "formal representations": 21352, + "special attention": 52013, + "representations previous": 46741, + "reported paper": 46452, + "focus language": 21173, + "experimental studies": 19328, + "studies propose": 53288, + "static dynamic": 52723, + "cross linguistic": 11861, + "cognitive processing": 8896, + "formal framework": 21346, + "research shows": 47120, + "shows language": 50787, + "language specific": 28495, + "specific properties": 52133, + "space results": 51895, + "linguistic variability": 30808, + "question models": 44737, + "models general": 35051, + "automatically detecting": 5159, + "preliminary step": 41807, + "discourse parsing": 15394, + "previous research": 42270, + "elementary discourse": 16975, + "discourse units": 15403, + "linear sequence": 30669, + "units paper": 59535, + "present simple": 42014, + "simple approach": 51135, + "able produce": 715, + "approach builds": 3439, + "standard multi": 52508, + "multi class": 35945, + "class classification": 8395, + "classification techniques": 8572, + "techniques combined": 56069, + "combined simple": 9085, + "global coherence": 22822, + "developed evaluated": 14630, + "annotations provided": 3000, + "ongoing effort": 38349, + "effort create": 16924, + "cross validated": 11872, + "performance results": 40539, + "score 73": 48808, + "categorial grammar": 7839, + "paper introduce": 39400, + "contribution paper": 10943, + "semantic model": 49300, + "model using": 34514, + "using soft": 60953, + "soft constraints": 51621, + "allow users": 2441, + "build semantic": 7425, + "semantic models": 49301, + "descriptions using": 14254, + "descriptions generated": 14252, + "account context": 877, + "using modern": 60809, + "20th century": 311, + "text corpus": 56516, + "directly used": 15341, + "used natural": 60247, + "implicit information": 24660, + "information paper": 26000, + "based global": 5755, + "experiment conducted": 19234, + "generate new": 22223, + "character recognition": 8221, + "transform text": 58441, + "text document": 56540, + "languages arabic": 28603, + "unique features": 59514, + "accuracy word": 1070, + "word recognition": 62276, + "solution using": 51665, + "network solve": 36805, + "solve problem": 51684, + "nlp challenge": 37470, + "conducted experiments": 10083, + "approach best": 3432, + "best results": 6815, + "results showing": 47839, + "analysis particular": 2713, + "work focuses": 62671, + "classical nlp": 8425, + "nlp pipelines": 37510, + "identification tasks": 24399, + "tasks various": 55959, + "articles published": 4477, + "programming language": 43086, + "formal model": 21350, + "high level": 23745, + "level abstraction": 30055, + "state transition": 52713, + "model inspired": 34007, + "word associations": 62115, + "statistically significant": 52770, + "documents containing": 15866, + "distinguish different": 15603, + "different classes": 14862, + "data perform": 12541, + "perform extensive": 40103, + "experiments benchmark": 19362, + "benchmark data": 6440, + "data sets": 12646, + "study performance": 53430, + "performance various": 40621, + "introduced measure": 26884, + "performs poorly": 40711, + "french language": 21664, + "language linguistic": 28140, + "lexicon features": 30410, + "various natural": 61368, + "patterns word": 39978, + "word use": 62330, + "human activities": 24089, + "intrinsic properties": 26772, + "relationship word": 46074, + "characteristic features": 8232, + "used develop": 60147, + "develop method": 14596, + "method quantify": 32631, + "important aspects": 24702, + "size word": 51403, + "range topics": 44941, + "aspects word": 4553, + "word frequencies": 62208, + "important word": 24792, + "shorter time": 50590, + "new concepts": 37154, + "novel words": 37957, + "words results": 62501, + "word statistics": 62315, + "provide novel": 44106, + "novel information": 37841, + "applied solve": 3294, + "problems proposed": 42725, + "method offers": 32594, + "using context": 60620, + "context driven": 10618, + "sentences experimental": 49715, + "experimental results": 19268, + "results obtained": 47747, + "algorithm presented": 2293, + "research paper": 47086, + "paper address": 39250, + "data management": 12481, + "currently available": 12031, + "major challenges": 31504, + "challenges present": 8069, + "present approach": 41847, + "research challenge": 46998, + "semantic based": 49241, + "based search": 6008, + "language technology": 28523, + "capable understanding": 7631, + "understanding semantic": 59397, + "language based": 27974, + "use semantic": 60006, + "semantic web": 49376, + "paper briefly": 39282, + "french italian": 21663, + "multi word": 36042, + "domain language": 16098, + "rapidly evolving": 44994, + "study properties": 53441, + "information related": 26049, + "set documents": 50138, + "latent variables": 29146, + "techniques use": 56147, + "original english": 38713, + "english text": 17890, + "language annotations": 27962, + "based set": 6030, + "general rules": 22089, + "existing text": 19158, + "text annotated": 56432, + "significant increase": 50891, + "increase size": 25424, + "annotation rules": 2966, + "set rules": 50243, + "knowledge annotated": 27395, + "language closely": 27990, + "try answer": 58845, + "data collected": 12216, + "paper studies": 39577, + "linguistic constraints": 30758, + "built using": 7493, + "texts written": 56948, + "written language": 63002, + "language words": 28582, + "network built": 36713, + "text natural": 56672, + "natural order": 36461, + "small world": 51513, + "scale free": 48573, + "high degree": 23725, + "surprising result": 54183, + "underlying data": 59265, + "network address": 36694, + "address problem": 1783, + "information speech": 26102, + "speech signal": 52293, + "speech based": 52253, + "based dialogue": 5679, + "dialogue systems": 14788, + "systems using": 54663, + "using phrase": 60858, + "phrase level": 40841, + "prosodic features": 43957, + "utterance level": 61137, + "features improves": 20601, + "improves model": 25135, + "model level": 34056, + "addition models": 1627, + "models used": 35651, + "used predict": 60266, + "varying levels": 61433, + "allows compare": 2454, + "based feature": 5727, + "feature sets": 20505, + "article presents": 4455, + "main features": 31438, + "features new": 20630, + "web based": 61879, + "classification scheme": 8539, + "new generation": 37214, + "dimensions including": 15244, + "small scale": 51496, + "studies address": 53243, + "real time": 45114, + "wide variety": 61979, + "relation types": 45999, + "probabilistic framework": 42458, + "framework model": 21563, + "conversational dataset": 11044, + "dataset specifically": 13097, + "task time": 55438, + "linguistic style": 30799, + "scale real": 48620, + "world setting": 62958, + "setting furthermore": 50325, + "explore potential": 19727, + "network features": 36743, + "current state": 12011, + "english machine": 17840, + "actions taken": 1462, + "translation process": 58661, + "word sense": 62297, + "sense disambiguation": 49483, + "technique using": 56051, + "using google": 60709, + "presents design": 42080, + "design development": 14273, + "spoken words": 52366, + "sequences paper": 50023, + "algorithm designed": 2267, + "word boundaries": 62120, + "speech processing": 52281, + "processing requires": 42932, + "evaluate set": 18504, + "adjacent words": 1842, + "technique improves": 56034, + "improves accuracy": 25113, + "words output": 62474, + "source code": 51743, + "language styles": 28512, + "function words": 21762, + "way achieve": 61789, + "language generation": 28083, + "generation process": 22529, + "study question": 53450, + "words large": 62445, + "corpus report": 11420, + "compositional meaning": 9744, + "sentences using": 49803, + "using data": 60642, + "based unsupervised": 6120, + "unsupervised learning": 59705, + "evaluation based": 18581, + "based word": 6134, + "disambiguation task": 15361, + "task developed": 55017, + "sentences similar": 49787, + "sentences model": 49754, + "model matches": 34094, + "syntactic complexity": 54293, + "model paper": 34173, + "paper evaluates": 39352, + "different tasks": 15094, + "translation mt": 58637, + "approach named": 3608, + "use different": 59867, + "different kinds": 14959, + "translation english": 58605, + "target language": 54823, + "spanish english": 51941, + "english languages": 17835, + "approach easily": 3499, + "easily extended": 16540, + "extended languages": 19836, + "topics covered": 57445, + "multiple answers": 36166, + "review research": 48039, + "recent work": 45366, + "work focused": 62670, + "focused primarily": 21228, + "work study": 62830, + "computational linguistics": 9845, + "compare approaches": 9330, + "gold standard": 22915, + "dataset based": 12822, + "feature analysis": 20473, + "learned models": 29468, + "models additionally": 34694, + "new semantic": 37310, + "semantic relatedness": 49325, + "wikipedia based": 62044, + "explicit semantic": 19624, + "svm classifier": 54233, + "classifier trained": 8605, + "wikipedia data": 62047, + "data various": 12770, + "word sentence": 62301, + "level propose": 30183, + "process involves": 42797, + "multi layered": 35982, + "clustering based": 8739, + "sequence alignment": 49906, + "learning based": 29529, + "based finding": 5730, + "canonical form": 7592, + "analyzing large": 2843, + "social networks": 51597, + "improving performance": 25189, + "correcting errors": 11481, + "user interface": 60429, + "available download": 5284, + "source tool": 51812, + "use novel": 59966, + "novel model": 37872, + "model sentence": 34353, + "discourse analysis": 15385, + "analysis information": 2683, + "information extraction": 25860, + "based shot": 6032, + "approach produces": 3654, + "original sentence": 38727, + "scientific literature": 48764, + "test corpus": 56339, + "main contributions": 31432, + "contributions work": 10957, + "distributional semantics": 15670, + "concept extraction": 9922, + "extraction proposed": 20099, + "proposed work": 43927, + "research areas": 46984, + "semi supervised": 49455, + "learning systems": 29902, + "proposed semi": 43892, + "supervised approaches": 53962, + "approaches used": 3949, + "used different": 60150, + "paper consider": 39302, + "consider problem": 10217, + "random variable": 44891, + "context free": 10641, + "free grammar": 21642, + "space second": 51896, + "widely studied": 62005, + "order paper": 38645, + "presents new": 42093, + "previously developed": 42332, + "special cases": 52015, + "cases paper": 7811, + "paper introduces": 39406, + "annotation framework": 2951, + "best practices": 6803, + "additional features": 1669, + "features support": 20678, + "syntactic phenomena": 54314, + "phenomena including": 40810, + "case study": 7798, + "annotated corpora": 2878, + "greatly increased": 23234, + "simple way": 51225, + "query languages": 44672, + "languages proposed": 28760, + "multiple layers": 36240, + "easy learn": 16565, + "query language": 44671, + "language particular": 28375, + "framework based": 21461, + "multiple levels": 36241, + "document language": 15801, + "language simple": 28488, + "simple intuitive": 51182, + "expressive power": 19814, + "commonly used": 9222, + "used tasks": 60324, + "tasks require": 55854, + "compare language": 9344, + "translate natural": 58551, + "language sentences": 28481, + "knowledge representation": 27591, + "representation language": 46534, + "language uses": 28569, + "lambda calculus": 27939, + "using input": 60737, + "semantic representation": 49332, + "representation words": 46606, + "languages including": 28694, + "order logic": 38631, + "answer set": 3055, + "uses syntactic": 60539, + "construct semantic": 10402, + "semantic meaning": 49298, + "parser used": 39762, + "used addition": 60081, + "learn semantic": 29418, + "use existing": 59883, + "statistical learning": 52747, + "learning approach": 29519, + "assign weights": 4601, + "multiple meanings": 36245, + "improved results": 24963, + "results standard": 47853, + "standard corpora": 52477, + "corpora natural": 11225, + "language interfaces": 28121, + "database queries": 12784, + "understand natural": 59306, + "text answer": 56434, + "answer questions": 3051, + "questions given": 44790, + "given natural": 22762, + "language achieve": 27950, + "able process": 714, + "process natural": 42808, + "able capture": 679, + "knowledge text": 27629, + "formal language": 21348, + "words sentence": 62507, + "approach uses": 3733, + "developed methods": 14633, + "methods learn": 32923, + "training sentence": 58246, + "sentence meaning": 49600, + "pairs evaluate": 39185, + "methods compare": 32792, + "compare existing": 9341, + "capable automatically": 7616, + "translating english": 58565, + "provide solutions": 44132, + "approach using": 3734, + "using probabilistic": 60874, + "able distinguish": 690, + "meanings word": 32036, + "parameters learned": 39706, + "using ontology": 60847, + "large set": 29009, + "paper investigates": 39414, + "ad hoc": 1495, + "retrieval task": 47971, + "experiments open": 19484, + "data proposed": 12572, + "proposed technique": 43911, + "data collection": 12218, + "experiments demonstrated": 19409, + "demonstrated promising": 14014, + "promising results": 43179, + "language like": 28138, + "rhetorical relations": 48087, + "play different": 40967, + "different roles": 15055, + "discourse relations": 15397, + "paper gives": 39387, + "data high": 12401, + "level correlation": 30092, + "emotional content": 17296, + "writing style": 62990, + "study conducted": 53345, + "inter rater": 26583, + "likert scale": 30527, + "results different": 47592, + "distributions different": 15675, + "different sentence": 15063, + "analysis identify": 2675, + "main objective": 31448, + "design automatic": 14263, + "scoring mechanism": 48935, + "sentence level": 49577, + "study important": 53388, + "purpose paper": 44408, + "presents method": 42090, + "based lexical": 5813, + "lexical semantic": 30383, + "method takes": 32678, + "linguistic processing": 30784, + "processing tools": 42960, + "ontology based": 38399, + "concepts semantic": 9943, + "semantic annotation": 49235, + "linguistic resources": 30791, + "resources paper": 47323, + "paper details": 39337, + "process building": 42762, + "used annotation": 60089, + "speech understanding": 52314, + "domain paper": 16128, + "lexical ambiguity": 30352, + "word different": 62138, + "specific task": 52153, + "task commonly": 54958, + "commonly referred": 9221, + "disambiguation wsd": 15363, + "sense words": 49491, + "source word": 51819, + "methods based": 32764, + "based main": 5829, + "research area": 46983, + "knowledge based": 27411, + "based method": 5846, + "corpus based": 11286, + "hypothesis word": 24350, + "requires knowledge": 46934, + "knowledge sources": 27614, + "order solve": 38652, + "sources different": 51828, + "approach combines": 3452, + "combines various": 9104, + "various sources": 61393, + "sources knowledge": 51832, + "sources information": 51831, + "information order": 25996, + "order achieve": 38588, + "achieve good": 1145, + "finally paper": 20873, + "presents comprehensive": 42077, + "comprehensive study": 9800, + "evaluation methods": 18642, + "literary texts": 30855, + "english previous": 17860, + "provide examples": 44065, + "linguistic aspects": 30750, + "previous work": 42302, + "work using": 62855, + "results previous": 47776, + "approach problem": 3652, + "proposed knowledge": 43796, + "knowledge domain": 27448, + "social behavior": 51554, + "english sentences": 17873, + "location time": 30968, + "cause effect": 7884, + "knowledge base": 27404, + "method automatically": 32393, + "language machine": 28144, + "role natural": 48316, + "language applications": 27964, + "applications information": 3211, + "proper nouns": 43254, + "technical terms": 56020, + "phoneme based": 40821, + "model proposed": 34249, + "little research": 30884, + "framework multiple": 21568, + "models operate": 35279, + "comparison models": 9499, + "models framework": 35042, + "framework using": 21622, + "modeling models": 34600, + "way improve": 61808, + "improve machine": 24869, + "models effective": 34946, + "effective models": 16676, + "explore possibility": 19725, + "tools used": 57387, + "used produce": 60276, + "word length": 62224, + "information content": 25791, + "english german": 17812, + "german spanish": 22676, + "usage words": 59810, + "frequently used": 21687, + "negative words": 36640, + "frequency information": 21673, + "communication social": 9253, + "presents novel": 42096, + "novel algorithm": 37751, + "sentiment word": 49866, + "chinese language": 8310, + "language proposed": 28450, + "proposed algorithm": 43715, + "algorithm applied": 2262, + "sentiment classification": 49833, + "using proposed": 60879, + "experiment shows": 19253, + "shows proposed": 50798, + "algorithm achieves": 2260, + "outperforming existing": 38851, + "biggest challenges": 7097, + "challenges development": 8040, + "spoken dialogue": 52354, + "spoken language": 52359, + "generation module": 22500, + "dialogue context": 14768, + "promising approach": 43160, + "generation uses": 22576, + "knowledge automatically": 27401, + "application domain": 3164, + "individual user": 25585, + "complex information": 9629, + "template based": 56174, + "based generator": 5752, + "tuned domain": 58872, + "domain method": 16110, + "method easily": 32470, + "generally perform": 22169, + "perform better": 40072, + "better models": 6921, + "models trained": 35602, + "trained tested": 57892, + "content selection": 10555, + "knowledge results": 27600, + "results provide": 47788, + "sentence structure": 49651, + "finally evaluate": 20854, + "contribution different": 10941, + "different feature": 14932, + "gram features": 23052, + "features features": 20584, + "features based": 20530, + "higher level": 23830, + "level linguistic": 30151, + "linguistic representations": 30789, + "representations paper": 46732, + "vector based": 61450, + "based representations": 5995, + "representations meaning": 46716, + "approaches problem": 3901, + "approach present": 3648, + "present different": 41893, + "framework use": 21619, + "words represented": 62498, + "meaning sentence": 32015, + "sentence represented": 49634, + "logical form": 30983, + "form paper": 21332, + "indian language": 25518, + "applications natural": 3222, + "like machine": 30482, + "translation speech": 58680, + "speech tagging": 52300, + "retrieval question": 47964, + "feature selection": 20502, + "important factor": 24723, + "using conditional": 60618, + "conditional random": 10003, + "random field": 44870, + "field crf": 20755, + "genetic algorithm": 22637, + "fold cross": 21250, + "cross validation": 11873, + "function model": 21757, + "model demonstrated": 33745, + "crf based": 11762, + "application paper": 3174, + "presents preliminary": 42100, + "socio linguistic": 51613, + "allow easy": 2435, + "personal information": 40757, + "information speakers": 26099, + "annotation tasks": 2974, + "tasks used": 55953, + "kind information": 27367, + "paper evaluate": 39351, + "evaluate various": 18516, + "new version": 37358, + "corpus used": 11454, + "used evaluation": 60173, + "evaluation campaign": 18585, + "features make": 20621, + "plain text": 40938, + "text format": 56584, + "nlp paper": 37507, + "presents work": 42111, + "work relies": 62804, + "fine grained": 20925, + "linguistic information": 30772, + "information provided": 26035, + "existing resources": 19138, + "various features": 61342, + "different types": 15108, + "freely available": 21652, + "text information": 56628, + "information presented": 26019, + "internal representation": 26688, + "representation natural": 46561, + "subject object": 53554, + "answer generation": 3035, + "generation based": 22425, + "persons organizations": 40772, + "proposed algorithms": 43716, + "information systems": 26111, + "twitter messages": 59038, + "using dataset": 60644, + "furthermore present": 21832, + "using machine": 60782, + "interactive web": 26635, + "web application": 61877, + "tagging using": 54755, + "transformation based": 58443, + "based learning": 5811, + "statistics based": 52777, + "developed using": 14640, + "critical achieving": 11776, + "achieving good": 1406, + "results method": 47714, + "lexical relations": 30378, + "preprocessing step": 41828, + "grammatical relations": 23075, + "relations sentences": 46056, + "sentences use": 49801, + "use context": 59851, + "parse trees": 39755, + "structure language": 53112, + "existing studies": 19150, + "studies used": 53306, + "comparative studies": 9322, + "studies using": 53307, + "using raw": 60894, + "problem use": 42680, + "collect corpus": 8939, + "particular attention": 39833, + "privacy issues": 42440, + "corpus statistics": 11438, + "60 000": 457, + "mandarin chinese": 31697, + "step step": 52828, + "generate semantic": 22239, + "visualization techniques": 61680, + "communication information": 9250, + "information network": 25990, + "social network": 51593, + "network analysis": 36695, + "space words": 51906, + "words related": 62494, + "theory based": 57036, + "based binary": 5612, + "introduced model": 26885, + "fully automated": 21713, + "automated processing": 5056, + "processing language": 42881, + "processing natural": 42894, + "human brain": 24119, + "bridge gap": 7319, + "knowledge given": 27494, + "given fact": 22743, + "aim build": 2141, + "given knowledge": 22751, + "knowledge state": 27618, + "state given": 52698, + "present examples": 41907, + "translation paper": 58655, + "present extension": 41913, + "analysis method": 2695, + "wikipedia page": 62051, + "evaluate method": 18469, + "method text": 32683, + "text classification": 56465, + "increases precision": 25439, + "finally provide": 20879, + "direct comparison": 15255, + "used knowledge": 60220, + "reasoning based": 45185, + "computer systems": 9893, + "sentiment analysis": 49815, + "positive negative": 41284, + "document paper": 15817, + "set human": 50165, + "human emotions": 24140, + "work model": 62725, + "model contains": 33710, + "resulting model": 47468, + "model compare": 33672, + "obtaining significant": 38237, + "improvements baseline": 25050, + "positive sentiment": 41297, + "domains paper": 16281, + "recognition asr": 45493, + "arabic language": 4000, + "work limited": 62711, + "present work": 42054, + "languages present": 28755, + "lexical analysis": 30353, + "tokens input": 57327, + "sequence tokens": 50012, + "context sensitive": 10711, + "systems approach": 54432, + "high risk": 23797, + "space representations": 51893, + "tensor product": 56225, + "representation space": 46582, + "space language": 51870, + "language evolution": 28056, + "experiments conducted": 19381, + "models different": 34914, + "different social": 15071, + "social communication": 51558, + "systems like": 54549, + "like natural": 30490, + "groups paper": 23281, + "paper claim": 39289, + "complex tasks": 9669, + "success task": 53725, + "task oriented": 55253, + "oriented dialogue": 38697, + "results confirm": 47556, + "based grammatical": 5759, + "pre processing": 41509, + "processing step": 42942, + "sentences task": 49792, + "information use": 26141, + "naive bayesian": 36366, + "tagged corpus": 54729, + "sentences experiments": 49717, + "experiments analysis": 19349, + "achieves good": 1327, + "good result": 22943, + "simple sentences": 51209, + "sentences complex": 49693, + "complex sentences": 9660, + "mimic human": 33268, + "semantic distance": 49270, + "wordnet based": 62356, + "based measures": 5840, + "measures human": 32078, + "human judgment": 24183, + "received little": 45260, + "little attention": 30870, + "resource poor": 47260, + "poor languages": 41138, + "attention paid": 4803, + "strengths limitations": 52976, + "based distributional": 5687, + "raw data": 45035, + "use knowledge": 59918, + "knowledge rich": 27602, + "new measures": 37248, + "exhaustive comparison": 18997, + "significant research": 50918, + "research problems": 47098, + "lead better": 29256, + "gives overview": 22808, + "outputs work": 39022, + "work attempt": 62577, + "multiple new": 36255, + "new features": 37203, + "pos tagging": 41233, + "answering natural": 3085, + "language questions": 28458, + "problem solve": 42660, + "answering qa": 3088, + "qa systems": 44458, + "systems perform": 54586, + "perform information": 40115, + "retrieval ir": 47948, + "overall performance": 39045, + "performance example": 40329, + "documents retrieved": 15910, + "questions paper": 44797, + "text retrieval": 56748, + "used evaluate": 60169, + "evaluate performance": 18481, + "query expansion": 44666, + "method data": 32449, + "data driven": 12298, + "words help": 62429, + "difficult questions": 15184, + "used improve": 60208, + "methods simple": 33043, + "correctly predicted": 11495, + "possible explanation": 41324, + "solve complex": 51677, + "ranging simple": 44945, + "image processing": 24542, + "human spoken": 24242, + "convert text": 11073, + "text processed": 56711, + "world applications": 62927, + "spelling errors": 52335, + "text especially": 56559, + "vocabulary size": 61712, + "input speech": 26342, + "low quality": 31167, + "quality paper": 44560, + "paper proposes": 39544, + "post editing": 41346, + "asr error": 4555, + "error correction": 18218, + "correction method": 11483, + "errors generated": 18239, + "asr systems": 4561, + "systems proposed": 54606, + "error detection": 18219, + "detection algorithm": 14457, + "generation algorithm": 22417, + "algorithm selecting": 2301, + "selecting best": 49123, + "best candidate": 6753, + "dataset contains": 12867, + "world data": 62932, + "data word": 12775, + "word sequences": 62307, + "extracted web": 20026, + "having large": 23489, + "vocabulary experiments": 61702, + "different speakers": 15075, + "asr errors": 4556, + "research improve": 47053, + "paper aims": 39262, + "shed light": 50524, + "especially social": 18301, + "networks provide": 36901, + "new insights": 37225, + "initial experiments": 26214, + "experiments machine": 19460, + "learning framework": 29651, + "framework various": 21626, + "various aspects": 61303, + "content features": 10523, + "paper review": 39568, + "translation systems": 58685, + "cross language": 11827, + "understanding ways": 59417, + "information achieves": 25753, + "research question": 47107, + "way information": 61812, + "end develop": 17629, + "analysis framework": 2669, + "framework build": 21464, + "build corpus": 7392, + "information able": 25748, + "able control": 682, + "significant differences": 50863, + "use common": 59844, + "word choices": 62127, + "easy apply": 16558, + "apply new": 3343, + "new contexts": 37157, + "construct large": 10390, + "lexical database": 30361, + "lexical semantics": 30385, + "used extensively": 60182, + "propose alternative": 43290, + "used computational": 60120, + "text experiments": 56566, + "experiments performed": 19487, + "performed using": 40667, + "using known": 60747, + "known benchmarks": 27655, + "benchmarks results": 6541, + "results compared": 47545, + "compared systems": 9464, + "systems use": 54661, + "measuring semantic": 32088, + "difficult evaluate": 15165, + "information represented": 26055, + "optical character": 38522, + "paper based": 39279, + "source document": 51763, + "method detecting": 32459, + "real word": 45120, + "approach use": 3730, + "data set": 12642, + "vocabulary word": 61718, + "reliable source": 46253, + "dictionary based": 14804, + "exploit information": 19655, + "information extracted": 25858, + "set experiments": 50153, + "written different": 62996, + "future developments": 21869, + "output text": 39004, + "context based": 10594, + "database containing": 12783, + "terms word": 56323, + "suggest possible": 53827, + "process experiments": 42780, + "experiments carried": 19368, + "significant improvement": 50873, + "measures semantic": 32080, + "similarity using": 51128, + "compare results": 9364, + "based similarity": 6036, + "similarity measures": 51104, + "human judges": 24182, + "nlp systems": 37528, + "respectively paper": 47375, + "lexical knowledge": 30368, + "qualitative quantitative": 44478, + "present statistical": 42024, + "analysis english": 2657, + "english texts": 17891, + "texts wikipedia": 56945, + "address issue": 1762, + "language complexity": 27999, + "simple english": 51166, + "english wikipedia": 17904, + "language limited": 28139, + "limited vocabulary": 30632, + "detailed analysis": 14412, + "speech tags": 52304, + "simple complex": 51143, + "shorter sentences": 50589, + "language varieties": 28575, + "complexity language": 9679, + "finally investigate": 20865, + "propose new": 43494, + "evaluation metric": 18643, + "metric called": 33112, + "edit distance": 16592, + "inter annotator": 26575, + "annotator agreement": 3010, + "improvement state": 25026, + "art propose": 4361, + "propose using": 43696, + "evaluate automatic": 18441, + "terms human": 56294, + "human performance": 24214, + "world natural": 62949, + "language world": 28585, + "web data": 61885, + "data paper": 12529, + "introduce new": 26831, + "new type": 37351, + "uses semantic": 60533, + "lexical unit": 30393, + "overview existing": 39112, + "introduce semantic": 26857, + "use present": 59978, + "architecture approach": 4026, + "human readable": 24226, + "recent years": 45381, + "years growing": 63061, + "research nlp": 47082, + "nlp tasks": 37531, + "tasks particular": 55793, + "evaluation systems": 18733, + "present new": 41958, + "methodology allows": 32716, + "automated analysis": 5035, + "low cost": 31137, + "central idea": 7919, + "setting paper": 50338, + "scientific research": 48769, + "research results": 47117, + "confirm effectiveness": 10129, + "effectiveness approach": 16766, + "spell checker": 52332, + "words perform": 62478, + "based regular": 5986, + "suffer data": 53761, + "data sparseness": 12681, + "sparseness problem": 51975, + "words including": 62435, + "proper names": 43253, + "specific terms": 52157, + "errors text": 18251, + "proposes new": 43937, + "new context": 37156, + "spelling correction": 52334, + "digital text": 15215, + "documents approach": 15856, + "set consists": 50125, + "gram word": 23060, + "generator based": 22615, + "based character": 5614, + "character gram": 8203, + "gram model": 23057, + "model generates": 33933, + "conducted set": 10095, + "set text": 50264, + "documents different": 15870, + "study proposed": 53445, + "lower computational": 31208, + "computational cost": 9839, + "aim paper": 2155, + "text knowledge": 56636, + "domain context": 16031, + "context knowledge": 10663, + "limited data": 30577, + "set domain": 50140, + "linguistic data": 30763, + "corpus corpus": 11310, + "corpus collection": 11295, + "collection text": 8986, + "used test": 60327, + "test data": 56340, + "set evaluate": 50146, + "evaluate nlp": 18479, + "systems available": 54438, + "available corpus": 5273, + "corpus domain": 11328, + "representative corpus": 46795, + "corpus evaluation": 11333, + "major components": 31505, + "knowledge model": 27552, + "model evaluation": 33844, + "lexical resources": 30381, + "straight forward": 52884, + "identification extraction": 24388, + "framework semantic": 21595, + "analysis corpus": 2639, + "based context": 5641, + "free grammars": 21643, + "recently explored": 45425, + "use simple": 60017, + "simple modification": 51196, + "important aspect": 24701, + "ranked list": 44957, + "number characters": 37987, + "current study": 12015, + "data best": 12188, + "observed data": 38143, + "data empirically": 12318, + "naive bayes": 36363, + "address task": 1803, + "task assigning": 54918, + "parsing task": 39799, + "information propose": 26029, + "topic model": 57414, + "documents languages": 15891, + "languages multilingual": 28735, + "latent topics": 29142, + "multilingual corpora": 36072, + "related documents": 45899, + "provides new": 44215, + "new framework": 37210, + "topic models": 57418, + "using topic": 60992, + "corpora introduce": 11210, + "present ensemble": 41903, + "ensemble method": 17976, + "method capable": 32410, + "text tokens": 56814, + "tokens use": 57341, + "task solve": 55383, + "related high": 45910, + "high quality": 23768, + "selection task": 49154, + "task finding": 55084, + "large collection": 28855, + "text present": 56704, + "parsing accuracy": 39770, + "accuracy 97": 929, + "based ensemble": 5704, + "directly predicting": 15332, + "efficiency method": 16845, + "method demonstrates": 32455, + "confidence predictions": 10116, + "provides valuable": 44233, + "mental lexicon": 32291, + "number syllables": 38041, + "relation words": 46001, + "words paper": 62475, + "tool based": 57359, + "psycho linguistic": 44286, + "target word": 54856, + "nature language": 36480, + "existing natural": 19114, + "processing methods": 42890, + "methods limited": 32929, + "limited scope": 30612, + "understanding aims": 59322, + "understanding language": 59357, + "focused understanding": 21232, + "language using": 28570, + "languages english": 28651, + "texts second": 56922, + "methods analyze": 32748, + "given sentences": 22781, + "sentences based": 49684, + "based sentence": 6020, + "sentence patterns": 49618, + "words methods": 62458, + "able understand": 730, + "learn new": 29403, + "new words": 37362, + "words addition": 62361, + "future work": 21899, + "propose general": 43396, + "general method": 22068, + "contrary previous": 10872, + "presented method": 42060, + "method does": 32466, + "highly structured": 23919, + "datasets obtained": 13352, + "obtained human": 38213, + "human annotation": 24101, + "annotation effort": 2945, + "unannotated corpus": 59206, + "document collection": 15775, + "input method": 26297, + "input corpus": 26262, + "examples include": 18911, + "latent dirichlet": 29120, + "dirichlet allocation": 15344, + "similarity measure": 51102, + "measure word": 32065, + "method generate": 32515, + "types including": 59092, + "related word": 45953, + "automatically generating": 5179, + "represent semantic": 46479, + "data extracted": 12354, + "language texts": 28528, + "novel approach": 37757, + "using method": 60801, + "semantic lexical": 49295, + "analysis text": 2777, + "represented using": 46808, + "universal language": 59541, + "language translation": 28538, + "translation method": 58628, + "important understanding": 24789, + "public opinion": 44325, + "paper adopt": 39258, + "problems involving": 42704, + "goal propose": 22898, + "prior work": 42418, + "text text": 56807, + "text written": 56848, + "facilitate study": 20277, + "analyses suggest": 2607, + "occur frequently": 38269, + "popular text": 41193, + "work data": 62617, + "dynamically generated": 16497, + "regular expressions": 45832, + "larger corpus": 29071, + "yields improved": 63124, + "improved performance": 24956, + "performance previous": 40496, + "requires training": 46955, + "training data": 57968, + "data allows": 12131, + "available https": 5307, + "https github": 24058, + "github com": 22693, + "architecture text": 4090, + "unlabeled data": 59564, + "data used": 12759, + "used learn": 60226, + "learn representations": 29415, + "representations used": 46782, + "features supervised": 20677, + "example text": 18882, + "text applications": 56436, + "high dimensional": 23727, + "dimensional space": 15235, + "size vocabulary": 51402, + "learn low": 29394, + "low rank": 31170, + "left right": 29998, + "right contexts": 48139, + "step procedure": 52823, + "sample complexity": 48447, + "single step": 51339, + "efficacy approach": 16831, + "representations learned": 46705, + "tasks pos": 55800, + "superior performance": 53935, + "performance neural": 40453, + "neural probabilistic": 37085, + "probabilistic language": 42463, + "gram models": 23058, + "long training": 31046, + "training times": 58301, + "datasets training": 13461, + "computationally expensive": 9874, + "propose fast": 43386, + "simple algorithm": 51133, + "algorithm training": 2307, + "noise contrastive": 37595, + "contrastive estimation": 10896, + "penn treebank": 40024, + "reduces training": 45699, + "order magnitude": 38632, + "quality resulting": 44575, + "resulting models": 47469, + "importance sampling": 24688, + "far fewer": 20399, + "approach training": 3725, + "training neural": 58188, + "neural language": 36961, + "word corpus": 62134, + "word vocabulary": 62342, + "obtaining state": 38238, + "art results": 4369, + "microsoft research": 33233, + "sentence completion": 49529, + "challenge dataset": 7974, + "dataset multilingual": 13002, + "multilingual text": 36126, + "classification problems": 8521, + "languages share": 28782, + "labeling cost": 27780, + "cost training": 11596, + "training classification": 57950, + "classification model": 8494, + "model individual": 33998, + "individual language": 25570, + "language important": 28101, + "language classification": 27989, + "paper develop": 39338, + "multi view": 36039, + "view learning": 61598, + "learning method": 29726, + "method cross": 32447, + "classification method": 8492, + "method built": 32408, + "parallel documents": 39651, + "empirical study": 17350, + "study large": 53404, + "classification tasks": 8569, + "tasks shows": 55887, + "method consistently": 32436, + "consistently outperforms": 10301, + "methods domain": 32829, + "domain adaptation": 15995, + "adaptation methods": 1527, + "methods multi": 32949, + "learning methods": 29730, + "fully annotated": 21712, + "paper discuss": 39343, + "framework evaluating": 21509, + "nlp models": 37501, + "models particular": 35307, + "approach help": 3552, + "error analysis": 18213, + "intrinsic extrinsic": 26770, + "extrinsic evaluations": 20172, + "evaluations new": 18764, + "novel learning": 37855, + "probabilistic generative": 42459, + "model composed": 33685, + "composed multiple": 9734, + "layer layer": 29185, + "fine tuning": 20977, + "tuning step": 58961, + "various state": 61396, + "art supervised": 4416, + "supervised learning": 53993, + "learning algorithms": 29512, + "support vector": 54131, + "vector machine": 61453, + "machine svm": 31342, + "maximum entropy": 31968, + "entropy model": 18165, + "bayes classifier": 6348, + "principal component": 42380, + "component analysis": 9701, + "words given": 62427, + "surrounding context": 54196, + "context words": 10747, + "words knowledge": 62441, + "conducted experiment": 10082, + "study presents": 53438, + "approach based": 3427, + "based clustering": 5622, + "study uses": 53472, + "uses combination": 60497, + "search space": 48984, + "space work": 51907, + "work automatic": 62581, + "work based": 62585, + "score 82": 48817, + "provides powerful": 44220, + "usually used": 61073, + "sentence structures": 49652, + "relations sentence": 46055, + "recent advance": 45277, + "questions asked": 44774, + "similar different": 51037, + "turing test": 58979, + "new level": 37238, + "language approach": 27965, + "method inspired": 32545, + "algorithm allows": 2261, + "average length": 5411, + "russian english": 48414, + "average word": 5419, + "growing rapidly": 23301, + "content words": 10573, + "words contribute": 62388, + "length word": 30039, + "desired language": 14348, + "using computer": 60616, + "people use": 40041, + "language english": 28050, + "resolve issue": 47201, + "input text": 26345, + "significant success": 50927, + "available natural": 5330, + "language parsing": 28374, + "focus problem": 21191, + "analyze different": 2811, + "biomedical domain": 7173, + "corpora method": 11222, + "level translation": 30227, + "large variety": 29044, + "words generate": 62424, + "overall quality": 39047, + "french translation": 21666, + "process reducing": 42824, + "word form": 62205, + "language highly": 28097, + "aims provide": 2211, + "practical applications": 41458, + "applications nlp": 3225, + "recent literature": 45318, + "analyze challenges": 2807, + "survey current": 54204, + "current future": 11978, + "science research": 48750, + "general paper": 22079, + "possibility using": 41312, + "using linear": 60771, + "random fields": 44872, + "fields crf": 20778, + "corpus present": 11406, + "approach detecting": 3484, + "makes use": 31642, + "allows identify": 2469, + "identify words": 24452, + "specific word": 52174, + "based methods": 5847, + "methods automatic": 32761, + "grammatical structure": 23078, + "number sentences": 38036, + "example user": 18883, + "important sentences": 24771, + "highlight important": 23863, + "sentence containing": 49532, + "topic sentence": 57429, + "information overload": 25999, + "meta data": 32329, + "multiple dimensions": 36199, + "help better": 23553, + "better understand": 6986, + "paper build": 39284, + "build models": 7413, + "introducing new": 26901, + "corpora consisting": 11184, + "consisting million": 10316, + "evaluate models": 18474, + "models prediction": 35343, + "prediction tasks": 41744, + "tasks use": 55952, + "use model": 59950, + "second use": 49028, + "sentences best": 49685, + "datasets consider": 13191, + "model recover": 34287, + "evaluation model": 18656, + "art approaches": 4214, + "scale datasets": 48564, + "world datasets": 62935, + "datasets introduce": 13303, + "introduce model": 26824, + "sentiment words": 49867, + "words automatically": 62370, + "automatically learn": 5187, + "aspect specific": 4535, + "specific sentiment": 52146, + "present study": 42028, + "study relationship": 53452, + "networks using": 36922, + "using novel": 60845, + "novel corpus": 37793, + "twitter users": 59042, + "users prior": 60474, + "use linguistic": 59935, + "level language": 30144, + "based work": 6142, + "language does": 28035, + "does match": 15958, + "classifier model": 8600, + "significantly fewer": 50958, + "gender language": 22037, + "computational methods": 9848, + "methods social": 33045, + "offers new": 38303, + "analysis dataset": 2642, + "dataset comprising": 12856, + "using latent": 60762, + "latent vector": 29147, + "autoregressive model": 5221, + "model aggregate": 33549, + "thousands words": 57081, + "identify high": 24424, + "united states": 59527, + "states model": 52721, + "model robust": 34330, + "results analysis": 47499, + "plays central": 40989, + "central role": 7921, + "single unified": 51354, + "american english": 2536, + "english paper": 17853, + "paper demonstrate": 39316, + "discuss results": 15481, + "analysis named": 2701, + "entity recognition": 18126, + "present findings": 41916, + "analyses paper": 2600, + "method extracting": 32504, + "allows generation": 2467, + "translation pairs": 58654, + "words source": 62517, + "ranking methods": 44972, + "based translation": 6112, + "based features": 5728, + "features used": 20689, + "used select": 60295, + "select best": 49100, + "translation obtain": 58650, + "obtain average": 38160, + "average precision": 5415, + "language pairs": 28366, + "pairs english": 39183, + "historical documents": 23959, + "modern chinese": 35702, + "research challenges": 46999, + "issues using": 27106, + "lexical syntactic": 30389, + "semantic pragmatic": 49319, + "language information": 28115, + "computer scientists": 9892, + "prove useful": 43983, + "contexts using": 10755, + "particularly useful": 39893, + "semantic concept": 49253, + "work shown": 62822, + "model effective": 33797, + "effective semantic": 16693, + "transition based": 58537, + "based dependency": 5676, + "dependency parsers": 14129, + "different representations": 15051, + "use classifier": 59841, + "experiments multilingual": 19472, + "earlier work": 16508, + "low resource": 31172, + "support hypothesis": 54120, + "vector machines": 61455, + "parsing performance": 39791, + "size training": 51399, + "training set": 58250, + "english based": 17776, + "based recently": 5981, + "recently emerged": 45421, + "proposed improve": 43794, + "popular approach": 41155, + "approach solve": 3698, + "need able": 36542, + "look ahead": 31064, + "way existing": 61801, + "methods algorithms": 32745, + "specifically designed": 52193, + "parsing approach": 39771, + "large subset": 29019, + "shows approach": 50762, + "approach practical": 3642, + "statistical machine": 52748, + "translation smt": 58678, + "parallel corpus": 39644, + "pair languages": 39152, + "term goal": 56238, + "problem study": 42670, + "parallel sentences": 39653, + "sentences wikipedia": 49806, + "using pos": 60864, + "main focus": 31441, + "syntactic features": 54303, + "features languages": 20610, + "human evaluation": 24142, + "evaluation performed": 18670, + "shows promising": 50795, + "results comparison": 47552, + "comparison baseline": 9491, + "baseline language": 6177, + "language learning": 28135, + "learning language": 29694, + "language learn": 28133, + "vast number": 61441, + "method finding": 32508, + "relative frequencies": 46097, + "propose way": 43702, + "use techniques": 60043, + "language speech": 28499, + "speech information": 52265, + "information included": 25917, + "conclude discussion": 9969, + "language use": 28567, + "use paper": 59970, + "perform natural": 40124, + "processing tasks": 42946, + "processing techniques": 42956, + "techniques based": 56065, + "based words": 6141, + "corpus using": 11455, + "using lexicon": 60769, + "question based": 44722, + "chinese social": 8319, + "social media": 51567, + "writing systems": 62992, + "use visual": 60072, + "knowledge understanding": 27638, + "understanding current": 59336, + "current events": 11977, + "analysis popular": 2718, + "specific problem": 52130, + "problem difficult": 42540, + "new ways": 37360, + "paper new": 39428, + "uses language": 60516, + "new efficient": 37179, + "efficient method": 16884, + "experimental evaluations": 19264, + "efficiency improvements": 16843, + "compared existing": 9405, + "existing techniques": 19156, + "technique used": 56049, + "used large": 60224, + "syntactic analysis": 54292, + "solution based": 51651, + "based idea": 5777, + "idea using": 24375, + "groups words": 23284, + "parts sentence": 39907, + "idea based": 24368, + "structure complex": 53093, + "set sentences": 50244, + "deep learning": 13700, + "learning models": 29750, + "considerable success": 10236, + "success natural": 53712, + "processing deep": 42866, + "deep architectures": 13683, + "representations lead": 46703, + "lead improvements": 29263, + "improvements various": 25110, + "various tasks": 61403, + "tasks difficult": 55590, + "difficult interpret": 15170, + "particularly difficult": 39879, + "difficult paper": 15179, + "makes sense": 31635, + "present analysis": 41844, + "known model": 27662, + "model produces": 34239, + "structural representations": 53083, + "representations text": 46770, + "certain tasks": 7946, + "significantly reduced": 51011, + "classification accuracy": 8427, + "using human": 60731, + "online content": 38356, + "methods identify": 32891, + "identify entities": 24421, + "entities unstructured": 18088, + "unstructured text": 59671, + "text machine": 56652, + "learning knowledge": 29691, + "extraction systems": 20117, + "web scale": 61892, + "massive text": 31888, + "corpora present": 11233, + "10 times": 52, + "times faster": 57249, + "nlp pipeline": 37509, + "pipeline consists": 40897, + "high performance": 23758, + "close state": 8691, + "art speech": 4411, + "speech pos": 52277, + "pos tagger": 41231, + "based named": 5895, + "entity recognizer": 18137, + "goal research": 22900, + "way present": 61827, + "applications demonstrate": 3198, + "demonstrate power": 13957, + "solving problems": 51705, + "problems natural": 42713, + "does require": 15968, + "large sets": 29010, + "sets training": 50309, + "used solve": 60308, + "problems field": 42701, + "field nlp": 20764, + "nlp problems": 37515, + "problem predicting": 42626, + "piece text": 40877, + "text based": 56452, + "produce text": 43015, + "text containing": 56510, + "results research": 47811, + "predict correct": 41637, + "time natural": 57182, + "summarization based": 53877, + "highly relevant": 23910, + "relevant sentences": 46234, + "approach solving": 3699, + "problems nlp": 42717, + "nlp knowledge": 37492, + "knowledge bases": 27412, + "applications benefit": 3186, + "easily accessible": 16532, + "relational knowledge": 46010, + "lack knowledge": 27896, + "knowledge new": 27560, + "new entities": 37188, + "entities relations": 18078, + "relations work": 46065, + "large unannotated": 29037, + "relationships entities": 46079, + "entities based": 18038, + "introduce neural": 26829, + "neural tensor": 37102, + "tensor network": 56223, + "model predicts": 34226, + "model improved": 33979, + "entity representations": 18144, + "representations word": 46788, + "word vectors": 62339, + "vectors learned": 61490, + "learned unsupervised": 29487, + "unsupervised fashion": 59696, + "entities present": 18072, + "model generalizes": 33927, + "outperforms existing": 38896, + "existing models": 19108, + "models problem": 35359, + "accuracy 75": 909, + "work deep": 62621, + "learning neural": 29778, + "neural networks": 37032, + "representations input": 46691, + "recent progress": 45337, + "progress field": 43097, + "efficient effective": 16869, + "effective methods": 16673, + "method efficient": 32475, + "important neural": 24747, + "network representations": 36795, + "representations method": 46717, + "method consists": 32439, + "propose novel": 43520, + "model architectures": 33577, + "continuous vector": 10854, + "vector representations": 61462, + "representations words": 46791, + "large data": 28865, + "quality representations": 44572, + "similarity task": 51124, + "compared previously": 9440, + "previously best": 42330, + "best performing": 6794, + "based different": 5680, + "types neural": 59105, + "observe large": 38136, + "large improvements": 28890, + "improvements accuracy": 25045, + "learn high": 29378, + "quality word": 44599, + "billion words": 7122, + "provide state": 44133, + "art performance": 4324, + "performance test": 40596, + "test set": 56369, + "semantic word": 49377, + "word similarities": 62309, + "native language": 36402, + "problems language": 42706, + "review recent": 48037, + "sufficient data": 53801, + "data learn": 12459, + "linguistic input": 30773, + "ability learn": 619, + "learn linguistic": 29390, + "language production": 28444, + "form meaning": 21326, + "simplicity approach": 51235, + "learn specific": 29427, + "linguistic constructions": 30759, + "al 2010": 2232, + "new learning": 37237, + "outperform existing": 38793, + "distributional models": 15665, + "models face": 35012, + "nlp text": 37556, + "source tools": 51814, + "using python": 60886, + "makes easy": 31621, + "specific tasks": 52155, + "tasks long": 55731, + "document corpus": 15781, + "corpus level": 11371, + "analytical results": 2801, + "easy use": 16568, + "web interface": 61888, + "english portuguese": 17858, + "portuguese language": 41226, + "features text": 20682, + "text extraction": 56577, + "token frequency": 57290, + "text search": 56753, + "conversational agent": 11038, + "urgent need": 59788, + "order test": 38656, + "application real": 3177, + "real users": 45118, + "wizard oz": 62096, + "capture interactions": 7685, + "understand people": 59308, + "rapid development": 44988, + "development natural": 14689, + "non experts": 37655, + "learning paradigm": 29796, + "understanding natural": 59370, + "classification problem": 8520, + "answers questions": 3112, + "art domain": 4250, + "domain approach": 16021, + "approach used": 3731, + "language interface": 28120, + "describes submission": 14233, + "based tree": 6113, + "using publicly": 60884, + "publicly available": 44334, + "accomplish task": 846, + "provided training": 44174, + "data built": 12196, + "translation model": 58630, + "language pair": 28365, + "approach work": 3741, + "carried experiments": 7770, + "experiments english": 19427, + "english italian": 17826, + "italian english": 27109, + "urdu language": 59784, + "improvement baseline": 24986, + "baseline bleu": 6159, + "kendall tau": 27285, + "detailed description": 14418, + "reproduce results": 46823, + "results possible": 47768, + "possible directions": 41322, + "limited languages": 30597, + "french spanish": 21665, + "problem effectively": 42547, + "using comparable": 60613, + "multilingual information": 36086, + "key issues": 27322, + "widely accepted": 61989, + "fact different": 20289, + "based metrics": 5854, + "proposed paper": 43875, + "paper method": 39425, + "cosine similarities": 11574, + "based ranking": 5972, + "experiments results": 19514, + "performs better": 40698, + "better traditional": 6981, + "frequency based": 21670, + "expressions used": 19812, + "used specific": 60309, + "specific contexts": 52061, + "fields machine": 20782, + "translation information": 58620, + "retrieval information": 47946, + "extraction text": 20122, + "text categorization": 56462, + "bilingual dictionary": 7107, + "translation cross": 58592, + "retrieval paper": 47961, + "paper addresses": 39255, + "addresses issues": 1812, + "alignment based": 2366, + "based multi": 5888, + "multi level": 35983, + "approach method": 3600, + "method computes": 32429, + "candidate sentence": 7579, + "words usually": 62542, + "enhance performance": 17918, + "corpus paper": 11398, + "experiment results": 19247, + "results multi": 47731, + "better performance": 6927, + "performance existing": 40330, + "existing method": 19090, + "graph language": 23145, + "network proposed": 36791, + "text structure": 56790, + "computer assisted": 9887, + "language learners": 28134, + "types language": 59097, + "comprehension tasks": 9777, + "field natural": 20762, + "challenges faced": 8047, + "key success": 27336, + "work natural": 62731, + "nlp rely": 37520, + "represent linguistic": 46476, + "linguistic phenomena": 30781, + "trees paper": 58769, + "graphs using": 23193, + "using graph": 60714, + "overcome problem": 39070, + "problem document": 42543, + "important terms": 24782, + "terms using": 56321, + "paper different": 39342, + "different stages": 15076, + "stop words": 52866, + "unique words": 59518, + "documents using": 15926, + "using term": 60985, + "tf idf": 56990, + "based minimum": 5856, + "approach reduce": 3669, + "selection method": 49144, + "accuracy experiments": 973, + "present results": 42000, + "research goal": 47046, + "goal automatically": 22877, + "automatically creating": 5156, + "available resources": 5364, + "resources natural": 47319, + "tasks machine": 55734, + "spanish language": 51945, + "existing english": 19065, + "english resources": 17865, + "approach consists": 3468, + "word senses": 62300, + "results comparing": 47551, + "wikipedia article": 62042, + "extracted wikipedia": 20027, + "results using": 47899, + "spanish japanese": 51944, + "japanese english": 27146, + "reports results": 46461, + "main goal": 31442, + "generality proposed": 22107, + "previously applied": 42328, + "work extend": 62663, + "apply technique": 3355, + "experiments showed": 19523, + "obtain good": 38173, + "tasks important": 55670, + "important step": 24775, + "classification english": 8459, + "previously known": 42336, + "achieved using": 1281, + "aspects linguistic": 4545, + "linguistic contexts": 30761, + "identify specific": 24445, + "specific lexical": 52104, + "complexity task": 9689, + "results despite": 47589, + "based classification": 5618, + "useful tool": 60392, + "large language": 28897, + "resources required": 47331, + "mt systems": 35925, + "monolingual bilingual": 35792, + "bilingual data": 7105, + "data web": 12773, + "building blocks": 7439, + "building language": 7450, + "web documents": 61886, + "rich textual": 48126, + "textual content": 56952, + "parallel data": 39645, + "available language": 5317, + "language order": 28363, + "experiments used": 19552, + "using sentence": 60923, + "sentence alignment": 49515, + "successfully used": 53751, + "used domain": 60155, + "systems work": 54673, + "results experimental": 47625, + "work develop": 62631, + "class based": 8392, + "results approach": 47502, + "human effort": 24136, + "effort required": 16930, + "development language": 14680, + "complex problem": 9647, + "information classification": 25780, + "proposes use": 43944, + "use automatically": 59830, + "semantic class": 49246, + "unsupervised clustering": 59686, + "clustering task": 8746, + "english results": 17867, + "different lexical": 14979, + "role information": 48308, + "sparse data": 51967, + "task achieving": 54878, + "holds promise": 23987, + "task performance": 55273, + "exist paper": 19016, + "approach overcome": 3632, + "based using": 6124, + "number natural": 38021, + "languages making": 28726, + "additionally approach": 1713, + "approach allows": 3416, + "allows automatic": 2451, + "user friendly": 60414, + "multilingual natural": 36102, + "work demonstrates": 62627, + "existing technologies": 19157, + "extension existing": 19848, + "languages domains": 28647, + "models play": 35323, + "play crucial": 40965, + "crucial role": 11910, + "sensitive changes": 49495, + "takes place": 54782, + "approach speech": 3701, + "speech language": 52268, + "model adaptation": 33532, + "self training": 49222, + "training language": 58144, + "model parameters": 34180, + "based automatically": 5590, + "particularly challenging": 39877, + "challenging settings": 8141, + "conversational speech": 11053, + "work propose": 62771, + "propose model": 43462, + "model considers": 33701, + "errors asr": 18233, + "asr output": 4559, + "instead using": 26468, + "using just": 60744, + "improve self": 24923, + "demonstrate improved": 13921, + "topic based": 57393, + "based language": 5802, + "language modeling": 28205, + "results best": 47525, + "training using": 58311, + "conversations paper": 11059, + "given question": 22775, + "question query": 44746, + "play important": 40971, + "answering question": 3092, + "answering questions": 3093, + "improves quality": 25154, + "candidate answers": 7569, + "word graph": 62214, + "graph model": 23148, + "given document": 22737, + "improved version": 24972, + "words non": 62468, + "words experimental": 62413, + "better state": 6969, + "art task": 4422, + "task answering": 54904, + "low level": 31156, + "lan guage": 27942, + "context question": 10701, + "developing methods": 14655, + "methods extract": 32859, + "extract useful": 20000, + "information large": 25943, + "large collections": 28857, + "collections documents": 8989, + "search engines": 48971, + "aim answer": 2137, + "answer question": 3050, + "documents contain": 15865, + "correct answer": 11465, + "task build": 54942, + "improving pre": 25190, + "analysis word": 2794, + "word ambiguity": 62111, + "ambiguity word": 2528, + "sense word": 49490, + "sentences paper": 49762, + "describes model": 14226, + "model uses": 34512, + "speech tagger": 52299, + "supervised unsupervised": 54064, + "unsupervised methods": 59711, + "methods combined": 32788, + "algorithm used": 2309, + "efficient accurate": 16858, + "word based": 62117, + "based domain": 5690, + "domain information": 16085, + "accuracy work": 1071, + "work evaluated": 62649, + "finding best": 20898, + "translation translation": 58697, + "translation natural": 58642, + "using automated": 60571, + "like india": 30477, + "process language": 42800, + "language paper": 28369, + "paper look": 39418, + "various machine": 61358, + "languages discuss": 28643, + "discuss various": 15483, + "various approaches": 61300, + "building machine": 7453, + "alignment text": 2385, + "human written": 24259, + "presents challenges": 42076, + "text alignment": 56427, + "problems including": 42702, + "individual words": 25588, + "new methods": 37256, + "based hidden": 5769, + "models specifically": 35526, + "problem demonstrate": 42531, + "summarization task": 53901, + "room improvement": 48340, + "contains different": 10493, + "different features": 14933, + "features including": 20603, + "modern standard": 35720, + "standard arabic": 52464, + "arabic msa": 4001, + "research uses": 47139, + "used classify": 60115, + "article aims": 4447, + "order extract": 38618, + "extract semantic": 19991, + "generation semantic": 22543, + "based parsing": 5934, + "parsing semantic": 39796, + "semantic tagging": 49361, + "semantic features": 49278, + "syntactic dependencies": 54299, + "applied text": 3302, + "text types": 56827, + "answering systems": 3098, + "relations expressed": 46031, + "relation labels": 45986, + "semantically related": 49388, + "paper reports": 39566, + "user feedback": 60413, + "empirical success": 17352, + "compositional semantics": 9749, + "consider types": 10225, + "types textual": 59123, + "models capable": 34802, + "capable capturing": 7617, + "address shortcomings": 1801, + "current models": 11988, + "models capture": 34805, + "solution propose": 51659, + "space representation": 51892, + "use representation": 59994, + "different variants": 15120, + "released open": 46177, + "open license": 38437, + "creative commons": 11752, + "nc sa": 36498, + "linguistic research": 30790, + "words corpus": 62391, + "word forms": 62207, + "based text": 6090, + "textual resources": 56978, + "translation techniques": 58688, + "probabilistic context": 42455, + "compared supervised": 9463, + "version text": 61557, + "text patterns": 56695, + "learning process": 29821, + "process automatic": 42759, + "automatic learning": 5101, + "learning techniques": 29907, + "patterns paper": 39973, + "paper apply": 39269, + "learning technique": 29906, + "based structural": 6063, + "polysemous words": 41120, + "using types": 61006, + "accuracy rates": 1034, + "knowledge time": 27630, + "problem finding": 42567, + "interface allows": 26660, + "allows user": 2482, + "ask questions": 4519, + "language large": 28132, + "number applications": 37982, + "interaction humans": 26601, + "end user": 17724, + "ease use": 16523, + "use various": 60070, + "million people": 33254, + "people world": 40044, + "available information": 5312, + "united nations": 59526, + "paper covers": 39309, + "brief overview": 7328, + "different components": 14871, + "advantages disadvantages": 1951, + "techniques used": 56148, + "used paper": 60259, + "fast effective": 20422, + "key phrases": 27326, + "main concepts": 31428, + "document used": 15841, + "used purpose": 60281, + "paper investigate": 39409, + "investigate use": 26993, + "use additional": 59815, + "features pre": 20643, + "improve automatic": 24825, + "key phrase": 27325, + "phrase extraction": 40839, + "features include": 20602, + "use signal": 60014, + "lead significant": 29270, + "accuracy results": 1039, + "document pre": 15819, + "sentences document": 49707, + "main content": 31429, + "standard set": 52529, + "set labeled": 50177, + "labeled documents": 27756, + "documents training": 15921, + "training evaluation": 58091, + "subjective nature": 53565, + "standard used": 52539, + "amazon mechanical": 2521, + "mechanical turk": 32092, + "obtain useful": 38198, + "improvements performance": 25091, + "shallow semantic": 50444, + "combination pre": 9047, + "scores propose": 48915, + "computational framework": 9842, + "framework identifying": 21537, + "new corpus": 37158, + "use evaluate": 59881, + "domain independent": 16084, + "key components": 27301, + "classifier achieves": 8592, + "close human": 8687, + "performance effective": 40312, + "domains use": 16299, + "use framework": 59895, + "framework study": 21606, + "achieve high": 1150, + "stack exchange": 52416, + "finally apply": 20837, + "preliminary analysis": 41800, + "management systems": 31691, + "learning data": 29578, + "data available": 12176, + "decision making": 13563, + "making easier": 31652, + "extraction key": 20073, + "provides framework": 44200, + "process machine": 42803, + "extraction classification": 20052, + "design new": 14292, + "popular word": 41199, + "pointwise mutual": 41083, + "information pmi": 26009, + "experiments large": 19454, + "available datasets": 5279, + "best known": 6776, + "distributional similarity": 15671, + "level document": 30103, + "document level": 15803, + "level concept": 30079, + "combined use": 9087, + "use document": 59869, + "performance gains": 40357, + "distributed word": 15629, + "word representations": 62286, + "word embeddings": 62156, + "embeddings recently": 17201, + "competitive performance": 9553, + "performance language": 40406, + "tasks work": 55968, + "work train": 62845, + "train word": 57659, + "100 languages": 61, + "languages using": 28816, + "demonstrate utility": 13997, + "embeddings using": 17240, + "features training": 20687, + "languages performance": 28750, + "performance competitive": 40261, + "near state": 36514, + "art methods": 4282, + "methods english": 32840, + "features captured": 20535, + "help researchers": 23587, + "applications machine": 3218, + "emerging research": 17272, + "source language": 51776, + "text target": 56803, + "overall translation": 39053, + "translation propose": 58662, + "propose use": 43694, + "hindi language": 23941, + "language current": 28015, + "current research": 12004, + "opinion mining": 38501, + "web resources": 61891, + "discussion forums": 15492, + "important problem": 24754, + "product reviews": 43045, + "produce summary": 43013, + "techniques developed": 56077, + "key tasks": 27338, + "analysis paper": 2711, + "morphologically rich": 35847, + "rich language": 48106, + "general approach": 22045, + "used development": 60149, + "token based": 57281, + "based given": 5754, + "information previous": 26020, + "determine best": 14555, + "evaluation machine": 18635, + "important research": 24760, + "field machine": 20758, + "structure words": 53149, + "words proper": 62484, + "plays significant": 41005, + "significant role": 50922, + "role improving": 48307, + "improving quality": 25194, + "quality machine": 44547, + "text english": 56556, + "perform automatic": 40070, + "automatic analysis": 5068, + "specifically collect": 52184, + "focus analysis": 21143, + "people using": 40042, + "using nlp": 60841, + "nlp methods": 37498, + "methods present": 32988, + "present series": 42008, + "key insights": 27321, + "data multiple": 12500, + "area research": 4149, + "analyzing language": 2842, + "based structure": 6064, + "50 000": 420, + "words set": 62510, + "words based": 62371, + "based proposed": 5959, + "clustering techniques": 8747, + "type data": 59051, + "set provided": 50229, + "years research": 63072, + "new techniques": 37339, + "development automatic": 14670, + "automatic machine": 5102, + "performance increase": 40389, + "present evaluation": 41905, + "evaluation human": 18624, + "automatic evaluation": 5081, + "evaluation metrics": 18646, + "sentence document": 49543, + "latent variable": 29143, + "variable model": 61223, + "model discover": 33771, + "semantic frames": 49279, + "analyze model": 2822, + "model learns": 34053, + "issues including": 27091, + "parameter learning": 39672, + "learning small": 29882, + "accuracy paper": 1019, + "paper establish": 39350, + "texts contain": 56867, + "concepts used": 9946, + "sentences containing": 49696, + "text according": 56422, + "pairs use": 39226, + "results demonstrate": 47573, + "demonstrate effectiveness": 13893, + "effectiveness method": 16789, + "clinical domain": 8669, + "gained increasing": 21916, + "increasing attention": 25444, + "vital role": 61692, + "decision support": 13569, + "sequence labeling": 49933, + "representations models": 46719, + "models post": 35332, + "events using": 18799, + "using current": 60641, + "art sequence": 4399, + "approach create": 3471, + "inference based": 25644, + "sentences natural": 49756, + "abstract concepts": 756, + "applied proposed": 3288, + "sentences semantic": 49782, + "processing algorithms": 42850, + "semantics words": 49421, + "knowledge used": 27643, + "provide valuable": 44151, + "public sentiment": 44328, + "based machine": 5826, + "limited use": 30630, + "twitter data": 59034, + "data research": 12609, + "presented paper": 42061, + "end framework": 17674, + "twitter corpus": 59033, + "approach machine": 3594, + "approach implemented": 3560, + "framework compared": 21473, + "compared using": 9470, + "good correlation": 22930, + "correlation results": 11527, + "approach large": 3582, + "large volumes": 29048, + "methods need": 32954, + "big data": 7089, + "data techniques": 12726, + "graph theoretic": 23174, + "theoretic analysis": 57011, + "analysis reveals": 2746, + "introduce method": 26821, + "method detect": 32458, + "given sequence": 22782, + "linear time": 30674, + "input sequence": 26331, + "approaches unsupervised": 3947, + "work paper": 62743, + "describes approach": 14218, + "increase number": 25419, + "sampling based": 48499, + "based alignment": 5563, + "alignment method": 2373, + "method approach": 32387, + "distribution used": 15656, + "leads better": 29306, + "better evaluation": 6886, + "evaluation results": 18700, + "results statistical": 47857, + "translation tasks": 58687, + "alignment approach": 2364, + "new data": 37160, + "contain significant": 10473, + "source knowledge": 51775, + "existing knowledge": 19078, + "level individual": 30132, + "individual sentences": 25579, + "levels sentence": 30248, + "100 000": 57, + "000 sentences": 11, + "subset sentences": 53609, + "inconsistent results": 25341, + "process extracting": 42783, + "word given": 62212, + "application natural": 3170, + "latest developments": 29157, + "algorithms results": 2340, + "results text": 47883, + "mixed effects": 33404, + "modeling linguistic": 34590, + "applications using": 3257, + "likelihood ratio": 30521, + "used text": 60328, + "considerable attention": 10228, + "attention given": 4753, + "emotion analysis": 17286, + "limited small": 30617, + "used generate": 60197, + "generate large": 22215, + "large high": 28884, + "propose solutions": 43641, + "word choice": 62126, + "choice question": 8335, + "help identify": 23569, + "sense level": 49487, + "level word": 30231, + "higher inter": 23828, + "degree semantic": 13814, + "processing including": 42875, + "including machine": 25272, + "manually created": 31771, + "automatic method": 5106, + "method identify": 32526, + "pairs based": 39171, + "based hypothesis": 5775, + "hypothesis pair": 24346, + "strongly related": 53073, + "human agreement": 24092, + "key features": 27312, + "features different": 20560, + "present automatic": 41852, + "methods paper": 32972, + "paper considers": 39304, + "estimating quality": 18380, + "translation outputs": 58653, + "human intervention": 24176, + "addressed using": 1806, + "various measures": 61360, + "quality automatic": 44493, + "produce good": 42984, + "produce results": 43005, + "level paper": 30172, + "features extracted": 20582, + "extracted input": 20013, + "input sentences": 26330, + "obtained based": 38203, + "based bayesian": 5596, + "bayesian inference": 6358, + "shared task": 50490, + "task total": 55440, + "tokens used": 57342, + "used shared": 60299, + "dependency trees": 14143, + "trees using": 58771, + "using heuristics": 60725, + "morphological analysis": 35839, + "analysis provided": 2732, + "end users": 17725, + "language tasks": 28521, + "extracting information": 20032, + "based application": 5568, + "language analysis": 27960, + "generates natural": 22349, + "focused using": 21233, + "task machine": 55196, + "language independent": 28105, + "language input": 28116, + "syntactically semantically": 54342, + "language textual": 28529, + "analysis emotion": 2655, + "emotion detection": 17289, + "sentiment emotion": 49842, + "task useful": 55460, + "issues like": 27093, + "analysis tools": 2781, + "digital libraries": 15212, + "task classification": 54950, + "classification algorithms": 8431, + "address specific": 1802, + "specific challenges": 52053, + "datasets used": 13468, + "classification task": 8561, + "test approach": 56332, + "digital library": 15213, + "representations semantic": 46751, + "space model": 51876, + "model suitable": 34427, + "algorithm able": 2259, + "able recognize": 719, + "highly similar": 23916, + "semantic composition": 49251, + "best models": 6783, + "significantly different": 50953, + "tasks model": 55748, + "matches performance": 31907, + "performance best": 40218, + "best previous": 6805, + "previous models": 42264, + "model consists": 33705, + "domain domain": 16051, + "various ways": 61417, + "model relations": 34299, + "paper develops": 39340, + "automate process": 5033, + "process generating": 42786, + "missing word": 33365, + "language structures": 28508, + "based large": 5805, + "large monolingual": 28912, + "monolingual data": 35798, + "languages small": 28791, + "data uses": 12764, + "distributed representation": 15623, + "linear mapping": 30659, + "vector spaces": 61470, + "spaces languages": 51910, + "languages despite": 28637, + "despite simplicity": 14390, + "surprisingly effective": 54186, + "achieve 90": 1107, + "words english": 62408, + "method makes": 32570, + "learning word": 29943, + "representations recently": 46747, + "recently seen": 45465, + "sequences word": 50030, + "word tokens": 62323, + "languages word": 28820, + "word segmentation": 62292, + "non trivial": 37688, + "trivial task": 58812, + "naturally occurring": 36474, + "data propose": 12569, + "propose learn": 43437, + "learn text": 29437, + "text representations": 56738, + "representations directly": 46640, + "directly raw": 15334, + "character sequences": 8225, + "sequences training": 50028, + "simple recurrent": 51204, + "recurrent network": 45620, + "network predict": 36786, + "network uses": 36821, + "hidden layer": 23639, + "demonstrate usefulness": 13995, + "learned text": 29484, + "text embeddings": 56549, + "embeddings use": 17237, + "character level": 8206, + "level text": 30223, + "labeling task": 27795, + "task recognizing": 55325, + "spans text": 51958, + "language code": 27991, + "using embeddings": 60672, + "substantially improve": 53636, + "improve baseline": 24826, + "baseline uses": 6221, + "character grams": 8204, + "highly multilingual": 23905, + "european union": 18431, + "multi label": 35975, + "label classification": 27695, + "manually labelled": 31786, + "labelled data": 27800, + "data automatically": 12172, + "automatically assign": 5143, + "ranking task": 44977, + "trained classifiers": 57688, + "parallel training": 39656, + "data languages": 12453, + "document collections": 15776, + "document representation": 15825, + "consistency human": 10267, + "process used": 42837, + "feature vector": 20508, + "various language": 61350, + "tasks including": 55677, + "including cross": 25245, + "cross lingual": 11828, + "plagiarism detection": 40933, + "detection sentence": 14521, + "sentence selection": 49638, + "available large": 5319, + "typically used": 59160, + "improve speed": 24926, + "recognition ner": 45518, + "new resource": 37304, + "use large": 59926, + "news media": 37407, + "work automated": 62580, + "official languages": 38309, + "languages particularly": 28748, + "particularly important": 39883, + "languages order": 28742, + "order capture": 38601, + "capture complementary": 7653, + "news content": 37393, + "extracted information": 20012, + "information present": 26018, + "present publicly": 41991, + "publicly accessible": 44333, + "discuss implications": 15469, + "able achieve": 668, + "representation speech": 46584, + "speaker listener": 52000, + "novel computational": 37785, + "words model": 62459, + "representations uses": 46784, + "key component": 27300, + "information real": 26040, + "method create": 32444, + "create large": 11705, + "used obtain": 60255, + "level annotations": 30063, + "data quality": 12581, + "improve semantic": 24924, + "semantic coherence": 49248, + "response paper": 47396, + "new hybrid": 37220, + "algorithm combines": 2266, + "character based": 8195, + "approaches presented": 3899, + "approach extended": 3530, + "distance metric": 15548, + "importance token": 24691, + "token level": 57295, + "large arabic": 28848, + "arabic dataset": 3996, + "dataset experimental": 12916, + "results proposed": 47781, + "types errors": 59086, + "different writing": 15136, + "writing styles": 62991, + "compared results": 9447, + "algorithms using": 2346, + "dataset proposed": 13039, + "achieving higher": 1410, + "relies heavily": 46266, + "analysis used": 2788, + "collections text": 8990, + "occurring words": 38282, + "words finally": 62419, + "finally compare": 20843, + "wider range": 62028, + "describes new": 14227, + "new freely": 37212, + "scale multilingual": 48600, + "multilingual news": 36105, + "analysis combined": 2630, + "20 different": 224, + "different scripts": 15059, + "used number": 60254, + "systems learn": 54546, + "translation results": 58670, + "details regarding": 14433, + "new variant": 37357, + "algorithm propose": 2294, + "using learned": 60766, + "recognition experiments": 45506, + "deep neural": 13735, + "network dnn": 36733, + "slightly better": 51435, + "widespread use": 62033, + "amounts text": 2557, + "words work": 62550, + "help people": 23583, + "growing number": 23299, + "available different": 5281, + "different natural": 15001, + "discuss approaches": 15460, + "automated approaches": 5037, + "addition propose": 1636, + "new architecture": 37136, + "used example": 60175, + "perform cross": 40083, + "time series": 57210, + "linguistic elements": 30766, + "people interact": 40030, + "words order": 62472, + "allow researchers": 2440, + "cognitive science": 8898, + "level analysis": 30060, + "end paper": 17692, + "computational efficiency": 9841, + "art entity": 4258, + "machine translated": 31343, + "approach paper": 3633, + "types entities": 59084, + "nouns verbs": 37746, + "statistical model": 52754, + "model nlp": 34131, + "text analysis": 56430, + "like information": 30478, + "text speech": 56784, + "speech synthesis": 52296, + "speech tag": 52298, + "approaches proposed": 3903, + "pos taggers": 41232, + "unigram bigram": 59494, + "tag set": 54725, + "accuracy 77": 910, + "translation research": 58669, + "evaluation important": 18626, + "mt output": 35923, + "based evaluation": 5708, + "evaluation english": 18615, + "research work": 47143, + "using different": 60653, + "different machine": 14982, + "like google": 30472, + "evaluation process": 18682, + "process using": 42838, + "using approaches": 60562, + "approaches human": 3840, + "evaluation automatic": 18578, + "automatic metric": 5108, + "semantic relationship": 49328, + "various types": 61413, + "words sentences": 62508, + "sentences documents": 49708, + "according semantic": 868, + "knowledge representations": 27592, + "representations support": 46766, + "intelligent agents": 26543, + "human ability": 24087, + "comprehensive survey": 9801, + "instances based": 26434, + "based semantic": 6014, + "extensively studied": 19919, + "political issues": 41110, + "paper analyze": 39265, + "written form": 62999, + "level features": 30120, + "significant results": 50919, + "significant changes": 50855, + "measured terms": 32070, + "usage different": 59800, + "different characters": 14860, + "different words": 15135, + "best knowledge": 6768, + "knowledge work": 27649, + "weakly supervised": 61859, + "learning scenario": 29857, + "resource rich": 47268, + "guide learning": 23337, + "learning languages": 29697, + "past approaches": 39929, + "gold labels": 22914, + "labels training": 27854, + "training propose": 58218, + "transfer model": 58405, + "model uncertainty": 34497, + "crf model": 11764, + "evaluated standard": 18548, + "chinese english": 8307, + "german english": 22665, + "datasets method": 13328, + "f1 scores": 20227, + "labeled data": 27738, + "accuracy supervised": 1056, + "labeled sentences": 27762, + "sentences furthermore": 49726, + "furthermore combined": 21807, + "labeled examples": 27758, + "yields significant": 63128, + "improvements state": 25100, + "supervised methods": 54018, + "methods achieving": 32736, + "achieving best": 1394, + "best reported": 6812, + "begin introducing": 6379, + "branch natural": 7300, + "constrained inference": 10366, + "way model": 61821, + "model long": 34072, + "relationships text": 46084, + "integer linear": 26499, + "linear programming": 30664, + "problem automatically": 42510, + "results propose": 47780, + "propose extension": 43383, + "language present": 28385, + "complex syntactic": 9666, + "syntax semantic": 54352, + "particular case": 39835, + "information words": 26163, + "general information": 22062, + "information theoretic": 26120, + "theoretic framework": 57013, + "information maximization": 25968, + "online user": 38392, + "semi automated": 49446, + "dataset real": 13053, + "knowledge corpus": 27428, + "corpus available": 11284, + "available real": 5355, + "preliminary results": 41804, + "results used": 47896, + "used baseline": 60100, + "research dataset": 47010, + "dataset released": 13061, + "structured knowledge": 53159, + "semantic interpretation": 49290, + "interpretation terms": 26736, + "terms semantic": 56314, + "compute similarity": 9880, + "order perform": 38646, + "similarity methods": 51105, + "structure information": 53109, + "approaches additionally": 3757, + "additionally present": 1728, + "based standard": 6054, + "standard benchmarks": 52473, + "evaluation measures": 18638, + "hundreds millions": 24297, + "beneficial downstream": 6555, + "downstream natural": 16343, + "applications question": 3239, + "summarization paper": 53895, + "new task": 37333, + "detection significantly": 14527, + "identifying key": 24460, + "large dataset": 28868, + "class task": 8411, + "task accuracy": 54874, + "accuracy 73": 907, + "class baseline": 8393, + "baseline finally": 6168, + "resources developed": 47299, + "purpose propose": 44410, + "propose study": 43652, + "study novel": 53422, + "novel supervised": 37932, + "supervised approach": 53961, + "approach learning": 3586, + "annotated training": 2925, + "training examples": 58094, + "proposed semantic": 43891, + "occurrence statistics": 38278, + "textual units": 56985, + "present efficient": 41896, + "efficient algorithm": 16860, + "learning semantic": 29866, + "models training": 35625, + "training sample": 58236, + "sufficiently large": 53810, + "large unstructured": 29042, + "coherent texts": 8919, + "texts approach": 56859, + "models specific": 35525, + "results extensive": 47629, + "small large": 51480, + "method effective": 32471, + "competitive state": 9566, + "semantic parsing": 49308, + "parsing framework": 39780, + "learning inference": 29681, + "inference framework": 25659, + "mapping natural": 31804, + "language formal": 28077, + "formal representation": 21351, + "representation meaning": 46549, + "translation evaluation": 58608, + "metrics proposed": 33191, + "proposed literature": 43800, + "require human": 46862, + "human reference": 24232, + "compare output": 9352, + "accurate results": 1086, + "text different": 56536, + "paper proposed": 39542, + "new human": 37219, + "quality text": 44588, + "translation text": 58690, + "text data": 56522, + "process information": 42793, + "language context": 28006, + "paper try": 39600, + "words according": 62360, + "paper used": 39602, + "way build": 61794, + "lexicon using": 30414, + "word input": 62218, + "novel semantic": 37914, + "based phrase": 5936, + "source target": 51799, + "valued vector": 61211, + "representations low": 46713, + "low dimensional": 31142, + "multi layer": 35980, + "layer neural": 29194, + "data learning": 12460, + "directly optimize": 15326, + "end end": 17635, + "end machine": 17681, + "experimental evaluation": 19262, + "tasks english": 55613, + "results new": 47740, + "model significantly": 34379, + "significantly improves": 50972, + "improves performance": 25141, + "phrase based": 40835, + "bleu points": 7209, + "discourse representation": 15398, + "language statements": 28505, + "art technologies": 4426, + "technologies natural": 56157, + "language syntax": 28515, + "given set": 22783, + "potential limitations": 41398, + "based reasoning": 5978, + "generation aims": 22413, + "new challenge": 37146, + "main topic": 31464, + "existing approaches": 19025, + "approaches neglect": 3882, + "hierarchical topic": 23695, + "topic structure": 57432, + "news corpus": 37395, + "generation paper": 22513, + "time dependent": 57143, + "generation model": 22494, + "detect different": 14438, + "topic information": 57410, + "structure used": 53146, + "used sentence": 60296, + "selection based": 49133, + "based topic": 6101, + "sentences selected": 49781, + "considering different": 10258, + "systems evaluate": 54489, + "evaluate long": 18467, + "performance comparison": 40260, + "comparison different": 9493, + "different systems": 15091, + "demonstrates effectiveness": 14029, + "effectiveness model": 16794, + "model terms": 34451, + "rouge metrics": 48352, + "metrics word": 33208, + "embeddings resulting": 17205, + "models shown": 35496, + "variety nlp": 61285, + "tasks architecture": 55506, + "difficult train": 15191, + "time consuming": 57127, + "instead propose": 26460, + "movie review": 35895, + "similar better": 51030, + "performance deep": 40280, + "embeddings provide": 17199, + "provide easy": 44057, + "embeddings specific": 17217, + "tasks paper": 55781, + "meaning representations": 32014, + "newly emerging": 37377, + "emerging field": 17271, + "task simple": 55378, + "sentence space": 49649, + "based baseline": 5594, + "interesting challenging": 26649, + "challenging problem": 8126, + "problem machine": 42600, + "learning community": 29564, + "distributed representations": 15625, + "meaning natural": 32005, + "natural way": 36470, + "data sparsity": 12683, + "sparsity problems": 51983, + "information semantic": 26076, + "discrete representations": 15427, + "proven useful": 43995, + "useful nlp": 60378, + "tasks recent": 55836, + "semantic representations": 49335, + "representations successfully": 46764, + "applications sentiment": 3248, + "work learning": 62707, + "learning shared": 29873, + "level representations": 30194, + "representations languages": 46699, + "combine approaches": 9062, + "method learning": 32563, + "learning distributed": 29596, + "multilingual setup": 36121, + "learns assign": 29952, + "aligned sentences": 2358, + "sentence aligned": 49514, + "word alignments": 62110, + "lingual document": 30698, + "document classification": 15770, + "task outperform": 55258, + "outperform previous": 38809, + "previous state": 42282, + "multiple language": 36234, + "pairs model": 39202, + "learns representations": 29972, + "representations capture": 46625, + "capture semantic": 7706, + "recursive neural": 45636, + "network models": 36767, + "words seen": 62503, + "tasks known": 55705, + "ability accurately": 592, + "accurately capture": 1092, + "capture aspects": 7651, + "linguistic meaning": 30778, + "model new": 34125, + "corpus constructed": 11305, + "logical reasoning": 30986, + "short sentences": 50565, + "representations generalize": 46675, + "generalize new": 22144, + "new types": 37352, + "learned representation": 29478, + "representation models": 46556, + "task generating": 55105, + "language nl": 28359, + "lexico syntactic": 30404, + "pattern matching": 39962, + "matching based": 31910, + "based techniques": 6086, + "light weight": 30454, + "lead accurate": 29255, + "learning linguistic": 29711, + "analysis using": 2790, + "dl based": 15752, + "framework learning": 21554, + "sentences non": 49758, + "sentences requires": 49778, + "tool called": 57360, + "observed significant": 38148, + "language query": 28456, + "given user": 22800, + "language semantic": 28478, + "logic based": 30979, + "query question": 44675, + "paper proposing": 39553, + "dataset present": 13033, + "flexible framework": 21110, + "probability estimates": 42475, + "kneser ney": 27381, + "training efficient": 58080, + "efficient approach": 16861, + "approach outperforms": 3622, + "outperforms state": 38946, + "baselines terms": 6309, + "terms perplexity": 56308, + "large corpora": 28862, + "bleu score": 7213, + "translation task": 58686, + "task recent": 55321, + "learning multilingual": 29771, + "multilingual word": 36134, + "usually relies": 61061, + "use word": 60073, + "translated sentences": 58558, + "sentences order": 49760, + "embeddings different": 17112, + "autoencoder model": 5027, + "model learning": 34052, + "bag word": 5502, + "word representation": 62283, + "representation given": 46525, + "given sentence": 22780, + "sentence encoded": 49548, + "representation extracted": 46515, + "translation evaluate": 58607, + "evaluate approach": 18437, + "multilingual document": 36079, + "classification performed": 8516, + "experiments observe": 19483, + "compares favorably": 9476, + "previously proposed": 42340, + "method exploits": 32499, + "level alignments": 30059, + "learn word": 29444, + "mining information": 33315, + "information integration": 25928, + "variety approaches": 61262, + "right wrong": 48144, + "certain degree": 7938, + "human behavior": 24114, + "selecting appropriate": 49122, + "task significant": 55375, + "significant challenge": 50853, + "domain expert": 16066, + "approach evaluated": 3520, + "comparison human": 9497, + "human judgments": 24184, + "average performance": 5412, + "approach study": 3706, + "used extract": 60183, + "success rate": 53721, + "extraction method": 20080, + "documents collected": 15863, + "collected different": 8961, + "different corpora": 14879, + "content different": 10519, + "languages text": 28804, + "applied small": 3293, + "small sets": 51501, + "sets languages": 50296, + "large self": 29008, + "providing training": 44254, + "data manually": 12483, + "tuning results": 58952, + "various multilingual": 61367, + "developing natural": 14658, + "complex text": 9670, + "online news": 38375, + "news articles": 37385, + "make easier": 31567, + "develop highly": 14591, + "mining applications": 33310, + "users select": 60480, + "german italian": 22672, + "portuguese spanish": 41227, + "turkish english": 58984, + "news domain": 37402, + "available systems": 5373, + "source translation": 51817, + "translation named": 58641, + "language making": 28148, + "making use": 31673, + "use separate": 60010, + "news titles": 37421, + "specific style": 52148, + "large volume": 29047, + "daily news": 12088, + "approach fully": 3541, + "large text": 29023, + "statistical language": 52744, + "approach enable": 3507, + "information needed": 25987, + "language comprehension": 28000, + "inferring semantic": 25711, + "free text": 21646, + "semi structured": 49453, + "user generated": 60415, + "especially relevant": 18297, + "relevant domain": 46212, + "pros cons": 43954, + "underlying semantic": 59276, + "expert annotations": 19572, + "different labels": 14962, + "using noisy": 60842, + "latent topic": 29141, + "model review": 34327, + "bayesian model": 6360, + "model joint": 34023, + "joint inference": 27172, + "semantically meaningful": 49387, + "evaluations demonstrate": 18756, + "demonstrate model": 13939, + "model substantially": 34422, + "substantially outperforms": 53644, + "outperforms alternative": 38865, + "single multiple": 51324, + "multiple documents": 36202, + "capabilities current": 7596, + "complex questions": 9652, + "questions need": 44796, + "factual information": 20321, + "information scattered": 26072, + "scattered different": 48680, + "different documents": 14903, + "documents specifically": 15914, + "temporal relations": 56192, + "complex question": 9651, + "novel aspect": 37769, + "easily extensible": 16542, + "present methodology": 41945, + "layer perform": 29201, + "evaluated compared": 18524, + "compared general": 9412, + "better results": 6956, + "evaluation measure": 18637, + "analysis texts": 2778, + "linguistic tools": 30803, + "study text": 53468, + "text example": 56562, + "facilitate research": 20275, + "aiming reduce": 2169, + "effectively reduce": 16754, + "accurately identify": 1095, + "set possible": 50219, + "possible causes": 41319, + "involves identifying": 27019, + "investigate approaches": 26942, + "approaches exploit": 3820, + "automatically constructed": 5152, + "using simple": 60943, + "simple heuristic": 51177, + "second approach": 48996, + "identification text": 24400, + "learn models": 29399, + "using models": 60808, + "models label": 35158, + "heuristic based": 23627, + "approach given": 3548, + "given sufficient": 22790, + "sufficient training": 53806, + "data outperform": 12525, + "outperform baseline": 38780, + "baseline significantly": 6210, + "bilingual parallel": 7113, + "words high": 62430, + "high probability": 23766, + "intermediate representation": 26676, + "words introduce": 62439, + "word expressions": 62203, + "compare performance": 9353, + "performance different": 40292, + "text segments": 56757, + "algorithm paper": 2290, + "sentence compression": 49530, + "naturally capture": 36473, + "capture structural": 7712, + "decoding framework": 13631, + "model trained": 34467, + "large margin": 28903, + "framework experimental": 21512, + "results sentence": 47823, + "bring significant": 7334, + "art model": 4292, + "model task": 34445, + "quality information": 44534, + "extraction paper": 20090, + "examples domain": 18897, + "available paper": 5339, + "model predicting": 34222, + "model explored": 33861, + "experiments demonstrate": 19401, + "improve f1": 24852, + "language semantics": 28479, + "requires access": 46912, + "vast amounts": 61437, + "common sense": 9197, + "world knowledge": 62943, + "work field": 62665, + "based purely": 5966, + "manual efforts": 31738, + "method called": 32409, + "grained semantic": 23044, + "derived wikipedia": 14205, + "explicitly represent": 19647, + "represent meaning": 46477, + "meaning text": 32019, + "based concepts": 5634, + "evaluate effectiveness": 18454, + "results significant": 47843, + "improvements previous": 25093, + "art tasks": 4423, + "use natural": 59957, + "human users": 24255, + "specific entity": 52080, + "paper novel": 39429, + "approach proposed": 3657, + "proposed identify": 43793, + "identified using": 24405, + "set syntactic": 50254, + "syntactic rules": 54321, + "identification results": 24396, + "results comparable": 47542, + "comparable obtained": 9300, + "human efforts": 24138, + "text automated": 56448, + "automated manner": 5050, + "aspects text": 4552, + "tasks text": 55931, + "approach measuring": 3599, + "implicit semantic": 24664, + "approach exploits": 3529, + "approach introduce": 3576, + "new measure": 37247, + "measure semantic": 32062, + "validate method": 61180, + "method evaluate": 32489, + "performance semantic": 40549, + "similarity relatedness": 51116, + "word analogy": 62112, + "evaluating performance": 18566, + "performance method": 40435, + "method measuring": 32574, + "text semantic": 56758, + "relatedness tasks": 45960, + "tasks sentence": 55871, + "sentence sentence": 49641, + "sentence similarity": 49647, + "recognition experimental": 45505, + "evaluation shows": 18719, + "method outperforms": 32597, + "method semantic": 32647, + "used data": 60135, + "based hybrid": 5774, + "hybrid approaches": 24310, + "approaches paper": 3890, + "describes method": 14225, + "small parallel": 51491, + "rules based": 48391, + "statistical mt": 52757, + "extracted sentence": 20019, + "conducted using": 10097, + "quality improved": 44532, + "word translation": 62324, + "translation transfer": 58695, + "using hand": 60720, + "method present": 32619, + "entirely unsupervised": 18034, + "rules applied": 48390, + "argument structure": 4174, + "sentence plays": 49620, + "plays critical": 40991, + "critical role": 11791, + "systems semantic": 54627, + "semantic dependency": 49268, + "pipeline framework": 40900, + "real applications": 45099, + "maintaining competitive": 31489, + "parsing word": 39805, + "word pair": 62260, + "pair classification": 39147, + "problem using": 42683, + "using maximum": 60792, + "entropy classifier": 18161, + "feature space": 20506, + "space use": 51902, + "achieves state": 1378, + "performance evaluation": 40327, + "evaluation data": 18602, + "task pipeline": 55276, + "understanding generation": 59347, + "especially resource": 18298, + "resource constraint": 47214, + "present semantic": 42004, + "clustering approach": 8738, + "document clusters": 15774, + "measure similarity": 32063, + "constituent words": 10357, + "candidate phrase": 7574, + "using vector": 61017, + "statistical models": 52756, + "point wise": 41052, + "wise mutual": 62082, + "outperforms competing": 38886, + "fully unsupervised": 21748, + "unlabeled text": 59582, + "standard maximum": 52502, + "maximum likelihood": 31971, + "markov model": 31847, + "task performs": 55275, + "inductive bias": 25610, + "large model": 28910, + "model capacity": 33646, + "learning objective": 29785, + "non parametric": 37676, + "orthographic features": 38755, + "rare words": 45000, + "develop efficient": 14584, + "efficient learning": 16881, + "computationally intensive": 9876, + "standard training": 52536, + "training provide": 58221, + "provide open": 44108, + "source implementation": 51772, + "experiments diverse": 19419, + "diverse languages": 15705, + "achieve significant": 1191, + "compared previous": 9433, + "previous methods": 42261, + "methods task": 33068, + "extraction using": 20130, + "information various": 26154, + "heuristic rules": 23631, + "rules training": 48396, + "training machine": 58162, + "algorithm called": 2265, + "random forest": 44875, + "similarity features": 51096, + "identification task": 24398, + "task approach": 54908, + "approach presented": 3649, + "used identify": 60206, + "running text": 48406, + "extraction important": 20071, + "applications ranging": 3241, + "summarization semantic": 53899, + "semantic search": 49342, + "document clustering": 15773, + "graph based": 23101, + "large domain": 28874, + "domain training": 16215, + "training corpus": 57962, + "approaches knowledge": 3851, + "online systems": 38388, + "systems remains": 54617, + "remains largely": 46337, + "paper experiment": 39356, + "noun phrase": 37741, + "analyze performance": 2823, + "performance benchmark": 40211, + "benchmark datasets": 6446, + "methods results": 33023, + "results competitive": 47553, + "better strong": 6971, + "strong unsupervised": 53057, + "unsupervised baselines": 59684, + "baselines propose": 6288, + "propose lexical": 43440, + "related event": 45905, + "data different": 12281, + "languages especially": 28658, + "internal structure": 26690, + "phenomena like": 40811, + "conversational systems": 11054, + "systems previous": 54596, + "work applied": 62567, + "learning approaches": 29522, + "acquire new": 1442, + "words approaches": 62367, + "approaches shown": 3919, + "shown promise": 50740, + "issues related": 27103, + "behavior human": 6392, + "human machine": 24205, + "psycholinguistic studies": 44289, + "studies shown": 53301, + "eye movement": 20176, + "acquisition process": 1448, + "previous unsupervised": 42298, + "systems generally": 54512, + "domain vocabulary": 16226, + "different functions": 14940, + "conversation context": 11030, + "context important": 10653, + "address issues": 1769, + "developed new": 14637, + "new approaches": 37134, + "approaches incorporate": 3848, + "approaches context": 3788, + "results shown": 47840, + "contextual information": 10770, + "information significantly": 26086, + "performance propose": 40502, + "novel language": 37850, + "independent approach": 25494, + "approach improving": 3566, + "improve translation": 24935, + "language given": 28092, + "limited number": 30600, + "taking advantage": 54788, + "word order": 62254, + "improve word": 24939, + "poor language": 41137, + "english using": 17897, + "absolute gain": 742, + "points respectively": 41079, + "improvement best": 24990, + "approaches using": 3950, + "using additional": 60552, + "additional data": 1664, + "lexical entailment": 30363, + "proposed strategy": 43904, + "context vectors": 10743, + "problem learning": 42594, + "relations using": 46062, + "using supervised": 60969, + "relation classification": 45967, + "recent state": 45347, + "designed capture": 14311, + "contexts word": 10756, + "represents word": 46820, + "learning training": 29918, + "feature vectors": 20509, + "additionally introduce": 1722, + "introduce approach": 26779, + "approach new": 3612, + "differences similarities": 14828, + "set reference": 50235, + "approaches use": 3948, + "use vector": 60071, + "semantics based": 49399, + "extensive evaluation": 19868, + "evaluation approaches": 18576, + "different datasets": 14889, + "datasets proposed": 13381, + "performs significantly": 40713, + "significantly better": 50939, + "approaches datasets": 3794, + "datasets dataset": 13207, + "dataset significantly": 13087, + "significantly worse": 51017, + "semantic relation": 49326, + "spoken languages": 52363, + "languages world": 28825, + "world research": 62955, + "translation language": 58624, + "pair paper": 39156, + "paper focuses": 39381, + "art chinese": 4232, + "popular approaches": 41156, + "approaches machine": 3867, + "available parallel": 5342, + "explore alternative": 19686, + "pivot language": 40918, + "use english": 59878, + "english arabic": 17775, + "based smt": 6042, + "language chinese": 27988, + "direct translation": 15260, + "objective work": 38108, + "community work": 9277, + "work important": 62682, + "languages given": 28681, + "applications require": 3246, + "pay attention": 39990, + "arabic text": 4005, + "text snippets": 56775, + "string based": 52992, + "approaches standard": 3924, + "topic segmentation": 57428, + "shown useful": 50758, + "useful natural": 60376, + "applications present": 3233, + "labeling tasks": 27796, + "conversations propose": 11061, + "approach extends": 3531, + "recent graph": 45313, + "methods nlp": 32959, + "novel unsupervised": 37946, + "unsupervised models": 59713, + "models exploit": 34998, + "novel graph": 37833, + "supervised model": 54020, + "model combines": 33667, + "topic features": 57406, + "random walk": 44892, + "models respectively": 35451, + "different sources": 15074, + "empirical evaluation": 17323, + "performed best": 40659, + "highly correlated": 23890, + "correlated human": 11510, + "human annotations": 24103, + "learning allows": 29514, + "allows use": 2481, + "use training": 60055, + "data language": 12452, + "level alignment": 30058, + "sentences parallel": 49766, + "corpora work": 11258, + "work explore": 62657, + "explore use": 19748, + "autoencoder based": 5026, + "methods cross": 32805, + "bag words": 5503, + "words representations": 62497, + "sentences languages": 49746, + "languages fact": 28669, + "issues propose": 27099, + "propose compare": 43324, + "compare different": 9335, + "different variations": 15121, + "setting propose": 50343, + "leads significant": 29325, + "empirically investigate": 17363, + "problem cross": 42526, + "given language": 22755, + "generalize different": 22138, + "achieving 10": 1389, + "percentage point": 40052, + "improvements best": 25054, + "reported results": 46454, + "knowledge resources": 27598, + "tasks compared": 55547, + "compared monolingual": 9423, + "like wordnet": 30514, + "bilingual dictionaries": 7106, + "typically provide": 59150, + "structured information": 53156, + "range possible": 44928, + "word paper": 62262, + "improve quality": 24911, + "extraction module": 20084, + "chinese data": 8303, + "respectively study": 47384, + "study focus": 53379, + "different real": 15044, + "field paper": 20765, + "paper represent": 39567, + "goal text": 22903, + "speech tts": 52313, + "particular language": 39850, + "natural sounding": 36465, + "like hindi": 30474, + "task identify": 55121, + "quality output": 44558, + "speech paper": 52275, + "increase efficiency": 25413, + "text furthermore": 56589, + "comparative study": 9323, + "developing countries": 14650, + "learning classification": 29555, + "identify suitable": 24447, + "text demonstrate": 56530, + "term frequency": 56236, + "achieve comparable": 1121, + "comparable performance": 9301, + "performance number": 40462, + "research finally": 47039, + "finally demonstrate": 20849, + "order increase": 38628, + "increase performance": 25420, + "performance accuracy": 40178, + "accuracy present": 1029, + "new algorithm": 37125, + "algorithm model": 2285, + "grammatical rules": 23076, + "despite fact": 14361, + "input data": 26263, + "success learning": 53706, + "learning model": 29743, + "use data": 59859, + "common form": 9176, + "distributions given": 15676, + "representation used": 46600, + "used capture": 60112, + "capture meaning": 7697, + "language utterances": 28572, + "recently new": 45445, + "gradually increasing": 23016, + "increasing number": 25457, + "little effort": 30875, + "based analysis": 5565, + "usage word": 59809, + "distributions word": 15678, + "word class": 62128, + "using twitter": 61004, + "tested using": 56400, + "usage patterns": 59804, + "used create": 60132, + "short answer": 50550, + "task topic": 55439, + "modeling approaches": 34560, + "approaches applied": 3764, + "performing systems": 40689, + "leverage lexical": 30276, + "level syntactic": 30219, + "syntactic information": 54304, + "score given": 48849, + "nlp community": 37474, + "largest corpus": 29093, + "overview methods": 39113, + "task using": 55463, + "data explore": 12348, + "explore extent": 19706, + "features contribute": 20548, + "scoring task": 48940, + "task way": 55468, + "manual effort": 31737, + "challenging task": 8143, + "constraints model": 10375, + "performance hand": 40371, + "hard constraints": 23441, + "popular technique": 41192, + "prediction given": 41711, + "given existing": 22742, + "existing algorithms": 19022, + "perform prediction": 40130, + "performing inference": 40679, + "inference given": 25660, + "automatically generate": 5174, + "optimization problem": 38553, + "problem training": 42678, + "training allows": 57931, + "obtain substantial": 38197, + "substantial gains": 53620, + "gains accuracy": 21931, + "accuracy new": 1015, + "new challenging": 37148, + "extraction dataset": 20055, + "al 2012": 2234, + "frequency inverse": 21674, + "inverse document": 26927, + "document frequency": 15795, + "frequency tf": 21677, + "demonstrate use": 13993, + "relational data": 46003, + "useful identifying": 60367, + "identifying relevant": 24464, + "relevant words": 46245, + "words terms": 62528, + "parser based": 39759, + "syntax semantics": 54353, + "words word": 62547, + "vectors generated": 61486, + "individual domains": 25566, + "domains approach": 16235, + "approach general": 3542, + "adapted new": 1554, + "new domains": 37176, + "domains propose": 16286, + "texts compared": 56865, + "texts experiments": 56879, + "experiments methods": 19466, + "reliably identify": 46256, + "identify different": 24420, + "types texts": 59122, + "hierarchical structures": 23694, + "used methods": 60236, + "methods information": 32904, + "retrieval natural": 47959, + "explore application": 19687, + "problems using": 42737, + "present novel": 41966, + "metric using": 33129, + "text matching": 56656, + "using arabic": 60564, + "test case": 56334, + "different sentences": 15064, + "classification sentence": 8542, + "important information": 24734, + "extraction task": 20118, + "task applications": 54905, + "systems multi": 54565, + "methods work": 33104, + "work explored": 62660, + "new multi": 37262, + "event types": 18789, + "f1 average": 20181, + "problem low": 42598, + "results adding": 47490, + "adding new": 1596, + "single label": 51310, + "label multi": 27715, + "relationship words": 46075, + "researchers developed": 47151, + "language tools": 28532, + "application area": 3160, + "area natural": 4142, + "language structure": 28506, + "play key": 40974, + "key role": 27332, + "way solve": 61830, + "sentence existing": 49554, + "approach suited": 3712, + "huge data": 24072, + "data machine": 12472, + "larger data": 29072, + "data better": 12189, + "better accuracy": 6844, + "training proposed": 58220, + "approach takes": 3716, + "sentence input": 49571, + "dependency relation": 14135, + "tree like": 58748, + "structure using": 53147, + "using hybrid": 60733, + "hybrid approach": 24309, + "proposed tool": 43915, + "quality existing": 44519, + "huge number": 24074, + "contain rich": 10469, + "order analyze": 38594, + "time space": 57216, + "space paper": 51879, + "massive data": 31883, + "order verify": 38661, + "verify effectiveness": 61535, + "manually annotate": 31755, + "set conduct": 50123, + "results macro": 47709, + "plays crucial": 40993, + "development machine": 14683, + "systems order": 54576, + "output human": 38977, + "human translation": 24249, + "various automatic": 61306, + "automatic metrics": 5109, + "different metrics": 14991, + "metrics used": 33206, + "free word": 21647, + "adjectives adverbs": 1846, + "work adopt": 62560, + "complex network": 9641, + "perform comparative": 40077, + "comparative analysis": 9319, + "english polish": 17857, + "observed real": 38146, + "problem mapping": 42606, + "english present": 17859, + "aim research": 2159, + "research make": 47071, + "common semantic": 9196, + "abstract syntax": 763, + "automatic extraction": 5091, + "annotated sentences": 2915, + "provide unified": 44147, + "method comparing": 32426, + "semantic syntactic": 49358, + "represent sentences": 46480, + "convolutional architecture": 11101, + "convolutional neural": 11112, + "max pooling": 31950, + "pooling operation": 41128, + "varying length": 61431, + "short long": 50559, + "long range": 31018, + "range relations": 44931, + "binary multi": 7151, + "distant supervision": 15557, + "network achieves": 36693, + "achieves excellent": 1323, + "excellent performance": 18954, + "performance tasks": 40594, + "strongest baseline": 53066, + "step text": 52830, + "main purpose": 31454, + "different grammatical": 14944, + "forms word": 21377, + "noun adjective": 37740, + "uses information": 60514, + "metrics measuring": 33180, + "applications based": 3185, + "corpus training": 11448, + "domains require": 16289, + "size corpus": 51378, + "newly added": 37369, + "higher correlation": 23817, + "domains different": 16247, + "method gives": 32519, + "metrics different": 33160, + "different data": 14885, + "used metric": 60237, + "metrics evaluation": 33163, + "plays vital": 41007, + "manually automatically": 31765, + "manual evaluation": 31739, + "evaluation time": 18740, + "use automatic": 59829, + "hindi english": 23938, + "english hindi": 17820, + "data provided": 12575, + "provided input": 44164, + "output using": 39008, + "using various": 61016, + "metrics like": 33177, + "like bleu": 30465, + "bleu meteor": 7205, + "results human": 47663, + "human ranking": 24220, + "able answer": 674, + "long standing": 31030, + "standing goal": 52551, + "promising progress": 43174, + "progress recently": 43115, + "recently achieved": 45401, + "logical forms": 30984, + "approaches effective": 3803, + "cost large": 11587, + "large amounts": 28832, + "amounts human": 2550, + "human labeled": 24186, + "paper instead": 39398, + "learning map": 29721, + "feature representations": 20500, + "method trained": 32686, + "trained new": 57830, + "optimization procedure": 38554, + "stochastic gradient": 52855, + "gradient descent": 23006, + "followed fine": 21257, + "step using": 52835, + "using weak": 61019, + "weak supervision": 61850, + "supervision provided": 54090, + "empirically demonstrate": 17358, + "model capture": 33647, + "major improvements": 31511, + "method able": 32354, + "trained similar": 57870, + "weakly labeled": 61857, + "data present": 12556, + "distributional hypothesis": 15663, + "multilingual data": 36076, + "space embeddings": 51857, + "embeddings models": 17176, + "models leverage": 35182, + "embeddings semantically": 17209, + "semantically equivalent": 49386, + "models rely": 35430, + "rely word": 46305, + "number diverse": 37995, + "extend approach": 19819, + "approach learn": 3585, + "representations document": 46642, + "models cross": 34874, + "tasks outperforming": 55779, + "outperforming prior": 38859, + "prior state": 42413, + "qualitative analysis": 44471, + "analysis study": 2770, + "method leverage": 32565, + "chinese character": 8299, + "character embedding": 8200, + "similar semantic": 51063, + "existing chinese": 19045, + "word character": 62124, + "paper gap": 39386, + "learning continuous": 29571, + "continuous representation": 10850, + "neural architecture": 36931, + "architecture effectively": 4044, + "effectively learn": 16746, + "chinese word": 8326, + "existing embedding": 19062, + "embedding learning": 17036, + "official language": 38308, + "languages spoken": 28794, + "texts especially": 56877, + "supervised information": 53990, + "information form": 25884, + "form word": 21341, + "word clusters": 62130, + "recently neural": 45441, + "network based": 36703, + "models explored": 35002, + "generate highly": 22210, + "highly informative": 23902, + "words known": 62442, + "known word": 27670, + "embeddings paper": 17186, + "new form": 37208, + "leverage information": 30270, + "information relevant": 26051, + "representations use": 46781, + "use neural": 59960, + "neural word": 37111, + "embeddings achieve": 17077, + "achieve state": 1203, + "achieves f1": 1325, + "f1 score": 20194, + "score 90": 48825, + "conll 2003": 10168, + "better previous": 6945, + "public data": 44311, + "foreign language": 21297, + "language properties": 28447, + "language work": 28583, + "work provide": 62795, + "provide empirical": 44060, + "empirical evidence": 17326, + "strong correlation": 53022, + "structural features": 53078, + "english second": 17870, + "native languages": 36403, + "languages leverage": 28710, + "structure directly": 53098, + "text perform": 56696, + "target languages": 54831, + "method achieves": 32360, + "prediction task": 41742, + "task result": 55344, + "highly competitive": 23883, + "methods rely": 33013, + "develop model": 14598, + "level model": 30162, + "space using": 51903, + "tokens given": 57326, + "proposed recently": 43886, + "used combination": 60117, + "extracted relations": 20018, + "using set": 60933, + "application method": 3168, + "model simultaneously": 34389, + "simultaneously learns": 51273, + "alignments model": 2391, + "model captures": 33648, + "semantic context": 49260, + "context prior": 10693, + "advantage approach": 1938, + "approach demonstrated": 3481, + "outperform prior": 38815, + "published state": 44373, + "based systems": 6078, + "hand paper": 23395, + "semi automatic": 49447, + "construction method": 10427, + "corpora domain": 11195, + "text annotation": 56433, + "text fragments": 56586, + "word occurrences": 62253, + "latent semantics": 29135, + "computed using": 9884, + "different large": 14972, + "scale text": 48630, + "hierarchical clustering": 23663, + "identify common": 24417, + "structures text": 53196, + "large size": 29011, + "quite limited": 44831, + "limited coverage": 30576, + "coverage paper": 11651, + "approach extracting": 3534, + "high coverage": 23723, + "crowd sourced": 11881, + "providing new": 44251, + "new state": 37323, + "art performances": 4354, + "unsupervised settings": 59732, + "tasks using": 55954, + "approach experiments": 3527, + "media data": 32165, + "work carried": 62594, + "data base": 12184, + "speech text": 52308, + "works studied": 62908, + "used training": 60336, + "training procedure": 58215, + "general problem": 22083, + "problem approach": 42505, + "local optimum": 30948, + "search algorithm": 48962, + "independent feature": 25498, + "new direction": 37172, + "basic idea": 6329, + "quite simple": 44832, + "set results": 50242, + "results better": 47526, + "language knowledge": 28125, + "knowledge paper": 27564, + "present comparison": 41868, + "occurrence networks": 38275, + "occurring sentence": 38281, + "terms average": 56269, + "shortest path": 50592, + "path length": 39948, + "furthermore perform": 21831, + "perform analysis": 40068, + "based results": 5997, + "results point": 47766, + "used developing": 60148, + "challenge developing": 7978, + "increasing accuracy": 25442, + "accuracy efficiency": 966, + "efficiency proposed": 16853, + "accurate efficient": 1077, + "systems question": 54609, + "question answer": 44685, + "community members": 9267, + "largely unknown": 29066, + "factors like": 20311, + "present case": 41861, + "extract high": 19975, + "model predict": 34220, + "significantly improving": 50980, + "research psychology": 47105, + "conclude future": 9970, + "open problems": 38440, + "problems paper": 42718, + "texts complex": 56866, + "text level": 56649, + "level experiments": 30115, + "text number": 56679, + "number words": 38056, + "obtained results": 38221, + "results showed": 47837, + "standard approach": 52462, + "texts results": 56919, + "fixed length": 21076, + "features bag": 20529, + "despite popularity": 14374, + "words features": 62418, + "ordering words": 38667, + "paragraph vector": 39637, + "algorithm learns": 2283, + "variable length": 61222, + "texts sentences": 56923, + "sentences paragraphs": 49765, + "represents document": 46817, + "dense vector": 14084, + "trained predict": 57841, + "predict words": 41661, + "words document": 62401, + "words models": 62460, + "models empirical": 34953, + "paragraph vectors": 39638, + "models techniques": 35588, + "techniques text": 56143, + "representations finally": 46669, + "achieve new": 1174, + "classification sentiment": 8543, + "analysis tasks": 2775, + "scalable method": 48548, + "method integrating": 32548, + "based probabilistic": 5954, + "model approach": 33572, + "evaluated context": 18525, + "efficient implementation": 16876, + "results range": 47796, + "range languages": 44921, + "languages demonstrate": 28635, + "representations perform": 46735, + "perform word": 40160, + "similarity tasks": 51125, + "lead substantial": 29277, + "rich languages": 48107, + "languages large": 28707, + "large vocabularies": 29045, + "models obtain": 35271, + "obtain improvements": 38178, + "improvements bleu": 25055, + "relative baseline": 46089, + "baseline using": 6222, + "using gram": 60713, + "models present": 35346, + "terms categories": 56274, + "using measure": 60795, + "wikipedia corpus": 62046, + "directed graph": 15267, + "provides unique": 44231, + "apply method": 3333, + "corpus evaluate": 11331, + "increase 10": 25403, + "10 compared": 37, + "compared standard": 9456, + "propose unsupervised": 43691, + "unsupervised method": 59710, + "available form": 5294, + "based networks": 5900, + "different time": 15100, + "time points": 57193, + "obtain word": 38199, + "conduct thorough": 10067, + "thorough evaluation": 57059, + "evaluation proposed": 18684, + "proposed methodology": 43834, + "evaluation indicates": 18629, + "correctly identify": 11492, + "approach applied": 3419, + "like word": 30512, + "article investigate": 4453, + "network structure": 36808, + "window size": 62065, + "point increase": 41046, + "combinatorial optimization": 9057, + "recently applied": 45406, + "method practical": 32616, + "nlp literature": 37494, + "problems machine": 42711, + "inference models": 25671, + "models significantly": 35506, + "graphical models": 23184, + "models chinese": 34812, + "phonetic information": 40827, + "enhance text": 17925, + "text model": 56667, + "model obtain": 34137, + "obtain better": 38162, + "standard nlp": 52514, + "mining methods": 33318, + "methods evaluate": 32844, + "linear svm": 30672, + "non parallel": 37674, + "tasks article": 55509, + "transductive learning": 58347, + "corpus method": 11379, + "method requires": 32639, + "requires small": 46951, + "small labeled": 51477, + "labeled corpus": 27737, + "corpus large": 11369, + "large unlabeled": 29038, + "unlabeled corpus": 59563, + "corpus build": 11289, + "build high": 7404, + "performance classifier": 40235, + "corpus experimental": 11337, + "results combining": 47538, + "method effectively": 32472, + "performance machine": 40426, + "related information": 45911, + "project aims": 43132, + "resourced language": 47288, + "goal project": 22897, + "parts speech": 39910, + "model languages": 34039, + "resourced languages": 47289, + "terms based": 56270, + "languages lack": 28702, + "able automatically": 677, + "quality produced": 44565, + "texts based": 56861, + "manually written": 31789, + "translated texts": 58561, + "distinguish types": 15605, + "texts similar": 56926, + "language quality": 28454, + "professional translators": 43061, + "limited time": 30626, + "available present": 5346, + "fluent natural": 21131, + "domain experts": 16068, + "aim generate": 2148, + "generate fluent": 22202, + "fluent coherent": 21129, + "multi sentence": 36005, + "texts multiple": 56904, + "multiple languages": 36236, + "domain dependent": 16045, + "use stage": 60025, + "resources available": 47292, + "produces significantly": 43034, + "resources created": 47297, + "multiple natural": 36253, + "languages important": 28690, + "applications data": 3194, + "heterogeneous data": 23617, + "data sources": 12678, + "factors including": 20309, + "linguistic characteristics": 30753, + "characteristics paper": 8242, + "language languages": 28131, + "framework uses": 21621, + "based new": 5909, + "proposed framework": 43778, + "case studies": 7796, + "substantial improvements": 53622, + "main components": 31427, + "constraints used": 10380, + "better representation": 6953, + "yield better": 63090, + "results present": 47774, + "novel framework": 37826, + "demonstrate capabilities": 13877, + "english korean": 17829, + "korean language": 27675, + "specific prior": 52129, + "knowledge training": 27632, + "model supports": 34432, + "parsing generation": 39781, + "generation present": 22521, + "human evaluations": 24158, + "limited domain": 30582, + "explore various": 19753, + "using task": 60978, + "task definition": 54998, + "kernel based": 27289, + "computing similarity": 9906, + "learning task": 29904, + "task semantic": 55352, + "semantic knowledge": 49291, + "knowledge experiments": 27473, + "experiments suggest": 19536, + "results various": 47903, + "outperforming baselines": 38847, + "baselines large": 6275, + "comparable current": 9294, + "results semantic": 47821, + "task understanding": 55455, + "understanding meaning": 59363, + "individual word": 25587, + "meaning word": 32020, + "approach semantics": 3682, + "semantics word": 49420, + "word represented": 62291, + "context vector": 10742, + "generation problem": 22526, + "given context": 22729, + "000 000": 2, + "potential solutions": 41408, + "algorithm generates": 2278, + "time paper": 57187, + "novel neural": 37882, + "network model": 36765, + "rnn encoder": 48191, + "encoder decoder": 17495, + "recurrent neural": 45622, + "networks rnn": 36907, + "length vector": 30037, + "vector representation": 61461, + "decoder proposed": 13613, + "proposed model": 43842, + "model jointly": 34024, + "jointly trained": 27222, + "trained maximize": 57786, + "conditional probability": 10002, + "target sequence": 54840, + "given source": 22787, + "source sequence": 51797, + "additional feature": 1668, + "log linear": 30973, + "linear model": 30660, + "model qualitatively": 34263, + "semantically syntactically": 49395, + "translation applications": 58577, + "knowledge language": 27540, + "knowledge encoded": 27458, + "domain models": 16114, + "models approach": 34721, + "example based": 18876, + "approach topic": 3722, + "topic paper": 57421, + "paper paper": 39438, + "improve accuracy": 24823, + "accuracy translation": 1066, + "approach improve": 3561, + "select appropriate": 49099, + "10 million": 46, + "computational resources": 9858, + "focus natural": 21185, + "svm based": 54232, + "method experiments": 32497, + "results results": 47814, + "dependency features": 14120, + "features related": 20653, + "effects different": 16825, + "recent developments": 45303, + "given rise": 22778, + "build knowledge": 7406, + "attempts learn": 4699, + "eye tracking": 20177, + "user study": 60451, + "text source": 56778, + "source domain": 51765, + "difficult task": 15187, + "task key": 55152, + "key problems": 27330, + "approach automatically": 3426, + "proof concept": 43236, + "use cases": 59839, + "extraction knowledge": 20074, + "framework called": 21467, + "framework used": 21620, + "language documents": 28034, + "extracted knowledge": 20014, + "text generated": 56591, + "approach brings": 3436, + "text considered": 56507, + "main challenges": 31426, + "paper provides": 39558, + "translation multilingual": 58640, + "different approach": 14839, + "non linguistic": 37660, + "digital world": 15216, + "automated machine": 5048, + "long way": 31048, + "languages hindi": 28687, + "translation approach": 58578, + "language research": 28473, + "shows significant": 50801, + "direction paper": 15272, + "paper methods": 39426, + "specifically look": 52214, + "methods detect": 32819, + "texts propose": 56913, + "propose methods": 43458, + "methods handle": 32883, + "specific use": 52168, + "analysis results": 2743, + "results experiments": 47627, + "evaluate impact": 18463, + "process paper": 42814, + "major problem": 31519, + "language makes": 28147, + "instead just": 26454, + "ongoing work": 38351, + "art technique": 4424, + "extract information": 19978, + "information texts": 26118, + "texts provide": 56915, + "provide different": 44053, + "practical solution": 41474, + "social sciences": 51604, + "methods tools": 33075, + "tools automatically": 57377, + "automatically extract": 5168, + "information natural": 25983, + "texts language": 56897, + "fall short": 20374, + "fail consider": 20331, + "mixed language": 33406, + "code switching": 8861, + "monolingual english": 35801, + "english speakers": 17881, + "given new": 22764, + "new class": 37150, + "model yields": 34551, + "validate model": 61181, + "present evidence": 41906, + "english english": 17800, + "task automatic": 54922, + "classification text": 8573, + "predefined categories": 41624, + "problem text": 42674, + "studied different": 53222, + "different communities": 14869, + "processing data": 42865, + "retrieval text": 47974, + "classification important": 8479, + "information management": 25965, + "tasks like": 55723, + "like topic": 30509, + "topic identification": 57409, + "language identification": 28098, + "performance text": 40598, + "patterns used": 39977, + "improvement text": 25034, + "extraction step": 20114, + "structure learning": 53116, + "work nlp": 62736, + "information phrase": 26007, + "answering information": 3075, + "detailed comparison": 14417, + "emotion recognition": 17293, + "speech features": 52263, + "used speech": 60310, + "features work": 20699, + "work evaluate": 62648, + "frame level": 21439, + "level feature": 30119, + "feature extraction": 20485, + "features paper": 20637, + "ongoing research": 38350, + "morphology syntax": 35851, + "20 languages": 228, + "knowledge particular": 27567, + "word units": 62328, + "semantic level": 49293, + "demonstrate approach": 13865, + "rich morphology": 48113, + "taking inspiration": 54789, + "takes advantage": 54778, + "inflected forms": 25718, + "available http": 5304, + "multilingual corpus": 36073, + "corpus study": 11439, + "study paper": 53426, + "presents overview": 42099, + "language developed": 28027, + "level information": 30133, + "information fusion": 25887, + "including lexical": 25267, + "range linguistic": 44922, + "semantic structures": 49357, + "model hmm": 33962, + "language natural": 28353, + "processing task": 42945, + "task speech": 55406, + "grammatical features": 23071, + "analysis language": 2687, + "statistical based": 52738, + "probability given": 42476, + "corpus linguistic": 11373, + "automatically extracted": 5170, + "90 accuracy": 552, + "based knowledge": 5796, + "knowledge graph": 27496, + "graph nodes": 23155, + "nodes represent": 37593, + "various real": 61382, + "given data": 22733, + "entity linking": 18115, + "task mapping": 55206, + "entities extracted": 18051, + "present knowledge": 41934, + "inherently difficult": 26205, + "unstructured nature": 59670, + "limited context": 30575, + "multiple entities": 36210, + "task report": 55336, + "art systems": 4420, + "entity disambiguation": 18102, + "inference approach": 25642, + "approach compare": 3454, + "base population": 5548, + "text analytics": 56431, + "experimental setup": 19326, + "model compute": 33691, + "probability distribution": 42473, + "generated words": 22335, + "representation methods": 46551, + "application areas": 3161, + "network systems": 36810, + "use nlp": 59964, + "analysis systems": 2773, + "extract classify": 19969, + "using small": 60947, + "word embedding": 62142, + "embedding method": 17040, + "supervised tasks": 54056, + "maps words": 31812, + "words occurring": 62471, + "similar contexts": 51034, + "embeddings including": 17149, + "including recent": 25292, + "framework multilingual": 21567, + "parsing results": 39795, + "embeddings tasks": 17225, + "tasks investigate": 55696, + "results multilingual": 47732, + "embeddings languages": 17159, + "languages available": 28604, + "available public": 5351, + "public use": 44329, + "propose approach": 43296, + "language family": 28067, + "parameters language": 39703, + "compared classical": 9391, + "based predictive": 5947, + "non monotonic": 37663, + "order support": 38654, + "auxiliary information": 5232, + "multiple sets": 36282, + "different possible": 15027, + "addition new": 1628, + "new content": 37155, + "tv shows": 59000, + "ner systems": 36682, + "systems need": 54569, + "models new": 35256, + "preliminary study": 41808, + "focus entity": 21160, + "entity type": 18152, + "collected twitter": 8969, + "evaluation sets": 18713, + "entities corresponding": 18042, + "final model": 20823, + "model shows": 34375, + "strong evidence": 53028, + "set entities": 50144, + "entities training": 18086, + "biomedical text": 7177, + "text enables": 56550, + "language process": 28392, + "attempt address": 4678, + "address challenge": 1742, + "large annotated": 28843, + "systems training": 54657, + "expert annotators": 19573, + "periods time": 40727, + "time recent": 57203, + "recent studies": 45349, + "turk amt": 58981, + "generate high": 22206, + "quality annotations": 44490, + "pubmed abstracts": 44380, + "based simple": 6037, + "increases number": 25437, + "quality results": 44576, + "valuable tool": 61206, + "shared knowledge": 50474, + "widely adopted": 61991, + "using monte": 60813, + "monte carlo": 35826, + "network network": 36772, + "widespread adoption": 62031, + "results shed": 47832, + "language change": 27986, + "syntax trees": 54356, + "input tokens": 26350, + "syntax tree": 54355, + "form set": 21334, + "infer new": 25637, + "new knowledge": 37230, + "knowledge present": 27575, + "distributional semantic": 15669, + "models improves": 35115, + "improves existing": 25128, + "important ways": 24791, + "gold standards": 22921, + "development models": 14687, + "adjective noun": 1844, + "noun verb": 37743, + "performance models": 40440, + "unlike existing": 59594, + "standard evaluations": 52492, + "automatic approaches": 5071, + "art models": 4293, + "models perform": 35309, + "future improvements": 21876, + "representation learning": 46536, + "learning architectures": 29524, + "architectures work": 4130, + "present application": 41846, + "proposed unsupervised": 43920, + "keyword extraction": 27350, + "extraction algorithm": 20047, + "language domain": 28036, + "method language": 32557, + "set non": 50203, + "heavily depends": 23530, + "automatic approach": 5070, + "statistical properties": 52761, + "short text": 50571, + "text messages": 56657, + "messages social": 32324, + "communication channel": 9247, + "additional challenges": 1656, + "degrade performance": 13806, + "performance traditional": 40603, + "using real": 60895, + "set large": 50181, + "media corpus": 32164, + "analyze effectiveness": 2813, + "effectiveness machine": 16788, + "order detect": 38608, + "explored different": 19757, + "detection using": 14539, + "using text": 60988, + "text normalization": 56678, + "validity proposed": 61200, + "baseline approaches": 6155, + "approaches provide": 3904, + "provide comparative": 44029, + "study neural": 53418, + "based occurrence": 5924, + "different semantic": 15061, + "semantic spaces": 49353, + "compositional models": 9746, + "models test": 35593, + "approaches tasks": 3936, + "tasks involving": 55698, + "additionally evaluate": 1719, + "spaces using": 51913, + "larger scale": 29087, + "paraphrase detection": 39739, + "dialogue act": 14764, + "provides method": 44213, + "method improving": 32536, + "contrast previous": 10881, + "relatively simple": 46128, + "models work": 35687, + "work use": 62852, + "robust model": 48255, + "linear regression": 30667, + "approach model": 3602, + "model independent": 33997, + "texts using": 56941, + "using non": 60843, + "non standard": 37683, + "standard words": 52542, + "features non": 20631, + "scientific text": 48771, + "conducted different": 10080, + "used features": 60188, + "features second": 20660, + "standard deviation": 52485, + "features representation": 20656, + "algorithms used": 2345, + "experiments best": 19367, + "results achieved": 47486, + "using feature": 60692, + "feature set": 20504, + "accuracy 87": 919, + "features highly": 20597, + "experiments neural": 19479, + "neural machine": 36967, + "unlike traditional": 59611, + "traditional statistical": 57547, + "translation neural": 58644, + "translation aims": 58576, + "aims building": 2179, + "building single": 7470, + "single neural": 51325, + "network jointly": 36752, + "translation performance": 58656, + "models proposed": 35373, + "encoder decoders": 17509, + "encoder encodes": 17512, + "source sentence": 51795, + "decoder generates": 13596, + "use fixed": 59893, + "decoder architecture": 13586, + "architecture propose": 4079, + "propose extend": 43381, + "allowing model": 2446, + "model automatically": 33591, + "parts source": 39908, + "predicting target": 41683, + "approach achieve": 3390, + "performance comparable": 40246, + "existing state": 19146, + "based task": 6083, + "task english": 55049, + "recently introduced": 45434, + "systems suffer": 54644, + "suffer significant": 53781, + "significant drop": 50864, + "long sentences": 31024, + "sentences unlike": 49800, + "way address": 61790, + "form final": 21320, + "relatively new": 46125, + "purely neural": 44397, + "networks neural": 36881, + "translation models": 58633, + "decoder encoder": 13590, + "correct translation": 11477, + "representation paper": 46564, + "translation using": 58701, + "models rnn": 35463, + "newly proposed": 37379, + "network neural": 36773, + "relatively short": 46127, + "performance degrades": 40283, + "length sentence": 30034, + "furthermore proposed": 21836, + "convolutional network": 11107, + "network learns": 36759, + "report describes": 46430, + "development nlp": 14693, + "easy access": 16557, + "common nlp": 9187, + "nlp data": 37477, + "semantic graph": 49281, + "triples extracted": 58805, + "semantic graphs": 49282, + "using information": 60736, + "translating natural": 58566, + "text describing": 56531, + "predictive power": 41778, + "related posts": 45925, + "posts twitter": 41375, + "geographical location": 22649, + "tasks language": 55708, + "significantly outperform": 50988, + "majority class": 31528, + "performance improved": 40382, + "complex natural": 9638, + "topic modeling": 57415, + "textual features": 56965, + "design implement": 14286, + "semantics preserving": 49412, + "languages make": 28723, + "use unlabeled": 60064, + "problem solved": 42661, + "present unique": 42049, + "algorithms based": 2322, + "based extensive": 5721, + "extensive empirical": 19862, + "empirical analysis": 17319, + "text input": 56629, + "using pre": 60865, + "size required": 51398, + "algorithms work": 2347, + "best possible": 6801, + "text problem": 56710, + "simple word": 51227, + "presents algorithm": 42069, + "processing algorithm": 42849, + "algorithm perform": 2291, + "perform comprehensive": 40081, + "growing concern": 23293, + "approaches mainly": 3869, + "mainly focus": 31472, + "focus developing": 21154, + "developing automatic": 14648, + "methods help": 32886, + "help users": 23593, + "work used": 62853, + "finally proposed": 20878, + "voting scheme": 61743, + "different approaches": 14840, + "approaches improve": 3842, + "improve classification": 24829, + "classification performance": 8514, + "000 tweets": 13, + "users use": 60486, + "typically contain": 59139, + "nlp tools": 37558, + "used english": 60164, + "order make": 38637, + "make sense": 31595, + "individual tokens": 25584, + "data presents": 12557, + "presents findings": 42085, + "techniques applied": 56059, + "data twitter": 12746, + "approach effective": 3500, + "effective tool": 16705, + "content important": 10529, + "important source": 24773, + "data given": 12391, + "language patterns": 28377, + "specific domain": 52072, + "domain data": 16038, + "biomedical texts": 7178, + "reported performance": 46453, + "performance terms": 40595, + "terms precision": 56309, + "recall score": 45246, + "dependency parse": 14125, + "investigation reveals": 27006, + "play vital": 40980, + "degraded performance": 13808, + "supervised classification": 53967, + "classification models": 8496, + "models learned": 35178, + "labeled unlabeled": 27770, + "classification natural": 8506, + "data expensive": 12339, + "expensive difficult": 19208, + "human experts": 24164, + "wide spread": 61977, + "classification study": 8558, + "explore idea": 19709, + "past work": 39937, + "political science": 41112, + "model texts": 34461, + "explicitly model": 19642, + "demonstrate benefits": 13875, + "approach improved": 3563, + "prediction ability": 41689, + "ability perform": 630, + "corpora paper": 11229, + "new tool": 37345, + "analysis tool": 2780, + "corpus finally": 11342, + "study linguistic": 53407, + "linguistic variation": 30810, + "data common": 12224, + "common nouns": 9189, + "tree structures": 58760, + "method creating": 32446, + "applied language": 3276, + "dictionary used": 14808, + "used define": 60143, + "systems text": 54653, + "extraction accuracy": 20043, + "data use": 12758, + "explores use": 19775, + "use machine": 59941, + "approaches specifically": 3923, + "decision tree": 13570, + "nearest neighbour": 36524, + "na ive": 36358, + "documents task": 15917, + "task automatically": 54923, + "predefined set": 41625, + "methods applied": 32752, + "applied english": 3272, + "studies conducted": 53252, + "bangla language": 5524, + "documents order": 15900, + "methods produce": 32994, + "satisfactory performance": 48524, + "using variety": 61015, + "text resources": 56743, + "data easy": 12308, + "words appear": 62365, + "appear frequently": 3138, + "brown corpus": 7371, + "natural logic": 36460, + "tasks remains": 55851, + "remains open": 46342, + "open question": 38441, + "question possible": 44744, + "possible train": 41338, + "address question": 1795, + "question using": 44757, + "using neural": 60832, + "models learning": 35179, + "learning embeddings": 29614, + "tensor networks": 56224, + "networks experiments": 36853, + "experiments evaluate": 19430, + "models ability": 34650, + "simulated data": 51260, + "positive results": 41294, + "results promising": 47779, + "promising future": 43166, + "representations applied": 46618, + "dependency syntax": 14141, + "text used": 56833, + "better suited": 6972, + "sets based": 50283, + "selection criteria": 49136, + "annotation scheme": 2968, + "scheme based": 48728, + "semi automatically": 49449, + "structure based": 53091, + "set machine": 50189, + "improved translation": 24969, + "words edges": 62404, + "sentence new": 49606, + "dependencies work": 14115, + "work paves": 62745, + "paves way": 39986, + "translation data": 58595, + "main challenge": 31425, + "active learning": 1473, + "challenge test": 8019, + "applying natural": 3370, + "information access": 25749, + "news text": 37419, + "number new": 38022, + "new challenges": 37147, + "context dependent": 10610, + "dynamic nature": 16488, + "tagging named": 54744, + "recognition entity": 45501, + "work new": 62735, + "dataset conduct": 12857, + "conduct empirical": 10038, + "number state": 38038, + "improve state": 24927, + "manual automatic": 31733, + "discuss advantages": 15458, + "paradigm task": 39630, + "translation nmt": 58645, + "translation shown": 58676, + "shown promising": 50741, + "traditional approaches": 57511, + "nmt systems": 37577, + "relatively small": 46129, + "vocabulary oov": 61707, + "oov word": 38406, + "effective technique": 16703, + "problem train": 42677, + "train nmt": 57621, + "data augmented": 12169, + "output word": 39010, + "word alignment": 62109, + "word target": 62319, + "target sentence": 54838, + "corresponding word": 11561, + "word source": 62312, + "sentence information": 49570, + "word using": 62332, + "using dictionary": 60652, + "task method": 55211, + "method provides": 32629, + "improvement bleu": 24991, + "does use": 15981, + "use technique": 60042, + "best result": 6814, + "result achieved": 47433, + "task present": 55287, + "inference algorithms": 25641, + "significantly reducing": 51013, + "reducing computation": 45704, + "features sequence": 20664, + "high confidence": 23717, + "small fraction": 51473, + "parameter estimation": 39669, + "present experiments": 41911, + "run time": 48403, + "errors introduced": 18242, + "automating process": 5207, + "methods investigate": 32908, + "combining methods": 9115, + "methods using": 33095, + "using random": 60891, + "random forests": 44879, + "performance supervised": 40589, + "typically require": 59154, + "require training": 46894, + "data investigate": 12441, + "methods unsupervised": 33092, + "requiring large": 46963, + "amounts training": 2560, + "data experiments": 12346, + "experiments reveal": 19515, + "small data": 51469, + "data sufficient": 12707, + "information sources": 26097, + "building domain": 7442, + "labor intensive": 27863, + "process study": 42831, + "study present": 53436, + "present semi": 42005, + "approach building": 3438, + "wikipedia articles": 62043, + "wide coverage": 61962, + "domain ontology": 16123, + "media texts": 32184, + "texts significant": 56925, + "significant information": 50894, + "areas including": 4153, + "unfortunately existing": 59452, + "existing solutions": 19144, + "tasks named": 55757, + "texts usually": 56942, + "perform poorly": 40128, + "tweets using": 59026, + "annotated data": 2881, + "sets experiments": 50293, + "better fit": 6893, + "recognition performance": 45524, + "different settings": 15068, + "task given": 55107, + "paper suggest": 39583, + "suggest method": 53823, + "parsing based": 39773, + "based supervised": 6068, + "learning used": 29928, + "algorithm select": 2300, + "sentence furthermore": 49562, + "results encouraging": 47606, + "approach automatic": 3425, + "automatic detection": 5078, + "develop simple": 14612, + "framework capable": 21468, + "analyzing texts": 2847, + "language embedded": 28041, + "samples used": 48493, + "related language": 45914, + "word2vec model": 62350, + "mikolov et": 33240, + "attracted great": 4878, + "great attention": 23199, + "attention recent": 4816, + "words learned": 62446, + "word2vec models": 62351, + "semantic meanings": 49299, + "useful various": 60397, + "various nlp": 61372, + "similar techniques": 51072, + "process word": 42840, + "embedding models": 17045, + "provides detailed": 44192, + "models including": 35118, + "continuous bag": 10841, + "skip gram": 51419, + "optimization techniques": 38559, + "techniques including": 56100, + "including hierarchical": 25260, + "negative sampling": 36634, + "understanding model": 59364, + "lot research": 31119, + "words vector": 62543, + "distributional approaches": 15662, + "number tasks": 38043, + "language usually": 28571, + "level meaning": 30159, + "fundamental task": 21792, + "task nlp": 55240, + "methods learning": 32924, + "linguistic units": 30807, + "neural models": 36973, + "models suitable": 35561, + "semantically rich": 49391, + "rich representations": 48115, + "representations representations": 46748, + "multiple state": 36289, + "models apply": 34719, + "representations various": 46786, + "tasks nlp": 55767, + "efficiency paper": 16849, + "explore effect": 19703, + "better semantic": 6963, + "input word": 26358, + "positive effect": 41279, + "deep models": 13728, + "models explore": 35001, + "graphical model": 23183, + "uses text": 60540, + "based dialog": 5678, + "model user": 34511, + "model infer": 33999, + "learn context": 29351, + "paper analyse": 39263, + "different texts": 15099, + "datasets furthermore": 13281, + "furthermore compare": 21808, + "results existing": 47623, + "language similar": 28486, + "similar languages": 51050, + "languages conclude": 28621, + "significant progress": 50915, + "semantic parsers": 49306, + "representation introduce": 46531, + "techniques tackle": 56140, + "tackle problems": 54711, + "style semantic": 53496, + "eliminates need": 16989, + "fully exploit": 21725, + "order better": 38598, + "better guide": 6895, + "graph generation": 23140, + "structured representation": 53173, + "representation input": 46529, + "text pre": 56701, + "vectors representing": 61497, + "vector representing": 61464, + "logistic regression": 30994, + "regression classifier": 45813, + "compare method": 9345, + "method constructing": 32441, + "training effective": 58077, + "disambiguation tasks": 15362, + "theoretic approach": 57012, + "confounding factors": 10150, + "gives rise": 22809, + "spurious correlations": 52388, + "stylistic features": 53510, + "features propose": 20650, + "propose test": 43666, + "topic conversation": 57398, + "problem proposed": 42635, + "high scores": 23800, + "representation word": 46605, + "models use": 35650, + "use single": 60018, + "study effects": 53366, + "using multiple": 60822, + "approach offers": 3617, + "objective project": 38101, + "answering using": 3102, + "additional modules": 1689, + "questions work": 44816, + "network cnn": 36719, + "structure data": 53095, + "image data": 24534, + "structure word": 53148, + "accurate prediction": 1084, + "prediction instead": 41712, + "using low": 60780, + "dimensional word": 15240, + "vectors input": 61487, + "input directly": 26266, + "directly apply": 15306, + "data leads": 12458, + "small text": 51505, + "image text": 24547, + "convolution layer": 11093, + "layer proposed": 29204, + "combine multiple": 9069, + "higher accuracy": 23812, + "comparison state": 9506, + "based limited": 5814, + "corpus english": 11329, + "point view": 41051, + "text use": 56832, + "tagging task": 54752, + "systems focus": 54506, + "focus small": 21201, + "small set": 51499, + "large fine": 28879, + "label set": 27726, + "dramatic improvements": 16385, + "improvements downstream": 25070, + "downstream tasks": 16352, + "labeled training": 27766, + "data existing": 12336, + "existing fine": 19069, + "systems obtain": 54573, + "automatically using": 5205, + "entities types": 18087, + "depends context": 14162, + "generalization propose": 22128, + "propose task": 43659, + "task context": 54976, + "local context": 30931, + "context sentence": 10713, + "new resources": 37305, + "task 12": 54867, + "provide baseline": 44012, + "data develop": 12277, + "language specifically": 28498, + "specifically model": 52217, + "english model": 17844, + "model language": 34036, + "word question": 62274, + "question model": 44736, + "important task": 24777, + "task natural": 55233, + "processing used": 42964, + "applications automatic": 3184, + "scale applications": 48554, + "applications previous": 3234, + "datasets paper": 13360, + "build large": 7408, + "dataset million": 12995, + "challenges real": 8074, + "world scenarios": 62957, + "cost function": 11582, + "learning problem": 29817, + "feature learning": 20494, + "based deep": 5672, + "model complicated": 33682, + "novel feature": 37821, + "based neural": 5901, + "network outperforms": 36776, + "outperforms methods": 38908, + "features specifically": 20671, + "best performance": 6789, + "performance model": 40438, + "model surpasses": 34433, + "baseline significant": 6209, + "relative improvement": 46101, + "research topics": 47133, + "decades ago": 13541, + "set languages": 50180, + "empirical studies": 17349, + "entirely different": 18032, + "knowledge transfer": 27633, + "purpose language": 44403, + "results recent": 47800, + "years witnessed": 63081, + "based question": 5970, + "systems systems": 54646, + "web information": 61887, + "information produce": 26025, + "systems designed": 54475, + "answering named": 3083, + "analysis approach": 2615, + "convert input": 11072, + "input question": 26324, + "results performance": 47764, + "respectively furthermore": 47372, + "easily applied": 16537, + "applied new": 3286, + "new languages": 37233, + "time human": 57162, + "relational semantics": 46012, + "end employ": 17633, + "results knowledge": 47687, + "base completion": 5541, + "representations trained": 46773, + "recent works": 45376, + "predictive models": 41776, + "embeddings trained": 17233, + "corresponding words": 11562, + "present systematic": 42033, + "systematic study": 54404, + "study use": 53470, + "good performance": 22936, + "performance word": 40631, + "dimensionality reduction": 15242, + "encoding function": 17566, + "function used": 21761, + "used infer": 60212, + "unseen words": 59659, + "clear advantage": 8651, + "models train": 35601, + "train new": 57620, + "allocation lda": 2431, + "addition proposed": 1639, + "proposed use": 43922, + "recently developed": 45419, + "method unsupervised": 32692, + "approach improves": 3564, + "improves interpretability": 25134, + "allows better": 2452, + "computational performance": 9853, + "results future": 47643, + "applied improve": 3275, + "problem word": 42687, + "method generating": 32517, + "search large": 48974, + "approach generating": 3547, + "generating word": 22405, + "efficient compared": 16865, + "boosted performance": 7259, + "performance natural": 40451, + "tasks usually": 55956, + "words multiple": 62462, + "negative effect": 36617, + "representations language": 46698, + "simple model": 51193, + "model enables": 33816, + "recent techniques": 45358, + "vectors represent": 61496, + "able effectively": 691, + "computationally efficient": 9873, + "efficient manner": 16882, + "words bow": 62374, + "common approach": 9164, + "used feature": 60186, + "training classifier": 57951, + "number features": 38005, + "information loss": 25960, + "information lost": 25961, + "overcome limitation": 39066, + "model provide": 34255, + "provide good": 44081, + "word vector": 62334, + "propose average": 43309, + "representations obtain": 46727, + "obtain representations": 38188, + "means clustering": 32039, + "semantic concepts": 49254, + "model outperforms": 34156, + "similar results": 51062, + "results traditional": 47886, + "model far": 33881, + "method integrate": 32546, + "lines work": 30689, + "work unsupervised": 62851, + "semantics semantic": 49413, + "relations text": 46059, + "consists components": 10321, + "semantic role": 49337, + "role labeling": 48311, + "labeling model": 27787, + "given rich": 22777, + "rich set": 48122, + "syntactic lexical": 54307, + "model relies": 34305, + "predict argument": 41635, + "annotated resources": 2911, + "method performs": 32613, + "performs par": 40709, + "induction methods": 25607, + "unlike previous": 59598, + "incorporate prior": 25361, + "prior linguistic": 42405, + "language neural": 28356, + "models learn": 35174, + "representations embeddings": 46648, + "embeddings capture": 17091, + "capture rich": 7705, + "rich linguistic": 48110, + "embeddings learned": 17164, + "learned neural": 29470, + "models recently": 35410, + "model embeddings": 33807, + "models outperform": 35283, + "monolingual models": 35804, + "models tasks": 35586, + "require knowledge": 46864, + "syntactic role": 54320, + "desirable properties": 14344, + "languages finally": 28673, + "method training": 32687, + "neural translation": 37108, + "models large": 35164, + "vocabulary expansion": 61701, + "algorithm results": 2298, + "embedding spaces": 17063, + "online demo": 38361, + "web page": 61889, + "analyses indicate": 2598, + "based embeddings": 5696, + "embeddings used": 17238, + "used applications": 60090, + "according similarity": 869, + "monolingual embeddings": 35800, + "embeddings better": 17089, + "inter word": 26590, + "word relatedness": 62277, + "zero shot": 63152, + "representations extracted": 46665, + "extracted text": 20022, + "learn general": 29373, + "mapping functions": 31802, + "feature spaces": 20507, + "vectors used": 61499, + "high proportion": 23767, + "propose simple": 43631, + "simple method": 51191, + "leads consistent": 29310, + "consistent improvements": 10278, + "shot experiments": 50615, + "experiments cross": 19393, + "image retrieval": 24546, + "domains present": 16284, + "distributed vector": 15627, + "representation based": 46494, + "gradient based": 23003, + "based training": 6103, + "gives state": 22810, + "dimension reduction": 15224, + "current work": 12028, + "provides interesting": 44207, + "better capturing": 6861, + "dot product": 16318, + "cosine similarity": 11575, + "decision boundaries": 13560, + "density based": 14092, + "learning representations": 29842, + "gaussian distributions": 22012, + "various word": 61418, + "investigate ability": 26938, + "embeddings model": 17175, + "explore novel": 19721, + "present hierarchical": 41924, + "document model": 15812, + "model architecture": 33576, + "architecture designed": 4041, + "document structure": 15835, + "using model": 60806, + "model use": 34507, + "computer vision": 9894, + "identify extract": 24422, + "topic relevant": 57426, + "sentences introduce": 49740, + "evaluation technique": 18736, + "automatic sentence": 5123, + "consuming human": 10446, + "validation data": 61193, + "investigate problem": 26977, + "relation learning": 45987, + "algorithm takes": 2304, + "good predictors": 22939, + "experiments task": 19540, + "specific embeddings": 52076, + "quality efficiency": 44513, + "space based": 51850, + "based best": 5602, + "second based": 48998, + "faster convergence": 20435, + "existing translation": 19166, + "model specific": 34401, + "framework introduce": 21548, + "dimensional feature": 15232, + "lexical resource": 30380, + "applications paper": 3227, + "approach construct": 3469, + "applying machine": 3364, + "method construct": 32440, + "effectiveness proposed": 16803, + "competitive result": 9560, + "compared english": 9403, + "sets respectively": 50305, + "generating novel": 22386, + "textual description": 56959, + "interesting problem": 26653, + "vision natural": 61641, + "processing paper": 42918, + "able generate": 696, + "sentences given": 49729, + "given sample": 22779, + "image model": 24539, + "model strong": 34412, + "image representation": 24544, + "representation generated": 46524, + "previously trained": 42353, + "trained convolutional": 57696, + "phrases used": 40856, + "given image": 22747, + "simple language": 51183, + "model produce": 34238, + "given test": 22793, + "models achieves": 34683, + "achieves comparable": 1312, + "comparable results": 9308, + "results recently": 47802, + "dataset speech": 13098, + "topic natural": 57419, + "nlp task": 37530, + "task language": 55159, + "words important": 62432, + "systems used": 54662, + "used new": 60251, + "new applications": 37130, + "lexical categories": 30355, + "words use": 62539, + "class words": 8414, + "closed class": 8696, + "syntactical features": 54337, + "used research": 60291, + "ability approach": 594, + "knowledge human": 27518, + "shows performance": 50791, + "used syntactic": 60320, + "texts including": 56891, + "current challenges": 11965, + "data representation": 12602, + "inter intra": 26582, + "standard data": 52481, + "data format": 12371, + "new publicly": 37292, + "simple powerful": 51202, + "set metrics": 50193, + "metrics quantify": 33194, + "recognition propose": 45527, + "novel metrics": 37871, + "proposed metrics": 43840, + "quantitative analysis": 44616, + "analysis based": 2621, + "visual information": 61656, + "multimodal models": 36153, + "representations learning": 46707, + "learning predict": 29813, + "set words": 50278, + "visual representations": 61667, + "linguistic visual": 30812, + "visual features": 61655, + "models achieve": 34666, + "performance variety": 40619, + "shot setup": 50644, + "model training": 34479, + "models discover": 34924, + "paving way": 39988, + "meaning paper": 32009, + "paper concerned": 39294, + "nearest neighbor": 36518, + "neighbor search": 36659, + "model ranked": 34269, + "space provides": 51889, + "provides important": 44204, + "information different": 25812, + "used word": 60353, + "sense induction": 49485, + "used determine": 60146, + "define set": 13779, + "models provide": 35379, + "known semantic": 27666, + "attracting attention": 4892, + "building recent": 7465, + "shot learning": 50624, + "paying attention": 39992, + "data containing": 12249, + "implicitly learn": 24668, + "achieve better": 1115, + "linguistic representation": 30788, + "approach performs": 3640, + "performs comparably": 40702, + "outperforms various": 38959, + "significantly improve": 50962, + "object recognition": 38083, + "languages exhibit": 28660, + "lexicon induction": 30411, + "research effort": 47026, + "research improving": 47054, + "level models": 30163, + "models similar": 35511, + "approach outperform": 3620, + "outperform word": 38833, + "challenge machine": 7994, + "used approach": 60091, + "approach apply": 3421, + "apply word": 3356, + "model lm": 34069, + "sentence words": 49673, + "best list": 6777, + "present methods": 41946, + "methods deep": 32814, + "visual modalities": 61660, + "audio visual": 4933, + "study approach": 53327, + "uni modal": 59460, + "deep networks": 13734, + "networks trained": 36917, + "trained separately": 57864, + "hidden layers": 23640, + "deep network": 13733, + "fusion model": 21858, + "model achieves": 33510, + "second present": 49017, + "new deep": 37170, + "network architecture": 36699, + "architecture uses": 4096, + "softmax layer": 51632, + "class specific": 8410, + "rate reduction": 45015, + "task develop": 55016, + "agglutinative languages": 2070, + "structure natural": 53120, + "language sentence": 28480, + "information word": 26159, + "help understand": 23592, + "understand language": 59301, + "literature survey": 30863, + "understand different": 59291, + "languages various": 28818, + "techniques paper": 56117, + "survey research": 54218, + "research papers": 47090, + "network language": 36753, + "train large": 57600, + "address questions": 1796, + "respect model": 47348, + "model size": 34391, + "set size": 50247, + "computational costs": 9840, + "analysis shows": 2758, + "relative word": 46112, + "word error": 62200, + "asr task": 4562, + "recently released": 45461, + "billion word": 7121, + "word language": 62221, + "language modelling": 28223, + "bleu point": 7208, + "prediction language": 41714, + "models generate": 35057, + "generate target": 22253, + "phrases words": 40858, + "model dependency": 33749, + "solving problem": 51704, + "model attempts": 33583, + "attempts solve": 4700, + "sub problems": 53528, + "model determine": 33759, + "scale monolingual": 48598, + "data order": 12523, + "alleviate data": 2402, + "sparsity problem": 51982, + "experiments chinese": 19371, + "english translation": 17893, + "using syntactic": 60973, + "framework supports": 21610, + "syntactic tags": 54332, + "language training": 28535, + "training texts": 58298, + "focus work": 21214, + "types semantic": 59115, + "including named": 25278, + "pre existing": 41502, + "described paper": 14214, + "significantly outperformed": 50993, + "baseline model": 6185, + "highest scores": 23857, + "scores reported": 48918, + "english test": 17889, + "supports hypothesis": 54142, + "information improve": 25913, + "quality based": 44496, + "language propose": 28448, + "model combination": 33664, + "self organizing": 49201, + "12 million": 109, + "semantic consistency": 49257, + "showed high": 50665, + "level semantic": 30205, + "time periods": 57192, + "level method": 30160, + "highly scalable": 23914, + "results popular": 47767, + "popular datasets": 41162, + "datasets task": 13452, + "paper uses": 39603, + "uses natural": 60523, + "tree based": 58741, + "based regression": 5985, + "methods combination": 32786, + "algorithm outperforms": 2289, + "model addresses": 33543, + "sentence embedding": 49546, + "hot topic": 24030, + "processing research": 42933, + "research using": 47140, + "using recurrent": 60900, + "networks long": 36870, + "long short": 31027, + "memory lstm": 32258, + "lstm cells": 31254, + "ability capture": 597, + "capture long": 7693, + "lstm rnn": 31281, + "richer information": 48129, + "layer network": 29193, + "network provides": 36792, + "representation sentence": 46578, + "sentence paper": 49612, + "supervised manner": 54015, + "web search": 61894, + "analysis performed": 2715, + "works model": 62898, + "embedding vector": 17070, + "vector used": 61472, + "different applications": 14838, + "automatic keyword": 5098, + "detection topic": 14536, + "network perform": 36781, + "document retrieval": 15828, + "difficult language": 15172, + "embedding vectors": 17071, + "search task": 48987, + "shown significantly": 50751, + "generates sentence": 22355, + "retrieval tasks": 47972, + "tasks comparison": 55550, + "method paper": 32607, + "significantly outperforms": 50996, + "order features": 38620, + "extend previous": 19826, + "corpus order": 11396, + "surface syntactic": 54155, + "achieving absolute": 1391, + "web text": 61900, + "address problems": 1791, + "state transducers": 52712, + "representation allows": 46489, + "use explore": 59885, + "classification experiments": 8469, + "dataset results": 13068, + "compared approaches": 9380, + "terms accuracy": 56263, + "accuracy recall": 1035, + "recall f1": 45240, + "models great": 35070, + "great progress": 23212, + "progress improving": 43100, + "models predict": 35341, + "predict target": 41656, + "target translation": 54854, + "translation source": 58679, + "context information": 10658, + "does depend": 15942, + "paper explore": 39361, + "prediction propose": 41731, + "based convolutional": 5650, + "network learn": 36758, + "learn sentence": 29421, + "sentence semantic": 49639, + "representations sentence": 46752, + "feature representation": 20499, + "feed forward": 20711, + "forward neural": 21405, + "network better": 36711, + "translations using": 58711, + "local global": 30938, + "global information": 22831, + "scale experiments": 48572, + "experiments method": 19462, + "method obtain": 32591, + "strong baseline": 53002, + "model augmented": 33588, + "augmented neural": 4982, + "joint model": 27178, + "model superior": 34428, + "sequence information": 49932, + "information time": 26124, + "lstm networks": 31276, + "strong results": 53046, + "results variety": 47901, + "sequence modeling": 49955, + "modeling tasks": 34629, + "lstm structure": 31283, + "linear chain": 30649, + "syntactic properties": 54315, + "tree lstm": 58749, + "tree structured": 58759, + "existing systems": 19153, + "lstm baselines": 31248, + "baselines tasks": 6308, + "tasks predicting": 55805, + "predicting semantic": 41680, + "task sentiment": 55360, + "stanford sentiment": 52560, + "systems usually": 54664, + "usually use": 61072, + "use linear": 59934, + "linear combination": 30653, + "features model": 20623, + "model quality": 34264, + "quality translation": 44592, + "model current": 33733, + "propose non": 43518, + "non linear": 37659, + "interaction features": 26598, + "training non": 58195, + "linear models": 30661, + "models discuss": 34927, + "discuss possible": 15478, + "learning performance": 29802, + "performance experimental": 40335, + "features hierarchical": 20595, + "method produce": 32622, + "complex task": 9668, + "paper make": 39421, + "make attempt": 31540, + "develop general": 14590, + "general framework": 22061, + "tasks define": 55573, + "using measures": 60796, + "compare simple": 9365, + "learning problems": 29818, + "represent input": 46474, + "input texts": 26347, + "effect performance": 16616, + "researchers practitioners": 47163, + "formulating problem": 21390, + "sequential model": 50046, + "based optimization": 5929, + "optimization technique": 38558, + "models competitive": 34839, + "based latent": 5809, + "variable models": 61224, + "models neural": 35252, + "topic classification": 57394, + "problems approach": 42695, + "black box": 7190, + "text require": 56740, + "require manual": 46877, + "manual tuning": 31752, + "knowledge shown": 27610, + "tasks approaches": 55504, + "variety knowledge": 61274, + "knowledge proposed": 27580, + "approach robust": 3677, + "discussed paper": 15486, + "propose regularization": 43600, + "regularization terms": 45844, + "conduct extensive": 10049, + "robustness proposed": 48294, + "proposed methods": 43835, + "methods experimental": 32850, + "demonstrate proposed": 13963, + "methods obtain": 32964, + "remarkable improvements": 46357, + "baselines present": 6287, + "task particular": 55270, + "automatically generated": 5176, + "generated speech": 22320, + "alignment using": 2388, + "using state": 60960, + "art visual": 4438, + "deep convolutional": 13688, + "technique outperforms": 56041, + "based keyword": 5794, + "proposed neural": 43868, + "devlin et": 14730, + "al 2014": 2236, + "source context": 51757, + "context window": 10744, + "achieving state": 1424, + "relevant source": 46235, + "source information": 51773, + "target information": 54819, + "context entire": 10625, + "unified representation": 59477, + "representation target": 46589, + "tasks proposed": 55822, + "model achieve": 33502, + "points average": 41067, + "understand meaning": 59304, + "explore methods": 19714, + "number methods": 38017, + "defined word": 13788, + "word ordering": 62255, + "areas research": 4158, + "framework generating": 21530, + "data training": 12741, + "training model": 58178, + "highly effective": 23897, + "extraction technique": 20120, + "method translation": 32691, + "capture context": 7655, + "curriculum learning": 12043, + "learning strategy": 29897, + "strategy train": 52952, + "train model": 57606, + "model classify": 33661, + "phrase sentence": 40845, + "level context": 30081, + "context using": 10741, + "using training": 60995, + "approach significantly": 3692, + "propose neural": 43488, + "response generator": 47394, + "decoder framework": 13594, + "decoding process": 13640, + "process based": 42761, + "latent representation": 29131, + "encoding decoding": 17564, + "conversation data": 11032, + "study shows": 53460, + "grammatically correct": 23082, + "appropriate responses": 3966, + "outperforming state": 38860, + "state arts": 52696, + "retrieval based": 47941, + "translation question": 58666, + "short texts": 50573, + "problem called": 42515, + "called deep": 7543, + "setting approach": 50317, + "structure test": 53140, + "matching problem": 31919, + "wang et": 61765, + "al 2013": 2235, + "including using": 25318, + "using dependency": 60650, + "trees based": 58767, + "large margins": 28907, + "vanishing gradient": 61219, + "network capture": 36716, + "range dependencies": 44912, + "traditional neural": 57537, + "semantic matching": 49297, + "model internal": 34014, + "internal structures": 26691, + "step goal": 52811, + "propose convolutional": 43340, + "vision speech": 61643, + "proposed models": 43862, + "patterns different": 39967, + "matching tasks": 31923, + "tasks different": 55589, + "tasks demonstrates": 55581, + "demonstrates efficacy": 14032, + "efficacy proposed": 16834, + "explore usage": 19747, + "words second": 62502, + "provide robust": 44126, + "principled approach": 42386, + "statistical structure": 52764, + "language remains": 28466, + "words frequency": 62421, + "jensen shannon": 27153, + "shannon divergence": 50447, + "suggest future": 53818, + "end neural": 17688, + "based architectures": 5576, + "en fr": 17417, + "factors success": 20315, + "availability high": 5249, + "quality parallel": 44561, + "work investigate": 62697, + "monolingual corpora": 35793, + "corpora neural": 11226, + "translation compared": 58589, + "based hierarchical": 5770, + "resource language": 47232, + "task chinese": 54949, + "targeted tasks": 54860, + "tasks parallel": 55791, + "high resource": 23791, + "resource languages": 47238, + "bleu scores": 7214, + "analysis important": 2678, + "studies word": 53312, + "word structure": 62316, + "nlp research": 37521, + "analysis techniques": 2776, + "techniques popular": 56121, + "day day": 13501, + "morphological structure": 35844, + "structure work": 53150, + "based finite": 5737, + "model lstm": 34079, + "principled way": 42389, + "language image": 28100, + "text fundamental": 56588, + "achieving performance": 1417, + "performance better": 40219, + "architecture named": 4067, + "word sequence": 62306, + "sequence prediction": 49967, + "different previous": 15033, + "work neural": 62734, + "modeling generation": 34579, + "rnn lstm": 48201, + "instead use": 26465, + "use convolutional": 59855, + "predict word": 41660, + "different existing": 14924, + "networks language": 36868, + "model effectively": 33798, + "designed task": 14332, + "task argue": 54912, + "dependencies model": 14108, + "model fast": 33882, + "easy train": 16566, + "experiments text": 19545, + "powerful approach": 41433, + "unstructured textual": 59674, + "textual data": 56956, + "data unstructured": 12756, + "electronic medical": 16969, + "model single": 34390, + "score based": 48835, + "common phenomenon": 9191, + "structural semantic": 53084, + "paper firstly": 39376, + "examples finally": 18903, + "method apply": 32385, + "input words": 26359, + "features words": 20698, + "related data": 45895, + "domain semantic": 16152, + "extended version": 19839, + "languages work": 28822, + "work addresses": 62558, + "addresses problem": 1814, + "number language": 38014, + "spanish portuguese": 51946, + "spanish chinese": 51939, + "proficiency levels": 43067, + "measure based": 32045, + "algorithm works": 2312, + "unsupervised word": 59748, + "embeddings shown": 17215, + "problem unsupervised": 42679, + "representative models": 46799, + "embeddings observe": 17182, + "observe consistent": 38130, + "improvements languages": 25077, + "analyze effect": 2812, + "effect various": 16623, + "embeddings downstream": 17118, + "results paper": 47757, + "set methods": 50192, + "words like": 62449, + "normalization methods": 37706, + "learn rich": 29416, + "rich semantic": 48119, + "recent nlp": 45328, + "research developing": 47017, + "developing models": 14657, + "learn useful": 29443, + "representations phrases": 46736, + "bridging gap": 7326, + "language embedding": 28042, + "models effectively": 34947, + "general knowledge": 22063, + "tasks neural": 55763, + "better existing": 6887, + "commercial systems": 9155, + "systems rely": 54616, + "specific engineering": 52078, + "results highlight": 47657, + "effectiveness neural": 16799, + "neural embedding": 36948, + "definition based": 13793, + "models understand": 35645, + "networks dnns": 36846, + "significant performance": 50902, + "language recognition": 28464, + "recognition tasks": 45543, + "possible using": 41340, + "using single": 60945, + "approach shown": 3688, + "substantial performance": 53626, + "performance improvements": 40386, + "recognition task": 45542, + "recognition evaluation": 45504, + "constituency trees": 10353, + "based convolution": 5649, + "architecture allows": 4025, + "output layer": 38981, + "enables effective": 17439, + "tasks sentiment": 55873, + "analysis question": 2736, + "outperforms previous": 38919, + "results including": 47673, + "existing neural": 19116, + "shedding light": 50530, + "based style": 6067, + "recent times": 45360, + "involving human": 27025, + "human bias": 24117, + "new metric": 37257, + "proposed metric": 43839, + "different people": 15022, + "metric human": 33117, + "classification process": 8522, + "process experimental": 42778, + "performance using": 40617, + "novel metric": 37870, + "different human": 14950, + "human expert": 24162, + "paper contributes": 39308, + "joint embedding": 27167, + "embedding model": 17043, + "pair entities": 39151, + "texts proposed": 56914, + "dimensional vector": 15237, + "make accurate": 31539, + "accurate predictions": 1085, + "performance approach": 40196, + "cutting edge": 12068, + "experiments model": 19467, + "achieves significant": 1364, + "relation extraction": 45972, + "extraction present": 20095, + "work identify": 62681, + "identify relevant": 24440, + "semantic levels": 49294, + "inference text": 25699, + "features like": 20616, + "common words": 9211, + "approaches reported": 3914, + "binary classification": 7144, + "traditional chinese": 57512, + "experiments test": 19543, + "individual features": 25568, + "interesting results": 26655, + "written texts": 63013, + "textual properties": 56975, + "paper study": 39578, + "representation text": 46592, + "text graph": 56611, + "law distribution": 29173, + "distribution experiments": 15638, + "metrics correlate": 33154, + "authorship attribution": 5010, + "particular types": 39869, + "words information": 62436, + "applications involving": 3214, + "better language": 6907, + "model propose": 34245, + "amounts data": 2546, + "smaller training": 51526, + "significant reduction": 50916, + "text useful": 56834, + "useful learning": 60374, + "domain text": 16209, + "using social": 60951, + "useful data": 60359, + "space specifically": 51899, + "methods developed": 32823, + "validated using": 61187, + "using high": 60728, + "quality datasets": 44507, + "state affairs": 52573, + "context paper": 10684, + "art natural": 4306, + "studied paper": 53231, + "systems dealing": 54471, + "human cognition": 24122, + "intelligent systems": 26544, + "robust automatic": 48240, + "exploit large": 19659, + "extremely difficult": 20156, + "unsupervised machine": 59706, + "probabilistic models": 42467, + "models text": 35595, + "text recently": 56728, + "standard approaches": 52463, + "approaches relying": 3912, + "difficult scale": 15186, + "report present": 46443, + "present empirical": 41897, + "variational inference": 61248, + "scheme applied": 48727, + "online inference": 38371, + "qualitative results": 44481, + "model need": 34120, + "accurately model": 1096, + "propose self": 43614, + "self adaptive": 49173, + "sentence model": 49602, + "representations suitable": 46765, + "suitable task": 53860, + "task hand": 55112, + "models benchmark": 34765, + "sets word": 50312, + "unlabelled data": 59587, + "data shown": 12659, + "shown high": 50717, + "paper perform": 39440, + "extrinsic evaluation": 20170, + "evaluation popular": 18675, + "embedding methods": 17041, + "sequence labelling": 49941, + "task based": 54929, + "representations using": 46785, + "training instances": 58135, + "sufficient achieve": 53799, + "achieve competitive": 1125, + "competitive results": 9561, + "results word": 47912, + "embeddings lead": 17162, + "oov words": 38407, + "words domain": 62403, + "little difference": 30873, + "tasks consider": 55554, + "analysis document": 2653, + "used document": 60154, + "called emph": 7545, + "open data": 38416, + "space word": 51904, + "nlp especially": 37485, + "especially given": 18277, + "given recent": 22776, + "recent methods": 45319, + "work assumes": 62576, + "single vector": 51356, + "vector word": 61474, + "word type": 62325, + "tasks present": 55807, + "learns multiple": 29967, + "multiple embeddings": 36208, + "embeddings word": 17246, + "performing word": 40693, + "learning non": 29783, + "context task": 10730, + "task demonstrate": 55000, + "billion tokens": 7120, + "base kb": 5543, + "new facts": 37200, + "making inferences": 31657, + "multi hop": 35969, + "presents approach": 42072, + "network rnn": 36796, + "vector embeddings": 61452, + "binary relation": 7153, + "unseen training": 59658, + "training time": 58299, + "time single": 57214, + "high capacity": 23709, + "predict new": 41648, + "compositional model": 9745, + "new dataset": 37163, + "method improves": 32534, + "leveraging pre": 30336, + "pre trained": 41520, + "trained embeddings": 57716, + "related concepts": 45889, + "related entities": 45904, + "entities given": 18054, + "given topic": 22797, + "wikipedia text": 62055, + "compute semantic": 9879, + "given query": 22774, + "study examine": 53372, + "important entities": 24722, + "entities relationships": 18079, + "classification used": 8578, + "lot attention": 31114, + "attention recently": 4818, + "recently popular": 45447, + "canonical correlation": 7590, + "correlation analysis": 11518, + "approaches learn": 3858, + "learn joint": 29385, + "joint representation": 27187, + "approaches outperform": 3888, + "approaches task": 3935, + "task transfer": 55445, + "transfer learning": 58374, + "approach called": 3440, + "learned using": 29488, + "approaches recent": 3908, + "recent advances": 45280, + "map words": 31798, + "rich information": 48102, + "language representation": 28468, + "approach simple": 3695, + "work language": 62702, + "probability model": 42481, + "million sentences": 33257, + "yelp reviews": 63085, + "user posts": 60433, + "temporal evolution": 56186, + "constructed using": 10417, + "graph structure": 23170, + "allows easy": 2459, + "benchmark dataset": 6443, + "propose concept": 43327, + "concept level": 9924, + "cause model": 7885, + "model utilized": 34518, + "event related": 18787, + "related tweets": 45950, + "results dataset": 47568, + "sina weibo": 51280, + "baseline methods": 6183, + "corpus specifically": 11435, + "specifically proposed": 52225, + "uses lexical": 60519, + "lexical patterns": 30376, + "automatically identify": 5181, + "compared model": 9420, + "evaluated using": 18553, + "current standard": 12010, + "novel evaluation": 37819, + "evaluation set": 18712, + "shown good": 50711, + "representations natural": 46722, + "language vocabulary": 28580, + "paper summarizes": 39586, + "applying neural": 3372, + "models task": 35585, + "similarity evaluation": 51094, + "depending task": 14158, + "task introduce": 55144, + "task achieved": 54876, + "models previously": 35354, + "texts russian": 56921, + "national corpus": 36396, + "models outperforming": 35290, + "larger corpora": 29070, + "especially true": 18308, + "trained larger": 57772, + "performance high": 40372, + "semantic vectors": 49375, + "learned way": 29491, + "way used": 61836, + "used variety": 60346, + "variety linguistic": 61278, + "linguistic tasks": 30800, + "exciting field": 18970, + "field study": 20770, + "matrix multiplication": 31942, + "recognition algorithm": 45491, + "rewriting systems": 48080, + "running time": 48407, + "currently best": 12032, + "mildly context": 33245, + "combinatory categorial": 9059, + "approach detection": 3485, + "associated text": 4625, + "additional information": 1675, + "lead improved": 29260, + "methods relation": 33011, + "using distributional": 60663, + "distributional information": 15664, + "using approach": 60561, + "approach cross": 3473, + "validation accuracy": 61192, + "accuracy dataset": 956, + "dataset improved": 12960, + "human labeling": 24188, + "labeling results": 27792, + "score 86": 48821, + "pairs present": 39207, + "approach extract": 3533, + "information text": 26117, + "critically important": 11799, + "range domains": 44916, + "documents propose": 15905, + "based entity": 5706, + "comprehensive set": 9798, + "set common": 50120, + "highly robust": 23913, + "text approach": 56438, + "finally present": 20875, + "rise social": 48155, + "identify new": 24432, + "new opportunities": 37276, + "level approach": 30064, + "detection sentiment": 14522, + "sentiment polarity": 49854, + "linguistically motivated": 30817, + "motivated features": 35866, + "accuracy furthermore": 980, + "furthermore introduce": 21824, + "introduce automatic": 26784, + "collected annotated": 8954, + "dialectal arabic": 14748, + "arabic tweets": 4007, + "performance levels": 40417, + "framework different": 21495, + "types features": 59088, + "popular social": 41187, + "data rich": 12618, + "increasing popularity": 25459, + "detection algorithms": 14458, + "data time": 12734, + "users provide": 60476, + "systems widely": 54670, + "systems literature": 54551, + "algorithms including": 2327, + "presents results": 42102, + "correlate human": 11503, + "collected crowdsourcing": 8957, + "based weighted": 6132, + "multiple domains": 36204, + "domains language": 16267, + "sequence approach": 49907, + "benchmarking datasets": 6508, + "tagging accuracy": 54735, + "advancing research": 1934, + "high cost": 23721, + "implemented evaluated": 24648, + "crowdsourcing approach": 11889, + "approach produce": 3653, + "existing corpus": 19049, + "agreement human": 2106, + "human annotators": 24105, + "data annotation": 12136, + "annotation guidelines": 2953, + "work text": 62841, + "generation task": 22558, + "task used": 55459, + "object categories": 38081, + "introduce dataset": 26796, + "annotated natural": 2907, + "language descriptions": 28022, + "learn data": 29356, + "textual descriptions": 56960, + "method successfully": 32674, + "generation previous": 22522, + "approach human": 3556, + "evaluation task": 18734, + "automated metric": 5053, + "strongly correlates": 53070, + "correlates human": 11514, + "framework allows": 21457, + "aspects language": 4543, + "syntactic structures": 54330, + "structures sentence": 53194, + "extra information": 19962, + "information conveyed": 25793, + "current paper": 11995, + "paper extend": 39371, + "extend framework": 19822, + "framework order": 21576, + "information using": 26147, + "study analyzes": 53325, + "european parliament": 18430, + "evolved time": 18839, + "considering context": 10256, + "analyzed using": 2836, + "using new": 60837, + "modeling method": 34597, + "matrix factorization": 31941, + "findings suggest": 20917, + "paper overview": 39437, + "overview shared": 39116, + "segmentation speech": 49087, + "micro blog": 33222, + "dataset shared": 13082, + "task consists": 54973, + "task sub": 55417, + "sub tasks": 53534, + "systems different": 54479, + "resources introduce": 47307, + "dataset task": 13113, + "participating systems": 39824, + "test results": 56364, + "results online": 47751, + "available open": 5337, + "questions designed": 44783, + "designed evaluate": 14313, + "evaluate human": 18462, + "human intelligence": 24172, + "comprehension questions": 9775, + "measure human": 32053, + "multiple senses": 36279, + "especially deep": 18271, + "learning technologies": 29908, + "quite challenging": 44826, + "simply applying": 51247, + "applying existing": 3362, + "performance mainly": 40430, + "complex relations": 9656, + "tackle challenges": 54700, + "challenges propose": 8071, + "framework consisting": 21478, + "build classifier": 7389, + "specific type": 52166, + "novel word": 37956, + "method considers": 32435, + "type questions": 59067, + "questions propose": 44800, + "relation representations": 45995, + "representations experimental": 46660, + "shown proposed": 50744, + "framework outperform": 21577, + "methods solving": 33047, + "study results": 53454, + "uses deep": 60503, + "step closer": 52803, + "closer human": 8710, + "article present": 4454, + "approaches attempt": 3768, + "interpretable way": 26732, + "detecting different": 14446, + "information capturing": 25776, + "knowledge learned": 27547, + "related topics": 45948, + "possible approaches": 41315, + "generation new": 22506, + "research explore": 47032, + "conducted series": 10093, + "similarity sentences": 51120, + "comparison results": 9504, + "sequence sequence": 49978, + "translation methods": 58629, + "based generation": 5748, + "model recently": 34282, + "recently shown": 45466, + "results tasks": 47878, + "text image": 56621, + "image captioning": 24531, + "work approach": 62571, + "quality terms": 44587, + "terms bleu": 56272, + "applicability models": 3154, + "task input": 55139, + "generation approach": 22420, + "able significantly": 725, + "bi directional": 6999, + "directional long": 15281, + "lstm neural": 31277, + "networks use": 36920, + "alignment information": 2370, + "conventional approaches": 11000, + "propose employ": 43366, + "architectures learning": 4115, + "modal interactions": 33459, + "words question": 62491, + "layer learn": 29187, + "representation classification": 46498, + "demonstrate efficacy": 13905, + "qa datasets": 44450, + "significantly outperforming": 50994, + "data method": 12484, + "models words": 35686, + "time models": 57180, + "used end": 60162, + "scores obtained": 48911, + "obtained method": 38215, + "features combined": 20539, + "recent success": 45355, + "application neural": 3173, + "networks model": 36875, + "model various": 34527, + "architecture neural": 4068, + "layers capture": 29219, + "capture important": 7681, + "higher order": 23834, + "interaction network": 26609, + "multitask learning": 36323, + "network parameters": 36780, + "arabic english": 3999, + "english chinese": 17782, + "low high": 31153, + "present approaches": 41848, + "speech signals": 52294, + "transition probabilities": 58542, + "networks dnn": 36845, + "approach combined": 3451, + "adaptation method": 1526, + "gains natural": 21938, + "long texts": 31044, + "texts like": 56900, + "longer documents": 31050, + "documents challenging": 15862, + "recurrent networks": 45621, + "networks models": 36876, + "models paper": 35298, + "step generation": 52810, + "task training": 55443, + "lstm long": 31271, + "auto encoder": 5014, + "lstm model": 31273, + "reconstruct original": 45578, + "using standard": 60959, + "standard metrics": 52505, + "models able": 34653, + "preserve syntactic": 42117, + "discourse coherence": 15387, + "generating coherent": 22367, + "coherent text": 8918, + "text units": 56831, + "footnote code": 21279, + "code models": 8834, + "networks successfully": 36914, + "tasks resulting": 55862, + "models difficult": 34919, + "building sentence": 7469, + "models nlp": 35263, + "introduce simple": 26860, + "information flow": 25880, + "test methods": 56357, + "methods sentiment": 33033, + "wide applications": 61959, + "outperform simple": 38819, + "learning distinct": 29595, + "models vector": 35672, + "representations multi": 46720, + "methods proposed": 32996, + "understanding tasks": 59408, + "introduce multi": 26825, + "based chinese": 5616, + "embeddings language": 17157, + "test performance": 56361, + "model speech": 34403, + "recognition sentiment": 45535, + "analysis semantic": 2749, + "relation identification": 45984, + "embeddings improve": 17147, + "tasks speech": 55904, + "various forms": 61345, + "information tasks": 26114, + "tasks results": 55863, + "highlight importance": 23862, + "models real": 35401, + "quality quantity": 44569, + "articles wikipedia": 4483, + "varies greatly": 61257, + "translation tools": 58692, + "specific needs": 52117, + "content wikipedia": 10571, + "studies rely": 53295, + "paper compare": 39291, + "compare data": 9334, + "data acquisition": 12115, + "self reported": 49203, + "age gender": 2046, + "accuracy text": 1061, + "giving best": 22813, + "introduce corpus": 26792, + "scale corpus": 48560, + "corpus annotated": 11274, + "annotated using": 2928, + "using amazon": 60556, + "inherent difficulty": 26202, + "annotation task": 2973, + "linguistic variables": 30809, + "level annotated": 30061, + "mental state": 32292, + "investigate feasibility": 26959, + "experiments present": 19492, + "present models": 41948, + "models predicting": 35342, + "distributional word": 15672, + "relations empirical": 46024, + "various data": 61320, + "results potential": 47769, + "potential use": 41410, + "embedding words": 17073, + "gained lot": 21918, + "methods provide": 33000, + "provide efficient": 44059, + "unclear paper": 59238, + "paper argue": 39271, + "ranking problem": 44975, + "metrics based": 33141, + "insight propose": 26384, + "attention mechanism": 4770, + "robustness noise": 48289, + "compared state": 9458, + "art word": 4440, + "embedding techniques": 17067, + "significant margin": 50898, + "million tokens": 33259, + "performs existing": 40707, + "similarity benchmark": 51086, + "available general": 5296, + "abstract meaning": 760, + "representation amr": 46490, + "open domain": 38419, + "rich semantics": 48121, + "fields like": 20781, + "event extraction": 18784, + "generation typically": 22571, + "dictionary lookup": 14807, + "robust learning": 48252, + "learning stage": 29892, + "generalize better": 22137, + "previous approach": 42237, + "classifier improve": 8599, + "improve previous": 24909, + "art result": 4368, + "end performance": 17695, + "used human": 60204, + "modeling human": 34582, + "language abilities": 27948, + "scale neural": 48605, + "working memory": 62869, + "memory model": 32270, + "network takes": 36811, + "takes input": 54781, + "input neural": 26304, + "flow information": 21119, + "components neural": 9721, + "gating mechanisms": 22007, + "capable learning": 7624, + "priori knowledge": 42430, + "role different": 48304, + "based interface": 5791, + "using open": 60848, + "open ended": 38429, + "incremental learning": 25483, + "output sentences": 38999, + "sentences expressing": 49719, + "range language": 44920, + "visually grounded": 61687, + "textual visual": 56986, + "visual input": 61657, + "input model": 26300, + "gated recurrent": 21996, + "recurrent unit": 45627, + "uses multi": 60521, + "multi task": 36015, + "task objective": 55244, + "visual representation": 61666, + "representations individual": 46689, + "visual scenes": 61669, + "learns effectively": 29958, + "sequential structure": 50051, + "structure semantic": 53133, + "online social": 38384, + "problem classifying": 42520, + "sentiment user": 49865, + "user comments": 60406, + "comments news": 9146, + "cover wide": 11648, + "domains including": 16261, + "particular domain": 39843, + "diverse topics": 15723, + "holistic view": 23990, + "useful applications": 60356, + "paper formulate": 39384, + "formulate problem": 21385, + "problem entity": 42550, + "entity specific": 18150, + "novel features": 37822, + "features specific": 20670, + "news comments": 37392, + "results models": 47728, + "outperform state": 38822, + "art baselines": 4222, + "neural sequence": 37096, + "sequence model": 49952, + "task essential": 55053, + "based encoder": 5698, + "decoder model": 13600, + "memory recurrent": 32279, + "networks lstm": 36872, + "language instructions": 28118, + "action sequences": 1457, + "based representation": 5993, + "model focus": 33903, + "focus sentence": 21197, + "contrast existing": 10875, + "methods model": 32947, + "specific annotations": 52043, + "achieves best": 1304, + "results reported": 47806, + "single sentence": 51333, + "dataset competitive": 12852, + "results limited": 47701, + "limited training": 30627, + "training multi": 58181, + "model series": 34363, + "components model": 9719, + "high accuracy": 23707, + "accuracy model": 1008, + "generative models": 22599, + "models allowing": 34708, + "fast accurate": 20420, + "inference propose": 25686, + "propose efficient": 43362, + "efficient decoding": 16867, + "decoding algorithm": 13625, + "beam size": 6370, + "uncertainty model": 59231, + "jointly predicting": 27218, + "pos tags": 41236, + "model obtains": 34140, + "obtains better": 38242, + "model performing": 34200, + "learning large": 29698, + "large unlabelled": 29041, + "corpus model": 11382, + "distilling knowledge": 15586, + "new research": 37301, + "research topic": 47130, + "performance particularly": 40482, + "tasks propose": 55818, + "specific knowledge": 52093, + "set high": 50164, + "dimensional embeddings": 15231, + "reduce model": 45671, + "model complexity": 33681, + "efficiency performance": 16850, + "performance experiments": 40337, + "experiments tasks": 19541, + "tasks reveal": 55865, + "directly training": 15339, + "structured neural": 53166, + "networks encode": 36849, + "models best": 34778, + "sequence based": 49912, + "sequence models": 49958, + "models like": 35186, + "compositional structure": 9750, + "tasks clear": 55539, + "data demonstrate": 12271, + "artificial data": 4489, + "data task": 12724, + "lstm based": 31243, + "based sequence": 6027, + "model learn": 34050, + "tree structure": 58758, + "large training": 29028, + "training sets": 58253, + "structure paper": 53126, + "method proposed": 32626, + "audio features": 4927, + "features generated": 20590, + "process data": 42767, + "valence arousal": 61170, + "attributes like": 4908, + "training test": 58290, + "test sets": 56377, + "feature weighting": 20510, + "nearest neighbors": 36522, + "quality research": 44574, + "total number": 57475, + "published papers": 44370, + "paramount importance": 39736, + "paper devise": 39341, + "89 accuracy": 546, + "systematic analysis": 54390, + "successful application": 53733, + "methods improve": 32894, + "performance identifying": 40377, + "artificially generated": 4502, + "vision language": 61637, + "language long": 28141, + "intelligence ai": 26535, + "images videos": 24558, + "available corpora": 5272, + "propose set": 43628, + "quality metrics": 44551, + "metrics evaluating": 33162, + "language datasets": 28018, + "datasets using": 13474, + "using complex": 60614, + "complex language": 9631, + "different strengths": 15082, + "data attention": 12147, + "performance range": 40518, + "range tasks": 44937, + "image caption": 24530, + "adaptation model": 1528, + "model used": 34508, + "used machine": 60229, + "task applied": 54906, + "alleviate issue": 2408, + "long inputs": 31017, + "finally propose": 20876, + "adverse drug": 1998, + "extraction information": 20072, + "crucial task": 11913, + "task detecting": 55012, + "detecting classifying": 14445, + "report analysis": 46426, + "analysis complex": 2634, + "terms time": 56318, + "impact quality": 24605, + "quality data": 44505, + "data analysis": 12132, + "robust language": 48251, + "reasonable performance": 45173, + "approach promising": 3655, + "baseline paper": 6199, + "dynamic time": 16492, + "demonstrated proposed": 14016, + "considerable performance": 10234, + "improvement existing": 25004, + "certain types": 7949, + "types information": 59093, + "extraction tasks": 20119, + "traditional rule": 57541, + "knowledge general": 27490, + "traditional text": 57552, + "based rules": 6004, + "task involving": 55149, + "dialogue corpus": 14769, + "corpus dataset": 11317, + "dataset containing": 12865, + "containing million": 10484, + "multi turn": 36035, + "turn dialogues": 58990, + "100 million": 62, + "resource research": 47266, + "research building": 46996, + "models make": 35213, + "amounts unlabeled": 2562, + "data dataset": 12269, + "dataset multi": 13001, + "state tracking": 52711, + "challenge datasets": 7975, + "dataset provide": 13043, + "provide benchmark": 44016, + "benchmark performance": 6486, + "performance task": 40593, + "consider task": 10221, + "learning control": 29574, + "language barrier": 27972, + "environments challenging": 18176, + "deep reinforcement": 13746, + "reinforcement learning": 45864, + "framework jointly": 21551, + "jointly learn": 27199, + "state representations": 52708, + "framework enables": 21502, + "text descriptions": 56533, + "capture semantics": 7710, + "baselines using": 6317, + "using bag": 60580, + "outperforms baselines": 38873, + "demonstrating importance": 14054, + "manually annotated": 31756, + "mining sentiment": 33322, + "different sets": 15067, + "000 words": 15, + "200 words": 235, + "annotated resource": 2910, + "based tensor": 6087, + "query candidate": 44663, + "train classifier": 57573, + "classifier using": 8609, + "vectors using": 61500, + "simple features": 51169, + "features achieves": 20517, + "achieves average": 1303, + "average f1": 5406, + "score 40": 48792, + "dataset comparable": 12848, + "count based": 11611, + "paper improve": 39396, + "performance recurrent": 40524, + "rnn language": 48194, + "model incorporating": 33992, + "incorporating syntactic": 25394, + "relevant contexts": 46205, + "10 points": 50, + "points accuracy": 41066, + "achieve results": 1186, + "comparable state": 9311, + "task consider": 54969, + "number text": 38045, + "distributional properties": 15666, + "properties data": 43258, + "approaches allow": 3761, + "information high": 25905, + "high low": 23751, + "space embedding": 51856, + "data consider": 12238, + "data contrast": 12252, + "addition use": 1648, + "query based": 44662, + "deeper understanding": 13760, + "computational approaches": 9834, + "opinion analysis": 38499, + "model sentiment": 34356, + "given different": 22736, + "variety languages": 61277, + "communication humans": 9249, + "provide natural": 44103, + "survey existing": 54206, + "english end": 17799, + "goal article": 22876, + "article provide": 4459, + "provide common": 44028, + "researchers interested": 47160, + "design decisions": 14272, + "network approaches": 36698, + "approaches recently": 3909, + "achieved state": 1273, + "distance dependencies": 15543, + "model exploit": 33858, + "exploit various": 19668, + "model improves": 33982, + "tasks achieves": 55489, + "achieves highest": 1337, + "work area": 62572, + "transfer based": 58353, + "developed english": 14629, + "little work": 30890, + "languages currently": 28630, + "focus designing": 21152, + "using transfer": 60997, + "structure parsing": 53127, + "generate text": 22254, + "language better": 27979, + "require large": 46868, + "aligned data": 2355, + "data translation": 12745, + "available languages": 5318, + "languages transfer": 28810, + "knowledge languages": 27543, + "languages source": 28792, + "language target": 28518, + "online information": 38372, + "make possible": 31585, + "possible directly": 41323, + "information expressed": 25852, + "expressed text": 19801, + "order able": 38585, + "new natural": 37266, + "automatically extracting": 5171, + "extracting relevant": 20035, + "relevant information": 46220, + "pieces information": 40880, + "information obtain": 25993, + "use unsupervised": 60066, + "tasks demonstrate": 55575, + "morphological features": 35841, + "features compared": 20542, + "considered paper": 10250, + "concept relation": 9925, + "focus task": 21205, + "task extracting": 55072, + "belong different": 6417, + "propose semi": 43619, + "supervised method": 54017, + "manually defined": 31774, + "linguistic patterns": 30779, + "automatically learned": 5188, + "search results": 48983, + "results addition": 47491, + "concepts based": 9932, + "saliency scores": 48438, + "method generates": 32516, + "results high": 47654, + "addresses question": 1816, + "online discussion": 38362, + "relative importance": 46100, + "task proposed": 55304, + "proposed based": 43742, + "shows importance": 50783, + "importance different": 24681, + "community paper": 9268, + "method solve": 32662, + "answering task": 3099, + "task employ": 55042, + "model calculate": 33639, + "question similarity": 44750, + "extracting features": 20030, + "propose learning": 43438, + "learning rank": 29831, + "algorithm train": 2305, + "ranking tasks": 44978, + "tasks experimental": 55629, + "similarity model": 51108, + "baseline systems": 6213, + "bring improvements": 7331, + "answer sentence": 3054, + "previous systems": 42295, + "systems standard": 54638, + "set paper": 50211, + "analysis task": 2774, + "binary tree": 7156, + "using hierarchical": 60727, + "fast pace": 20428, + "systems based": 54439, + "methods order": 32969, + "order ensure": 38615, + "lattice rescoring": 29165, + "models suffer": 35556, + "solution use": 51663, + "single pass": 51329, + "second pass": 49014, + "hinge loss": 23946, + "beam search": 6365, + "approach gives": 3549, + "decoding performance": 13637, + "instead consider": 26446, + "approach inspired": 3573, + "structured prediction": 53170, + "obtain high": 38175, + "model second": 34341, + "texts work": 56947, + "showed method": 50666, + "method learn": 32562, + "learn embedding": 29367, + "analysis provide": 2731, + "provide thorough": 44144, + "models document": 34934, + "document similarity": 15833, + "better methods": 6917, + "methods propose": 32995, + "embeddings vector": 17243, + "useful semantic": 60387, + "results date": 47572, + "structured documents": 53154, + "previous works": 42318, + "works focused": 62891, + "focused modeling": 21227, + "recently methods": 45438, + "text specific": 56782, + "build general": 7400, + "remains important": 46334, + "terms model": 56300, + "efficiency propose": 16852, + "method model": 32578, + "framework leverages": 21557, + "information learn": 25950, + "topic word": 57435, + "word distributions": 62140, + "topic distributions": 57403, + "mining tasks": 33326, + "efficient variational": 16909, + "inference method": 25668, + "em algorithm": 17000, + "parameters propose": 39718, + "propose large": 43433, + "results effectiveness": 47602, + "effectiveness efficiency": 16778, + "efficiency robustness": 16855, + "model state": 34408, + "methods document": 32827, + "document modeling": 15813, + "prediction text": 41746, + "semantic embeddings": 49274, + "dimensional data": 15228, + "classification work": 8583, + "learning vector": 29936, + "embedding space": 17062, + "vectors word": 61501, + "better comparable": 6864, + "unsupervised text": 59742, + "text embedding": 56548, + "attracting increasing": 4893, + "methods usually": 33096, + "learning tasks": 29905, + "tasks possible": 55801, + "learn representation": 29414, + "unsupervised way": 59745, + "information available": 25766, + "available task": 5375, + "task low": 55194, + "dimensional representations": 15234, + "applicable different": 3156, + "tasks particularly": 55794, + "tuned task": 58889, + "gap proposing": 21979, + "supervised representation": 54035, + "text labeled": 56639, + "levels word": 30251, + "occurrence information": 38274, + "dimensional embedding": 15230, + "words documents": 62402, + "particular task": 39865, + "task compared": 54960, + "compared recent": 9445, + "fewer parameters": 20739, + "knowledge source": 27612, + "existing work": 19173, + "structured tables": 53177, + "using question": 60890, + "answer pairs": 3044, + "central challenge": 7917, + "domain results": 16149, + "results open": 47752, + "set relations": 50237, + "obtains significant": 38255, + "improvements natural": 25084, + "baselines evaluation": 6257, + "created new": 11730, + "world languages": 62945, + "language interactions": 28119, + "model needs": 34121, + "entailment relations": 18006, + "models external": 35006, + "interactions results": 26620, + "model present": 34227, + "parsing method": 39784, + "based parser": 5933, + "lstm recurrent": 31279, + "networks learn": 36869, + "parsing model": 39786, + "model benefits": 33617, + "level relation": 30189, + "networks cnn": 36837, + "feature engineering": 20483, + "based cnn": 5623, + "cnn based": 8760, + "especially long": 18283, + "distance dependency": 15544, + "pairs paper": 39205, + "simple framework": 51173, + "based recurrent": 5982, + "based model": 5857, + "semeval 2010": 49425, + "task dataset": 54992, + "dataset introduce": 12968, + "experiments different": 19414, + "rnn based": 48183, + "performance relation": 40528, + "learning long": 29712, + "makes suitable": 31637, + "suitable real": 53858, + "models model": 35228, + "network encoder": 36737, + "attention based": 4714, + "achieves word": 1387, + "rate wer": 45016, + "model 10": 33481, + "tweets tweets": 59023, + "period time": 40724, + "tweet sentiment": 59006, + "different groups": 14949, + "sentences collected": 49689, + "different categories": 14857, + "bengali text": 6597, + "model useful": 34510, + "sentences contain": 49695, + "sentences different": 49705, + "structures semantic": 53193, + "classification machine": 8490, + "learning information": 29682, + "existing research": 19136, + "research efforts": 47027, + "approached problem": 3749, + "problem introducing": 42587, + "user queries": 60440, + "web using": 61901, + "leveraging linguistic": 30331, + "using datasets": 60645, + "datasets consisting": 13194, + "results confirmed": 47557, + "develop language": 14592, + "question propose": 44745, + "focus modeling": 21182, + "study impact": 53387, + "shared language": 50475, + "energy function": 17750, + "best strategy": 6825, + "shared languages": 50476, + "tasks current": 55567, + "propose language": 43432, + "language style": 28511, + "easier process": 16528, + "negatively impact": 36644, + "conduct studies": 10063, + "causal relationship": 7877, + "evaluations using": 18771, + "models sequence": 35484, + "sequence tagging": 50002, + "tagging models": 54743, + "models include": 35117, + "bidirectional lstm": 7077, + "lstm bi": 31250, + "bi lstm": 7012, + "crf layer": 11763, + "layer lstm": 29189, + "lstm crf": 31257, + "layer bi": 29181, + "work apply": 62569, + "nlp benchmark": 37468, + "past future": 39932, + "input features": 26279, + "use sentence": 60008, + "produce state": 43011, + "investigate effect": 26953, + "analysis sa": 2747, + "common language": 9182, + "classification using": 8580, + "using generated": 60704, + "previously generated": 42334, + "ive bayes": 27137, + "using bidirectional": 60594, + "bidirectional lstms": 7079, + "traditional word": 57556, + "type model": 59062, + "model requires": 34313, + "fixed set": 21079, + "set parameters": 50214, + "model despite": 33755, + "yield state": 63101, + "results language": 47690, + "modeling speech": 34625, + "processing problem": 42926, + "problem model": 42608, + "model selection": 34346, + "approaches rely": 3911, + "grid search": 23248, + "coarse grained": 8784, + "methods allow": 32746, + "allow efficient": 2436, + "efficient model": 16886, + "gaussian processes": 22018, + "tree kernels": 58746, + "better prediction": 6942, + "prediction performance": 41727, + "performance compared": 40249, + "search framework": 48972, + "framework proposed": 21587, + "languages provide": 28761, + "provide strong": 44135, + "pairs existing": 39186, + "models suited": 35562, + "model latent": 34045, + "likelihood training": 30522, + "model address": 33542, + "approximate inference": 3976, + "model contrastive": 33716, + "scales large": 48645, + "existing generative": 19072, + "models exploiting": 35000, + "used automatic": 60097, + "gram based": 23050, + "does consider": 15938, + "entropy based": 18158, + "words method": 62457, + "health conditions": 23513, + "make inferences": 31577, + "concept text": 9928, + "required achieve": 46898, + "achieve propose": 1183, + "propose adapt": 43281, + "adapt existing": 1502, + "evaluate proposed": 18489, + "using collection": 60610, + "results combination": 47536, + "based mt": 5887, + "real valued": 45119, + "relatively low": 46124, + "network named": 36771, + "shown encode": 50705, + "encode semantic": 17468, + "length vectors": 30038, + "experimental evidence": 19266, + "evidence using": 18825, + "specific corpus": 52064, + "technique text": 56047, + "distributional representations": 15668, + "order produce": 38648, + "area nlp": 4144, + "based rich": 5999, + "sentence embeddings": 49547, + "jointly learned": 27200, + "context furthermore": 10644, + "furthermore word": 21843, + "process evaluate": 42776, + "qualitatively quantitatively": 44484, + "effectiveness framework": 16780, + "results state": 47855, + "mt metrics": 35919, + "evaluating different": 18558, + "tend produce": 56206, + "humans usually": 24292, + "recall precision": 45244, + "biases present": 7059, + "present data": 41881, + "data does": 12293, + "knowledge types": 27637, + "training unlabeled": 58308, + "initially trained": 26231, + "trained standard": 57881, + "adaptation domain": 1523, + "problem solving": 42662, + "statistical dependencies": 52740, + "joint probability": 27185, + "key semantic": 27333, + "concepts text": 9944, + "embedding based": 17014, + "networks nlp": 36882, + "strategies including": 52906, + "embeddings embedding": 17123, + "hyperparameter tuning": 24338, + "combining different": 9110, + "neural nlp": 37081, + "models existing": 34987, + "existing word": 19170, + "methods models": 32948, + "methods typically": 33086, + "solved using": 51695, + "singular value": 51360, + "information addition": 25754, + "global latent": 22833, + "latent factors": 29126, + "model generative": 33937, + "way incorporate": 61810, + "propose generative": 43401, + "easy interpret": 16564, + "serve basis": 50075, + "model inference": 34000, + "experiments common": 19375, + "common benchmark": 9166, + "models language": 35161, + "tasks translation": 55944, + "sub word": 53537, + "improve model": 24870, + "model performance": 34185, + "help overcome": 23582, + "overcome data": 39060, + "attain better": 4667, + "related words": 45954, + "improves models": 25137, + "performance provides": 40510, + "applied tasks": 3301, + "modelling word": 34646, + "models automatically": 34745, + "list words": 30841, + "models limited": 35189, + "leads improvements": 29318, + "improvements task": 25105, + "networks shown": 36909, + "shown improve": 50720, + "settings paper": 50387, + "layers introduce": 29225, + "modeling demonstrate": 34569, + "demonstrate ability": 13859, + "models machine": 35206, + "models maintain": 35211, + "models natural": 35244, + "posterior distribution": 41359, + "user trust": 60453, + "analysis present": 2723, + "method analyze": 32380, + "used models": 60241, + "sampling algorithm": 48497, + "confidence intervals": 10113, + "datasets comprising": 13186, + "explore impact": 19710, + "conduct series": 10060, + "series analyses": 50059, + "prediction experiments": 41707, + "experiments datasets": 19398, + "datasets results": 13409, + "present general": 41919, + "bipartite graph": 7181, + "proposed document": 43759, + "set using": 50275, + "nlp approaches": 37464, + "provide limited": 44098, + "linguistic rules": 30792, + "non expert": 37653, + "general categories": 22047, + "interactive interface": 26629, + "user studies": 60450, + "nlp non": 37506, + "produce high": 42985, + "quality labels": 44541, + "release source": 46167, + "majority languages": 31531, + "languages considered": 28623, + "alternative hypothesis": 2503, + "context approach": 10584, + "instruction following": 26480, + "explicitly modeling": 19644, + "level compositional": 30078, + "level structure": 30217, + "diverse set": 15717, + "set benchmark": 50114, + "benchmark tasks": 6498, + "tasks task": 55925, + "outperform strong": 38824, + "baselines achieve": 6226, + "representations useful": 46783, + "capturing semantic": 7742, + "applied variety": 3308, + "tasks especially": 55617, + "especially english": 18275, + "english work": 17907, + "english word": 17905, + "similarity text": 51126, + "effectiveness models": 16796, + "language article": 27968, + "present survey": 42032, + "field computational": 20752, + "aim provide": 2158, + "provide comprehensive": 44036, + "comprehensive overview": 9795, + "use social": 60020, + "social interaction": 51564, + "demonstrate potential": 13956, + "research communities": 47001, + "showing large": 50681, + "scale data": 48562, + "driven methods": 16429, + "methods widely": 33100, + "complement existing": 9584, + "challenge methods": 7995, + "open challenges": 38413, + "models discrete": 34925, + "discrete latent": 15422, + "shown beneficial": 50696, + "applications work": 3260, + "work exploit": 62656, + "syntactic dependency": 54300, + "used additional": 60082, + "information potentially": 26012, + "capturing fine": 7733, + "fine grain": 20924, + "evaluate word": 18518, + "recognition semantic": 45534, + "observe improvements": 38135, + "cases results": 7813, + "models advantage": 34698, + "nmt models": 37576, + "models typically": 35639, + "fixed vocabulary": 21084, + "open vocabulary": 38466, + "problem previous": 42628, + "effective approach": 16629, + "nmt model": 37575, + "model capable": 33644, + "subword units": 53688, + "based intuition": 5792, + "segmentation based": 49080, + "byte pair": 7514, + "pair encoding": 39149, + "models improve": 35110, + "english russian": 17869, + "exploratory analysis": 19682, + "propose end": 43369, + "neural encoder": 36950, + "joint task": 27190, + "surface realization": 54154, + "model encodes": 33819, + "network utilizes": 36823, + "utilizes novel": 61115, + "novel coarse": 37783, + "coarse fine": 8782, + "small subset": 51504, + "decoder generate": 13595, + "generate free": 22203, + "free form": 21640, + "selection generation": 49141, + "generation results": 22541, + "generation benchmark": 22426, + "dataset despite": 12893, + "despite using": 14403, + "using specialized": 60956, + "features linguistic": 20618, + "resources using": 47339, + "perform series": 40138, + "model components": 33684, + "generalizability model": 22109, + "competitive better": 9545, + "used derive": 60144, + "surface features": 54150, + "features document": 20562, + "rhetorical structure": 48088, + "structure theory": 53143, + "level sentiment": 30209, + "local information": 30942, + "information discourse": 25817, + "based sentiment": 6022, + "offers significant": 38304, + "classification based": 8437, + "algorithm improve": 2280, + "input query": 26323, + "linear interpolation": 30657, + "information experiments": 25849, + "english respectively": 17866, + "respectively experimental": 47366, + "achieves consistent": 1320, + "art method": 4280, + "accuracy speech": 1049, + "languages limited": 28715, + "speech resources": 52291, + "lack data": 27882, + "data data": 12267, + "language study": 28510, + "study develop": 53359, + "develop techniques": 14616, + "techniques extracting": 56086, + "data particular": 12538, + "domain corpora": 16033, + "different dialects": 14899, + "data selection": 12633, + "selection strategies": 49152, + "cross entropy": 11822, + "data improve": 12418, + "performance baseline": 40208, + "baseline machine": 6181, + "report preliminary": 46442, + "preliminary experiments": 41803, + "experiments using": 19553, + "using automatically": 60576, + "automatically translated": 5203, + "data additional": 12121, + "additional training": 1706, + "setting multi": 50332, + "multi perspective": 35999, + "constructed semantic": 10416, + "perform detailed": 40087, + "analysis evaluation": 2660, + "ted talks": 56160, + "applications human": 3209, + "used task": 60323, + "research direction": 47019, + "validate quality": 61184, + "previous results": 42274, + "results supervised": 47870, + "additional datasets": 1665, + "datasets languages": 13311, + "word formation": 62206, + "embeddings enable": 17126, + "present unsupervised": 42050, + "unsupervised approach": 59681, + "semantic vector": 49373, + "based analyses": 5564, + "translation experiments": 58613, + "experiments semantic": 19517, + "framework developed": 21494, + "framework make": 21559, + "text form": 56583, + "text framework": 56587, + "random test": 44890, + "possible improve": 41329, + "text prior": 56709, + "need developing": 36555, + "development paper": 14697, + "sliding window": 51431, + "training process": 58217, + "method incorporating": 32540, + "conducted compare": 10076, + "content paper": 10544, + "theoretical understanding": 57026, + "embeddings semantic": 17208, + "new evaluation": 37191, + "evaluation tasks": 18735, + "contrast prior": 10885, + "random walks": 44893, + "embedding algorithms": 17009, + "semantic language": 49292, + "studied problem": 53232, + "document using": 15842, + "information document": 25819, + "devise method": 14725, + "method make": 32568, + "make predictions": 31587, + "domains like": 16270, + "paper analyzes": 39267, + "problem specifically": 42665, + "assess performance": 4581, + "extensive experimental": 19872, + "evaluation effectiveness": 18613, + "art methodologies": 4281, + "pave way": 39984, + "select set": 49111, + "features order": 20634, + "detection problem": 14512, + "problem task": 42673, + "based twitter": 6114, + "problem propose": 42631, + "content based": 10514, + "widely available": 61994, + "available resource": 5363, + "resource limited": 47249, + "good quality": 22940, + "motivates use": 35879, + "use statistical": 60029, + "statistical translation": 52767, + "systems unfortunately": 54660, + "quantity quality": 44639, + "quality training": 44590, + "data limited": 12465, + "limited availability": 30571, + "especially languages": 18281, + "text domains": 56544, + "research present": 47095, + "various text": 61405, + "specific domains": 52073, + "approach domain": 3496, + "domain neural": 16120, + "translation present": 58659, + "present research": 41999, + "different training": 15104, + "training methods": 58175, + "translation used": 58699, + "medical data": 32200, + "parallel text": 39654, + "used basis": 60102, + "used analysis": 60085, + "performs slightly": 40716, + "does perform": 15964, + "perform significantly": 40140, + "segmentation task": 49088, + "useful resource": 60385, + "research explores": 47034, + "mining data": 33312, + "corpora task": 11248, + "method building": 32407, + "corpora wikipedia": 11257, + "sentence pairs": 49611, + "pairs new": 39204, + "effects various": 16829, + "various training": 61409, + "medical texts": 32211, + "texts various": 56943, + "models development": 34911, + "bleu nist": 7207, + "models hierarchical": 35085, + "hierarchical models": 23679, + "different alignment": 14834, + "alignment methods": 2374, + "automatic data": 5076, + "bilingual evaluation": 7108, + "evaluation understudy": 18743, + "understudy bleu": 59428, + "evaluation perform": 18668, + "perform experiments": 40102, + "existing evaluation": 19067, + "makes existing": 31622, + "correlation human": 11520, + "human translators": 24251, + "word orders": 62256, + "accuracy machine": 1003, + "systems nlp": 54572, + "tools text": 57386, + "tasks requiring": 55857, + "research proposes": 47102, + "text domain": 56543, + "semantic text": 49364, + "structure analysis": 53089, + "compared sentence": 9450, + "training settings": 58254, + "language various": 28577, + "effects data": 16824, + "morphological information": 35842, + "results neural": 47739, + "models powerful": 35335, + "powerful tool": 41448, + "models generally": 35055, + "relies availability": 46265, + "diverse training": 15724, + "specialised domains": 52026, + "obtaining large": 38236, + "corpus limited": 11372, + "limited range": 30607, + "entities knowledge": 18060, + "model integrates": 34011, + "diverse data": 15697, + "information achieve": 25751, + "recent models": 45321, + "link prediction": 30828, + "training labels": 58143, + "learning different": 29589, + "detailed information": 14427, + "information web": 26157, + "daily basis": 12084, + "common knowledge": 9181, + "research shown": 47119, + "shown possible": 50734, + "obtain similar": 38192, + "lines research": 30688, + "method combines": 32421, + "model successfully": 34424, + "information language": 25939, + "years neural": 63065, + "emerged powerful": 17262, + "yielding state": 63111, + "image recognition": 24543, + "recognition speech": 45538, + "processing recently": 42931, + "perspective natural": 40775, + "input encoding": 26273, + "forward networks": 21404, + "convolutional networks": 11109, + "networks recurrent": 36903, + "measured using": 32071, + "language means": 28149, + "different contexts": 14876, + "results applying": 47501, + "provide evidence": 44063, + "proved effective": 43987, + "fixed dimensional": 21075, + "space resulting": 51894, + "embeddings need": 17178, + "word types": 62326, + "old new": 38327, + "task best": 54935, + "best approach": 6748, + "pairs train": 39222, + "produce embeddings": 42981, + "pairs different": 39180, + "performs similarly": 40715, + "best previously": 6806, + "previously published": 42343, + "published results": 44372, + "introduces novel": 26894, + "approach tackle": 3714, + "systems currently": 54467, + "sentences approach": 49681, + "disambiguation problem": 15359, + "using concepts": 60617, + "enable use": 17430, + "extract latent": 19984, + "use latent": 59929, + "label data": 27701, + "silver standard": 51025, + "set train": 50268, + "classifier predict": 8602, + "predict topic": 41659, + "topic distribution": 57402, + "current sentence": 12006, + "sentence experimental": 49555, + "results large": 47694, + "domain proposed": 16139, + "effective predicting": 16687, + "shows potential": 50793, + "interactive setting": 26632, + "common use": 9209, + "classifiers model": 8619, + "words recently": 62493, + "shown perform": 50732, + "scenario small": 48689, + "small number": 51486, + "referring expressions": 45761, + "network extract": 36742, + "image features": 24536, + "positional information": 41274, + "information model": 25975, + "achieves performance": 1352, + "conceptually simpler": 9957, + "explore role": 19734, + "paper define": 39315, + "networks approach": 36831, + "networks multi": 36877, + "multi party": 35996, + "supervised training": 54061, + "learn task": 29434, + "embeddings words": 17249, + "data new": 12510, + "words test": 62529, + "embeddings task": 17224, + "loss objective": 31100, + "objective function": 38089, + "analysis experimental": 2662, + "properties word": 43272, + "embeddings proposed": 17197, + "controlled experiments": 10984, + "relations word": 46063, + "demonstrated using": 14025, + "model experiments": 33852, + "level noise": 30169, + "distribution word": 15659, + "space defined": 51853, + "non zero": 37691, + "languages use": 28814, + "order words": 38663, + "test hypothesis": 56349, + "differences human": 14822, + "efficient processing": 16892, + "gain insights": 21910, + "research issues": 47059, + "studies utilized": 53309, + "sources text": 51841, + "shared multiple": 50481, + "similar approaches": 51029, + "newspaper articles": 37430, + "work analyze": 62565, + "methods generating": 32878, + "based linear": 5815, + "interpretable results": 26730, + "randomly selected": 44904, + "topical structure": 57440, + "structure documents": 53101, + "documents related": 15909, + "method promising": 32624, + "learning common": 29563, + "representations multiple": 46721, + "multiple views": 36310, + "data typically": 12749, + "using parallel": 60853, + "work address": 62555, + "world scenario": 62956, + "specific downstream": 52074, + "downstream applications": 16332, + "cross modal": 11863, + "performance multilingual": 40447, + "available multilingual": 5329, + "dataset created": 12872, + "largest publicly": 29100, + "corpus use": 11453, + "previously reported": 42346, + "independent evaluation": 25497, + "data second": 12631, + "evaluate performances": 18485, + "performances various": 40652, + "bi lstms": 7016, + "dataset create": 12871, + "predictions multiple": 41765, + "multiple models": 36251, + "models ensemble": 34970, + "performance achieves": 40180, + "dataset finally": 12929, + "finally discuss": 20852, + "discuss future": 15466, + "corpus text": 11443, + "text quality": 56722, + "semantic structural": 49354, + "processed using": 42843, + "terms quality": 56312, + "available web": 5390, + "used ground": 60200, + "used order": 60257, + "order evaluate": 38616, + "evaluate results": 18501, + "various systems": 61402, + "systems experiments": 54499, + "data processing": 12565, + "used compare": 60119, + "quality scores": 44578, + "english translations": 17894, + "relatively high": 46116, + "score 64": 48800, + "approach multi": 3605, + "multi language": 35979, + "given target": 22791, + "sequence generation": 49925, + "generation models": 22495, + "aligned english": 2356, + "sentences significant": 49786, + "meteor scores": 32351, + "scores models": 48909, + "trained multiple": 57820, + "languages compared": 28618, + "corpora propose": 11234, + "corpora study": 11247, + "text span": 56780, + "experiment different": 19237, + "perform error": 40098, + "analysis component": 2635, + "using multi": 60816, + "accuracy 74": 908, + "held test": 23545, + "reveals interesting": 48018, + "interesting findings": 26650, + "best systems": 6830, + "bidirectional long": 7075, + "shown effective": 50702, + "data speech": 12688, + "properties natural": 43268, + "study propose": 53442, + "set state": 50252, + "accuracy achieved": 934, + "features approach": 20523, + "based sparse": 6050, + "relations different": 46022, + "different syntactic": 15090, + "parameters model": 39709, + "network different": 36732, + "parameters different": 39691, + "different degrees": 14895, + "parameters languages": 39704, + "determined using": 14560, + "approach captures": 3443, + "geographical regions": 22650, + "approaches primarily": 3900, + "primarily focused": 42363, + "lexical variation": 30395, + "method identifies": 32525, + "extend recently": 19830, + "approach explicitly": 3528, + "explicitly account": 19630, + "model study": 34419, + "online data": 38358, + "tweets twitter": 59024, + "spanning different": 51952, + "finally using": 20886, + "100 years": 66, + "rational speech": 45026, + "speech acts": 52252, + "model treats": 34492, + "shown capture": 50698, + "knowledge data": 27432, + "data address": 12123, + "address concerns": 1751, + "activation function": 1467, + "opens new": 38482, + "new application": 37129, + "application domains": 3165, + "learning effectively": 29609, + "expression generation": 19805, + "task showing": 55372, + "performance achieved": 40179, + "statistical parsing": 52759, + "adapting models": 1566, + "models languages": 35163, + "difficult languages": 15173, + "make significant": 31597, + "leads improved": 29315, + "approaches compared": 3786, + "dependency parser": 14128, + "score compared": 48841, + "model f1": 33874, + "score 89": 48824, + "constituency parsing": 10351, + "arabic twitter": 4008, + "entity names": 18122, + "topic detection": 57400, + "detection propose": 14513, + "propose approaches": 43298, + "approaches tackle": 3933, + "tackle issue": 54704, + "relevant tweets": 46244, + "given entity": 22741, + "task shown": 55373, + "shown competitive": 50699, + "task according": 54873, + "according evaluation": 858, + "approach sentence": 3685, + "formulate task": 21386, + "programming ilp": 43085, + "techniques proposed": 56126, + "introduce novel": 26842, + "orders magnitude": 38670, + "magnitude faster": 31415, + "evaluation demonstrates": 18607, + "does degrade": 15941, + "single best": 51286, + "results introduce": 47683, + "sequence learning": 49944, + "learning deep": 29582, + "output sequence": 39000, + "lstm network": 31275, + "automatically create": 5154, + "features represent": 20655, + "learns make": 29964, + "make decision": 31564, + "used input": 60215, + "learning attention": 29525, + "natural sentences": 36464, + "experiments indicate": 19446, + "sentences training": 49797, + "outperformed baseline": 38836, + "sentences terms": 49793, + "based lstm": 5825, + "designed predict": 14328, + "generation probability": 22525, + "time step": 57224, + "generated based": 22270, + "modeling power": 34611, + "achieves results": 1359, + "results current": 47566, + "achieving competitive": 1400, + "effective modeling": 16675, + "documents study": 15916, + "applied various": 3309, + "tagging tasks": 54753, + "specific features": 52084, + "solution uses": 51664, + "uses set": 60535, + "set task": 50257, + "task independent": 55133, + "independent features": 25499, + "internal representations": 26689, + "representations learnt": 46708, + "text tasks": 56806, + "explore different": 19698, + "different neural": 15005, + "network architectures": 36700, + "question making": 44735, + "regularization methods": 45840, + "popularly used": 41209, + "used train": 60332, + "train deep": 57577, + "study different": 53361, + "different context": 14875, + "performance compare": 40248, + "models multimodal": 35240, + "computational tools": 9868, + "cases using": 7816, + "historical information": 23960, + "information extract": 25857, + "person names": 40750, + "information integrated": 25927, + "non negative": 37667, + "negative matrix": 36625, + "held data": 23544, + "data able": 12104, + "able train": 728, + "data important": 12417, + "important practical": 24753, + "data usually": 12766, + "training algorithm": 57929, + "algorithm using": 2311, + "training experiments": 58101, + "results use": 47895, + "loss function": 31093, + "information perform": 26005, + "small amounts": 51462, + "thousand words": 57077, + "model real": 34275, + "data taking": 12719, + "data source": 12674, + "data improves": 12421, + "based relevant": 5992, + "estimation method": 18382, + "study focused": 53380, + "focused automatic": 21216, + "automatic identification": 5097, + "extracted using": 20025, + "based reference": 5984, + "lexical diversity": 30362, + "model does": 33784, + "level lexical": 30150, + "recent past": 45332, + "studies published": 53291, + "demonstrate simple": 13976, + "important context": 24714, + "hope paper": 24011, + "models analysis": 34710, + "analysis data": 2641, + "data natural": 12503, + "translation proposed": 58663, + "approaches typically": 3945, + "typically use": 59159, + "representing information": 46812, + "represent information": 46473, + "information original": 25997, + "entities introduce": 18058, + "new test": 37341, + "test language": 56353, + "unlike standard": 59610, + "task predicting": 55283, + "frequency words": 21681, + "range state": 44935, + "different way": 15127, + "representations long": 46712, + "art neural": 4308, + "text encoded": 56551, + "single words": 51358, + "meaningful information": 32024, + "window based": 62064, + "self supervision": 49220, + "performance paper": 40473, + "propose structured": 43651, + "label sequence": 27725, + "structured input": 53158, + "special case": 52014, + "structured learning": 53162, + "proposed perform": 43877, + "aim extract": 2147, + "unstructured texts": 59673, + "identify main": 24429, + "particular text": 39866, + "text crucial": 56520, + "research studies": 47123, + "order determine": 38609, + "task goal": 55108, + "tweet dataset": 59004, + "dataset using": 13130, + "data unsupervised": 12757, + "topic drift": 57404, + "key challenge": 27297, + "provide analysis": 44007, + "target dataset": 54806, + "approaches model": 3875, + "level recurrent": 30187, + "network gcn": 36747, + "training generative": 58114, + "accuracy evaluation": 969, + "resource english": 47226, + "extraction generation": 20070, + "approach leverages": 3589, + "extract set": 19994, + "patterns based": 39965, + "based data": 5663, + "work studies": 62829, + "multimodal data": 36145, + "given piece": 22769, + "corresponding semantic": 11557, + "image video": 24548, + "end introduce": 17676, + "training objective": 58196, + "objective learning": 38092, + "learning multimodal": 29772, + "present extensive": 41914, + "extensive results": 19911, + "embeddings various": 17242, + "promise approach": 43155, + "characters words": 8255, + "information provides": 26036, + "using joint": 60743, + "model target": 34442, + "modeled sequence": 34555, + "sequence word": 50018, + "benefit approach": 6559, + "challenges associated": 8033, + "languages model": 28730, + "results par": 47761, + "models complex": 34840, + "complex compositional": 9617, + "language challenging": 27985, + "challenging language": 8106, + "provides strong": 44230, + "performance underlying": 40612, + "underlying models": 59272, + "models truly": 35634, + "truly understanding": 58826, + "visual content": 61652, + "multi modal": 35988, + "visual question": 61663, + "answering vqa": 3103, + "questions specifically": 44810, + "level semantics": 30206, + "dataset language": 12976, + "language priors": 28390, + "better chance": 6862, + "balanced dataset": 5515, + "approach matches": 3598, + "dataset outperforms": 13022, + "dataset recent": 13055, + "advances neural": 1918, + "deep latent": 13698, + "distributions latent": 15677, + "inference network": 25673, + "discrete text": 15429, + "different text": 15097, + "standard test": 52532, + "test corpora": 56338, + "selection model": 49146, + "model employs": 33814, + "layer attention": 29178, + "mechanism extract": 32119, + "answer pair": 3043, + "answering benchmarks": 3065, + "benchmarks model": 6531, + "paper exploit": 39360, + "language variation": 28573, + "key idea": 27315, + "novel attention": 37770, + "architecture attention": 4027, + "review data": 48028, + "data transfer": 12743, + "setting task": 50351, + "task settings": 55367, + "tasks example": 55623, + "trained language": 57758, + "used recognize": 60287, + "little training": 30886, + "learning cross": 29576, + "learning shows": 29876, + "abstract features": 757, + "features learned": 20615, + "learned deep": 29455, + "models transfer": 35626, + "data distributions": 12291, + "data types": 12748, + "types model": 59101, + "model structures": 34417, + "model types": 34496, + "review paper": 48034, + "highlight potential": 23869, + "interesting research": 26654, + "research field": 47036, + "improvements machine": 25080, + "tested method": 56396, + "corpora based": 11180, + "corpora data": 11189, + "corpus sentences": 11427, + "refer entities": 45733, + "entities domain": 18045, + "validation results": 61195, + "results accurate": 47484, + "explicit implicit": 19615, + "implicit relationships": 24663, + "data critical": 12261, + "perform complex": 40080, + "reasoning tasks": 45228, + "tasks efficient": 55603, + "model report": 34307, + "report state": 46447, + "answering tasks": 3100, + "proved difficult": 43986, + "approaches learning": 3859, + "learning representation": 29841, + "al 2015": 2237, + "created large": 11728, + "trade offs": 57502, + "memory usage": 32286, + "reduce memory": 45669, + "memory footprint": 32255, + "art embedding": 4251, + "impact performance": 24604, + "representation better": 46496, + "better interpretability": 6904, + "attention lately": 4761, + "impressive performance": 24811, + "numerous natural": 38067, + "tasks semantic": 55870, + "similarity measurement": 51103, + "despite success": 14394, + "methods consider": 32799, + "relational structure": 46013, + "semantic lexicons": 49296, + "learnt using": 29980, + "integrating knowledge": 26523, + "knowledge semantic": 27606, + "propose joint": 43425, + "corpus proposed": 11410, + "statistically significantly": 52775, + "outperforms previously": 38928, + "methods incorporating": 32901, + "incorporating semantic": 25393, + "datasets semantic": 13414, + "computational semantics": 9861, + "multi domain": 35954, + "words proposed": 62487, + "domain texts": 16210, + "obtained state": 38224, + "pairs using": 39228, + "training target": 58283, + "translation investigate": 58623, + "use monolingual": 59952, + "work combines": 62601, + "models separately": 35482, + "separately trained": 49883, + "explore strategies": 19739, + "train monolingual": 57612, + "data automatic": 12171, + "data obtain": 12519, + "low resourced": 31197, + "results fine": 47635, + "tuning domain": 58907, + "monolingual parallel": 35809, + "topic specific": 57431, + "framework built": 21466, + "classification methods": 8493, + "methods currently": 32808, + "wide use": 61978, + "intensive methods": 26559, + "methods latent": 32921, + "mental health": 32290, + "documents high": 15884, + "set phrases": 50218, + "method extended": 32500, + "computational time": 9867, + "word lists": 62241, + "methods literature": 32930, + "literature present": 30860, + "methods offer": 32966, + "unstructured documents": 59668, + "knowledge form": 27485, + "entities context": 18041, + "set training": 50269, + "data using": 12765, + "context features": 10638, + "new patterns": 37280, + "evaluated proposed": 18543, + "proposed english": 43766, + "language dataset": 28017, + "documents corpus": 15868, + "work leverage": 62708, + "extract structured": 19996, + "used reduce": 60288, + "reduce data": 45657, + "data requirements": 12606, + "particular demonstrate": 39841, + "discover new": 15407, + "words vectors": 62544, + "data science": 12629, + "new york": 37364, + "approach natural": 3609, + "self contained": 49192, + "networks used": 36921, + "fundamental natural": 21781, + "learning general": 29659, + "embeddings based": 17086, + "based supervision": 6071, + "textual similarity": 56981, + "similarity datasets": 51091, + "distribution training": 15654, + "complex architectures": 9616, + "networks perform": 36890, + "perform best": 40071, + "data domain": 12294, + "domain scenarios": 16150, + "averaging model": 5427, + "model competitive": 33679, + "extremely efficient": 20157, + "use order": 59968, + "conduct experiments": 10042, + "experiments supervised": 19537, + "supervised nlp": 54027, + "classification word": 8582, + "pretrained sentence": 42181, + "using prior": 60873, + "feature extractor": 20489, + "leads performance": 29322, + "entailment tasks": 18008, + "tasks release": 55847, + "hope serve": 24013, + "new baseline": 37141, + "universal sentence": 59546, + "used method": 60235, + "method computing": 32430, + "upper bound": 59773, + "theoretical analysis": 57018, + "language data": 28016, + "data generated": 12384, + "ways improve": 61842, + "novel type": 37944, + "representations make": 46715, + "sensitive word": 49505, + "demonstrated effective": 14004, + "effective capturing": 16634, + "unsupervised representation": 59724, + "unlabeled corpora": 59562, + "corpora learn": 11215, + "learn similar": 29423, + "similar representations": 51061, + "information important": 25912, + "important knowledge": 24739, + "knowledge help": 27513, + "category information": 7862, + "information documents": 25820, + "representations learn": 46704, + "wise manner": 62081, + "models word": 35683, + "tasks evaluate": 55619, + "learned word": 29492, + "models demonstrated": 34894, + "capable achieving": 7615, + "achieving remarkable": 1420, + "remarkable performance": 46358, + "performance sentence": 40550, + "work combine": 62600, + "combine strengths": 9074, + "novel unified": 37945, + "unified model": 59474, + "sentence representation": 49631, + "cnn extract": 8767, + "level phrase": 30176, + "phrase representations": 40844, + "representations fed": 46668, + "network lstm": 36763, + "obtain sentence": 38189, + "capture local": 7691, + "sentence semantics": 49640, + "proposed architecture": 43738, + "classification question": 8526, + "outperforms cnn": 38881, + "cnn lstm": 8772, + "achieve excellent": 1134, + "problem modeling": 42609, + "relationship modeling": 46071, + "modeling structured": 34626, + "prediction problem": 41729, + "supervised framework": 53988, + "framework learn": 21553, + "partially labeled": 39809, + "use set": 60013, + "knowledge investigate": 27534, + "demonstrate framework": 13914, + "framework outperforms": 21578, + "outperforms competitive": 38887, + "competitive baselines": 9542, + "translation slt": 58677, + "major challenge": 31503, + "sentence segmentation": 49637, + "downstream language": 16340, + "explore problem": 19728, + "problem identifying": 42579, + "sentence boundaries": 49522, + "recognition systems": 45541, + "span annotations": 51919, + "specific words": 52177, + "words characters": 62378, + "analyze text": 2829, + "languages single": 28790, + "single model": 51317, + "model small": 34393, + "small vocabulary": 51512, + "multilingual models": 36098, + "results similar": 47848, + "training datasets": 58054, + "external data": 19932, + "sources models": 51836, + "problem joint": 42588, + "linguistic semantic": 30793, + "text provide": 56719, + "competitive baseline": 9540, + "solve data": 51678, + "especially data": 18269, + "does exist": 15945, + "combined model": 9081, + "employ word": 17395, + "models additional": 34693, + "experimental result": 19267, + "result shows": 47451, + "method significantly": 32653, + "outperforms conventional": 38890, + "lexicon model": 30413, + "model phrase": 34209, + "results chinese": 47532, + "using english": 60676, + "powerful models": 41437, + "models widely": 35681, + "widely applied": 61993, + "modeling translation": 34632, + "notable improvements": 37717, + "remains challenge": 46326, + "machine translations": 31393, + "translations paper": 58710, + "model directly": 33769, + "prediction experimental": 41705, + "various baseline": 61307, + "systems automatically": 54437, + "automatically recognize": 5198, + "essay scoring": 18316, + "detection classification": 14467, + "new feature": 37202, + "accuracy accuracy": 933, + "features useful": 20690, + "present dataset": 41883, + "dataset manually": 12991, + "texts order": 56907, + "automatic methods": 5107, + "relation type": 45998, + "type prediction": 59064, + "computational analysis": 9832, + "given pair": 22766, + "negative neutral": 36627, + "relationship text": 46073, + "embeddings build": 17090, + "symbolic knowledge": 54268, + "previous neural": 42265, + "performance reasoning": 40521, + "tasks having": 55662, + "unseen entities": 59648, + "learn embeddings": 29368, + "like human": 30475, + "using computational": 60615, + "computational linguistic": 9844, + "features trained": 20686, + "linguistic features": 30771, + "features results": 20659, + "presents end": 42082, + "model named": 34116, + "neural generative": 36959, + "generative question": 22608, + "questions based": 44775, + "model built": 33638, + "framework sequence": 21597, + "trained corpus": 57699, + "triples knowledge": 58806, + "questions answers": 44772, + "demonstrates proposed": 14039, + "model outperform": 34152, + "based qa": 5967, + "qa model": 44451, + "model neural": 34123, + "dialogue model": 14778, + "trained data": 57703, + "data real": 12585, + "real systems": 45112, + "texts particular": 56911, + "network methods": 36764, + "novel models": 37876, + "network representation": 36794, + "way proposed": 61828, + "relevant topics": 46242, + "methods outperform": 32970, + "outperform traditional": 38829, + "textual representation": 56976, + "understand human": 59296, + "process human": 42788, + "human natural": 24209, + "research provide": 47103, + "reading comprehension": 45080, + "scientific papers": 48766, + "different styles": 15085, + "models analyze": 34712, + "makes difficult": 31619, + "people different": 40028, + "existing machine": 19087, + "designing new": 14341, + "paper attempt": 39275, + "attempt improve": 4688, + "set language": 50179, + "czech english": 12078, + "english vietnamese": 17901, + "settings language": 50379, + "data adaptation": 12119, + "adaptation techniques": 1543, + "techniques employed": 56081, + "train language": 57597, + "models develop": 34908, + "corpora use": 11253, + "explored use": 19767, + "use domain": 59870, + "alignment models": 2376, + "models unsupervised": 35649, + "used bleu": 60109, + "metrics results": 33198, + "indicate approach": 25524, + "positive impact": 41282, + "quality multilingual": 44554, + "data systems": 12717, + "systems limited": 54550, + "improvements current": 25065, + "computation time": 9830, + "various domains": 61329, + "mined data": 33277, + "quality propose": 44566, + "minimum risk": 33307, + "risk training": 48164, + "training end": 58085, + "unlike conventional": 59592, + "likelihood estimation": 30518, + "experiments approach": 19355, + "approach achieves": 3393, + "applied neural": 3285, + "novel concept": 37786, + "learning generate": 29660, + "target corpus": 54804, + "implicit relations": 24662, + "relations concepts": 46020, + "datasets measuring": 13327, + "compared prior": 9441, + "statistical significance": 52762, + "methods study": 33057, + "methods measuring": 32943, + "task entity": 55051, + "entity extraction": 18106, + "tool developed": 57362, + "model utilizes": 34519, + "information like": 25954, + "pos tag": 41229, + "features enhance": 20571, + "submitted runs": 53585, + "models based": 34753, + "datasets released": 13399, + "project develop": 43133, + "problem developing": 42537, + "text datasets": 56525, + "multiple classes": 36183, + "question text": 44752, + "produces best": 43026, + "corpora text": 11249, + "sentence representations": 49633, + "paper concerns": 39295, + "good candidate": 22928, + "provide preliminary": 44114, + "gaining popularity": 21929, + "systematic comparison": 54393, + "introduce efficient": 26802, + "method popular": 32615, + "popular benchmarks": 41159, + "speed accuracy": 52320, + "provide information": 44089, + "limited present": 30606, + "present graph": 41923, + "based semi": 6018, + "automatically construct": 5150, + "small seed": 51498, + "size high": 51385, + "11 languages": 89, + "languages addition": 28594, + "automatically created": 5155, + "features improve": 20600, + "performance downstream": 40305, + "tagging dependency": 54738, + "pair sentences": 39158, + "critical issue": 11784, + "paraphrase identification": 39741, + "work deals": 62620, + "individual task": 25581, + "task fine": 55086, + "tuning specific": 58957, + "models sentence": 35480, + "manually designed": 31775, + "sentences make": 49752, + "make contributions": 31554, + "applied wide": 3311, + "variety tasks": 61292, + "modeling sentence": 34622, + "propose attention": 43301, + "takes consideration": 54779, + "sentence pair": 49609, + "pair representations": 39157, + "specific semantic": 52143, + "relations propose": 46053, + "approach models": 3604, + "directed acyclic": 15264, + "acyclic graph": 1492, + "mechanism using": 32148, + "make sure": 31602, + "proposed design": 43757, + "design allows": 14262, + "validated results": 61186, + "task artificial": 54914, + "paper target": 39595, + "generation framework": 22465, + "framework framework": 21524, + "framework consists": 21479, + "including topic": 25311, + "understanding sentence": 59399, + "run experiments": 48401, + "chinese corpus": 8302, + "easily adapted": 16535, + "remaining challenges": 46323, + "particular linguistic": 39852, + "model problem": 34236, + "character sequence": 8224, + "problem present": 42627, + "model solving": 34398, + "trained supervised": 57886, + "supervised semi": 54039, + "supervised settings": 54045, + "seven datasets": 50417, + "languages achieve": 28590, + "generation recent": 22537, + "recent language": 45315, + "models especially": 34973, + "networks rnns": 36908, + "possible generate": 41327, + "generate natural": 22221, + "probability language": 42478, + "translation summarization": 58683, + "summarization question": 53896, + "systems existing": 54495, + "typically learn": 59147, + "generated texts": 22326, + "particular word": 39873, + "approaches solve": 3922, + "model provided": 34256, + "model variants": 34523, + "position sentence": 41269, + "results generated": 47647, + "negative sentences": 36635, + "translate english": 58550, + "established new": 18358, + "sentence translation": 49663, + "methods achieve": 32728, + "movie reviews": 35896, + "document embeddings": 15788, + "embeddings methods": 17174, + "document vectors": 15844, + "predicting words": 41687, + "imdb movie": 24570, + "review dataset": 48029, + "dataset shows": 13085, + "shows model": 50789, + "robust results": 48265, + "model combined": 33666, + "models source": 35521, + "code model": 8833, + "single word": 51357, + "pair wise": 39160, + "model multi": 34108, + "sentence multiple": 49604, + "multiple words": 36313, + "defined categories": 13783, + "combination words": 9054, + "correct meaning": 11470, + "pair model": 39155, + "model recognize": 34284, + "entailment contradiction": 18000, + "model tree": 34493, + "element wise": 16973, + "combine information": 9067, + "information individual": 25922, + "results model": 47720, + "existing sentence": 19140, + "sentence encoding": 49551, + "approaches large": 3854, + "systems built": 54444, + "systems languages": 54542, + "used building": 60111, + "building systems": 7473, + "systems best": 54441, + "type systems": 59071, + "resources english": 47300, + "english target": 17887, + "using language": 60752, + "sequence neural": 49961, + "potentially useful": 41420, + "information introduce": 25931, + "function neural": 21758, + "neural mt": 36986, + "information source": 26094, + "target sentences": 54839, + "implement model": 24636, + "ranking method": 44971, + "method introduce": 32550, + "models offers": 35275, + "consistent performance": 10281, + "performance boost": 40222, + "standard lstm": 52498, + "lstm attention": 31241, + "noisy training": 37626, + "data way": 12772, + "labels example": 27819, + "corpus improve": 11359, + "multi source": 36006, + "model train": 34466, + "combination methods": 9043, + "provide extensive": 44070, + "affects performance": 2026, + "performance wide": 40627, + "finding optimal": 20900, + "tasks benefit": 55522, + "simple concatenation": 51144, + "learned different": 29456, + "additional contribution": 1662, + "contribution propose": 10945, + "learns word": 29978, + "code mixed": 8823, + "bengali english": 6593, + "english tamil": 17886, + "tamil english": 54794, + "overall accuracy": 39033, + "accuracy 70": 904, + "obtains highest": 38251, + "highest average": 23850, + "tasks understanding": 55946, + "challenge paper": 8004, + "propose recurrent": 43597, + "memory network": 32272, + "rnn architecture": 48180, + "discover underlying": 15409, + "patterns data": 39966, + "network large": 36756, + "english dataset": 17793, + "dataset additionally": 12805, + "perform depth": 40086, + "depth analysis": 14184, + "analysis various": 2791, + "various linguistic": 61356, + "sentence coherence": 49528, + "art large": 4272, + "specific architectures": 52045, + "used sequence": 60297, + "paper enhance": 39349, + "model label": 34034, + "label dependencies": 27702, + "encoded vector": 17485, + "initial state": 26218, + "methods predict": 32987, + "predict label": 41643, + "information input": 25924, + "slot filling": 51441, + "filling task": 20803, + "essential component": 18323, + "understanding using": 59415, + "art f1": 4261, + "score 95": 48829, + "data consists": 12244, + "sequences sentences": 50025, + "sentences word": 49807, + "word labels": 62220, + "translations languages": 58709, + "data noisy": 12514, + "use source": 60022, + "learning new": 29781, + "leads significantly": 29328, + "performance multi": 40445, + "source transfer": 51816, + "gaussian noise": 22016, + "online learning": 38373, + "learning finally": 29644, + "present corpus": 41878, + "quantitative qualitative": 44623, + "including non": 25285, + "addition approach": 1601, + "features speech": 20672, + "based solutions": 6047, + "number people": 38026, + "closed set": 8699, + "human speech": 24241, + "mechanism evaluate": 32115, + "result poor": 47447, + "approach adopted": 3412, + "time cost": 57140, + "cost paper": 11590, + "novel deep": 37802, + "model deep": 33742, + "network used": 36820, + "acoustic features": 1434, + "features input": 20604, + "aware training": 5475, + "framework multi": 21566, + "learning domain": 29604, + "performance gain": 40356, + "approach self": 3681, + "training semi": 58244, + "automatically classifying": 5148, + "adding additional": 1593, + "context language": 10664, + "tasks training": 55939, + "training does": 58070, + "does lead": 15956, + "lead performance": 29266, + "training beneficial": 57944, + "step process": 52824, + "process developing": 42771, + "classifier able": 8590, + "able adapt": 672, + "adapt new": 1506, + "art nlp": 4316, + "sequences words": 50031, + "words generated": 62425, + "model takes": 34439, + "grammatical correctness": 23068, + "model aims": 33556, + "automated approach": 5036, + "approach evaluate": 3519, + "evaluate quality": 18495, + "quality generated": 44524, + "evaluation generated": 18623, + "designed use": 14336, + "learning tools": 29914, + "reasoning natural": 45208, + "reasoning model": 45204, + "models provided": 35380, + "detailed analyses": 14411, + "users need": 60470, + "need know": 36573, + "trained human": 57747, + "human raters": 24222, + "core concepts": 11147, + "models important": 35107, + "parameters models": 39710, + "models easily": 34944, + "easily overfit": 16549, + "data sparse": 12680, + "popular solution": 41189, + "alignment paper": 2379, + "propose framework": 43392, + "framework generalizes": 21527, + "performance data": 40275, + "examine various": 18871, + "gives best": 22803, + "nlp information": 37491, + "multiple word": 36312, + "sparse coding": 51966, + "approach applies": 3420, + "using variant": 61013, + "arora et": 4193, + "al 2016": 2238, + "used verify": 60349, + "surface form": 54151, + "form input": 21322, + "input outputs": 26311, + "description generation": 14243, + "generation natural": 22503, + "recently received": 45460, + "space provide": 51888, + "review existing": 48032, + "models highlighting": 35089, + "image datasets": 24535, + "datasets evaluation": 13255, + "machine generated": 31299, + "future directions": 21871, + "task loss": 55193, + "learning present": 29814, + "standard reference": 52521, + "translation experiment": 58610, + "improves translation": 25166, + "approaches employ": 3805, + "feedback learning": 20717, + "news events": 37403, + "information prior": 26021, + "time inference": 57165, + "gibbs sampling": 22688, + "sampling procedure": 48506, + "encouraging results": 17606, + "retrieval model": 47955, + "clustering method": 8741, + "method competitive": 32428, + "competitive best": 9544, + "extractive summarization": 20139, + "summarization aims": 53876, + "selecting set": 49129, + "sentences source": 49788, + "critical issues": 11785, + "methods designed": 32818, + "designed model": 14325, + "far aware": 20396, + "motivated observations": 35871, + "major contributions": 31507, + "methods directly": 32826, + "increase diversity": 25412, + "set representative": 50239, + "relevant given": 46219, + "cover important": 11646, + "important sub": 24776, + "make step": 31600, + "step forward": 52808, + "document sentence": 15829, + "framework enhance": 21506, + "empirical evaluations": 17325, + "methods tend": 33070, + "tend make": 56203, + "relationships word": 46086, + "clustering algorithm": 8736, + "benefit using": 6574, + "predict sentiment": 41655, + "sentiment given": 49846, + "short time": 50574, + "information linguistic": 25956, + "invariant representations": 26921, + "information specifically": 26101, + "specifically demonstrate": 52190, + "information embedded": 25827, + "verify hypothesis": 61540, + "achieved high": 1239, + "accuracy using": 1068, + "structures paper": 53191, + "reduction word": 45724, + "provide insight": 44092, + "mechanism performs": 32135, + "parsing approaches": 39772, + "domain question": 16141, + "ask question": 4518, + "paraphrase generation": 39740, + "semantic parser": 49305, + "evaluation experiments": 18620, + "dataset performance": 13026, + "improves strong": 25165, + "strong baselines": 53008, + "like news": 30493, + "news article": 37384, + "provide quantitative": 44117, + "learning experiments": 29636, + "experiments showing": 19524, + "key task": 27337, + "parametric models": 39731, + "types linguistic": 59099, + "data approaches": 12144, + "approaches apply": 3765, + "takes form": 54780, + "using synthetic": 60974, + "synthetic data": 54370, + "set real": 50232, + "real datasets": 45102, + "proposed test": 43913, + "types data": 59080, + "sequence level": 49949, + "better handling": 6897, + "input propose": 26321, + "propose machine": 43445, + "machine reading": 31340, + "neural attention": 36934, + "single sequence": 51335, + "architecture experiments": 4048, + "experiments language": 19451, + "analysis natural": 2703, + "language inference": 28108, + "inference model": 25670, + "matches outperforms": 31906, + "huge volume": 24079, + "volume data": 61728, + "opinions social": 38506, + "social networking": 51595, + "networking sites": 36825, + "like twitter": 30510, + "twitter facebook": 59037, + "allow people": 2439, + "lot work": 31124, + "data survey": 12714, + "provide survey": 44140, + "comparative analyses": 9318, + "approaches evaluation": 3815, + "metrics using": 33207, + "algorithms like": 2329, + "data streams": 12696, + "introduces new": 26893, + "distribution words": 15660, + "languages task": 28800, + "task text": 55434, + "levels text": 30249, + "units meaning": 59533, + "development methods": 14686, + "independence assumption": 25492, + "used prediction": 60267, + "analysis propose": 2728, + "exhibit better": 19001, + "model words": 34542, + "words texts": 62531, + "study suggests": 53464, + "study various": 53474, + "words propose": 62486, + "accuracy high": 985, + "research works": 47144, + "level approaches": 30065, + "approaches natural": 3879, + "handling rare": 23428, + "architecture utilizes": 4098, + "recurrent layers": 45617, + "validate proposed": 61183, + "model large": 34040, + "tasks compare": 55546, + "comparable performances": 9305, + "toolkit provides": 57372, + "incorporate social": 25364, + "social scientists": 51605, + "topic modelling": 57417, + "raw corpus": 45034, + "processing input": 42878, + "based vector": 6128, + "model conditional": 33694, + "unsupervised topic": 59743, + "models output": 35293, + "output space": 39003, + "methods estimating": 32843, + "words languages": 62444, + "shared embedding": 50466, + "estimation methods": 18383, + "data require": 12604, + "require parallel": 46882, + "evaluation method": 18639, + "shown correlate": 50700, + "correlate better": 11501, + "previous ones": 42268, + "network trained": 36815, + "new loss": 37241, + "classification loss": 8487, + "obtain promising": 38183, + "networks natural": 36879, + "inference achieve": 25639, + "achieve similar": 1198, + "similar performance": 51058, + "vector embedding": 61451, + "feature embeddings": 20481, + "embeddings feature": 17136, + "use information": 59912, + "approach results": 3676, + "embeddings achieved": 17078, + "sampling methods": 48503, + "methods recent": 33007, + "recent empirical": 45309, + "research focused": 47041, + "learned language": 29463, + "data simple": 12665, + "range semantic": 44933, + "identifying relationships": 24463, + "key aspect": 27295, + "scientific knowledge": 48763, + "knowledge graphs": 27504, + "medical domain": 32202, + "domain make": 16108, + "make language": 31579, + "electronic health": 16967, + "health records": 23517, + "perform knowledge": 40117, + "graph completion": 23115, + "relationships tokens": 46085, + "knowledge reasoning": 27585, + "multiple choice": 36180, + "choice questions": 8337, + "task obtain": 55246, + "useful researchers": 60384, + "researchers working": 47171, + "diverse range": 15712, + "extraction question": 20100, + "provide solution": 44131, + "answer given": 3036, + "present unified": 42047, + "max margin": 31949, + "framework learns": 21555, + "shows framework": 50778, + "outperforms strong": 38948, + "wealth information": 61873, + "task domain": 55032, + "entity resolution": 18145, + "studied literature": 53228, + "rank based": 44948, + "proposed solutions": 43898, + "specialized knowledge": 52034, + "current systems": 12016, + "systems fine": 54505, + "grained entity": 23031, + "entity typing": 18155, + "type labels": 59058, + "entity mentions": 18120, + "entity mention": 18118, + "define new": 13776, + "task label": 55156, + "label noise": 27718, + "examples given": 18907, + "set candidate": 50117, + "labels individual": 27833, + "entity types": 18154, + "unique challenges": 59510, + "task propose": 55299, + "mentions text": 32310, + "text features": 56579, + "semantically close": 49381, + "training example": 58093, + "manner using": 31727, + "learned embeddings": 29457, + "embeddings text": 17228, + "margin based": 31819, + "based loss": 5823, + "robust noisy": 48260, + "noisy labels": 37622, + "models type": 35637, + "experiments public": 19500, + "datasets demonstrate": 13210, + "effectiveness robustness": 16811, + "improvement accuracy": 24982, + "accuracy compared": 948, + "best method": 6779, + "way using": 61837, + "knowledge unsupervised": 27641, + "methods identifying": 32892, + "sequence data": 49918, + "related fields": 45908, + "level hierarchical": 30129, + "word learning": 62223, + "cognitive processes": 8895, + "type token": 59073, + "token frequencies": 57289, + "enables model": 17443, + "propose train": 43677, + "uni directional": 59459, + "paper shows": 39573, + "apply natural": 3340, + "methods classification": 32781, + "prediction target": 41741, + "computer aided": 9886, + "hand crafted": 23385, + "levels abstraction": 30236, + "model incorporate": 33990, + "incorporate contextual": 25348, + "contextual features": 10769, + "topics model": 57454, + "specific nlp": 52119, + "tasks word": 55965, + "prediction results": 41737, + "corpora english": 11196, + "english documents": 17797, + "google news": 22955, + "models baseline": 34761, + "baseline lstm": 6180, + "lstm models": 31274, + "accuracy improvements": 991, + "news dataset": 37398, + "demonstrates significant": 14041, + "like question": 30494, + "dialog systems": 14760, + "tasks effective": 55600, + "effective way": 16712, + "extract meaningful": 19986, + "sentences produce": 49770, + "matching score": 31921, + "inspired success": 26417, + "based extracted": 5723, + "recognition model": 45513, + "successfully identify": 53746, + "identify salient": 24441, + "demonstrate superiority": 13984, + "baselines work": 6321, + "text clustering": 56493, + "use small": 60019, + "design novel": 14293, + "novel objective": 37888, + "clustering process": 8744, + "optimize objective": 38563, + "data unlabeled": 12753, + "data iteratively": 12442, + "representation current": 46502, + "current neural": 11991, + "results datasets": 47569, + "method works": 32710, + "better text": 6979, + "clustering methods": 8742, + "linear discriminant": 30655, + "discriminant analysis": 15436, + "performance degradation": 40282, + "data size": 12667, + "presents solution": 42105, + "development data": 14672, + "data required": 12605, + "improved accuracy": 24944, + "topics existing": 57450, + "given small": 22786, + "seed words": 49045, + "uses neural": 60525, + "related terms": 45944, + "built pre": 7489, + "methods focus": 32869, + "parts input": 39905, + "meanings sentences": 32035, + "sentences work": 49809, + "model represents": 34311, + "sentence word": 49671, + "cnn model": 8773, + "model employed": 33812, + "capture features": 7670, + "similarity score": 51117, + "model gets": 33938, + "task achieves": 54877, + "event data": 18780, + "syntactic level": 54306, + "syntactic parse": 54309, + "new structure": 37327, + "method learns": 32564, + "relevant task": 46237, + "embeddings generated": 17142, + "reach state": 45053, + "words annotated": 62364, + "types lexical": 59098, + "information sentiment": 26080, + "training order": 58198, + "key challenges": 27299, + "challenges natural": 8062, + "domains languages": 16268, + "investigate robustness": 26984, + "detection systems": 14531, + "fundamental tasks": 21794, + "tasks information": 55686, + "corpus task": 11442, + "preserve original": 42115, + "systematic evaluation": 54396, + "rnn architectures": 48181, + "outperform best": 38785, + "relative error": 46094, + "setting achieve": 50315, + "performance cross": 40270, + "cross domain": 11813, + "domain setting": 16156, + "traditional methods": 57529, + "methods similar": 33042, + "similar task": 51070, + "task named": 55231, + "efficient inference": 16877, + "parsing language": 39783, + "modeling experiments": 34575, + "experiments provide": 19499, + "provide better": 44020, + "model better": 33624, + "profound impact": 43074, + "sentence using": 49667, + "models auto": 34742, + "predicting correct": 41674, + "data corpus": 12253, + "hope work": 24017, + "results based": 47518, + "method efficiently": 32476, + "empirical comparisons": 17321, + "sample data": 48448, + "methods studied": 33056, + "models developed": 34909, + "mainly english": 31471, + "european languages": 18429, + "multiple corpora": 36188, + "models evaluated": 34977, + "task knowledge": 55153, + "task work": 55472, + "vectors trained": 61498, + "comparable accuracy": 9288, + "single corpus": 51289, + "corpus trained": 11447, + "trained models": 57793, + "combination multiple": 9045, + "learning order": 29791, + "methods analyzing": 32749, + "activation patterns": 1468, + "use multi": 59953, + "architecture consisting": 4036, + "trained predicting": 57842, + "visual scene": 61668, + "corresponding input": 11553, + "predicting word": 41686, + "method estimate": 32488, + "final prediction": 20827, + "sensitive information": 49500, + "selective attention": 49163, + "input token": 26349, + "differently depending": 15148, + "furthermore propose": 21833, + "time steps": 57226, + "term dependencies": 56233, + "identified text": 24404, + "text word": 56845, + "key step": 27334, + "ner using": 36685, + "word boundary": 62121, + "provide richer": 44125, + "representations jointly": 46695, + "yield significant": 63098, + "jointly training": 27223, + "absolute improvement": 744, + "results main": 47710, + "using character": 60601, + "combination convolutional": 9035, + "based attention": 5579, + "based bidirectional": 5607, + "bidirectional recurrent": 7081, + "results source": 47852, + "representations order": 46730, + "applying method": 3366, + "available pre": 5344, + "trained word": 57915, + "leads new": 29321, + "dataset method": 12992, + "downstream task": 16350, + "task dialogue": 55019, + "dialogue state": 14784, + "domains introduce": 16262, + "novel simple": 37923, + "convolution neural": 11096, + "cnn architecture": 8758, + "architecture multi": 4066, + "embeddings sentence": 17211, + "sentence classification": 49525, + "input embedding": 26271, + "sets model": 50297, + "model simpler": 34387, + "time furthermore": 57159, + "baseline models": 6188, + "models article": 34727, + "algorithm detect": 2268, + "results presented": 47775, + "different studies": 15084, + "problem studied": 42669, + "size paper": 51392, + "propose combine": 43323, + "combine different": 9065, + "supervised classifiers": 53970, + "training evaluating": 58090, + "restaurant reviews": 47413, + "measure robustness": 32061, + "methods robust": 33025, + "preprocessing steps": 41829, + "applications text": 3252, + "context aware": 10589, + "solve tasks": 51692, + "approach make": 3595, + "semantic structure": 49355, + "output representation": 38996, + "variational autoencoder": 61243, + "image classification": 24533, + "order reduce": 38651, + "reduce computational": 45653, + "computational complexity": 9838, + "optimization method": 38549, + "reduction techniques": 45721, + "ag news": 2044, + "improves classification": 25119, + "compared pure": 9443, + "achieves competitive": 1316, + "advanced methods": 1889, + "methods state": 33051, + "methods large": 32916, + "developments field": 14712, + "media analysis": 32158, + "combining information": 9112, + "media posts": 32180, + "users explore": 60463, + "multiple criteria": 36189, + "environment propose": 18173, + "despite lack": 14371, + "direct supervision": 15258, + "available speech": 5369, + "text similar": 56770, + "model implemented": 33974, + "ground truth": 23251, + "achieves 20": 1286, + "outperforming previous": 38855, + "10 absolute": 35, + "require pre": 46883, + "pre specified": 41514, + "shown great": 50713, + "methods require": 33017, + "types training": 59124, + "class classifier": 8398, + "large labeled": 28893, + "set based": 50111, + "features limited": 20617, + "certain domains": 7939, + "using linguistic": 60772, + "related knowledge": 45913, + "novel joint": 37845, + "mentions using": 32311, + "using representations": 60909, + "representations framework": 46673, + "crafted features": 11679, + "new domain": 37175, + "language furthermore": 28080, + "specific context": 52060, + "context representation": 10705, + "representation experiments": 46514, + "genres news": 22643, + "discussion forum": 15491, + "systems trained": 54656, + "data results": 12615, + "domains general": 16258, + "historical texts": 23962, + "approach fails": 3537, + "paper assess": 39274, + "benchmark task": 6497, + "evaluate domain": 18452, + "unsupervised domain": 59694, + "outperforms word": 38961, + "methods better": 32772, + "data necessary": 12506, + "necessary build": 36529, + "early detection": 16511, + "relevant data": 46207, + "data train": 12738, + "public health": 44321, + "prior systems": 42416, + "media twitter": 32185, + "effective data": 16641, + "data difficult": 12284, + "difficult acquire": 15155, + "aims address": 2172, + "forest classifier": 21301, + "data gathered": 12379, + "state level": 52701, + "level statistics": 30216, + "art accuracy": 4209, + "level data": 30095, + "recent approaches": 45292, + "based artificial": 5577, + "results short": 47834, + "systems leverage": 54547, + "texts model": 56903, + "dialog act": 14751, + "performing tasks": 40691, + "support new": 54121, + "digital humanities": 15210, + "new field": 37204, + "task textual": 55437, + "tasks textual": 55933, + "studies illustrate": 53270, + "pretrained word": 42194, + "tasks small": 55895, + "scale multi": 48599, + "agent based": 2054, + "step paper": 52819, + "novel question": 37904, + "mathematical framework": 31933, + "explore question": 19730, + "automatically predict": 5192, + "model features": 33884, + "features use": 20688, + "extracted features": 20010, + "hybrid model": 24319, + "parser model": 39760, + "25 times": 335, + "models integrated": 35137, + "data little": 12468, + "loss accuracy": 31082, + "entailment task": 18007, + "task significantly": 55376, + "encoding models": 17572, + "method jointly": 32555, + "jointly learning": 27201, + "phrase embeddings": 40838, + "types embeddings": 59083, + "scoring function": 48933, + "jointly optimized": 27211, + "embeddings experiments": 17133, + "experiments apply": 19353, + "joint learning": 27174, + "method task": 32679, + "human ratings": 24224, + "previous best": 42248, + "best model": 6781, + "ensemble technique": 17981, + "improves results": 25156, + "tasks existing": 55625, + "models focus": 35037, + "data directly": 12287, + "directly use": 15340, + "novel end": 37814, + "model source": 34399, + "structure model": 53118, + "model attention": 33585, + "mechanism enables": 32111, + "english japanese": 17827, + "dataset demonstrate": 12883, + "considerably outperforms": 10242, + "outperforms sequence": 38941, + "favorably state": 20456, + "complementary approaches": 9587, + "methods supervised": 33061, + "current best": 11963, + "path based": 39945, + "research attention": 46987, + "dependency paths": 14134, + "methods extend": 32857, + "improving state": 25197, + "task sentence": 55357, + "recognizing textual": 45559, + "memory networks": 32274, + "tasks similar": 55890, + "similar model": 51054, + "perspective propose": 40777, + "comparing performance": 9483, + "performance common": 40243, + "convolutional recurrent": 11115, + "recurrent attention": 45609, + "tasks datasets": 55570, + "problem evaluating": 42554, + "models propose": 35369, + "new datasets": 37168, + "currently used": 12040, + "introduce unified": 26875, + "source software": 51798, + "tasks enables": 55610, + "trained sentence": 57862, + "model set": 34365, + "set new": 50199, + "dialogue dataset": 14771, + "present deep": 41885, + "hierarchical recurrent": 23685, + "network sequence": 36803, + "recurrent units": 45629, + "character word": 8226, + "tags model": 54758, + "independent language": 25500, + "extend model": 19825, + "task cross": 54986, + "joint training": 27191, + "results multiple": 47733, + "languages benchmark": 28607, + "demonstrate multi": 13947, + "training improve": 58125, + "problem sequence": 42648, + "sequence seq2seq": 49975, + "seq2seq learning": 49899, + "language communication": 27995, + "humans tend": 24289, + "based seq2seq": 6024, + "learning propose": 29823, + "new model": 37259, + "decoder structure": 13615, + "word generation": 62211, + "copying mechanism": 11138, + "sub sequences": 53531, + "sequences input": 50022, + "summarization tasks": 53902, + "tasks study": 55914, + "selection methods": 49145, + "number different": 37994, + "tagging parsing": 54746, + "parsing experiments": 39779, + "experiments languages": 19453, + "models higher": 35087, + "works better": 62879, + "final results": 20830, + "art languages": 4271, + "languages time": 28805, + "reducing number": 45712, + "recently works": 45476, + "domain natural": 16117, + "methods word": 33102, + "advanced state": 1893, + "art various": 4437, + "propose scalable": 43611, + "present experimental": 41909, + "demonstrate performance": 13954, + "performance proposed": 40504, + "gram method": 23056, + "demonstrate better": 13876, + "better approach": 6850, + "learned representations": 29479, + "representations propose": 46743, + "pipeline models": 40905, + "use hidden": 59906, + "test time": 56387, + "languages universal": 28812, + "universal dependencies": 59538, + "art graph": 4266, + "generation tasks": 22562, + "text conditioned": 56504, + "structured unstructured": 53179, + "arbitrary number": 4014, + "number input": 38010, + "effective training": 16706, + "using framework": 60700, + "framework address": 21451, + "problem generating": 42573, + "language structured": 28507, + "create new": 11711, + "corpus demonstrate": 11319, + "allows model": 2472, + "relatively rare": 46126, + "useful research": 60383, + "applications study": 3250, + "obtaining data": 38232, + "makes good": 31623, + "research task": 47127, + "order build": 38600, + "based wikipedia": 6133, + "comparable data": 9296, + "evaluation quality": 18691, + "second method": 49011, + "given domain": 22739, + "order train": 38657, + "past decade": 39930, + "scale supervised": 48628, + "learning researchers": 29845, + "corpora available": 11179, + "factoid question": 20296, + "evaluated human": 18533, + "human evaluators": 24161, + "using automatic": 60573, + "metrics including": 33174, + "translation sentence": 58674, + "similarity metrics": 51107, + "evaluation criteria": 18600, + "question generation": 44729, + "baseline furthermore": 6170, + "generated questions": 22310, + "comparable quality": 9307, + "real human": 45103, + "human generated": 24167, + "standing problem": 52552, + "recently researchers": 45464, + "word text": 62320, + "neural net": 36991, + "better capture": 6857, + "patterns text": 39975, + "lack training": 27922, + "data words": 12776, + "label propagation": 27721, + "demonstrate state": 13977, + "results especially": 47615, + "semantic textual": 49365, + "similarity sts": 51122, + "evaluate semantic": 18503, + "assessing quality": 4588, + "information encoded": 25830, + "set evaluating": 50147, + "pairs annotated": 39168, + "annotated semantic": 2913, + "models gram": 35068, + "data freely": 12373, + "available recent": 5357, + "good capturing": 22929, + "capturing linguistic": 7738, + "linguistic regularities": 30787, + "simple linear": 51185, + "words different": 62398, + "proposed learning": 43799, + "learning document": 29602, + "document representations": 15826, + "structure learned": 53115, + "question design": 44727, + "new document": 37174, + "analogy task": 2584, + "semantic regularities": 49324, + "models results": 35456, + "results reveal": 47815, + "based document": 5689, + "representations work": 46792, + "work better": 62589, + "conventional methods": 11007, + "work examines": 62652, + "demonstrate language": 13927, + "driven model": 16430, + "data target": 12720, + "languages furthermore": 28679, + "language typology": 28543, + "factors contribute": 20307, + "form text": 21338, + "semantic patterns": 49317, + "sentiment polarities": 49853, + "data semantic": 12637, + "used achieve": 60079, + "achieve goal": 1142, + "popular topic": 41195, + "purpose study": 44411, + "context sentiment": 10715, + "systems operate": 54575, + "access large": 826, + "collection documents": 8983, + "documents work": 15930, + "explore task": 19740, + "incorporating external": 25385, + "domains training": 16298, + "data scarce": 12623, + "extraction new": 20089, + "using reinforcement": 60903, + "learns select": 29973, + "select optimal": 49109, + "based contextual": 5643, + "reward function": 48067, + "demonstrate significantly": 13975, + "outperforms traditional": 38955, + "methods use": 33093, + "best accuracy": 6744, + "regression model": 45814, + "model actually": 33530, + "online health": 38368, + "various applications": 61299, + "annotated dataset": 2889, + "dataset supervised": 13108, + "classifiers based": 8612, + "cnn models": 8774, + "task classifying": 54952, + "cnn classifier": 8764, + "analysis topic": 2782, + "embedding word": 17072, + "neural model": 36971, + "trained using": 57908, + "visual objects": 61662, + "given textual": 22796, + "multilayer perceptron": 36059, + "perceptron mlp": 40058, + "embedding layer": 17033, + "mentioned text": 32300, + "text work": 56847, + "work contributes": 62612, + "contributes new": 10937, + "method select": 32644, + "features fine": 20587, + "tuning method": 58926, + "improves f1": 25129, + "f1 measure": 20186, + "task semeval": 55354, + "semeval 2013": 49426, + "attempt solve": 4691, + "prepositional phrase": 41820, + "motivation work": 35885, + "trained english": 57722, + "present technique": 42037, + "male female": 31681, + "generated approach": 22268, + "potential errors": 41389, + "lines code": 30687, + "using generic": 60706, + "goal natural": 22892, + "text passages": 56694, + "understanding work": 59418, + "investigate machine": 26965, + "machine comprehension": 31298, + "limited size": 30616, + "dataset neural": 13008, + "neural approach": 36929, + "simple neural": 51199, + "using manually": 60788, + "level sentence": 30207, + "networks operate": 36886, + "embedding representations": 17058, + "text trained": 56817, + "designed help": 14319, + "data parallel": 12535, + "model sets": 34366, + "sets new": 50298, + "feature engineered": 20482, + "neural approaches": 36930, + "discriminative model": 15446, + "single document": 51296, + "model selects": 34347, + "summary based": 53913, + "learned large": 29465, + "units text": 59536, + "improve cross": 24837, + "cross sentence": 11869, + "trained end": 57720, + "outperforms prior": 38931, + "linguistic quality": 30786, + "generally considered": 22164, + "models outperformed": 35289, + "target words": 54857, + "set improvement": 50168, + "learning multi": 29769, + "parallel sentence": 39652, + "outperform monolingual": 38804, + "monolingual counterparts": 35795, + "available test": 5376, + "models lms": 35198, + "network nn": 36774, + "models represent": 35438, + "superior performances": 53940, + "paper examine": 39353, + "chinese speech": 8321, + "task visual": 55467, + "range nlp": 44926, + "tasks visual": 55963, + "multimodal tasks": 36156, + "tasks image": 55668, + "description text": 14249, + "augments existing": 4993, + "existing multimodal": 19113, + "labels propose": 27846, + "using textual": 60989, + "embeddings textual": 17229, + "embeddings perform": 17188, + "supervised setting": 54044, + "sentence relation": 49629, + "relation modeling": 45988, + "pairs quality": 39212, + "complex semantic": 9658, + "challenge propose": 8009, + "architecture jointly": 4056, + "leverage pre": 30282, + "representations inputs": 46692, + "lstm learn": 31269, + "approach consistently": 3465, + "methods standard": 33050, + "standard evaluation": 52490, + "evaluation datasets": 18605, + "datasets natural": 13341, + "help language": 23574, + "classifiers different": 8614, + "different error": 14919, + "error types": 18231, + "models motivated": 35234, + "issues present": 27098, + "present neural": 41955, + "approach language": 3581, + "core component": 11145, + "network attention": 36701, + "approach dataset": 3477, + "noisy user": 37628, + "generated text": 22325, + "collected english": 8962, + "model method": 34099, + "f_ score": 20234, + "demonstrate training": 13992, + "training network": 58187, + "used millions": 60239, + "millions people": 33264, + "share information": 50457, + "variety domains": 61268, + "tailored specific": 54769, + "specific user": 52170, + "depend availability": 14098, + "existing unsupervised": 19167, + "unsupervised semi": 59728, + "approaches focused": 3830, + "focused identifying": 21224, + "contrast work": 10892, + "work proposes": 62792, + "data achieve": 12110, + "performance fine": 40349, + "grained analysis": 23021, + "discuss limitations": 15472, + "word speech": 62314, + "domains demonstrate": 16245, + "task existing": 55061, + "existing ones": 19122, + "modern nlp": 35716, + "rely heavily": 46286, + "engineered features": 17764, + "combine word": 9075, + "word contextual": 62133, + "features combination": 20538, + "large numbers": 28928, + "reduce parameter": 45678, + "parameter space": 39680, + "improve prediction": 24906, + "furthermore investigate": 21825, + "investigate methods": 26966, + "tasks relation": 55845, + "approaches language": 3853, + "uses simple": 60536, + "driven approaches": 16421, + "training approach": 57934, + "approach requires": 3675, + "behavior model": 6393, + "time compared": 57124, + "using existing": 60684, + "unknown target": 59558, + "easier train": 16529, + "train character": 57571, + "based ones": 5925, + "english czech": 17791, + "models handle": 35076, + "words best": 62372, + "best achieves": 6746, + "achieves new": 1348, + "20 bleu": 222, + "character models": 8220, + "models successfully": 35553, + "learn generate": 29374, + "language complex": 27998, + "english source": 17876, + "ensemble approach": 17970, + "search query": 48980, + "based entities": 5705, + "entities compared": 18039, + "compared traditional": 9466, + "keyword based": 27349, + "model identify": 33970, + "approach instead": 3574, + "varying complexity": 61427, + "collect real": 8951, + "models contextual": 34860, + "like wikipedia": 30511, + "approach utilizes": 3735, + "approach data": 3476, + "set created": 50132, + "set contains": 50129, + "use supervised": 60034, + "semantics model": 49407, + "trained wikipedia": 57914, + "micro averaged": 33220, + "score 97": 48830, + "approach question": 3662, + "question multiple": 44738, + "relevance score": 46194, + "relevance scores": 46195, + "statistical modeling": 52755, + "model gives": 33940, + "baseline approach": 6154, + "traditional nlp": 57538, + "asr transcripts": 4564, + "essential task": 18335, + "models enable": 34958, + "joint multi": 27181, + "multilingual neural": 36104, + "model character": 33652, + "dimensional vectors": 15239, + "process results": 42828, + "based classifier": 5620, + "produce human": 42988, + "error patterns": 18221, + "amr graphs": 2574, + "manually crafted": 31769, + "method selecting": 32645, + "amr graph": 2573, + "level neural": 30167, + "amr parser": 2575, + "semeval 2016": 49428, + "represent words": 46486, + "words embeddings": 62406, + "embeddings machine": 17169, + "machine learned": 31305, + "learned vector": 29490, + "embeddings produced": 17195, + "large multilingual": 28914, + "higher previous": 23838, + "commonsense knowledge": 9234, + "deep language": 13695, + "received lot": 45262, + "hindered lack": 23927, + "lack proper": 27907, + "proper evaluation": 43251, + "evaluation framework": 18621, + "framework paper": 21580, + "paper attempts": 39276, + "problem new": 42616, + "cloze test": 8726, + "evaluation corpus": 18599, + "everyday life": 18804, + "story generation": 52880, + "generation experimental": 22456, + "baselines state": 6302, + "high score": 23799, + "understanding paper": 59378, + "corpora recent": 11237, + "based architecture": 5575, + "datasets showing": 13424, + "showing significant": 50689, + "framework neural": 21571, + "data scenarios": 12628, + "effective low": 16667, + "train high": 57595, + "model transfer": 34485, + "transfer learned": 58373, + "learned parameters": 29471, + "method improve": 32529, + "average bleu": 5403, + "word replacement": 62282, + "performance low": 40423, + "resource machine": 47252, + "syntax based": 54347, + "additionally using": 1737, + "art low": 4277, + "translation word": 58703, + "ambiguous words": 2530, + "text large": 56645, + "used approaches": 60092, + "approaches perform": 3894, + "word used": 62331, + "performance work": 40632, + "features derived": 20556, + "global features": 22828, + "embeddings results": 17206, + "network classifiers": 36718, + "based long": 5821, + "accuracy 95": 927, + "knowledge extraction": 27480, + "graph construction": 23117, + "shallow parsing": 50443, + "design hierarchical": 14285, + "entity knowledge": 18111, + "finally experimental": 20856, + "results prove": 47787, + "prove method": 43981, + "effective communication": 16636, + "structural properties": 53081, + "based framework": 5740, + "framework modeling": 21564, + "narrative text": 36383, + "text modeling": 56668, + "network use": 36819, + "use sentiment": 60009, + "network framework": 36744, + "unique characteristics": 59511, + "social interactions": 51565, + "study problem": 53439, + "english code": 17786, + "mixed social": 33410, + "data developed": 12278, + "knowledge attempt": 27398, + "bit ly": 7186, + "models presenting": 35349, + "presenting novel": 42066, + "novel multi": 37877, + "existing open": 19124, + "systems typically": 54658, + "conversation propose": 11035, + "context candidate": 10596, + "aims determine": 2186, + "problem nlp": 42617, + "applications recently": 3245, + "recently deep": 45414, + "problem significant": 42655, + "improvements achieved": 25046, + "propose view": 43701, + "idea propose": 24372, + "deep architecture": 13682, + "constructed capture": 10407, + "capture word": 7722, + "local interactions": 30943, + "score calculated": 48838, + "exact matching": 18854, + "dynamic programming": 16489, + "demonstrate attention": 13872, + "grammatical error": 23069, + "automated evaluation": 5041, + "scientific writing": 48773, + "decoder models": 13601, + "particularly effective": 39880, + "based counterpart": 5656, + "highest performing": 23855, + "representation important": 46527, + "long history": 31014, + "work knowledge": 62701, + "graph embedding": 23130, + "entity classification": 18099, + "representation method": 46550, + "method knowledge": 32556, + "generative process": 22606, + "semantic units": 49372, + "2016 task": 261, + "task 10": 54866, + "primarily based": 42361, + "based combination": 5625, + "combination word": 9053, + "works using": 62916, + "networks based": 36833, + "boost performance": 7256, + "selection problem": 49148, + "works used": 62915, + "used deep": 60141, + "methods like": 32928, + "rnn cnn": 48190, + "end learning": 17680, + "similarity metric": 51106, + "metric learning": 33119, + "tokens proposed": 57334, + "model demonstrates": 33746, + "qa dataset": 44449, + "memory bi": 32243, + "networks recently": 36902, + "proven successful": 43994, + "little known": 30879, + "input representations": 26327, + "representations target": 46767, + "languages data": 28632, + "data sizes": 12668, + "novel bi": 37779, + "auxiliary loss": 5233, + "obtains state": 38258, + "22 languages": 318, + "especially morphologically": 18287, + "analysis suggests": 2772, + "semeval 2015": 49427, + "annotation process": 2961, + "shown approach": 50695, + "sparsity issues": 51980, + "dataset obtain": 13015, + "cluster based": 8733, + "adaptive training": 1581, + "method deep": 32452, + "used decoding": 60140, + "method large": 32558, + "spontaneous speech": 52370, + "task evaluated": 55056, + "baseline word": 6224, + "11 relative": 90, + "relative reduction": 46110, + "language challenge": 27984, + "response problem": 47399, + "build training": 7431, + "information parallel": 26003, + "labelling task": 27806, + "rich features": 48100, + "features finally": 20586, + "labelled training": 27804, + "data translating": 12744, + "universal schema": 59545, + "textual patterns": 56974, + "base construction": 5542, + "entity pairs": 18124, + "relations represented": 46054, + "generalization unseen": 22133, + "unseen text": 59656, + "work step": 62827, + "step propose": 52825, + "explicit entity": 19614, + "entity pair": 18123, + "representations instead": 46693, + "instead learning": 26455, + "representations entity": 46653, + "benchmark demonstrate": 6461, + "match performance": 31899, + "comparable model": 9298, + "model explicit": 33856, + "attention relation": 4820, + "types demonstrate": 59081, + "seen training": 49066, + "training present": 58214, + "performance according": 40177, + "text genres": 56609, + "annotation automatic": 2936, + "rater agreement": 45019, + "memory neural": 32276, + "attracted wide": 4889, + "lstm architecture": 31239, + "architecture consists": 4037, + "mechanism paper": 32133, + "accurately predict": 1097, + "cognitive process": 8894, + "article study": 4461, + "corpus work": 11461, + "tasks address": 55494, + "words single": 62515, + "unrelated words": 59634, + "phrase generation": 40840, + "alignment word": 2389, + "handcrafted features": 23400, + "single attention": 51285, + "characteristics specific": 8243, + "limits effectiveness": 30641, + "effectiveness tasks": 16816, + "propose architecture": 43299, + "architecture based": 4028, + "based gated": 5744, + "attention pooling": 4809, + "determine given": 14556, + "given task": 22792, + "apply framework": 3329, + "support research": 54123, + "linguistic cues": 30762, + "detection social": 14528, + "difficult identify": 15169, + "classification approach": 8435, + "lda topic": 29251, + "approach extracts": 3535, + "information topic": 26128, + "dataset self": 13076, + "collected dataset": 8959, + "methods terms": 33071, + "averaged f1": 5421, + "called word": 7556, + "embedding language": 17032, + "languages natural": 28736, + "dataset collection": 12846, + "variety language": 61275, + "language families": 28066, + "use parallel": 59971, + "languages perform": 28749, + "12 different": 105, + "human subjects": 24246, + "model humans": 33966, + "similarity languages": 51099, + "dialect identification": 14746, + "work word": 62859, + "task furthermore": 55099, + "lstm language": 31265, + "basic model": 6332, + "model outperforming": 34155, + "produce novel": 42995, + "fine tune": 20947, + "questions ask": 44773, + "context self": 10709, + "models character": 34811, + "model designed": 33753, + "overcome problems": 39072, + "inter sentence": 26586, + "representation level": 46546, + "learning scheme": 29859, + "order alleviate": 38593, + "network end": 36738, + "end fashion": 17673, + "quantitatively qualitatively": 44633, + "act classification": 1452, + "capture implicit": 7680, + "implicit explicit": 24659, + "semantics sentence": 49414, + "performance end": 40317, + "generating text": 22400, + "written using": 63014, + "python library": 44440, + "finnish english": 21062, + "tasks significant": 55888, + "obtained best": 38204, + "better training": 6983, + "various components": 61316, + "semantic alignment": 49232, + "multiclass classification": 36047, + "results algorithm": 47495, + "systems terms": 54650, + "dataset terms": 13115, + "terms overall": 56304, + "overall score": 39049, + "analysis social": 2761, + "important challenging": 24706, + "classification data": 8450, + "data requires": 12607, + "requires modeling": 46944, + "modeling various": 34635, + "various contexts": 61318, + "social context": 51559, + "use hierarchical": 59907, + "hierarchical lstm": 23676, + "rich contexts": 48094, + "particularly long": 39884, + "range context": 44908, + "context experimental": 10633, + "perform sentiment": 40137, + "single sentences": 51334, + "textual context": 56954, + "sentences language": 49745, + "reducing time": 45713, + "time required": 57205, + "higher degree": 23821, + "present set": 42010, + "set relevant": 50238, + "relevant aspects": 46199, + "set context": 50130, + "achieved average": 1218, + "related context": 45890, + "method evaluated": 32490, + "context independent": 10657, + "work mainly": 62718, + "mainly focused": 31473, + "datasets experiment": 13262, + "manual annotation": 31731, + "data publicly": 12579, + "publicly released": 44362, + "context existing": 10632, + "analysis scientific": 2748, + "tools help": 57379, + "various textual": 61407, + "summarization systems": 53900, + "selection approach": 49132, + "content input": 10531, + "input document": 26268, + "maps input": 31811, + "important ones": 24750, + "ones used": 38344, + "used classification": 60114, + "features introduce": 20607, + "introduce different": 26798, + "identify important": 24425, + "important concepts": 24713, + "select informative": 49107, + "informative content": 26170, + "extensive evaluations": 19869, + "suggest using": 53832, + "models alleviate": 34706, + "alleviate issues": 2410, + "scale chinese": 48557, + "english task": 17888, + "enhanced model": 17933, + "language phenomena": 28380, + "requires high": 46932, + "time large": 57172, + "large memory": 28908, + "usage paper": 59803, + "issue introducing": 27064, + "output vocabulary": 39009, + "time memory": 57177, + "target vocabulary": 54855, + "model bilingual": 33631, + "traditional machine": 57526, + "model experimental": 33850, + "scale english": 48569, + "achieves better": 1307, + "better translation": 6985, + "performance bleu": 40221, + "models recurrent": 35416, + "sequences different": 50021, + "using shared": 60935, + "shared representations": 50486, + "cross linguistically": 11862, + "shared feature": 50471, + "intrinsic evaluation": 26768, + "evaluation downstream": 18611, + "phonetic features": 40826, + "models ii": 35101, + "higher quality": 23840, + "quality learned": 44545, + "bayesian optimization": 6361, + "representations features": 46667, + "ranking function": 44970, + "variety downstream": 61269, + "entity information": 18110, + "large knowledge": 28891, + "meaningful semantic": 32027, + "framework handle": 21532, + "patterns training": 39976, + "data test": 12727, + "proven difficult": 43992, + "limited datasets": 30580, + "contrast human": 10877, + "human behaviour": 24115, + "similar tasks": 51071, + "tasks provide": 55826, + "feedforward neural": 20719, + "network paper": 36779, + "does contain": 15939, + "new results": 37306, + "semantics natural": 49408, + "lexical meaning": 30372, + "data report": 12601, + "health issues": 23516, + "limited lack": 30596, + "lack large": 27901, + "data labeled": 12448, + "present large": 41936, + "set novel": 50204, + "analysis methods": 2696, + "methods measure": 32942, + "applying techniques": 3379, + "datasets focused": 13279, + "certain topic": 7947, + "datasets contain": 13197, + "contain words": 10477, + "various categories": 61312, + "similarity scores": 51118, + "scores words": 48931, + "work discuss": 62639, + "evaluation procedure": 18680, + "humans provide": 24285, + "provide list": 44099, + "commonalities differences": 9213, + "human judgements": 24181, + "pairwise similarity": 39240, + "believe proposed": 6411, + "datasets test": 13456, + "model score": 34338, + "slightly modified": 51437, + "search decoder": 48968, + "practical advantages": 41456, + "pieces evidence": 40879, + "language question": 28457, + "based evidence": 5712, + "model integrate": 34009, + "baselines demonstrate": 6251, + "demonstrate benefit": 13874, + "comprehension model": 9767, + "task small": 55381, + "small datasets": 51471, + "datasets research": 13404, + "new open": 37274, + "open dataset": 38417, + "news based": 37389, + "apply proposed": 3347, + "entailment model": 18002, + "model similar": 34384, + "test questions": 56363, + "dataset improve": 12959, + "improve neural": 24877, + "automatic post": 5114, + "problem achieve": 42496, + "different models": 14996, + "model allowing": 33561, + "output source": 39002, + "string matching": 52993, + "used control": 60128, + "translation output": 58652, + "data generate": 12383, + "unseen test": 59655, + "submitted shared": 53587, + "task large": 55163, + "micro blogging": 33223, + "based recent": 5979, + "advances deep": 1909, + "models detect": 34905, + "score 92": 48827, + "including social": 25300, + "media platforms": 32176, + "platforms twitter": 40956, + "paper explored": 39368, + "speech act": 52251, + "set manually": 50190, + "method achieved": 32358, + "performance average": 40205, + "score 70": 48805, + "different granularities": 14945, + "type specific": 59070, + "specific topic": 52162, + "perform novel": 40126, + "analysis existing": 2661, + "existing model": 19107, + "model previously": 34234, + "previously shown": 42350, + "learns identify": 29961, + "relations present": 46051, + "model exploits": 33859, + "analysis model": 2697, + "models literature": 35192, + "multiple data": 36190, + "separate encoders": 49875, + "information sentence": 26078, + "propose deep": 43351, + "architecture model": 4063, + "specifically introduce": 52209, + "informative features": 26172, + "features experiments": 20577, + "large datasets": 28869, + "communication channels": 9248, + "situational awareness": 51370, + "processing social": 42938, + "availability data": 5246, + "human annotated": 24095, + "present human": 41925, + "corpora collected": 11183, + "19 different": 185, + "train machine": 57603, + "learning classifiers": 29557, + "word2vec word": 62352, + "variations paper": 61251, + "data corresponding": 12254, + "data information": 12430, + "approach order": 3619, + "speaker information": 51999, + "experiments proposed": 19493, + "method achieve": 32355, + "points improvement": 41076, + "publicly release": 44359, + "data manual": 12482, + "annotation propose": 2963, + "propose interactive": 43421, + "interactive multimodal": 26631, + "natural text": 36468, + "referential games": 45755, + "language need": 28355, + "provide promising": 44115, + "agents trained": 2065, + "trained way": 57910, + "analysis annotation": 2611, + "text semi": 56760, + "automatic annotation": 5069, + "annotation tool": 2976, + "salient features": 48440, + "outperform standard": 38821, + "standard models": 52507, + "models distinguish": 34930, + "model novel": 34133, + "novel embedding": 37811, + "extraction based": 20050, + "dependency path": 14133, + "specifically method": 52216, + "treated sequence": 58734, + "embedding features": 17029, + "context dependency": 10609, + "extraction experimental": 20065, + "results semeval": 47822, + "features achieve": 20515, + "method incorporates": 32539, + "yields better": 63117, + "extraction models": 20083, + "sentence given": 49566, + "propose variational": 43700, + "generates target": 22358, + "hidden representations": 23645, + "representations source": 46758, + "source sentences": 51796, + "model introduces": 34020, + "continuous latent": 10847, + "model underlying": 34498, + "underlying semantics": 59277, + "guide generation": 23334, + "generation target": 22557, + "perform efficient": 40094, + "posterior inference": 41361, + "scale training": 48631, + "build neural": 7416, + "lower bound": 31207, + "german translation": 22678, + "baselines paper": 6284, + "interactions multiple": 26618, + "levels granularity": 30240, + "words sub": 62524, + "attention network": 4795, + "learn interactions": 29384, + "attention matrix": 4769, + "soft attention": 51620, + "attention weight": 4851, + "convolution based": 11092, + "based learned": 5810, + "incorporate semantic": 25363, + "achieves substantial": 1382, + "prediction accuracy": 41690, + "component models": 9707, + "information multiple": 25980, + "multiple systems": 36297, + "systems improve": 54526, + "approach different": 3489, + "cold start": 8928, + "lingual entity": 30701, + "object detection": 38082, + "detection tasks": 14533, + "tasks obtain": 55773, + "obtain new": 38181, + "detection task": 14532, + "approach evaluation": 3522, + "metrics accuracy": 33134, + "accuracy precision": 1026, + "nlp evaluation": 37486, + "collecting large": 8977, + "number human": 38009, + "human responses": 24235, + "model compares": 33675, + "performance human": 40375, + "able provide": 716, + "performance standard": 40573, + "accuracy score": 1041, + "systems developed": 54478, + "data human": 12410, + "translation image": 58618, + "generation systems": 22556, + "according automatic": 853, + "metrics bleu": 33144, + "methods detecting": 32820, + "domain corpus": 16034, + "general language": 22064, + "language corpus": 28009, + "domain present": 16136, + "tools available": 57378, + "modeling single": 34623, + "multi agent": 35937, + "specific data": 52065, + "data case": 12199, + "receives input": 45270, + "structure present": 53129, + "convergence speed": 11025, + "preference learning": 41790, + "model generate": 33929, + "asking questions": 4524, + "attention model": 4787, + "model conditioned": 33695, + "model evaluated": 33843, + "model help": 33955, + "performance measured": 40434, + "indicate model": 25527, + "architectures using": 4129, + "improves performances": 25147, + "cloze style": 8724, + "documents model": 15896, + "gated attention": 21994, + "architecture novel": 4070, + "mechanism based": 32102, + "specific representations": 52138, + "results benchmarks": 47523, + "benchmarks task": 6545, + "cnn daily": 8765, + "daily mail": 12087, + "news stories": 37417, + "dataset effectiveness": 12904, + "ablation study": 659, + "study comparing": 53342, + "code available": 8792, + "dominant approach": 16306, + "model increases": 33995, + "convolutional layers": 11105, + "report improvements": 46437, + "tasks best": 55524, + "processing propose": 42929, + "uses attention": 60491, + "stanford natural": 52556, + "inference snli": 25691, + "snli dataset": 51551, + "obtain state": 38193, + "magnitude fewer": 31416, + "order information": 38629, + "intra sentence": 26760, + "sentence attention": 49518, + "order account": 38587, + "yields improvements": 63126, + "automatically answer": 5142, + "questions like": 44794, + "provide rich": 44124, + "base propose": 5549, + "deep recurrent": 13744, + "neural embeddings": 36949, + "achieves accuracy": 1302, + "largest public": 29099, + "outperforms current": 38891, + "propose enhance": 43371, + "decoder neural": 13605, + "external memory": 19950, + "representation source": 46581, + "designed better": 14310, + "capture information": 7682, + "set neural": 50198, + "low frequency": 31151, + "sentence propose": 49625, + "method alleviate": 32377, + "using attention": 60566, + "attention vector": 4850, + "model select": 34343, + "probabilities model": 42471, + "methods combine": 32787, + "experiments corpora": 19391, + "given english": 22740, + "automatic alignment": 5067, + "suffers data": 53789, + "small size": 51502, + "data english": 12323, + "paper formalize": 39383, + "alignment problem": 2380, + "based syntax": 6077, + "address data": 1752, + "experiments verify": 19559, + "method english": 32483, + "results significantly": 47846, + "interactions different": 26616, + "consistently performs": 10309, + "expectation maximization": 19194, + "languages current": 28629, + "paid attention": 39142, + "play significant": 40978, + "data captured": 12198, + "measure extent": 32052, + "processing semantic": 42936, + "data achieved": 12112, + "various topics": 61408, + "composition model": 9740, + "dependency based": 14118, + "strong ability": 52999, + "art wide": 4439, + "completion task": 9613, + "report new": 46439, + "extraction tool": 20124, + "designed extract": 14314, + "based sequential": 6029, + "sequential labeling": 50044, + "automatic manual": 5103, + "provide access": 44003, + "access training": 830, + "compare traditional": 9372, + "compare models": 9349, + "models traditional": 35600, + "distinct tasks": 15595, + "tasks sequence": 55877, + "treebank ptb": 58764, + "corpora results": 11240, + "effective paper": 16682, + "words learning": 62447, + "ability predict": 633, + "better word": 6993, + "based embedding": 5695, + "models help": 35083, + "better human": 6898, + "research neural": 47080, + "language agnostic": 27956, + "apply neural": 3342, + "task arabic": 54909, + "compare standard": 9366, + "extensive comparison": 19859, + "comparison using": 9510, + "various configurations": 61317, + "perform comparably": 40075, + "domain test": 16204, + "world deployment": 62937, + "achieved impressive": 1244, + "impressive results": 24815, + "using little": 60774, + "external linguistic": 19948, + "learning capability": 29551, + "mt models": 35921, + "models does": 34935, + "does make": 15957, + "easily incorporated": 16543, + "layer encoder": 29184, + "attentional encoder": 4857, + "features addition": 20518, + "dependency labels": 14124, + "labels input": 27834, + "english romanian": 17868, + "quality according": 44487, + "perplexity bleu": 40738, + "nlp tool": 37557, + "proven effective": 43993, + "effective text": 16704, + "generation sequence": 22545, + "local word": 30953, + "work introduce": 62691, + "training scheme": 58238, + "learn global": 29376, + "training loss": 58159, + "efficient training": 16904, + "highly optimized": 23908, + "baselines different": 6253, + "different sequence": 15066, + "sequence tasks": 50010, + "use deep": 59863, + "actor critic": 1485, + "learning rl": 29851, + "domain expertise": 16067, + "remove need": 46375, + "markov decision": 31845, + "decision processes": 13568, + "practical deployment": 41462, + "data efficiently": 12314, + "text understanding": 56829, + "understanding machine": 59362, + "mapping words": 31807, + "aware word": 5477, + "remains challenging": 46327, + "training large": 58147, + "tasks new": 55765, + "datasets propose": 13379, + "dictionary learning": 14806, + "mechanism learn": 32126, + "learn good": 29377, + "learning phase": 29804, + "embeddings extracted": 17134, + "tasks test": 55929, + "pre training": 41566, + "critical information": 11783, + "vast majority": 61440, + "materials methods": 31927, + "methods introduce": 32907, + "systems compare": 54454, + "systems datasets": 54470, + "dataset largest": 12980, + "identification dataset": 24385, + "performance previously": 40499, + "engineering paper": 17769, + "effective model": 16674, + "approach leverage": 3588, + "network shared": 36804, + "shared words": 50511, + "words enables": 62407, + "size model": 51390, + "effective use": 16709, + "approach standard": 3702, + "standard datasets": 52484, + "data employ": 12319, + "external resources": 19952, + "resources knowledge": 47308, + "crafted rules": 11682, + "systems employ": 54484, + "making difficult": 31651, + "knowledge achieve": 27387, + "introduce data": 26794, + "conditional independence": 9995, + "train sequence": 57630, + "rnn model": 48202, + "model structural": 34414, + "parsing datasets": 39777, + "datasets leading": 13315, + "leading new": 29293, + "dataset models": 13000, + "models comparable": 34834, + "task benchmark": 54932, + "task reinforcement": 55326, + "action space": 1458, + "fixed window": 21085, + "directional lstm": 15283, + "different experimental": 14925, + "language essential": 28053, + "grounded language": 23261, + "present effective": 41895, + "representation model": 46552, + "conditional language": 9996, + "modeling task": 34628, + "probing model": 42492, + "model output": 34168, + "training prediction": 58213, + "limited applicability": 30569, + "prediction approach": 41694, + "encoder trained": 17544, + "given training": 22798, + "instead model": 26457, + "multi aspect": 35940, + "aspect sentiment": 4533, + "annotated test": 2920, + "test cases": 56335, + "illustrate method": 24517, + "multi way": 36041, + "enables zero": 17452, + "zero resource": 63151, + "multilingual model": 36096, + "model translate": 34490, + "pivot based": 40917, + "learning al": 29510, + "networks cnns": 36838, + "manually labeled": 31781, + "minimal effort": 33287, + "tuning task": 58965, + "al strategies": 2248, + "contrast traditional": 10891, + "based uncertainty": 6117, + "uncertainty sampling": 59232, + "learning discriminative": 29594, + "approach document": 3493, + "jointly considering": 27193, + "representations model": 46718, + "stochastic process": 52857, + "embeddings best": 17088, + "problem question": 42640, + "reasoning multiple": 45207, + "multiple facts": 36216, + "propose query": 43596, + "context sentences": 10714, + "sentence time": 49658, + "time experiments": 57156, + "produces state": 43035, + "tasks real": 55834, + "oriented dialog": 38696, + "dataset addition": 12803, + "formulation allows": 21393, + "time complexity": 57125, + "training inference": 58129, + "remarkable progress": 46360, + "rely parallel": 46296, + "usually limited": 61057, + "quality coverage": 44503, + "especially low": 18284, + "training nmt": 58194, + "corpora using": 11256, + "target target": 54845, + "target source": 54842, + "models serve": 35486, + "language source": 28490, + "language experiments": 28061, + "dataset approach": 12812, + "systems present": 54595, + "evaluate new": 18478, + "model natural": 34117, + "generation nlg": 22507, + "current generation": 11979, + "generation context": 22439, + "context user": 10740, + "use standard": 60026, + "present information": 41928, + "results users": 47898, + "length information": 30027, + "cognitive load": 8892, + "compared base": 9382, + "learned policy": 29473, + "prior approaches": 42393, + "common linguistic": 9184, + "sentences multiple": 49755, + "multiple target": 36298, + "representation work": 46607, + "context neural": 10680, + "decoder architectures": 13587, + "end goal": 17675, + "specifically consider": 52187, + "consider case": 10208, + "available training": 5379, + "stage model": 52434, + "model converts": 33719, + "jointly learns": 27202, + "representation evaluate": 46510, + "evaluate model": 18472, + "model tasks": 34446, + "architectures paper": 4119, + "end method": 17683, + "generating short": 22396, + "semantically diverse": 49385, + "learning architecture": 29523, + "generated content": 22276, + "modified version": 35732, + "learning context": 29569, + "large space": 29015, + "larger context": 29069, + "simpler models": 51231, + "models faster": 35020, + "model finally": 33888, + "datasets develop": 13226, + "high number": 23755, + "ill formed": 24510, + "instead relying": 26462, + "relying solely": 46310, + "data samples": 12620, + "comprehensive data": 9785, + "model created": 33729, + "created using": 11734, + "denoising autoencoder": 14065, + "data providing": 12577, + "use high": 59908, + "topics using": 57464, + "able model": 707, + "need pre": 36588, + "spam detection": 51917, + "algorithms proposed": 2336, + "approach achieving": 3404, + "97 accuracy": 571, + "understanding requires": 59394, + "requires deep": 46922, + "deep semantic": 13749, + "discourse information": 15390, + "discriminatively trained": 15450, + "trained neural": 57825, + "generate embeddings": 22197, + "using perplexity": 60857, + "helps improve": 23608, + "systems complex": 54456, + "consider information": 10211, + "information traditional": 26129, + "traditional media": 57528, + "ignoring rich": 24502, + "provided user": 44175, + "optimization framework": 38548, + "framework designed": 21491, + "automatic evaluations": 5090, + "datasets cover": 13199, + "produces informative": 43032, + "systems human": 54522, + "agreement disagreement": 2105, + "detection online": 14508, + "online discussions": 38364, + "segment level": 49075, + "existing general": 19070, + "sentiment lexicons": 49851, + "performance evaluate": 40325, + "tagging model": 54742, + "online debates": 38360, + "model shown": 34374, + "shown outperform": 50731, + "datasets example": 13257, + "egyptian arabic": 16946, + "efforts focused": 16939, + "using tools": 60991, + "model class": 33658, + "features neural": 20628, + "model unsupervised": 34504, + "ranked second": 44958, + "data deep": 12270, + "coverage semantic": 11652, + "opposite direction": 38518, + "position paper": 41268, + "dataset evaluate": 12910, + "computational models": 9851, + "models simply": 35513, + "art language": 4268, + "novel benchmark": 37776, + "challenging test": 8158, + "encourage development": 17591, + "development new": 14692, + "new models": 37261, + "context natural": 10678, + "tasks explore": 55633, + "introduce general": 26808, + "various dimensions": 61326, + "highly related": 23909, + "terms evaluation": 56285, + "better baseline": 6852, + "optimal results": 38531, + "provide qualitative": 44116, + "provide set": 44128, + "order generate": 38622, + "score 67": 48802, + "domain general": 16077, + "general semantic": 22090, + "current approaches": 11960, + "approaches largely": 3856, + "largely rely": 29064, + "additional supervision": 1701, + "generalize domains": 22142, + "present generative": 41920, + "demonstrate application": 13864, + "sentence generation": 49565, + "generation work": 22581, + "novel application": 37755, + "prediction present": 41728, + "work introduced": 62694, + "prediction addition": 41691, + "limitations work": 30559, + "work examine": 62651, + "level embeddings": 30107, + "hot encoding": 24029, + "large multi": 28913, + "class multi": 8407, + "demonstrate efficiency": 13908, + "performance benefits": 40215, + "significant portion": 50912, + "performance automatic": 40202, + "noisy environments": 37617, + "training strategy": 58275, + "strategy called": 52929, + "multi stage": 36010, + "stage training": 52445, + "use method": 59946, + "training samples": 58237, + "methods evaluated": 32845, + "end speech": 17710, + "wall street": 61762, + "street journal": 52968, + "journal corpus": 27228, + "compared conventional": 9398, + "training method": 58173, + "core problem": 11154, + "space previous": 51885, + "work relied": 62803, + "dataset increase": 12965, + "increase coverage": 25410, + "model combining": 33669, + "recurrent convolutional": 45611, + "highway network": 23924, + "directional recurrent": 15285, + "network bi": 36712, + "outperforms common": 38882, + "models cnn": 34819, + "cnn rnn": 8775, + "task analysis": 54900, + "sequence length": 49947, + "good representation": 22941, + "long text": 31043, + "problem requires": 42645, + "select correct": 49102, + "answering model": 3081, + "key insight": 27320, + "semantic parses": 49307, + "neural baselines": 36941, + "users paper": 60472, + "challenges involved": 8056, + "prediction models": 41720, + "novel semi": 37915, + "supervised neural": 54024, + "set consisting": 50124, + "shown superior": 50756, + "successfully deployed": 53743, + "production systems": 43052, + "advantages proposed": 1954, + "user engagement": 60410, + "proposed task": 43908, + "propose systematic": 43656, + "analyze behavior": 2806, + "behavior models": 6394, + "models step": 35536, + "models attention": 34734, + "attention attention": 4713, + "despite recent": 14381, + "hidden representation": 23644, + "understanding models": 59366, + "models studied": 35542, + "hidden state": 23647, + "noise work": 37607, + "visual analysis": 61648, + "focus understanding": 21210, + "tool allows": 57356, + "focus local": 21176, + "domain use": 16223, + "tool analyzing": 57357, + "properties dataset": 43259, + "tool used": 57368, + "domain different": 16048, + "novel hybrid": 37839, + "model generalization": 33921, + "generalization ability": 22114, + "ability neural": 628, + "layer model": 29190, + "task neural": 55237, + "learning recently": 29835, + "promising paradigm": 43171, + "paradigm machine": 39623, + "description paper": 14246, + "recently published": 45459, + "used neural": 60249, + "build systems": 7429, + "2016 shared": 259, + "shared tasks": 50508, + "tasks automatic": 55514, + "dimensions word": 15245, + "structures natural": 53189, + "measure quality": 32060, + "just like": 27251, + "positive correlation": 41278, + "correlation model": 11526, + "model downstream": 33790, + "downstream semantic": 16349, + "evaluation tool": 18741, + "space semantic": 51897, + "models considered": 34849, + "structure propose": 53130, + "simple baselines": 51140, + "computing power": 9904, + "text make": 56654, + "essential building": 18322, + "learning unsupervised": 29925, + "learn semantics": 29420, + "words entities": 62409, + "aim learn": 2154, + "model shared": 34368, + "strategy improves": 52937, + "unsupervised framework": 59698, + "existing domain": 19060, + "chen et": 8284, + "al 2011": 2233, + "specific constraints": 52058, + "features evaluate": 20572, + "task outperforms": 55260, + "outperforms unsupervised": 38957, + "baselines existing": 6258, + "summarization method": 53890, + "rouge score": 48354, + "reach competitive": 45045, + "models need": 35248, + "knowledge distillation": 27439, + "models domains": 34938, + "standard knowledge": 52495, + "level prediction": 30180, + "novel sequence": 37919, + "eliminate need": 16986, + "teacher model": 55992, + "model best": 33620, + "student model": 53212, + "loss performance": 31101, + "performance significantly": 40561, + "trained knowledge": 57755, + "greedy decoding": 23243, + "model 13": 33483, + "13 times": 128, + "times fewer": 57250, + "parameters original": 39713, + "concepts methods": 9938, + "representing words": 46815, + "nodes connected": 37590, + "semantically similar": 49392, + "number studies": 38040, + "studies carried": 53251, + "pattern recognition": 39963, + "using traditional": 60993, + "recognition process": 45526, + "process addition": 42755, + "representations based": 46622, + "based bipartite": 5613, + "problem approaches": 42506, + "approaches consider": 3787, + "consider possible": 10216, + "context target": 10729, + "results revealed": 47817, + "excellent results": 18955, + "method outperformed": 32596, + "small training": 51506, + "training dataset": 58052, + "dataset available": 12819, + "method useful": 32695, + "useful improve": 60368, + "models popular": 35329, + "various semantic": 61388, + "semantic phenomena": 49318, + "novel probabilistic": 37897, + "advances machine": 1915, + "learning particular": 29799, + "based logical": 5818, + "networks finally": 36855, + "demonstrate feasibility": 13911, + "vocabulary set": 61711, + "trained novel": 57835, + "novel applications": 37756, + "outside nlp": 39025, + "words existing": 62412, + "training systems": 58282, + "network data": 36728, + "transfer paper": 58412, + "proposed benchmark": 43744, + "dataset showing": 13084, + "resulting significant": 47476, + "representations documents": 46643, + "vectors jointly": 61488, + "tokens using": 57343, + "hierarchical framework": 23670, + "model document": 33782, + "learn continuous": 29353, + "similar documents": 51038, + "learning user": 29930, + "user specific": 60448, + "specific vectors": 52171, + "news data": 37397, + "indicate proposed": 25531, + "outperforming current": 38849, + "margin paper": 31822, + "models dialogue": 34912, + "experiments standard": 19530, + "rnn models": 48203, + "models state": 35533, + "dataset specific": 13096, + "architectures used": 4128, + "performance benchmarks": 40213, + "models close": 34818, + "continuous representations": 10851, + "challenging requires": 8136, + "model given": 33939, + "does model": 15960, + "directly trained": 15338, + "manually engineered": 31778, + "perform task": 40152, + "analysis performance": 2714, + "objective reduce": 38102, + "performance does": 40300, + "compared common": 9392, + "common practice": 9192, + "future context": 21864, + "specific applications": 52044, + "applications real": 3242, + "visual data": 61654, + "data previous": 12560, + "propose probabilistic": 43592, + "jointly leveraging": 27205, + "leveraging text": 30339, + "text images": 56623, + "crafted feature": 11678, + "design end": 14279, + "set existing": 50151, + "large gap": 28882, + "biomedical clinical": 7172, + "texts research": 56918, + "research articles": 46986, + "subject research": 53557, + "extraction process": 20097, + "texts existing": 56878, + "use manually": 59944, + "methods create": 32804, + "features fed": 20585, + "results methods": 47719, + "methods highly": 32889, + "highly dependent": 23891, + "quality user": 44596, + "designed features": 14316, + "curse dimensionality": 12048, + "work focus": 62668, + "focus extracting": 21163, + "learn features": 29370, + "features automatically": 20528, + "reduce dependency": 45658, + "manual feature": 31741, + "good model": 22935, + "model relation": 34298, + "clinical text": 8674, + "expert knowledge": 19583, + "quality features": 44523, + "role determining": 48303, + "word dependencies": 62136, + "sentence work": 49674, + "focus reducing": 21194, + "propose domain": 43357, + "domain invariant": 16090, + "particular propose": 39859, + "features employ": 20569, + "performance obtained": 40465, + "models obtained": 35272, + "performance classification": 40234, + "scores sentence": 48920, + "training use": 58310, + "scores given": 48902, + "compared popular": 9430, + "romanian english": 48334, + "learning directly": 29592, + "feature weights": 20511, + "advantage large": 1940, + "large body": 28852, + "body work": 7242, + "work machine": 62716, + "interpretable model": 26725, + "results small": 47851, + "small performance": 51492, + "scale sentence": 48623, + "sentence length": 49576, + "providing fine": 44244, + "novel human": 37838, + "representation scheme": 46576, + "experiment language": 19240, + "generating data": 22371, + "dialogue history": 14776, + "dialogue turn": 14792, + "dialogue acts": 14765, + "score furthermore": 48846, + "furthermore model": 21829, + "used original": 60258, + "finer granularity": 21039, + "particularly problematic": 39887, + "topic related": 57424, + "approaches work": 3956, + "applications need": 3224, + "nature texts": 36491, + "purpose task": 44412, + "computational language": 9843, + "statistical approach": 52736, + "logical structure": 30990, + "linguistic theory": 30802, + "questions remain": 44804, + "strong performance": 53040, + "tasks research": 55858, + "performance multiple": 40450, + "multiple types": 36307, + "prediction based": 41696, + "corpora language": 11212, + "methods traditional": 33077, + "use research": 59996, + "research demonstrate": 47014, + "effectively used": 16760, + "used downstream": 60157, + "permutation invariant": 40733, + "different prior": 15036, + "regression problem": 45816, + "problem deep": 42530, + "progress deep": 43095, + "simple domain": 51147, + "method neural": 32588, + "supervised domain": 53980, + "improving generalization": 25181, + "generalization performance": 22126, + "performance target": 40591, + "target domain": 54812, + "domain using": 16225, + "using source": 60955, + "domain dataset": 16041, + "datasets labeled": 13308, + "generation existing": 22455, + "tune model": 58857, + "dataset training": 13122, + "training source": 58264, + "source dataset": 51761, + "dataset design": 12891, + "domain target": 16199, + "adaptation technique": 1542, + "technique proposed": 56044, + "trained cross": 57700, + "entropy loss": 18162, + "datasets performance": 13370, + "improvements domain": 25069, + "text sources": 56779, + "representations word2vec": 46790, + "enables users": 17451, + "sources like": 51834, + "map word": 31797, + "capture linguistic": 7690, + "concepts like": 9937, + "technique uses": 56050, + "based observation": 5921, + "words representation": 62496, + "additionally propose": 1729, + "propose represent": 43605, + "special tokens": 52022, + "continuous vectors": 10856, + "reveal proposed": 48012, + "proposed approaches": 43737, + "quality neural": 44557, + "systems significantly": 54633, + "create high": 11698, + "humans machines": 24280, + "promising applications": 43159, + "content given": 10527, + "challenges developing": 8039, + "developing systems": 14663, + "work serves": 62817, + "advances field": 1911, + "annotations used": 3005, + "used directly": 60152, + "directly paper": 15330, + "approach sequence": 3686, + "correct errors": 11467, + "using explicit": 60686, + "000 tokens": 12, + "exceeds state": 18950, + "resource settings": 47275, + "models utilizing": 35665, + "novel extension": 37820, + "extension work": 19849, + "using target": 60977, + "results consistent": 47559, + "quality language": 44542, + "context model": 10674, + "model extended": 33864, + "generalization capabilities": 22118, + "complex reasoning": 9655, + "scale knowledge": 48581, + "bases kbs": 6325, + "relations entities": 46026, + "entities entity": 18047, + "task achieve": 54875, + "relations shared": 46057, + "art code": 4236, + "code data": 8798, + "github io": 22717, + "space possible": 51881, + "possible learn": 41331, + "learn efficient": 29366, + "augment existing": 4940, + "parsing models": 39787, + "global model": 22835, + "model non": 34132, + "new objective": 37271, + "objective encourages": 38086, + "tiny fraction": 57262, + "accuracy f1": 975, + "finds optimal": 20919, + "propose effective": 43360, + "statistical word": 52768, + "models novel": 35268, + "alignment training": 2386, + "additional signal": 1698, + "decoder network": 13603, + "network novel": 36775, + "mt quality": 35924, + "general domain": 22052, + "speech translation": 52311, + "systems outperforms": 54578, + "order control": 38604, + "use multiple": 59956, + "level sequence": 30210, + "task studies": 55415, + "words current": 62393, + "addresses issue": 1811, + "task datasets": 54993, + "developed machine": 14631, + "models data": 34880, + "sentence language": 49575, + "representation enables": 46507, + "text context": 56513, + "compared text": 9465, + "model context": 33711, + "final outcome": 20824, + "using recent": 60897, + "outperform text": 38828, + "data text": 12729, + "performs best": 40697, + "sensitivity analysis": 49507, + "style reading": 53495, + "data greatly": 12395, + "present chinese": 41865, + "comprehension datasets": 9764, + "datasets consist": 13192, + "dataset propose": 13037, + "problem aims": 42501, + "attention words": 4853, + "words query": 62490, + "baselines public": 6291, + "public datasets": 44314, + "comprehension task": 9776, + "type based": 59049, + "line research": 30646, + "representations linguistic": 46711, + "models lack": 35159, + "required train": 46906, + "share parameters": 50460, + "methods enable": 32837, + "enable zero": 17432, + "words training": 62535, + "embeddings neural": 17179, + "applications particular": 3231, + "models classify": 34816, + "media messages": 32171, + "service providers": 50093, + "30 different": 356, + "achieve accuracy": 1109, + "accuracy 85": 917, + "using lstm": 60781, + "traditional techniques": 57551, + "vary different": 61421, + "models deployed": 34899, + "customer support": 12058, + "meta information": 32335, + "information diverse": 25818, + "sentiment text": 49863, + "information features": 25873, + "problems present": 42720, + "tasks evaluation": 55622, + "evaluation word": 18748, + "embeddings outperform": 17184, + "architectures based": 4103, + "achieving new": 1415, + "systems natural": 54567, + "tasks ranging": 55833, + "modeling language": 34588, + "paper simple": 39574, + "simple baseline": 51139, + "baseline achieves": 6152, + "51 relative": 432, + "improvement compared": 24998, + "model datasets": 33737, + "yields competitive": 63121, + "results second": 47819, + "second dataset": 49002, + "dataset study": 13104, + "study investigates": 53399, + "investigates use": 27003, + "sequence features": 49921, + "reduce manual": 45668, + "effectiveness compared": 16771, + "set baseline": 50112, + "representation approach": 46492, + "performance unsupervised": 40614, + "demonstrate significant": 13972, + "improvements terms": 25107, + "using unsupervised": 61011, + "approaches literature": 3864, + "captured existing": 7725, + "allows effectively": 2461, + "effectively leverage": 16747, + "meaning language": 32004, + "methods open": 32967, + "models arbitrary": 34724, + "information making": 25964, + "models combine": 34826, + "significantly improved": 50970, + "model provides": 34257, + "using mean": 60794, + "mean field": 31991, + "entailment based": 17999, + "networks proven": 36899, + "effective natural": 16678, + "answering machine": 3079, + "technical report": 56019, + "present detailed": 41889, + "process input": 42794, + "limited dependence": 30581, + "syntactic tree": 54334, + "structured model": 53165, + "middle ground": 33236, + "mechanism applied": 32099, + "showing model": 50682, + "model achieved": 33507, + "different nlp": 15009, + "art recurrent": 4363, + "continuous word": 10857, + "tasks popular": 55799, + "popular models": 41173, + "train models": 57610, + "appear training": 3139, + "data evaluate": 12329, + "representations different": 46639, + "tasks comparing": 55549, + "representations vectors": 46787, + "research literature": 47067, + "challenges understanding": 8079, + "understanding previous": 59383, + "information collected": 25781, + "collected data": 8958, + "promising research": 43175, + "novel methodology": 37868, + "capable extracting": 7619, + "extracting meaningful": 20034, + "study role": 53456, + "bilingual word": 7115, + "performance second": 40547, + "language similarity": 28487, + "similarity target": 51123, + "language additionally": 27954, + "languages results": 28775, + "numerous applications": 38065, + "hot topics": 24031, + "results time": 47885, + "ability recognize": 640, + "content web": 10570, + "propose paper": 43579, + "cross document": 11812, + "turn used": 58991, + "performances tasks": 40649, + "demonstrating potential": 14055, + "potential approach": 41382, + "problems learning": 42708, + "learning search": 29862, + "imitation learning": 24577, + "training highly": 58121, + "processing existing": 42870, + "pairs propose": 39210, + "propose label": 43431, + "tease apart": 56015, + "improve robustness": 24919, + "key ingredient": 27318, + "performance furthermore": 40355, + "network layers": 36757, + "increasing model": 25454, + "complexity inference": 9677, + "inference time": 25700, + "probabilistic modeling": 42466, + "number parameters": 38024, + "mechanism automatically": 32101, + "automatically learns": 5190, + "embeddings particular": 17187, + "learning procedure": 29819, + "improvement word": 25039, + "syntactic tasks": 54333, + "parsing using": 39804, + "joint models": 27180, + "development online": 14695, + "online communication": 38354, + "information support": 26109, + "extract relevant": 19988, + "goal develop": 22881, + "based experiment": 5714, + "specific aspects": 52047, + "frame task": 21441, + "task multi": 55224, + "investigate performance": 26973, + "different classification": 14863, + "evaluate different": 18451, + "different architectures": 14841, + "hierarchical approach": 23654, + "approach leads": 3584, + "superior results": 53942, + "model makes": 34086, + "cognitive linguistics": 8891, + "consists set": 10328, + "relevant context": 46204, + "analyze effects": 2814, + "mechanism neural": 32131, + "results natural": 47736, + "input representation": 26326, + "model works": 34546, + "growing research": 23302, + "research automated": 46988, + "text news": 56676, + "able identify": 699, + "time event": 57152, + "time time": 57232, + "annotated documents": 2892, + "documents domain": 15873, + "domain news": 16122, + "research focuses": 47042, + "utilizes context": 61113, + "learning specifically": 29889, + "generate dialogue": 22192, + "using rnn": 60911, + "information dialogue": 25811, + "conversational agents": 11039, + "sentence ordering": 49608, + "critical task": 11795, + "focused improving": 21225, + "collect large": 8945, + "driven approach": 16420, + "source codes": 51754, + "dataset paper": 13023, + "tweets model": 59017, + "lstm encoder": 31260, + "decoder trained": 13616, + "trained model": 57791, + "using methods": 60802, + "generated model": 22299, + "representations generated": 46677, + "method presented": 32620, + "presented used": 42063, + "model time": 34463, + "scientific publications": 48768, + "higher complexity": 23816, + "including vocabulary": 25320, + "introduced new": 26887, + "terms vocabulary": 56322, + "analyze dataset": 2809, + "dataset composed": 12853, + "year period": 63047, + "generating synthetic": 22398, + "models methods": 35224, + "generation multiple": 22502, + "inference process": 25685, + "hierarchical classification": 23662, + "similar domains": 51040, + "selection propose": 49150, + "sets results": 50306, + "baseline comparison": 6163, + "comparison existing": 9495, + "language environment": 28051, + "despite large": 14372, + "cross cultural": 11810, + "linguistic theories": 30801, + "learning offers": 29788, + "achieve human": 1158, + "human like": 24199, + "data enable": 12320, + "building effective": 7443, + "data computational": 12230, + "models address": 34695, + "privacy preserving": 42441, + "evaluated different": 18529, + "different linguistic": 14980, + "linguistic levels": 30776, + "machines humans": 31398, + "external sources": 19954, + "present current": 41880, + "existing literature": 19085, + "specific components": 52057, + "order promote": 38649, + "theory practice": 57038, + "improve attention": 24824, + "accuracy neural": 1014, + "task model": 55215, + "art traditional": 4431, + "primary goal": 42371, + "bias language": 7029, + "focus particular": 21189, + "theoretical empirical": 57019, + "primary task": 42373, + "task unsupervised": 55457, + "unsupervised grammar": 59699, + "grammar induction": 23063, + "induction task": 25608, + "extract salient": 19990, + "particularly hard": 39882, + "hard problem": 23450, + "help improving": 23572, + "model build": 33635, + "build model": 7412, + "algorithm efficiently": 2272, + "examine effectiveness": 18863, + "methods particularly": 32977, + "current language": 11981, + "models significant": 35505, + "ability encode": 606, + "factual knowledge": 20322, + "acquire knowledge": 1441, + "knowledge provided": 27582, + "knowledge related": 27587, + "performance generating": 40365, + "smaller number": 51524, + "art cnn": 4235, + "models good": 35066, + "purpose work": 44414, + "work empirically": 62643, + "empirically study": 17367, + "semantically coherent": 49382, + "model observe": 34136, + "observation propose": 38123, + "using clustering": 60606, + "problem used": 42681, + "datasets experimental": 13263, + "usefulness approach": 60400, + "performance close": 40237, + "methods semantic": 33030, + "methods proven": 32998, + "useful tasks": 60390, + "nlp natural": 37504, + "corpus word": 11459, + "word2vec glove": 62349, + "results corpus": 47562, + "range different": 44915, + "attracted research": 4886, + "recently gained": 45427, + "gained popularity": 21921, + "embeddings typically": 17236, + "rare unseen": 44998, + "propose improve": 43412, + "embeddings incorporating": 17150, + "word features": 62204, + "embeddings directly": 17113, + "framework word": 21627, + "prior distribution": 42397, + "distribution latent": 15643, + "corpus approach": 11278, + "approach yields": 3744, + "poses major": 41249, + "languages significant": 28785, + "differences word": 14831, + "approach utilizing": 3736, + "art statistical": 4413, + "clusters based": 8751, + "hierarchical structure": 23692, + "models generating": 35060, + "used nlp": 60252, + "tasks high": 55664, + "high computational": 23713, + "greedy algorithm": 23242, + "clustering algorithms": 8737, + "sub optimal": 53526, + "ability produce": 635, + "quality human": 44529, + "human understandable": 24252, + "preprocessing feature": 41826, + "feature generation": 20492, + "generation steps": 22552, + "tuning parameters": 58939, + "parameters used": 39727, + "number clusters": 37989, + "yielded significant": 63105, + "performance resulting": 40538, + "words concepts": 62384, + "dimensional spaces": 15236, + "addition existing": 1614, + "model extend": 33863, + "certain conditions": 7937, + "content multiple": 10540, + "standard methods": 52504, + "query translation": 44679, + "embeddings method": 17173, + "method captures": 32412, + "similar context": 51033, + "use dictionary": 59866, + "various methods": 61361, + "difficult obtain": 15178, + "resource scarce": 47271, + "retrieval evaluation": 47944, + "proposed word": 43926, + "google translate": 22959, + "fail detect": 20333, + "relevant documents": 46211, + "proposed address": 43712, + "performance superior": 40588, + "approaches present": 3898, + "query document": 44665, + "word mover": 62249, + "mover distance": 35892, + "measures proposed": 32079, + "method relies": 32638, + "helps identify": 23607, + "document method": 15811, + "data approach": 12143, + "mean average": 31989, + "world dataset": 62934, + "dataset collected": 12845, + "combine semantic": 9072, + "method leads": 32561, + "directional attention": 15276, + "forward backward": 21402, + "memory component": 32247, + "model implicitly": 33976, + "capture high": 7676, + "12 languages": 108, + "showing proposed": 50687, + "scores languages": 48906, + "languages introduce": 28697, + "dataset evaluation": 12912, + "evaluation resource": 18698, + "semantic category": 49243, + "research existing": 47031, + "existing large": 19083, + "compare human": 9343, + "automatic systems": 5127, + "huge gap": 24073, + "gap human": 21963, + "distributional representation": 15667, + "models substantial": 35547, + "substantial differences": 53617, + "models overcome": 35296, + "systems article": 54434, + "results case": 47528, + "recognition ocr": 45522, + "models possible": 35331, + "results evaluated": 47618, + "methods recently": 33009, + "corpus consisting": 11302, + "models tested": 35594, + "individual models": 25574, + "corpora including": 11209, + "manual transcription": 31751, + "cultural heritage": 11937, + "robust word": 48268, + "word processing": 62271, + "propose word": 43706, + "robust performance": 48262, + "furthermore demonstrate": 21813, + "experiment human": 19239, + "human reading": 24229, + "model domain": 33788, + "adaptation approaches": 1520, + "domain shared": 16158, + "transfer knowledge": 58369, + "explore multi": 19717, + "adaptation multiple": 1529, + "multiple tasks": 36299, + "tasks simultaneously": 55892, + "representations better": 46624, + "generalize domain": 22141, + "framework domain": 21497, + "tasks chinese": 55537, + "experiments multi": 19471, + "adaptation task": 1540, + "tasks social": 55896, + "correct output": 11471, + "technique language": 56037, + "language vision": 28578, + "problem problem": 42630, + "language emerging": 28044, + "human understanding": 24253, + "test model": 56358, + "model diverse": 33781, + "diverse domains": 15700, + "generic language": 22630, + "causal language": 7875, + "work opens": 62742, + "opens door": 38481, + "usually require": 61064, + "text aligned": 56426, + "aligned word": 2359, + "hypothesis propose": 24347, + "propose scheme": 43612, + "trained source": 57877, + "trained target": 57889, + "using adversarial": 60554, + "results discuss": 47594, + "lingual sentence": 30726, + "language known": 28126, + "adapted languages": 1553, + "human rights": 24236, + "language adaptation": 27952, + "different genres": 14943, + "typical text": 59133, + "scale natural": 48603, + "understanding task": 59407, + "task publicly": 55311, + "available dataset": 5278, + "task predict": 55282, + "reading text": 45090, + "articles task": 4481, + "task contains": 54975, + "contains rich": 10503, + "classification extraction": 8470, + "extraction sub": 20116, + "tasks making": 55739, + "end models": 17686, + "models deep": 34888, + "compare various": 9376, + "classification information": 8480, + "answering models": 3082, + "models supporting": 35566, + "performing model": 40681, + "accuracy 71": 905, + "current nlp": 11993, + "systems propose": 54604, + "propose graph": 43405, + "human curated": 24130, + "art automatic": 4218, + "systems evaluation": 54491, + "languages experiment": 28663, + "training classifiers": 57952, + "results information": 47682, + "information pos": 26010, + "data reveals": 12617, + "novel strategy": 37930, + "processing analyze": 42851, + "online conversations": 38357, + "medical conditions": 32199, + "learning work": 29946, + "relevant content": 46203, + "data exists": 12338, + "length sentences": 30035, + "common methods": 9186, + "methods include": 32898, + "averaging word": 5428, + "hidden states": 23648, + "networks lstms": 36873, + "sentence vectors": 49669, + "tasks pre": 55802, + "training context": 57959, + "context deep": 10606, + "information capture": 25774, + "encoded representations": 17484, + "content word": 10572, + "ability train": 645, + "using representation": 60908, + "analyzing different": 2841, + "analysis sheds": 2755, + "sheds light": 50532, + "relative strengths": 46111, + "resulting representations": 47475, + "data analytics": 12133, + "used widely": 60352, + "explosive growth": 19780, + "data challenges": 12204, + "process large": 42801, + "data finally": 12363, + "efficient methods": 16885, + "improve query": 24913, + "usage large": 59802, + "lower memory": 31217, + "memory requirements": 32282, + "large neural": 28916, + "systems task": 54647, + "translation use": 58698, + "training improving": 58128, + "deep model": 13727, + "memory efficient": 32254, + "understanding context": 59334, + "increasingly important": 25473, + "information nlp": 25992, + "resources languages": 47310, + "3rd workshop": 391, + "metrics use": 33205, + "use cross": 59857, + "universal speech": 59548, + "approaches results": 3916, + "new strategies": 37325, + "open information": 38433, + "making processes": 31665, + "diverse datasets": 15699, + "datasets analysis": 13149, + "completely different": 9606, + "employ different": 17377, + "different methodologies": 14988, + "krippendorff alpha": 27677, + "used measure": 60233, + "graph models": 23149, + "used social": 60306, + "lot progress": 31117, + "evaluate approaches": 18440, + "learn deep": 29357, + "train test": 57646, + "test models": 56359, + "using crowdsourcing": 60640, + "demonstrate models": 13945, + "text compared": 56500, + "released public": 46181, + "public access": 44303, + "useful training": 60394, + "training word": 58314, + "effective framework": 16653, + "framework automatic": 21459, + "based universal": 6119, + "universal dependency": 59539, + "framework effective": 21499, + "human scores": 24239, + "based contexts": 5642, + "time results": 57207, + "english training": 17892, + "training setup": 58255, + "outperform previously": 38813, + "proposed context": 43746, + "results context": 47561, + "humans read": 24286, + "attempts explain": 4697, + "architecture combines": 4035, + "combines neural": 9099, + "encoding input": 17567, + "words possible": 62482, + "corpus showing": 11429, + "accurately predicts": 1099, + "features human": 20598, + "able detect": 687, + "interactions paper": 26619, + "sentence sequence": 49645, + "words instead": 62438, + "sequence sentence": 49973, + "sentence proposed": 49626, + "difficult learn": 15174, + "problem work": 42689, + "prove effectiveness": 43980, + "evaluate tasks": 18511, + "including word": 25321, + "model powerful": 34216, + "word document": 62141, + "weight matrix": 61919, + "embedding input": 17031, + "model offer": 34142, + "offer new": 38294, + "methods lead": 32922, + "variety neural": 61284, + "models finally": 35024, + "reduce size": 45680, + "spoken content": 52351, + "text content": 56512, + "content difficult": 10520, + "difficult time": 15189, + "highly attractive": 23880, + "develop machine": 14594, + "key information": 27317, + "english propose": 17861, + "architecture task": 4089, + "initial results": 26216, + "shown word": 50760, + "level attention": 30066, + "robust sentence": 48266, + "attention task": 4833, + "field research": 20769, + "consists multiple": 10324, + "exponential growth": 19782, + "modeling approach": 34559, + "high scoring": 23801, + "techniques propose": 56125, + "end present": 17697, + "particular application": 39832, + "study provides": 53448, + "provides novel": 44216, + "time resource": 57206, + "analysis automatic": 2619, + "models derived": 34901, + "observed results": 38147, + "face challenge": 20240, + "benchmarks demonstrate": 6514, + "benchmark corpora": 6438, + "performance drops": 40310, + "limited set": 30613, + "english newswire": 17852, + "current practice": 11997, + "practice training": 41486, + "data single": 12666, + "single domain": 51297, + "data non": 12515, + "non obvious": 37672, + "data combining": 12222, + "robust models": 48256, + "daily lives": 12086, + "number documents": 37996, + "language detection": 28025, + "texts important": 56889, + "algorithms paper": 2332, + "specific information": 52090, + "detection results": 14519, + "choose best": 8344, + "detection short": 14526, + "approaches include": 3846, + "svm logistic": 54235, + "based modified": 5885, + "model include": 33987, + "goal improving": 22889, + "approaches evaluated": 3813, + "non latin": 37658, + "algorithm evaluated": 2274, + "language conduct": 28001, + "conduct case": 10029, + "conversational text": 11055, + "distantly supervised": 15561, + "like language": 30480, + "addition analyze": 1600, + "analyze quality": 2824, + "existing language": 19081, + "like text": 30508, + "release new": 46158, + "corpus tweets": 11450, + "tweets containing": 59012, + "create word": 11720, + "related tasks": 45943, + "size data": 51379, + "algorithm does": 2270, + "does need": 15962, + "need training": 36597, + "training able": 57921, + "far know": 20402, + "results nlp": 47743, + "collect dataset": 8941, + "labeled set": 27763, + "propose generate": 43398, + "using sequence": 60930, + "f1 improvement": 20183, + "improvement non": 25010, + "point improvement": 41045, + "generation explore": 22459, + "models identify": 35097, + "reddit posts": 45644, + "key aspects": 27296, + "predictive model": 41775, + "model analyze": 33564, + "evidence suggests": 18821, + "users different": 60459, + "statistical measures": 52751, + "similarity based": 51083, + "based human": 5772, + "representation results": 46575, + "representation semantic": 46577, + "results incorporating": 47674, + "improves correlation": 25120, + "various different": 61325, + "used recent": 60286, + "different learning": 14974, + "model sentences": 34355, + "selection training": 49157, + "training compared": 57954, + "models pre": 35338, + "method shows": 32651, + "different benchmarks": 14852, + "datasets exhibit": 13258, + "process obtain": 42812, + "use graph": 59902, + "degrades performance": 13810, + "parameter optimization": 39674, + "level convolutional": 30089, + "datasets relatively": 13397, + "relatively large": 46118, + "conneau et": 10170, + "effective learning": 16665, + "representations unlike": 46779, + "learning parameters": 29798, + "words obtained": 62469, + "modeling based": 34561, + "popular language": 41166, + "evidence based": 18808, + "significantly reduces": 51012, + "highly beneficial": 23881, + "objective paper": 38098, + "common type": 9207, + "effectively represent": 16756, + "clinical practice": 8672, + "showed proposed": 50670, + "available various": 5389, + "popular task": 41190, + "processing work": 42968, + "work goal": 62676, + "goal predict": 22895, + "train simple": 57633, + "addition present": 1634, + "methods deal": 32813, + "common problem": 9193, + "traditional linguistic": 57525, + "research methods": 47073, + "speakers paper": 52008, + "progress language": 43101, + "specific types": 52167, + "types social": 59118, + "relatively little": 46121, + "little evidence": 30876, + "evidence support": 18822, + "important resource": 24765, + "people make": 40031, + "make informed": 31578, + "prior works": 42427, + "review text": 48042, + "problem detecting": 42534, + "review different": 48031, + "results generally": 47646, + "generally outperform": 22168, + "measures used": 32081, + "used prior": 60274, + "method extends": 32501, + "customer reviews": 12056, + "sentential context": 49811, + "aspect based": 4527, + "analysis modeling": 2698, + "outperforms non": 38915, + "non hierarchical": 37656, + "art multilingual": 4304, + "multilingual multi": 36100, + "domain datasets": 16042, + "hand engineered": 23392, + "features external": 20580, + "task use": 55458, + "point scale": 41050, + "embeddings contain": 17101, + "sentiment information": 49847, + "classification ranking": 8528, + "negative sentiment": 36636, + "propose improvements": 43414, + "order address": 38590, + "extraction multi": 20085, + "results languages": 47693, + "domain pairs": 16127, + "present computational": 41875, + "methods establish": 32842, + "main results": 31458, + "language translations": 28540, + "language characteristics": 27987, + "different original": 15014, + "original ones": 38721, + "supervised text": 54058, + "domain trained": 16214, + "evaluation scenarios": 18707, + "highly accurate": 23878, + "task suggest": 55424, + "method determining": 32460, + "use labels": 59921, + "improving accuracy": 25172, + "suggest simple": 53830, + "mixed domain": 33403, + "domain related": 16145, + "related features": 45907, + "original translated": 38737, + "reasonable accuracy": 45172, + "complex multi": 9636, + "multi faceted": 35957, + "great value": 23221, + "practice paper": 41485, + "approach developing": 3488, + "use concept": 59849, + "academic papers": 793, + "topic words": 57436, + "lexical similarity": 30386, + "perspective model": 40774, + "used model": 60240, + "studies evaluate": 53260, + "directions research": 15300, + "research lack": 47061, + "lack parallel": 27904, + "important challenge": 24704, + "common solution": 9200, + "quality translations": 44593, + "examine use": 18870, + "quality phrase": 44562, + "limited parallel": 30602, + "constraints based": 10372, + "direct model": 15256, + "process automatically": 42760, + "emerged new": 17261, + "new paradigm": 37277, + "learning paper": 29793, + "sequence using": 50016, + "decoder attention": 13588, + "proposed encoder": 43762, + "achieve significantly": 1195, + "significantly higher": 50960, + "efficiently train": 16920, + "train neural": 57616, + "word distribution": 62139, + "time approach": 57117, + "approach reduces": 3670, + "reduces computational": 45688, + "particularly suited": 39892, + "word approach": 62113, + "achieving accuracy": 1392, + "com facebookresearch": 9011, + "generation produce": 22530, + "language previous": 28387, + "language problem": 28391, + "language introduce": 28122, + "introduce task": 26868, + "specific methods": 52110, + "approach neural": 3611, + "output neural": 38988, + "main problems": 31453, + "able handle": 698, + "reduce training": 45682, + "time systems": 57228, + "different outputs": 15016, + "based nmt": 5915, + "setup work": 50411, + "work investigates": 62699, + "style topic": 53502, + "online communities": 38355, + "content style": 10562, + "hybrid word": 24323, + "model topic": 34464, + "specific topics": 52163, + "growing demand": 23294, + "led great": 29989, + "limited supervision": 30621, + "approaches extract": 3822, + "learning setting": 29870, + "approach applying": 3422, + "supervision cross": 54079, + "approach graph": 3550, + "graph representation": 23161, + "sentences extract": 49721, + "extract features": 19974, + "features multiple": 20626, + "accuracy robustness": 1040, + "learn accurate": 29343, + "sentence relations": 49630, + "sentiment topic": 49864, + "york times": 63140, + "ethical concerns": 18416, + "negative impact": 36620, + "time automatic": 57119, + "provided model": 44165, + "need labeled": 36574, + "data form": 12370, + "propose iterative": 43424, + "text classifier": 56491, + "based transfer": 6104, + "additionally demonstrate": 1716, + "benefits proposed": 6588, + "evaluation multiple": 18659, + "datasets different": 13228, + "provide systematic": 44141, + "design features": 14283, + "language design": 28023, + "framework provides": 21589, + "challenges evaluation": 8044, + "complexity human": 9676, + "way work": 61838, + "task spoken": 55407, + "using sets": 60934, + "extracted speech": 20021, + "used form": 60194, + "representations speech": 46761, + "encode information": 17464, + "paper construct": 39305, + "encodes information": 17561, + "data make": 12477, + "technique known": 56036, + "viable alternative": 61570, + "alternative model": 2506, + "prediction score": 41738, + "neural end": 36952, + "present number": 41981, + "networks evaluate": 36850, + "performance similar": 40562, + "use external": 59886, + "external language": 19946, + "model decoding": 33741, + "researchers investigated": 47161, + "problem extracting": 42563, + "mining techniques": 33327, + "proposing new": 43946, + "process models": 42806, + "leverage unsupervised": 30297, + "little human": 30877, + "human involvement": 24179, + "automatically label": 5185, + "use case": 59838, + "demonstrates usefulness": 14049, + "usefulness proposed": 60401, + "software library": 51639, + "integrated existing": 26513, + "achieves promising": 1354, + "trained parallel": 57837, + "used translate": 60343, + "test sentences": 56368, + "propose dynamic": 43359, + "fine tunes": 20974, + "work small": 62825, + "data obtained": 12520, + "similarity search": 51119, + "sentence extensive": 49558, + "demonstrate method": 13934, + "performance especially": 40323, + "similar sentences": 51066, + "sentences available": 49683, + "available new": 5332, + "online forums": 38367, + "set known": 50175, + "approach knowledge": 3580, + "based small": 6041, + "set initial": 50170, + "effective detecting": 16644, + "time new": 57184, + "lda based": 29249, + "model social": 34395, + "data like": 12463, + "nature natural": 36484, + "researchers applying": 47149, + "product review": 43044, + "making challenging": 31647, + "modeling target": 34627, + "target specific": 54843, + "approaches propose": 3902, + "new formulation": 37209, + "prior information": 42402, + "model utilizing": 34520, + "existing public": 19131, + "data conduct": 12236, + "million tweets": 33260, + "provides useful": 44232, + "representations fine": 46670, + "grained word": 23048, + "implicitly learned": 24669, + "text resulting": 56746, + "questions does": 44785, + "climate change": 8666, + "complex relationships": 9657, + "refers task": 45763, + "task converting": 54980, + "possible ways": 41341, + "context study": 10726, + "written spoken": 63010, + "ranking model": 44973, + "language universal": 28565, + "purpose multilingual": 44407, + "multilingual semantic": 36117, + "tagger using": 54732, + "using deep": 60646, + "uses word": 60543, + "character representations": 8223, + "includes novel": 25232, + "semantic tags": 49362, + "prior results": 42412, + "results english": 47609, + "information considered": 25786, + "constructing knowledge": 10421, + "plain texts": 40939, + "contexts entities": 10751, + "dynamically select": 16499, + "informative sentences": 26176, + "sentences corresponding": 49699, + "corresponding entities": 11550, + "propose sequential": 43626, + "multiple sentences": 36281, + "sentence entity": 49552, + "network encode": 36736, + "model measure": 34097, + "build text": 7430, + "representations entities": 46652, + "method tasks": 32680, + "indicates method": 25539, + "information knowledge": 25936, + "word problems": 62270, + "word problem": 62269, + "added existing": 1590, + "existing datasets": 19054, + "datasets make": 13324, + "enable accurate": 17420, + "used metrics": 60238, + "future evaluations": 21873, + "amr text": 2577, + "generation generate": 22468, + "meaning given": 32002, + "essential step": 18334, + "approaches heavily": 3838, + "heavily rely": 23535, + "rely hand": 46283, + "features domain": 20563, + "specific resources": 52140, + "difficult collect": 15159, + "reason paper": 45169, + "neural architectures": 36933, + "text experimental": 56564, + "task benchmarks": 54933, + "model representation": 34309, + "construction model": 10429, + "outperforms recent": 38937, + "work low": 62715, + "data potentially": 12550, + "endangered languages": 17732, + "languages training": 28808, + "step making": 52814, + "using dynamic": 60670, + "trained jointly": 57753, + "jointly using": 27225, + "using expectation": 60685, + "extremely low": 20163, + "resource scenario": 47272, + "model performs": 34201, + "baseline introduce": 6176, + "training decoding": 58055, + "output different": 38967, + "models instead": 35135, + "attention weights": 4852, + "generation experiments": 22458, + "gains baseline": 21933, + "learning utilize": 29934, + "utilize pre": 61100, + "currently exist": 12034, + "usage social": 59807, + "paper release": 39564, + "basic language": 6330, + "resource training": 47284, + "training development": 58063, + "development test": 14706, + "data resources": 12611, + "report baseline": 46427, + "baseline results": 6206, + "data resource": 12610, + "web services": 61897, + "qa task": 44459, + "relevant concepts": 46202, + "answers given": 3109, + "recall measure": 45243, + "achieved second": 1267, + "require manually": 46878, + "models performances": 35319, + "requires expert": 46926, + "expensive recent": 19217, + "systematic way": 54406, + "near optimal": 36508, + "compared random": 9444, + "models yield": 35691, + "yield best": 63089, + "best performances": 6793, + "sequence characters": 49914, + "predicted model": 41669, + "demo available": 13846, + "visual textual": 61671, + "textual representations": 56977, + "quality models": 44552, + "models standard": 35531, + "standard semantic": 52524, + "sequential models": 50047, + "outperform recent": 38817, + "including ones": 25287, + "best configuration": 6758, + "representations deep": 46635, + "data released": 12597, + "80 accuracy": 521, + "higher accuracies": 23811, + "model easily": 33796, + "additional context": 1659, + "approach makes": 3596, + "use sequence": 60011, + "conversation threads": 11036, + "work addressing": 62559, + "different assumptions": 14845, + "twitter datasets": 59036, + "datasets collected": 13177, + "non sequential": 37682, + "introducing novel": 26902, + "novel way": 37952, + "networks achieved": 36829, + "hybrid architecture": 24311, + "architecture proposed": 4080, + "study using": 53473, + "performance rnn": 40540, + "task experimental": 55065, + "performance datasets": 40277, + "does outperform": 15963, + "outperform models": 38803, + "used domains": 60156, + "domains existing": 16251, + "read sentences": 45067, + "different weights": 15130, + "sentences end": 49709, + "end propose": 17699, + "attention models": 4788, + "derived using": 14204, + "reading time": 45091, + "methods significantly": 33039, + "art sentence": 4397, + "sequence transduction": 50014, + "based generative": 5749, + "sentences generate": 49727, + "parsing tasks": 39800, + "domains limited": 16271, + "limited access": 30562, + "data extend": 12349, + "synthetically generated": 54388, + "forms work": 21379, + "annotation tools": 2978, + "annotation projection": 2962, + "applicable wide": 3158, + "languages provides": 28762, + "external information": 19937, + "demonstrate validity": 13998, + "great success": 23217, + "tasks previous": 55810, + "work investigated": 62698, + "generate concise": 22186, + "length paper": 30031, + "results learning": 47698, + "based input": 5785, + "work improving": 62685, + "improving efficiency": 25178, + "efficiency neural": 16848, + "models adopted": 34697, + "candidates given": 7586, + "based selection": 6010, + "decoding time": 13650, + "single cpu": 51291, + "words input": 62437, + "conventional machine": 11004, + "agent learns": 2057, + "make decisions": 31565, + "setting experiments": 50322, + "experiments state": 19532, + "baselines language": 6274, + "pairs demonstrate": 39178, + "generic text": 22633, + "text representation": 56737, + "training parameters": 58205, + "representation new": 46563, + "new text": 37343, + "work information": 62687, + "benchmark paper": 6485, + "use rich": 60001, + "events paper": 18796, + "paper outlines": 39435, + "outperforms models": 38910, + "basic units": 6334, + "training small": 58263, + "selectional preferences": 49160, + "sets different": 50288, + "vector models": 61458, + "variety features": 61272, + "error prone": 18222, + "require domain": 46850, + "greatly reduce": 23237, + "reduce effort": 45660, + "framework leverage": 21556, + "leverage large": 30273, + "problem limited": 42596, + "limited labeled": 30592, + "task experiments": 55067, + "better compared": 6866, + "features using": 20692, + "using unlabeled": 61009, + "tweets labeled": 59016, + "classifiers use": 8626, + "present quantitative": 41995, + "text contains": 56511, + "datasets small": 13433, + "step direction": 52805, + "times larger": 57253, + "larger training": 29089, + "training new": 58192, + "model original": 34151, + "recent attempts": 45294, + "version dataset": 61552, + "human baseline": 24113, + "baseline provided": 6204, + "human study": 24244, + "despite advances": 14355, + "advances natural": 1916, + "short informal": 50556, + "information needs": 25988, + "online fashion": 38366, + "models using": 35655, + "twitter dataset": 59035, + "smt neural": 51544, + "translation directions": 58602, + "efficient neural": 16889, + "demonstrate current": 13886, + "linguistic expressions": 30769, + "systems model": 54562, + "relations hold": 46034, + "representations generate": 46676, + "semantics different": 49402, + "model generation": 33935, + "generation component": 22436, + "collected online": 8965, + "classifier based": 8594, + "based fuzzy": 5743, + "used analyze": 60086, + "alternative methods": 2505, + "classification social": 8554, + "categories used": 7850, + "date paper": 13493, + "propose innovative": 43418, + "based expert": 5718, + "novel concepts": 37787, + "outlier detection": 38772, + "help detect": 23556, + "annotation errors": 2947, + "improve overall": 24882, + "diverse real": 15713, + "sets demonstrate": 50287, + "embeddings demonstrated": 17109, + "embeddings obtained": 17183, + "salient information": 48441, + "results benchmark": 47520, + "outperform original": 38808, + "medical text": 32210, + "poses challenges": 41245, + "fast growing": 20425, + "challenging lack": 8105, + "lack labeled": 27897, + "labeled dataset": 27754, + "sources external": 51829, + "external knowledge": 19938, + "knowledge multiple": 27555, + "token representations": 57304, + "representations single": 46757, + "real application": 45098, + "score performance": 48864, + "representation techniques": 46591, + "overcome challenges": 39059, + "propose data": 43346, + "training technique": 58288, + "nn model": 37580, + "best score": 6820, + "art average": 4219, + "average score": 5417, + "argue need": 4165, + "experiments sentiment": 19519, + "approach text": 3720, + "level understanding": 30228, + "developing automated": 14647, + "automated tools": 5064, + "dominant paradigm": 16308, + "models parameters": 35306, + "train better": 57568, + "systems recent": 54613, + "mobile devices": 33448, + "access internet": 825, + "limited resources": 30610, + "high memory": 23753, + "languages remains": 28770, + "big challenge": 7088, + "context machine": 10671, + "deal problem": 13519, + "standard machine": 52499, + "obtains best": 38241, + "neural based": 36938, + "classification results": 8538, + "work large": 62704, + "large research": 28955, + "research project": 47100, + "decoder based": 13589, + "context use": 10738, + "use contextual": 59852, + "contextual data": 10760, + "framework method": 21562, + "models investigate": 35146, + "investigate behavior": 26944, + "work semantic": 62816, + "linking model": 30835, + "language gap": 28081, + "particular present": 39858, + "doctor patient": 15763, + "suitable training": 53861, + "variety methods": 61280, + "fully automatic": 21714, + "augmenting training": 4989, + "points absolute": 41065, + "network approach": 36697, + "approach predicting": 3647, + "networks directly": 36844, + "performing better": 40673, + "better feature": 6891, + "models previous": 35353, + "reduce accuracy": 45648, + "accuracy gap": 983, + "model providing": 34258, + "success neural": 53714, + "systems especially": 54488, + "new theoretical": 37344, + "structure results": 53132, + "theoretical foundation": 57020, + "encoding sentence": 17575, + "true false": 58820, + "selection experimental": 49137, + "baselines achieves": 6228, + "investigate task": 26989, + "automatically identifying": 5182, + "multimodal information": 36150, + "propose predictive": 43590, + "using long": 60778, + "analyze results": 2826, + "based real": 5976, + "new simple": 37316, + "sentiment label": 49849, + "model keeps": 34028, + "nlp techniques": 37553, + "measures degree": 32076, + "proposed evaluated": 43769, + "based metric": 5853, + "metric proposed": 33123, + "studies model": 53282, + "metrics paper": 33186, + "paper empirically": 39345, + "empirically explore": 17362, + "explore effects": 19705, + "skip connections": 51418, + "present comprehensive": 41870, + "comprehensive experiments": 9791, + "using gated": 60701, + "based novel": 5919, + "successfully train": 53749, + "new instances": 37226, + "languages develop": 28638, + "extensively used": 19920, + "learn better": 29348, + "processing models": 42892, + "information syntactic": 26110, + "auxiliary task": 5240, + "data come": 12223, + "trained text": 57893, + "learns predict": 29968, + "features given": 20591, + "data modalities": 12487, + "visual context": 61653, + "context given": 10648, + "lower layers": 31213, + "higher layers": 23829, + "used support": 60319, + "multilingual nlp": 36106, + "multilingual tasks": 36125, + "systematic survey": 54405, + "inspire future": 26402, + "research natural": 47077, + "agents able": 2062, + "lm using": 30915, + "glove word": 22860, + "idea training": 24374, + "unit gru": 59522, + "used lstm": 60228, + "prediction network": 41723, + "network designed": 36731, + "tends produce": 56216, + "produce coherent": 42976, + "use lstm": 59940, + "trained solely": 57876, + "using external": 60687, + "improvements predicting": 25092, + "art parsers": 4323, + "improvement previous": 25017, + "corpus machine": 11375, + "reasoning models": 45205, + "shown remarkable": 50746, + "context end": 10623, + "end trainable": 17717, + "promising performance": 43172, + "performance simple": 40563, + "tasks multi": 55752, + "remain challenging": 46313, + "complex interactions": 9630, + "family models": 20389, + "current progress": 12002, + "field computer": 20754, + "learning perspective": 29803, + "learned end": 29458, + "supervision signal": 54093, + "experiments significant": 19527, + "challenging tasks": 8156, + "tasks 20": 55484, + "dataset use": 13125, + "dataset datasets": 12880, + "datasets model": 13334, + "developed nlp": 14638, + "commercial use": 9156, + "based decoding": 5670, + "drop replacement": 16443, + "main contribution": 31430, + "experimental analysis": 19258, + "works different": 62884, + "min max": 33273, + "variety existing": 61271, + "kl divergence": 27378, + "entailment datasets": 18001, + "highest performance": 23854, + "performance combined": 40242, + "combined sentence": 9084, + "achieved great": 1237, + "challenges model": 8060, + "recurrent architecture": 45607, + "overall semantic": 39050, + "groups different": 23280, + "network sentiment": 36802, + "information better": 25769, + "models publicly": 35386, + "available document": 5282, + "analysis datasets": 2643, + "datasets online": 13353, + "systems large": 54543, + "number training": 38049, + "source framework": 51771, + "framework data": 21486, + "data preparation": 12554, + "evaluation methodology": 18641, + "ultimate goal": 59191, + "foster research": 21412, + "achieves overall": 1351, + "f_1 score": 20237, + "set evaluation": 50148, + "vietnamese language": 61592, + "typically employ": 59140, + "acoustic models": 1438, + "models compared": 34836, + "gaussian mixture": 22013, + "mixture models": 33422, + "based acoustic": 5555, + "resource constrained": 47212, + "different layers": 14973, + "study demonstrates": 53357, + "fewer model": 20737, + "network using": 36822, + "number model": 38018, + "adaptation data": 1522, + "data result": 12613, + "free approach": 21638, + "save time": 48534, + "time effort": 57148, + "improve text": 24933, + "standard neural": 52513, + "models bring": 34793, + "improvements tasks": 25106, + "context used": 10739, + "improves recall": 25155, + "perform qualitative": 40131, + "models lower": 35204, + "lower perplexity": 31220, + "human reasoning": 24231, + "reasoning paper": 45212, + "based memory": 5843, + "memory augmented": 32240, + "approach involves": 3578, + "neural semantic": 37092, + "accuracy previous": 1030, + "obtained single": 38223, + "datasets explore": 13266, + "languages compare": 28617, + "increase bleu": 25406, + "used languages": 60223, + "systems results": 54625, + "extensive experimentation": 19876, + "spanning multiple": 51955, + "model hierarchical": 33958, + "hierarchical representation": 23689, + "single embedding": 51298, + "character character": 8199, + "ended questions": 17738, + "intelligent agent": 26542, + "recent open": 45329, + "semantic understanding": 49370, + "generating plausible": 22387, + "novel task": 37933, + "task answer": 54903, + "list candidate": 30839, + "experiment various": 19256, + "including neural": 25282, + "high recall": 23788, + "performs competitively": 40703, + "rarely seen": 45006, + "core idea": 11149, + "idea design": 24369, + "method leverages": 32566, + "character model": 8219, + "correct translations": 11478, + "linguistics cognitive": 30820, + "raises questions": 44863, + "development advanced": 14667, + "explore effectiveness": 19704, + "improves baseline": 25115, + "bengali hindi": 6595, + "general domains": 22057, + "domains multi": 16276, + "work demonstrate": 62625, + "inference steps": 25695, + "examples different": 18896, + "performance benefit": 40214, + "reasoning process": 45217, + "models compare": 34835, + "established methods": 18356, + "methods represent": 33015, + "research fields": 47038, + "fields including": 20780, + "approaches adapt": 3755, + "tasks perform": 55795, + "methods including": 32899, + "lack annotated": 27872, + "task particularly": 55271, + "network applied": 36696, + "previously established": 42333, + "set texts": 50265, + "new high": 37218, + "english indian": 17824, + "training testing": 58295, + "representation sentences": 46579, + "sentences important": 49736, + "important text": 24783, + "tasks involve": 55697, + "propose series": 43627, + "series novel": 50067, + "learning latent": 29701, + "latent representations": 29132, + "representations sentences": 46753, + "sentence inter": 49572, + "ways using": 61845, + "sampling method": 48502, + "computational power": 9854, + "achieve fine": 1140, + "make code": 31548, + "code publicly": 8850, + "text systems": 56800, + "complex data": 9620, + "techniques data": 56072, + "natural human": 36411, + "language common": 27994, + "common way": 9210, + "way human": 61806, + "human human": 24168, + "users interact": 60468, + "general data": 22050, + "methods finally": 32865, + "opportunities future": 38512, + "research progress": 47099, + "progress text": 43118, + "task requiring": 55341, + "broader context": 7363, + "comprehension models": 9768, + "models constrained": 34854, + "context improve": 10654, + "knowledge needed": 27558, + "present submission": 42029, + "task corpus": 54981, + "results shared": 47830, + "low performance": 31163, + "need develop": 36554, + "extracting entities": 20029, + "types text": 59121, + "text important": 56624, + "entity relation": 18139, + "relied human": 46262, + "corpora training": 11251, + "pipeline systems": 40906, + "systems require": 54619, + "require additional": 46841, + "additional human": 1673, + "human expertise": 24163, + "joint extraction": 27170, + "context agnostic": 10582, + "poses unique": 41255, + "challenges task": 8078, + "novel domain": 37807, + "algorithm extract": 2276, + "joint optimization": 27182, + "problem learn": 42593, + "capture cross": 7659, + "relations experiments": 46030, + "domains news": 16279, + "news biomedical": 37390, + "improvement f1": 25005, + "entities short": 18084, + "model make": 34084, + "learning strategies": 29896, + "methods public": 33001, + "movies tv": 35900, + "methods adapt": 32737, + "adapt different": 1501, + "types entity": 59085, + "outperform current": 38789, + "methods trained": 33080, + "tasks introduce": 55694, + "introduce word": 26878, + "corpus created": 11314, + "freely accessible": 21651, + "related text": 45945, + "text explore": 56569, + "vectors capture": 61481, + "parameter tuning": 39681, + "embeddings competitive": 17098, + "results outperform": 47753, + "performance release": 40531, + "release corpus": 46148, + "corpus data": 11316, + "available hope": 5303, + "used future": 60195, + "future studies": 21896, + "paper demonstrates": 39317, + "new arabic": 37135, + "dataset trained": 13121, + "process known": 42799, + "rely human": 46288, + "leverage knowledge": 30271, + "explore method": 19713, + "method incorporate": 32538, + "improves state": 25163, + "identification systems": 24397, + "submitted results": 53584, + "languages mixed": 28729, + "mixed english": 33405, + "tagging techniques": 54754, + "work languages": 62703, + "novel problem": 37898, + "knowledge specific": 27616, + "end approach": 17612, + "answer options": 3042, + "finally approach": 20838, + "structured query": 53172, + "question human": 44732, + "historical data": 23958, + "semantically annotated": 49380, + "features train": 20684, + "demonstrate viability": 14000, + "overall approach": 39034, + "dimensional convolutional": 15227, + "representation encoder": 46508, + "receptive field": 45479, + "attains state": 4675, + "quadratic time": 44465, + "statistical power": 52760, + "power neural": 41428, + "symbolic reasoning": 54269, + "neural symbolic": 37100, + "model maps": 34092, + "presents challenge": 42075, + "challenge community": 7971, + "given large": 22757, + "general text": 22094, + "generated using": 22332, + "near future": 36504, + "experiments data": 19396, + "different rnn": 15054, + "errors produced": 18248, + "achieve level": 1166, + "level accuracy": 30056, + "huge amounts": 24069, + "amounts annotated": 2544, + "annotated text": 2923, + "model open": 34144, + "source data": 51760, + "providing novel": 44252, + "novel data": 37796, + "used work": 60354, + "data neural": 12509, + "progress past": 43109, + "bilingual sentence": 7114, + "needed training": 36605, + "training human": 58122, + "data bottleneck": 12193, + "dual learning": 16461, + "learning mechanism": 29723, + "dual task": 16462, + "dual tasks": 16463, + "generate informative": 22213, + "train translation": 57654, + "mechanism use": 32146, + "task agent": 54889, + "output model": 38986, + "reconstruction error": 45582, + "using policy": 60861, + "policy gradient": 41094, + "gradient methods": 23010, + "data 10": 12101, + "task common": 54957, + "way train": 61832, + "resulting increased": 47467, + "framework investigate": 21550, + "investigate different": 26951, + "different choices": 14861, + "translation accuracy": 58573, + "little impact": 30878, + "detection natural": 14505, + "detection approach": 14459, + "fixed size": 21080, + "method fully": 32513, + "encode sentence": 17469, + "sentence fragment": 49561, + "size representation": 51396, + "entity label": 18112, + "tasks methods": 55747, + "traditional sequence": 57543, + "presents empirical": 42081, + "empirical comparison": 17320, + "published date": 44369, + "related unrelated": 45951, + "poor performance": 41140, + "second subtask": 49025, + "multiple semantic": 36278, + "models called": 34801, + "inspired work": 26418, + "models simple": 35512, + "using fewer": 60694, + "learning settings": 29871, + "corpus results": 11422, + "various domain": 61328, + "documents large": 15892, + "large document": 28871, + "used perform": 60260, + "standard dataset": 52483, + "evaluated multiple": 18539, + "performance dataset": 40276, + "performance sentiment": 40552, + "models treat": 35633, + "allows models": 2474, + "models create": 34871, + "mentions entities": 32304, + "dialogue generation": 14773, + "discourse context": 15388, + "words experiments": 62415, + "selection text": 49156, + "text entailment": 56557, + "framework performs": 21582, + "level matching": 30158, + "using convolutional": 60628, + "particularly focus": 39881, + "focus different": 21155, + "datasets evaluate": 13252, + "functions based": 21769, + "better standard": 6968, + "present domain": 41894, + "network train": 36814, + "learning network": 29776, + "speech dataset": 52257, + "parameters trained": 39724, + "trained network": 57824, + "datasets train": 13459, + "different characteristics": 14859, + "multiple source": 36283, + "methods research": 33020, + "propose memory": 43449, + "models incorporate": 35121, + "memory mechanism": 32269, + "continuous data": 10843, + "function based": 21752, + "method baseline": 32400, + "including sentiment": 25297, + "question type": 44753, + "type classification": 59050, + "large pool": 28937, + "documents paper": 15901, + "lack standard": 27913, + "make publicly": 31590, + "provide gold": 44080, + "set entity": 50145, + "entity related": 18138, + "related articles": 45887, + "articles propose": 4475, + "directional lstms": 15284, + "encode entire": 17462, + "compared recurrent": 9446, + "temporal dependencies": 56184, + "wmt 16": 62099, + "translation achieve": 58574, + "competitive accuracy": 9537, + "accuracy state": 1052, + "results wmt": 47911, + "task models": 55220, + "obtain accuracy": 38159, + "deep lstm": 13726, + "wmt 14": 62098, + "14 english": 138, + "accuracy strong": 1055, + "language task": 28520, + "counter intuitive": 11614, + "task challenging": 54948, + "challenging humans": 8100, + "candidate words": 7582, + "used conjunction": 60125, + "level tasks": 30222, + "tasks unlike": 55950, + "dependency information": 14123, + "relationships sentences": 46083, + "embeddings fixed": 17139, + "discrimination tasks": 15441, + "training approaches": 57935, + "relatively unexplored": 46135, + "work particular": 62744, + "word classification": 62129, + "contrastive loss": 10912, + "siamese network": 50818, + "network training": 36817, + "models addition": 34692, + "use recurrent": 59990, + "models unlike": 35647, + "direct models": 15257, + "models produce": 35362, + "produce outputs": 42996, + "output distribution": 38968, + "distribution using": 15657, + "decoder experimental": 13591, + "outperform direct": 38792, + "significantly benefit": 50938, + "texts key": 56895, + "nlp problem": 37514, + "build evaluate": 7398, + "sequence framework": 49923, + "strongly outperforms": 53072, + "prior methods": 42408, + "task novel": 55243, + "scientific articles": 48755, + "furthermore work": 21844, + "work shows": 62823, + "useful text": 60391, + "representations obtained": 46728, + "level logical": 30153, + "art pre": 4357, + "methods sentence": 33032, + "data context": 12251, + "gold data": 22912, + "time use": 57235, + "similarity different": 51092, + "based observations": 5923, + "manual inspection": 31743, + "approaches suffer": 3930, + "suffer shortcomings": 53780, + "utilize large": 61097, + "order minimize": 38639, + "words resulting": 62500, + "simple mechanism": 51190, + "copy mechanism": 11133, + "able exploit": 693, + "handle vocabulary": 23419, + "algorithm exploits": 2275, + "systems submitted": 54643, + "linking el": 30834, + "consists modules": 10323, + "candidate generation": 7572, + "best achieved": 6745, + "achieved f1": 1231, + "focus text": 21206, + "platforms paper": 40954, + "model incorporates": 33991, + "online debate": 38359, + "macro average": 31402, + "data significantly": 12662, + "model design": 33752, + "accuracy english": 968, + "models showing": 35495, + "regardless language": 45796, + "suggest new": 53826, + "using gold": 60708, + "goal improve": 22888, + "achieve higher": 1154, + "performance measure": 40433, + "understanding long": 59361, + "past decades": 39931, + "models depend": 34898, + "level annotation": 30062, + "words able": 62359, + "simple models": 51195, + "obtain competitive": 38166, + "study attempt": 53329, + "attempt build": 4682, + "corpus arabic": 11280, + "corpus includes": 11360, + "news sources": 37416, + "source input": 51774, + "source texts": 51809, + "score experiments": 48844, + "correlation coefficient": 11519, + "label space": 27729, + "use target": 60039, + "ranking algorithm": 44965, + "metrics automatically": 33140, + "automatically select": 5200, + "learning text": 29910, + "unified semantic": 59478, + "model entity": 33830, + "relation embeddings": 45971, + "including entity": 25256, + "entity prediction": 18125, + "prediction relation": 41735, + "relation prediction": 45992, + "significantly consistently": 50949, + "consistently improve": 10294, + "compared baselines": 9387, + "models achieved": 34674, + "achieved success": 1277, + "models gain": 35049, + "globally normalized": 22851, + "crf models": 11765, + "models mainly": 35209, + "prediction work": 41753, + "compare model": 9347, + "model different": 33765, + "models known": 35156, + "tasks experiments": 55631, + "previously unseen": 42355, + "novel architecture": 37768, + "alternative word": 2511, + "mechanism model": 32129, + "proposed attention": 43740, + "number trainable": 38047, + "trainable parameters": 57665, + "tackle challenge": 54697, + "identify useful": 24450, + "text end": 56555, + "propose knowledge": 43428, + "knowledge enhanced": 27463, + "hybrid neural": 24321, + "model fuses": 33914, + "knowledge word": 27648, + "representations knowledge": 46697, + "units gru": 59531, + "network generate": 36748, + "model extends": 33865, + "extends existing": 19843, + "global context": 22823, + "sentences evaluation": 49712, + "matching models": 31916, + "models particularly": 35308, + "use recent": 59988, + "advances representation": 1923, + "overall task": 39051, + "model end": 33821, + "end differentiable": 17630, + "documents similar": 15913, + "measure improve": 32056, + "relevant tasks": 46238, + "tasks document": 55594, + "answering paper": 3087, + "documents topic": 15919, + "novel mechanism": 37861, + "research social": 47121, + "models solve": 35519, + "order understand": 38658, + "evaluate representations": 18499, + "extent model": 19923, + "model properties": 34244, + "multiple classifiers": 36185, + "nlp previous": 37513, + "works mainly": 62896, + "using pipeline": 60859, + "approach address": 3410, + "problem uses": 42682, + "pointer network": 41058, + "alleviate error": 2407, + "error propagation": 18223, + "propagation problem": 43246, + "utilize contextual": 61087, + "information experimental": 25847, + "paper available": 39277, + "contains approximately": 10492, + "style information": 53488, + "important topic": 24786, + "quality large": 44543, + "different corpus": 14880, + "corpus analysis": 11273, + "frequency analysis": 21669, + "data aim": 12127, + "present state": 42022, + "methods build": 32774, + "analyses examine": 2596, + "examine effect": 18862, + "necessary sufficient": 36534, + "achieving high": 1408, + "high classification": 23710, + "generative modeling": 22598, + "language state": 28503, + "performance investigate": 40400, + "linguistic perspective": 30780, + "model data": 33734, + "explicit modeling": 19621, + "crucial achieving": 11894, + "performance attention": 40201, + "providing support": 44253, + "data feature": 12361, + "designing better": 14339, + "tasks recently": 55841, + "development deep": 14674, + "model model": 34104, + "representation document": 46504, + "model generating": 33934, + "models terms": 35591, + "experiments analyze": 19351, + "key points": 27328, + "including model": 25274, + "types tasks": 59120, + "tasks argue": 55508, + "generate good": 22205, + "introduce joint": 26814, + "existing document": 19059, + "model recurrent": 34288, + "learning setup": 29872, + "learn complex": 29350, + "despite usefulness": 14402, + "models affected": 34701, + "study effect": 53364, + "shown strong": 50754, + "task similar": 55377, + "model building": 33636, + "smaller models": 51523, + "learning helps": 29668, + "research linguistics": 47066, + "length text": 30036, + "models usually": 35660, + "features feature": 20583, + "classification compared": 8443, + "tasks specifically": 55903, + "highest accuracy": 23849, + "classification fine": 8472, + "grained classification": 23026, + "news social": 37414, + "single language": 51312, + "focuses specific": 21243, + "manual curation": 31735, + "major languages": 31514, + "model detecting": 33757, + "high frequency": 23739, + "results number": 47746, + "direct use": 15261, + "use input": 59913, + "word information": 62217, + "leaf nodes": 29333, + "better representations": 6954, + "learning emerged": 29615, + "active research": 1478, + "document embedding": 15787, + "classification document": 8457, + "process produce": 42818, + "contributions paper": 10955, + "method named": 32584, + "background information": 5491, + "increasing importance": 25452, + "automatically recognized": 5199, + "specific texts": 52159, + "methods best": 32771, + "best suited": 6828, + "main reasons": 31456, + "reasons lack": 45236, + "tool support": 57366, + "open datasets": 38418, + "datasets average": 13164, + "processing time": 42958, + "time experimental": 57154, + "comparison reveals": 9505, + "best average": 6750, + "datasets available": 13162, + "best methods": 6780, + "new encoder": 37183, + "decoder approach": 13585, + "model learned": 34051, + "map input": 31794, + "vector using": 61473, + "including sentence": 25296, + "sentence prediction": 49621, + "hierarchical encoder": 23667, + "predict multiple": 41647, + "training models": 58179, + "sentence encoder": 49549, + "superiority proposed": 53953, + "competing methods": 9530, + "evaluated large": 18535, + "datasets present": 13374, + "complex words": 9672, + "test samples": 56366, + "space efficient": 51855, + "relations knowledge": 46039, + "representation knowledge": 46533, + "entities entities": 18046, + "information entities": 25837, + "models encode": 34962, + "valuable information": 61202, + "text description": 56532, + "gating mechanism": 22006, + "unified architecture": 59467, + "experiments models": 19470, + "tasks source": 55899, + "available github": 5297, + "learning bias": 29548, + "words low": 62451, + "early stages": 16515, + "written non": 63006, + "received increasing": 45258, + "years number": 63066, + "number annotated": 37981, + "approaches limited": 3863, + "consuming expensive": 10445, + "expensive work": 19224, + "propose utilize": 43697, + "utilize unlabeled": 61105, + "detection models": 14502, + "negative training": 36638, + "data introduce": 12440, + "introduce attention": 26782, + "use reinforcement": 59992, + "learning learn": 29703, + "predicted using": 41672, + "benefit learning": 6565, + "phrases different": 40850, + "different conventional": 14878, + "abstractive summarization": 772, + "previous sequence": 42276, + "seq2seq models": 49902, + "single decoder": 51294, + "seq2seq model": 49901, + "model fuse": 33913, + "specific vocabulary": 52172, + "final output": 20825, + "datasets result": 13408, + "approaches terms": 3938, + "connectionist temporal": 10182, + "temporal classification": 56181, + "allow model": 2437, + "model allows": 33562, + "generating natural": 22383, + "languages particular": 28747, + "proposed novel": 43871, + "novel approaches": 37767, + "text sequences": 56766, + "addressing problem": 1822, + "problem long": 42597, + "range dependency": 44914, + "detection human": 14490, + "focus identifying": 21169, + "content internet": 10532, + "understanding content": 59333, + "key steps": 27335, + "using available": 60579, + "available annotated": 5261, + "annotated datasets": 2890, + "datasets like": 13317, + "models fail": 35015, + "fail generalize": 20338, + "differ significantly": 14815, + "domains large": 16269, + "training robust": 58234, + "recognition models": 45514, + "models key": 35152, + "adapt models": 1505, + "available domains": 5283, + "methods effectively": 32833, + "effectively adapt": 16720, + "domains using": 16300, + "using distributed": 60662, + "analyze linguistic": 2821, + "identify key": 24426, + "linguistic insights": 30774, + "performance domains": 40304, + "methods capture": 32777, + "capture domain": 7666, + "global semantics": 22842, + "knowledge learn": 27546, + "ner models": 36679, + "previous baselines": 42246, + "baselines domain": 6254, + "approach identify": 3558, + "multilingual context": 36070, + "language expression": 28064, + "extraction pipeline": 20093, + "pos tagged": 41230, + "regular expression": 45831, + "false positives": 20383, + "demonstrated effectiveness": 14005, + "tasks tasks": 55927, + "study possible": 53433, + "models multiple": 35241, + "embeddings finally": 17137, + "use previous": 59981, + "work uses": 62854, + "small corpus": 51468, + "new neural": 37269, + "using negative": 60831, + "compare proposed": 9360, + "improving previous": 25193, + "success deep": 53697, + "reasoning requires": 45222, + "requires complex": 46919, + "recent neural": 45324, + "approaches attempted": 3769, + "typically limited": 59148, + "synthetic tasks": 54383, + "framework integrates": 21547, + "non differentiable": 37647, + "parsing dataset": 39776, + "task requires": 55339, + "requires significant": 46950, + "text wikipedia": 56844, + "wikipedia knowledge": 62049, + "effective tasks": 16702, + "memory representations": 32281, + "mimic iii": 33269, + "networks predict": 36895, + "models improved": 35112, + "bidirectional encoder": 7067, + "encoder attention": 17491, + "attention decoder": 4733, + "tokens sentence": 57335, + "adequacy fluency": 1831, + "approaches entity": 3810, + "entity identification": 18109, + "boundary detection": 7283, + "types natural": 59103, + "level representation": 30192, + "frame problem": 21440, + "architecture performs": 4076, + "models bilstm": 34789, + "linearly number": 30685, + "expensive data": 19207, + "learning aims": 29509, + "reduce cost": 45655, + "confidence scores": 10118, + "likelihood based": 30517, + "based active": 5556, + "methods shown": 33036, + "understood work": 59424, + "learning end": 29622, + "reduce number": 45676, + "number samples": 38034, + "random sampling": 44887, + "inspired recent": 26412, + "explore ways": 19754, + "model highly": 33961, + "language level": 28136, + "maintaining performance": 31496, + "processing word": 42967, + "languages high": 28684, + "able correctly": 684, + "present training": 42043, + "data sample": 12619, + "semantically correct": 49383, + "model fits": 33900, + "literature propose": 30861, + "benchmarks approach": 6510, + "typically requires": 59156, + "accuracy result": 1038, + "good margin": 22934, + "analysis work": 2795, + "daily life": 12085, + "linguistic markers": 30777, + "methods text": 33072, + "classification topic": 8576, + "modeling text": 34631, + "analysis applied": 2614, + "personal stories": 40759, + "techniques word": 56152, + "crowd sourcing": 11882, + "combined word": 9089, + "resources wordnet": 47340, + "intrinsic evaluations": 26769, + "applications word": 3259, + "community question": 9272, + "yes questions": 63087, + "stage framework": 52431, + "framework perform": 21581, + "leverage existing": 30267, + "pu learning": 44301, + "positive unlabeled": 41300, + "unlabeled examples": 59573, + "binary classifier": 7148, + "using distant": 60659, + "learning help": 29667, + "answers using": 3113, + "social platforms": 51599, + "information multi": 25978, + "turn dialogue": 58989, + "specifically propose": 52222, + "propose hierarchical": 43409, + "softmax classifier": 51630, + "classification evaluate": 8464, + "classification real": 8529, + "method capture": 32411, + "capture contextual": 7656, + "systems recently": 54615, + "encoding bpe": 17563, + "paper presented": 39467, + "work applying": 62570, + "main idea": 31443, + "improve performances": 24904, + "performance highly": 40374, + "models computing": 34843, + "representations specifically": 46760, + "monolingual multilingual": 35805, + "allows perform": 2475, + "perform unsupervised": 40158, + "training embeddings": 58082, + "semantic compositionality": 49252, + "trained unsupervised": 57907, + "multilingual embeddings": 36081, + "multiple variants": 36309, + "information methods": 25971, + "methods process": 32993, + "process context": 42764, + "methods incorporate": 32900, + "methods fine": 32867, + "modeling process": 34614, + "global local": 22834, + "report accuracy": 46424, + "improved classification": 24945, + "model future": 33916, + "using global": 60707, + "based order": 5930, + "achieve improvement": 1164, + "english social": 17874, + "media websites": 32190, + "strategy using": 52955, + "release dataset": 46151, + "finally identify": 20863, + "recently attention": 45407, + "plays key": 41001, + "score function": 48845, + "decoding step": 13646, + "model greatly": 33947, + "greatly increases": 23235, + "complexity paper": 9686, + "proposing novel": 43947, + "attention framework": 4751, + "model step": 34411, + "step experiments": 52806, + "conventional attention": 11001, + "networks attention": 36832, + "attention mechanisms": 4784, + "different dimensions": 14900, + "dataset perform": 13025, + "uses large": 60518, + "set linguistic": 50185, + "morphological complexity": 35840, + "linguistics research": 30824, + "tense aspect": 56220, + "everyday language": 18803, + "language usage": 28566, + "paper aim": 39260, + "aim analyze": 2136, + "vocabulary used": 61717, + "200 000": 234, + "based tool": 6099, + "media users": 32188, + "derived large": 14201, + "labeling framework": 27785, + "representations proposed": 46744, + "accuracy trained": 1064, + "approach train": 3723, + "language systems": 28516, + "learning relies": 29839, + "study learning": 53405, + "word meanings": 62246, + "better reflect": 6951, + "simple strategy": 51212, + "document aligned": 15766, + "address challenges": 1747, + "challenges applying": 8031, + "solve challenges": 51676, + "multiple translations": 36306, + "selection models": 49147, + "pairs experimental": 39187, + "models multilingual": 35237, + "predictions language": 41761, + "contrast propose": 10887, + "using continuous": 60623, + "improve inference": 24864, + "inference language": 25664, + "multilingual language": 36088, + "approaches automatic": 3771, + "manner paper": 31721, + "provide depth": 44047, + "evaluation existing": 18616, + "carefully designed": 7762, + "experiments explore": 19435, + "based evaluations": 5710, + "evaluations results": 18769, + "metrics work": 33209, + "international conference": 26695, + "score 79": 48814, + "systems participated": 54584, + "media content": 32163, + "substantial research": 53630, + "ensemble learning": 17975, + "information detect": 25809, + "accuracy significantly": 1047, + "majority baseline": 31526, + "emotions expressed": 17303, + "applied natural": 3284, + "come cost": 9128, + "interpretability paper": 26717, + "effects model": 16826, + "set input": 50171, + "model decision": 33738, + "comprehensive analysis": 9782, + "multiple nlp": 36256, + "linguistic feature": 30770, + "prediction proposed": 41732, + "methodology offers": 32719, + "model decisions": 33739, + "analysis neural": 2706, + "learning sequence": 29869, + "unsupervised task": 59739, + "task supervised": 55425, + "final layer": 20822, + "task auxiliary": 54926, + "task architecture": 54910, + "shows improvements": 50786, + "percentage points": 40053, + "points f1": 41072, + "problem computational": 42522, + "training corpora": 57961, + "grained fine": 23035, + "score 76": 48811, + "neural method": 36969, + "method transfer": 32689, + "learning source": 29887, + "tasks aspects": 55510, + "target labels": 54822, + "class labels": 8406, + "labels documents": 27817, + "select relevant": 49110, + "applied target": 3299, + "adversarial training": 1988, + "different baselines": 14849, + "baselines model": 6279, + "rapid growth": 44990, + "growth social": 23310, + "need automated": 36547, + "limited work": 30634, + "work reported": 62806, + "model developed": 33761, + "rich source": 48123, + "increase accuracy": 25404, + "taken account": 54773, + "does directly": 15943, + "experiment model": 19242, + "model english": 33823, + "data german": 12390, + "data jointly": 12443, + "jointly modeling": 27207, + "additionally investigate": 1724, + "finally study": 20881, + "structured attention": 53152, + "attention neural": 4799, + "models incorporating": 35122, + "structural information": 53079, + "information propagation": 26028, + "treebank dataset": 58763, + "model persian": 34208, + "persian english": 40745, + "persian language": 40746, + "language best": 27977, + "hyper parameters": 24327, + "persian dataset": 40744, + "enhance word": 17928, + "alignment model": 2375, + "entity entity": 18105, + "entity embeddings": 18104, + "investigate state": 26986, + "aware model": 5461, + "entity level": 18114, + "complementary information": 9589, + "baseline fine": 6169, + "information entity": 25838, + "entity descriptions": 18100, + "improves multi": 25138, + "entities paper": 18070, + "response generation": 47392, + "learning especially": 29628, + "framework propose": 21585, + "evaluation propose": 18683, + "models online": 35276, + "human judgement": 24180, + "results modeling": 47727, + "sentence matching": 49599, + "phrase representation": 40843, + "single framework": 51304, + "data evaluation": 12331, + "standard practice": 52516, + "method build": 32406, + "training experiment": 58098, + "demonstrate modeling": 13944, + "crowd workers": 11883, + "level quality": 30185, + "quality domain": 44512, + "data scale": 12622, + "supervision propose": 54089, + "learning mtl": 29767, + "tasks shared": 55883, + "task learn": 55167, + "learn mapping": 29396, + "mtl model": 35932, + "model tested": 34454, + "various levels": 61355, + "asr model": 4558, + "model results": 34321, + "especially useful": 18311, + "novel decoding": 37801, + "decoding approach": 13627, + "based continuous": 5646, + "using gradient": 60711, + "right right": 48142, + "right left": 48140, + "leads substantial": 29331, + "models typical": 35638, + "end conduct": 17621, + "evaluation compare": 18591, + "produced state": 43021, + "systems language": 54541, + "language directions": 28029, + "error categories": 18217, + "systems neural": 54570, + "models conditional": 34844, + "perform tasks": 40153, + "analysis speech": 2765, + "nlp researchers": 37523, + "called textit": 7555, + "detection identify": 14491, + "category based": 7861, + "preliminary work": 41809, + "limited task": 30623, + "compared simple": 9452, + "simple bag": 51138, + "based skip": 6039, + "higher probability": 23839, + "accuracy time": 1062, + "model distinguish": 33779, + "resources present": 47327, + "goal identify": 22887, + "improvements multiple": 25083, + "multiple baselines": 36171, + "use specific": 60023, + "representations improves": 46686, + "identifying important": 24459, + "representations especially": 46654, + "matching approach": 31909, + "efficient transfer": 16905, + "methods training": 33081, + "network long": 36761, + "learning schemes": 29860, + "small user": 51511, + "user data": 60407, + "methods especially": 32841, + "dialogue data": 14770, + "methods successfully": 33058, + "aspects paper": 4550, + "different domain": 14904, + "modeling using": 34634, + "using fine": 60695, + "ensemble models": 17980, + "similar gains": 51043, + "able outperform": 709, + "understanding nlu": 59373, + "nlu tasks": 37571, + "tasks shallow": 55882, + "semantic slot": 49351, + "current deep": 11969, + "labeling problem": 27789, + "labels paper": 27843, + "alternative approach": 2497, + "promote development": 43190, + "resources language": 47309, + "availability large": 5252, + "large parallel": 28932, + "report performance": 46441, + "use text": 60046, + "ones obtained": 38340, + "pearson correlation": 40005, + "analysis indicates": 2682, + "based interactions": 5790, + "based specific": 6051, + "task related": 55328, + "based distribution": 5686, + "approaches data": 3792, + "difficulty task": 15202, + "task lack": 55158, + "competing approaches": 9529, + "sentiments expressed": 49870, + "address propose": 1794, + "propose augment": 43304, + "datasets systems": 13450, + "use traditional": 60052, + "features perform": 20640, + "handle complex": 23407, + "structured data": 53153, + "benefit various": 6575, + "benefits using": 6590, + "text relations": 56733, + "new cross": 37159, + "participants asked": 39813, + "based english": 5703, + "computer generated": 9889, + "corpus provided": 11413, + "provided dataset": 44160, + "contain errors": 10461, + "hurt performance": 24304, + "different input": 14956, + "effectively improve": 16739, + "translation errors": 58606, + "additionally method": 1725, + "knowledge target": 27624, + "modal attention": 33453, + "different parts": 15020, + "decoder hidden": 13597, + "different strategies": 15081, + "features compare": 20541, + "impact adding": 24589, + "models report": 35436, + "evaluated data": 18526, + "domain dialogue": 16047, + "trained produce": 57843, + "indistinguishable human": 25561, + "dialogue utterances": 14795, + "cast task": 7823, + "problem jointly": 42589, + "jointly train": 27221, + "train systems": 57642, + "generated ones": 22305, + "number potential": 38028, + "adversarial evaluation": 1968, + "evaluation demonstrate": 18606, + "adversarially trained": 1994, + "baselines introduce": 6273, + "simple general": 51174, + "generate outputs": 22227, + "length model": 30030, + "based token": 6097, + "token generation": 57292, + "summarization machine": 53888, + "bleu rouge": 7211, + "rouge scores": 48355, + "given collection": 22726, + "semantically relevant": 49390, + "annotations model": 2994, + "word categories": 62123, + "aims make": 2204, + "comparable models": 9299, + "english corpora": 17789, + "benchmark corpus": 6439, + "trained sequence": 57868, + "quality proposed": 44567, + "users social": 60482, + "purpose training": 44413, + "hate speech": 23480, + "speech detection": 52259, + "training phase": 58208, + "corpora build": 11181, + "build accurate": 7385, + "candidate selection": 7578, + "measured bleu": 32068, + "increase quality": 25422, + "important various": 24790, + "various fields": 61343, + "fields natural": 20783, + "processing recent": 42930, + "corpora proposed": 11235, + "corpus experiments": 11339, + "apply state": 3353, + "art techniques": 4425, + "techniques different": 56078, + "way combine": 61797, + "produce better": 42975, + "propose different": 43353, + "models examine": 34981, + "novel combination": 37784, + "models various": 35669, + "models experiment": 34992, + "learning ssl": 29891, + "understanding slu": 59400, + "model adapt": 33531, + "utilizing knowledge": 61124, + "graph document": 23128, + "descent sgd": 14210, + "selection techniques": 49155, + "chi square": 8287, + "attempt explore": 4686, + "research method": 47072, + "contextual similarity": 10783, + "importance word": 24695, + "lack resources": 27911, + "research word": 47142, + "generate word": 22263, + "testing different": 56405, + "hong kong": 23999, + "demonstrate importance": 13920, + "media language": 32169, + "popular natural": 41174, + "nlp aims": 37460, + "text task": 56805, + "supervised based": 53964, + "based conditional": 5635, + "order tackle": 38655, + "english bengali": 17779, + "able successfully": 727, + "labels given": 27830, + "given code": 22725, + "sentence experiments": 49557, + "pairs domains": 39182, + "addresses task": 1817, + "generation leveraging": 22484, + "training graph": 58117, + "using heuristic": 60724, + "generate output": 22226, + "sentences evaluated": 49711, + "far paper": 20404, + "multilingual cross": 36074, + "lingual data": 30696, + "based assumption": 5578, + "documents written": 15931, + "new tasks": 37336, + "tasks respectively": 55860, + "main ideas": 31444, + "relations document": 46023, + "leverage multilingual": 30279, + "multilingual resources": 36115, + "target entities": 54816, + "remains difficult": 46331, + "available facilitate": 5293, + "information analysis": 25760, + "provide important": 44088, + "important insights": 24736, + "quality control": 44500, + "based user": 6122, + "interface provides": 26661, + "provides overview": 44218, + "human knowledge": 24185, + "knowledge recent": 27586, + "learning ml": 29739, + "ml natural": 33432, + "learning networks": 29777, + "networks paper": 36888, + "survey aims": 54202, + "texts present": 56912, + "corpus introduce": 11363, + "probabilistic topic": 42468, + "challenges posed": 8068, + "specific example": 52082, + "methods understanding": 33091, + "understanding nature": 59372, + "attention networks": 4798, + "structural dependencies": 53077, + "end training": 17720, + "training work": 58316, + "work experiment": 62654, + "chain conditional": 7957, + "model models": 34105, + "networks outperform": 36887, + "models variety": 35668, + "synthetic real": 54380, + "way learn": 61816, + "attention propose": 4814, + "modal data": 33456, + "model advantage": 33547, + "languages improve": 28691, + "function training": 21760, + "sentence ranking": 49628, + "task additional": 54885, + "strong improvements": 53032, + "temporal characteristics": 56180, + "additional knowledge": 1678, + "used discover": 60153, + "sources work": 51842, + "entities mentioned": 18064, + "articles present": 4474, + "similar text": 51073, + "resources like": 47311, + "trained based": 57679, + "gap language": 21966, + "limitations existing": 30548, + "approach adapting": 3407, + "adapting existing": 1564, + "method work": 32709, + "noisy data": 37614, + "reduce noise": 45675, + "information terms": 26116, + "comparable better": 9290, + "art benchmark": 4226, + "representations encode": 46649, + "task jointly": 55151, + "products services": 43058, + "task joint": 55150, + "user ratings": 60444, + "data compared": 12226, + "presents simple": 42104, + "simple robust": 51205, + "shows competitive": 50769, + "results respect": 47812, + "domain based": 16023, + "available given": 5301, + "student network": 53214, + "better learning": 6909, + "teacher network": 55994, + "sentence neural": 49605, + "smaller model": 51521, + "process demonstrate": 42769, + "data filtering": 12362, + "filtering method": 20812, + "knowledge teacher": 27628, + "training leads": 58153, + "performance given": 40367, + "search strategy": 48986, + "reach better": 45044, + "decoding speed": 13644, + "driven models": 16431, + "models excel": 34984, + "task test": 55433, + "called cross": 7542, + "proposes neural": 43936, + "shows promise": 50794, + "architectures task": 4124, + "model external": 33869, + "study based": 53334, + "cloze task": 8725, + "linear classifier": 30651, + "context addition": 10580, + "model predictions": 34224, + "reaches state": 45057, + "demonstrate different": 13890, + "different task": 15093, + "way people": 61825, + "revolutionized field": 48062, + "widely explored": 61996, + "handle various": 23418, + "invariant features": 26918, + "representative nlp": 46800, + "study popular": 53432, + "weighting schemes": 61936, + "optimal performance": 38530, + "set used": 50273, + "best match": 6778, + "weighting scheme": 61935, + "scheme used": 48731, + "best overall": 6787, + "especially used": 18310, + "performance general": 40361, + "layer recurrent": 29205, + "model temporal": 34448, + "knowledge input": 27527, + "input signal": 26335, + "carry depth": 7775, + "semantic aspects": 49237, + "method obtaining": 32592, + "accurate models": 1081, + "fail fully": 20337, + "large context": 28861, + "run parallel": 48402, + "entire documents": 18024, + "parameter sharing": 39678, + "training procedures": 58216, + "accuracy comparable": 947, + "entire document": 18023, + "important goal": 24729, + "embeddings evaluated": 17129, + "focus word": 21213, + "propose evaluation": 43377, + "focus data": 21151, + "data efficiency": 12312, + "available data": 5276, + "comprehensive evaluation": 9788, + "complete picture": 9600, + "brings new": 7342, + "new insight": 37224, + "unsupervised language": 59703, + "agnostic method": 2093, + "substantial amounts": 53616, + "health related": 23518, + "information social": 26091, + "provides opportunity": 44217, + "opportunity study": 38514, + "particular study": 39862, + "identify potentially": 24437, + "employ simple": 17390, + "simple rule": 51206, + "supervised classifier": 53969, + "hand annotated": 23383, + "using user": 61012, + "available sources": 5368, + "graph representations": 23163, + "end model": 17685, + "learns latent": 29963, + "objective experiments": 38088, + "models performance": 35318, + "ensemble model": 17978, + "standard english": 52489, + "translation dataset": 58596, + "recognition text": 45545, + "constructed large": 10412, + "different dataset": 14888, + "ground truths": 23258, + "make datasets": 31562, + "datasets publicly": 13388, + "advantage neural": 1944, + "weight sharing": 61920, + "model compression": 33686, + "knowledge neural": 27559, + "consistently yields": 10313, + "compared baseline": 9383, + "models proven": 35377, + "result models": 47441, + "black boxes": 7194, + "learned patterns": 29472, + "demonstrate new": 13951, + "text attributes": 56443, + "challenging text": 8160, + "level deep": 30098, + "used compute": 60121, + "end manner": 17682, + "using propagation": 60878, + "maximize likelihood": 31957, + "propose variants": 43699, + "limited understanding": 30629, + "automatically detects": 5160, + "time span": 57218, + "chat logs": 8261, + "different users": 15119, + "benefit future": 6562, + "future exploration": 21874, + "detection methods": 14500, + "finally obtain": 20871, + "overall f1": 39039, + "corpus recent": 11416, + "evidence humans": 18811, + "factors affect": 20305, + "affect human": 2015, + "sense knowledge": 49486, + "model estimates": 33839, + "second study": 49024, + "syntactic representation": 54318, + "applications recent": 3244, + "limited english": 30584, + "dependency graphs": 14122, + "handling complex": 23424, + "forms language": 21376, + "multilingual evaluation": 36084, + "languages datasets": 28634, + "datasets english": 13248, + "ability represent": 641, + "negation scope": 36613, + "investigate possibility": 26974, + "logic representation": 30980, + "using universal": 61008, + "similar texts": 51074, + "representations evaluate": 46656, + "entity semantic": 18147, + "correlation scores": 11529, + "rate compared": 45013, + "entity embedding": 18103, + "representations addition": 46614, + "accuracy scores": 1042, + "methods operate": 32968, + "representation using": 46601, + "performance reported": 40534, + "specific natural": 52115, + "comparison approaches": 9490, + "concepts entities": 9933, + "contains million": 10499, + "person organization": 40751, + "different mentions": 14987, + "evaluated performance": 18542, + "performance based": 40207, + "based concept": 5633, + "approaches performance": 3895, + "higher state": 23846, + "propose hybrid": 43411, + "approach encourages": 3510, + "incorporate linguistic": 25358, + "prior training": 42417, + "developed deep": 14628, + "task question": 55314, + "new question": 37295, + "main task": 31461, + "achieving better": 1396, + "set approach": 50108, + "produces higher": 43030, + "engineering approaches": 17766, + "approaches state": 3925, + "linear transformation": 30675, + "using dictionaries": 60651, + "pairs improve": 39195, + "set composed": 50121, + "robust noise": 48259, + "languages achieving": 28593, + "set finally": 50158, + "finally extend": 20859, + "extend method": 19824, + "based cross": 5660, + "annotations english": 2989, + "meaning preserving": 32011, + "preserving semantic": 42126, + "consists main": 10322, + "main steps": 31460, + "segmentation text": 49089, + "based discourse": 5681, + "trained semi": 57860, + "new parallel": 37278, + "developing evaluating": 14652, + "language proficiency": 28445, + "original text": 38731, + "systems corpus": 54462, + "identifying specific": 24469, + "need new": 36585, + "features deep": 20554, + "architectures achieve": 4102, + "achieve robust": 1188, + "representations provide": 46745, + "validate hypothesis": 61179, + "features additionally": 20520, + "low medium": 31159, + "medium high": 32217, + "challenging real": 8132, + "relative improvements": 46102, + "proposed features": 43776, + "shown effectiveness": 50703, + "effectiveness using": 16821, + "segmentation models": 49085, + "rely large": 46292, + "data effective": 12309, + "resource datasets": 47220, + "insufficient training": 26494, + "propose transfer": 43679, + "corpora train": 11250, + "model high": 33959, + "use learned": 59931, + "learned knowledge": 29462, + "train student": 57640, + "model low": 34076, + "resource data": 47219, + "data experiment": 12342, + "results work": 47913, + "work significantly": 62824, + "datasets machine": 13322, + "translated sentence": 58557, + "sets compared": 50285, + "set accuracy": 50101, + "accuracy challenging": 941, + "individual data": 25565, + "data points": 12546, + "data point": 12545, + "examine impact": 18864, + "methods human": 32890, + "patterns experiments": 39968, + "experiments natural": 19477, + "inference nli": 25674, + "examples neural": 18918, + "task data": 54991, + "sparsity issue": 51979, + "issue paper": 27070, + "tackle data": 54703, + "baseline neural": 6193, + "extra linguistic": 19963, + "study proposes": 53446, + "containing multiple": 10485, + "model identifying": 33971, + "labeling approach": 27777, + "dataset consists": 12861, + "open access": 38410, + "annotation model": 2957, + "rnn long": 48199, + "representations computed": 46628, + "input layer": 26291, + "feature rich": 20501, + "model furthermore": 33912, + "extraction scientific": 20106, + "corpus recently": 11417, + "address different": 1755, + "paper possible": 39442, + "human designed": 24134, + "able obtain": 708, + "style analysis": 53481, + "fake news": 20371, + "presents large": 42088, + "articles manually": 4469, + "corpus contains": 11308, + "meta learning": 32337, + "news detection": 37400, + "results important": 47668, + "presents systematic": 42108, + "results additional": 47492, + "model addition": 33539, + "requires manual": 46941, + "settings present": 50390, + "efficiency accuracy": 16838, + "responses given": 47405, + "dialogue models": 14780, + "models experiments": 34995, + "diverse outputs": 15711, + "explore simple": 19736, + "solution multi": 51656, + "architecture training": 4092, + "simply concatenate": 51249, + "sentences form": 49725, + "source languages": 51780, + "provide insights": 44093, + "information scenario": 26073, + "train supervised": 57641, + "high reliability": 23789, + "investigate ways": 26996, + "optimize model": 38562, + "achieve desired": 1132, + "value pairs": 61209, + "train recurrent": 57624, + "generate textual": 22256, + "generates sentences": 22356, + "achieves bleu": 1310, + "baseline human": 6175, + "human preference": 24216, + "evaluation suggests": 18731, + "manual analysis": 31730, + "classification ctc": 8448, + "labeling methods": 27786, + "models models": 35229, + "paper train": 39597, + "encoder used": 17548, + "used pretrain": 60268, + "learning joint": 29688, + "specialized domain": 52032, + "present systems": 42035, + "complexity proposed": 9687, + "proposed tasks": 43909, + "worst case": 62977, + "crucial understand": 11916, + "requires considerable": 46920, + "considerable human": 10230, + "human supervision": 24247, + "near real": 36511, + "specifically focus": 52204, + "increased accuracy": 25428, + "baseline method": 6182, + "learning computational": 29567, + "computational approach": 9833, + "approach investigate": 3577, + "learning mechanisms": 29724, + "sensitive different": 49497, + "bias model": 7034, + "model knowledge": 34030, + "learning scenarios": 29858, + "learning examples": 29631, + "examples paper": 18920, + "task word": 55471, + "models despite": 34904, + "able recover": 721, + "information single": 26090, + "vector model": 61457, + "broad set": 7354, + "generate training": 22259, + "generated training": 22328, + "set labels": 50178, + "major drawbacks": 31508, + "use hand": 59904, + "mentions context": 32303, + "micro f1": 33224, + "model dataset": 33736, + "systems approaches": 54433, + "transferring knowledge": 58436, + "knowledge improve": 27521, + "high overall": 23757, + "extend state": 19831, + "yields improvement": 63125, + "propose information": 43416, + "spanish tweets": 51949, + "lexical level": 30370, + "tend use": 56209, + "obtained different": 38207, + "work suggests": 62833, + "languages low": 28717, + "subword level": 53685, + "substantially better": 53632, + "models highly": 35090, + "target training": 54853, + "combining multiple": 9117, + "multiple related": 36274, + "compensate lack": 9520, + "learning proposed": 29825, + "model relatively": 34302, + "network recurrent": 36793, + "recurrent layer": 45616, + "human learning": 24195, + "learning single": 29881, + "single feature": 51303, + "non recurrent": 37678, + "recurrent models": 45619, + "models capturing": 34806, + "requires use": 46957, + "paradigm shift": 39629, + "limited paper": 30601, + "features outperforms": 20636, + "outperforms complex": 38889, + "complex neural": 9643, + "models detecting": 34906, + "models offer": 35274, + "rich contextual": 48095, + "contextual semantics": 10781, + "occurrence patterns": 38276, + "provide effective": 44058, + "open research": 38444, + "research questions": 47108, + "propose solution": 43639, + "models joint": 35149, + "latent space": 29136, + "generated synthetic": 22323, + "inspired observation": 26408, + "selection process": 49149, + "goal work": 22906, + "end design": 17628, + "making process": 31664, + "approaches widely": 3954, + "significant accuracy": 50846, + "accuracy improvement": 990, + "improvement especially": 25003, + "cnns used": 8780, + "used existing": 60176, + "layers deep": 29220, + "capture human": 7679, + "residual connections": 47187, + "combination different": 9040, + "chat data": 8260, + "compared widely": 9473, + "respectively present": 47377, + "robust approach": 48239, + "pairs used": 39227, + "used single": 60304, + "manual evaluations": 31740, + "able consistently": 681, + "consistently outperform": 10298, + "increasingly used": 25478, + "datasets shows": 13426, + "networks achieve": 36828, + "achieve accurate": 1110, + "accurate language": 1079, + "classification performances": 8515, + "text best": 56458, + "popular method": 41170, + "local semantic": 30949, + "accuracy rate": 1033, + "obtained training": 38227, + "investigate using": 26994, + "soft labels": 51623, + "improve generalization": 24859, + "training deep": 58056, + "tuning approach": 58899, + "labels provided": 27847, + "true label": 58821, + "improved generalization": 24949, + "nli task": 37455, + "label training": 27733, + "performance baselines": 40210, + "reasoning understanding": 45231, + "teacher student": 55995, + "machines understand": 31400, + "understand text": 59314, + "sentence text": 49657, + "observed text": 38149, + "learning experimental": 29634, + "input prior": 26318, + "uses pipeline": 60527, + "pipeline method": 40902, + "proposes approach": 43930, + "approach identifies": 3557, + "jointly solve": 27220, + "solve issue": 51679, + "conversations present": 11060, + "knowledge information": 27524, + "forms words": 21378, + "common concepts": 9169, + "explicitly stated": 19648, + "access data": 819, + "knowledge guide": 27510, + "guide model": 23338, + "medical terms": 32209, + "aim work": 2162, + "based importance": 5779, + "new unsupervised": 37356, + "heterogeneous information": 23622, + "topic coherence": 57397, + "levels semantic": 30247, + "semantic types": 49369, + "expert annotated": 19569, + "identification performance": 24394, + "metrics performance": 33188, + "performance relatively": 40530, + "learning robust": 29854, + "readily applied": 45077, + "applied domains": 3270, + "use terms": 60044, + "importance scores": 24690, + "speed training": 52325, + "research mainly": 47070, + "algorithms propose": 2335, + "propose generic": 43403, + "easy implement": 16562, + "learning research": 29844, + "use pre": 59975, + "vocabulary tokens": 61716, + "architectural choices": 4018, + "final performance": 20826, + "systematically explore": 54412, + "provide recommendations": 44120, + "area paper": 4145, + "resources improve": 47305, + "augmented training": 4983, + "corpus vocabulary": 11456, + "detailed error": 14422, + "hindi marathi": 23942, + "improve coverage": 24836, + "works paper": 62900, + "method propose": 32625, + "designed test": 14333, + "approach comparing": 3459, + "target entity": 54817, + "tweet text": 59007, + "stance detection": 52455, + "perform classification": 40074, + "detection accuracy": 14455, + "finally perform": 20874, + "model construct": 33707, + "word selection": 62294, + "language easily": 28039, + "languages having": 28683, + "significant challenges": 50854, + "reasonable results": 45174, + "present solution": 42019, + "using lexical": 60767, + "results achieve": 47485, + "kind data": 27366, + "difficult propose": 15183, + "used augment": 60095, + "explicit memory": 19620, + "apply model": 3336, + "tasks achieve": 55487, + "benchmarks including": 6527, + "including cnn": 25244, + "qa tasks": 44460, + "examples task": 18935, + "model encode": 33817, + "people paper": 40033, + "semantic differences": 49269, + "groups people": 23282, + "apply approach": 3321, + "different regions": 15046, + "systems today": 54655, + "models long": 35201, + "suffer problem": 53777, + "propose lightweight": 43442, + "learn robust": 29417, + "datasets low": 13320, + "environment paper": 18172, + "approaches methods": 3873, + "methods explored": 32856, + "scheme uses": 48732, + "discriminative features": 15443, + "positive samples": 41296, + "samples training": 48492, + "using nearest": 60830, + "using ground": 60718, + "data effectiveness": 12311, + "artificial language": 4495, + "experimental data": 19260, + "complex context": 9618, + "micro level": 33226, + "macro level": 31409, + "vectors language": 61489, + "similarity propose": 51114, + "detection method": 14499, + "using range": 60893, + "contrast state": 10889, + "completely unsupervised": 9607, + "experiments publicly": 19504, + "compared strong": 9460, + "strong supervised": 53053, + "supervised baselines": 53966, + "baselines approach": 6233, + "paper work": 39607, + "systems robust": 54626, + "systems effectively": 54483, + "human robot": 24237, + "method help": 32522, + "work consider": 62608, + "software development": 51637, + "broad coverage": 7352, + "used language": 60222, + "demonstrate effect": 13892, + "model prove": 34253, + "systems deployed": 54474, + "expensive train": 19222, + "prohibitively expensive": 43128, + "scale analysis": 48551, + "report empirical": 46431, + "novel insights": 37842, + "release open": 46162, + "enables researchers": 17447, + "researchers easily": 47153, + "novel techniques": 37937, + "networks task": 36915, + "labels available": 27810, + "propose extensions": 43384, + "dynamic memory": 16487, + "proposed extensions": 43772, + "art end": 4255, + "single task": 51342, + "research previous": 47096, + "based pattern": 5935, + "approach parsing": 3635, + "scale study": 48627, + "structure called": 53092, + "efficient framework": 16875, + "massive corpora": 31881, + "generates high": 22342, + "supervised baseline": 53965, + "using features": 60693, + "encoding strategies": 17577, + "performed better": 40660, + "accuracy 72": 906, + "ranking 3rd": 44963, + "official evaluation": 38307, + "study natural": 53416, + "linguistic competence": 30754, + "fundamental question": 21789, + "article introduce": 4451, + "arguments given": 4180, + "modular framework": 35745, + "sequence attention": 49911, + "significantly accurate": 50932, + "define novel": 13778, + "based formulation": 5739, + "random noise": 44884, + "data considered": 12239, + "self paced": 49202, + "high noise": 23754, + "study contributes": 53349, + "recent development": 45302, + "development large": 14682, + "research end": 47028, + "increasingly complex": 25470, + "neural baseline": 36940, + "development neural": 14691, + "building high": 7446, + "high performing": 23761, + "models argue": 34726, + "categories including": 7844, + "data problem": 12563, + "difficult maintain": 15175, + "sequence generative": 49930, + "generative adversarial": 22584, + "sub models": 53522, + "generator discriminator": 22616, + "discriminator generator": 15452, + "aims generate": 2195, + "generate sentences": 22243, + "human translated": 24248, + "generated sentences": 22316, + "sentences human": 49733, + "generation high": 22472, + "evaluate generated": 18461, + "model consistently": 33702, + "art transformer": 4432, + "german chinese": 22663, + "2017 task": 269, + "types knowledge": 59096, + "order enhance": 38614, + "results entity": 47613, + "entity retrieval": 18146, + "combined using": 9088, + "achieved best": 1219, + "accuracy average": 936, + "code mixing": 8831, + "switching languages": 54261, + "does necessarily": 15961, + "speaker aware": 51996, + "speech technologies": 52307, + "based social": 6043, + "propose context": 43333, + "used score": 60294, + "rank correlation": 44950, + "competitive existing": 9548, + "existing baseline": 19038, + "reported literature": 46451, + "truth data": 58835, + "provide best": 44019, + "narrative texts": 36384, + "corpus 000": 11262, + "10 different": 40, + "different scenarios": 15058, + "respectively additionally": 47360, + "coreference information": 11159, + "corpus shows": 11430, + "rich lexical": 48109, + "knowledge natural": 27556, + "popular research": 41183, + "powerful tools": 41449, + "learning current": 29577, + "chit chat": 8330, + "using end": 60675, + "end architectures": 17615, + "introduce deep": 26797, + "grounded task": 23263, + "oriented dialogues": 38698, + "gradient algorithm": 23002, + "tested dataset": 56394, + "model performed": 34199, + "semeval 2017": 49430, + "lingual semantic": 30725, + "employs attention": 17404, + "similarity paper": 51112, + "spanish arabic": 51937, + "similarity dataset": 51090, + "dataset best": 12828, + "knowledge dataset": 27433, + "recent papers": 45331, + "shown neural": 50730, + "networks obtain": 36885, + "specific feature": 52083, + "tasks large": 55713, + "source task": 51807, + "target task": 54847, + "available annotations": 5262, + "studied tasks": 53237, + "lstm rnns": 31282, + "task like": 55188, + "like speech": 30506, + "perform large": 40118, + "training framework": 58109, + "layer wise": 29213, + "wise training": 62086, + "moving average": 35902, + "successfully trained": 53750, + "outperform deep": 38791, + "shallow model": 50440, + "model recognition": 34283, + "trained proposed": 57844, + "proposed training": 43917, + "compared original": 9428, + "original model": 38720, + "minimum bayes": 33303, + "bayes risk": 6353, + "small dataset": 51470, + "dataset outperform": 13020, + "change detection": 8169, + "dialog modeling": 14758, + "modeling paper": 34609, + "differs existing": 15152, + "audio based": 4925, + "various scenarios": 61387, + "attention experimental": 4746, + "propose supervised": 43654, + "supervised algorithm": 53960, + "type embeddings": 59053, + "embeddings algorithm": 17081, + "achieves near": 1347, + "task outperforming": 55259, + "manually curated": 31773, + "finally use": 20885, + "use embeddings": 59873, + "mechanism used": 32147, + "decoder uses": 13620, + "sentence parts": 49617, + "customer feedback": 12055, + "like social": 30504, + "prediction model": 41719, + "model objective": 34134, + "classification approaches": 8436, + "approaches study": 3928, + "study aims": 53322, + "manual labelling": 31746, + "time propose": 57200, + "context generation": 10647, + "representations attention": 46620, + "performance evidence": 40328, + "dataset bias": 12830, + "common space": 9201, + "using speech": 60958, + "vice versa": 61573, + "use image": 59911, + "words multi": 62461, + "labels train": 27853, + "able predict": 713, + "making effective": 31654, + "dataset allows": 12806, + "allowing researchers": 2448, + "data enables": 12321, + "work hope": 62679, + "certain linguistic": 7942, + "examples text": 18938, + "topics natural": 57455, + "important issue": 24737, + "improvement traditional": 25035, + "expensive time": 19220, + "tend perform": 56205, + "additional linguistic": 1685, + "performance makes": 40432, + "popular metrics": 41172, + "performances different": 40641, + "methods yield": 33105, + "higher performance": 23835, + "finally introduce": 20864, + "performance metrics": 40437, + "workshop shared": 62922, + "robust different": 48244, + "quality estimation": 44516, + "using reference": 60902, + "processing pipeline": 42922, + "written english": 62998, + "analysis dependency": 2649, + "errors automatic": 18234, + "focus learning": 21174, + "similarity function": 51097, + "errors propose": 18249, + "approach task": 3717, + "task lexical": 55187, + "using subset": 60968, + "speech corpus": 52255, + "task methods": 55212, + "defined task": 13786, + "methods new": 32958, + "core tasks": 11157, + "areas natural": 4156, + "different evaluation": 14920, + "advent large": 1959, + "data access": 12107, + "researchers paper": 47162, + "sensitive data": 49496, + "pragmatic reasoning": 41495, + "reasoning framework": 45195, + "framework experiments": 21514, + "classifiers built": 8613, + "cases model": 7808, + "newly collected": 37371, + "collected corpus": 8956, + "corpus human": 11357, + "recognition important": 45508, + "dataset named": 13005, + "collected multiple": 8964, + "dataset used": 13126, + "gender age": 22033, + "largest dataset": 29094, + "networks significantly": 36910, + "significantly faster": 50957, + "perplexity using": 40742, + "using significantly": 60940, + "parameters paper": 39714, + "resource intensive": 47231, + "mixed data": 33401, + "existing monolingual": 19110, + "resources training": 47337, + "produce significantly": 43008, + "baseline present": 6202, + "mixed tweets": 33412, + "multilingual speakers": 36123, + "great promise": 23214, + "output decoder": 38966, + "conditional variational": 10008, + "variational autoencoders": 61246, + "discourse level": 15392, + "encoder model": 17525, + "using greedy": 60717, + "developed novel": 14639, + "novel variant": 37950, + "knowledge better": 27418, + "performance finally": 40347, + "loss proposed": 31103, + "models validated": 35666, + "novel cross": 37794, + "lingual transfer": 30734, + "transfer method": 58403, + "art monolingual": 4302, + "use labeled": 59920, + "shot shot": 50645, + "developed dataset": 14627, + "task called": 54944, + "simultaneously propose": 51275, + "available twitter": 5383, + "big personality": 7093, + "personality traits": 40762, + "task scientific": 55348, + "scientific paper": 48765, + "requires large": 46936, + "corpus automatic": 11282, + "automatic feature": 5092, + "work conducted": 62607, + "using 10": 60545, + "evaluation conducted": 18595, + "set annotated": 50107, + "embeddings effective": 17119, + "better overall": 6924, + "overall classification": 39037, + "works shown": 62907, + "synthetic parallel": 54378, + "generated translation": 22330, + "effective various": 16711, + "various neural": 61370, + "issues study": 27105, + "data efficient": 12313, + "pseudo parallel": 44280, + "synthetic examples": 54375, + "pairs experiments": 39189, + "czech german": 12079, + "german french": 22668, + "surface forms": 54152, + "performed experiments": 40662, + "methods various": 33099, + "translation datasets": 58597, + "previous word": 42301, + "information event": 25842, + "information seeking": 26074, + "empirically test": 17368, + "news texts": 37420, + "motivate development": 35858, + "label large": 27713, + "task train": 55441, + "data compare": 12225, + "performance domain": 40301, + "specific classifiers": 52056, + "classifiers trained": 8625, + "given news": 22765, + "data domains": 12295, + "varies depending": 61255, + "annotators provide": 3016, + "challenge existing": 7980, + "parsing techniques": 39801, + "uses novel": 60526, + "set features": 50156, + "ability handle": 611, + "graph structures": 23172, + "structures languages": 53186, + "models potentially": 35334, + "posterior probabilities": 41362, + "improvements baselines": 25051, + "models applications": 34717, + "like sentiment": 30502, + "representations particular": 46734, + "traditional feature": 57518, + "methods high": 32887, + "particular words": 39874, + "extensive study": 19914, + "embedding size": 17061, + "interesting insights": 26651, + "specifically approach": 52181, + "financial news": 20892, + "news headlines": 37406, + "architecture used": 4095, + "challenge task": 8018, + "performance present": 40493, + "submitted systems": 53589, + "use syntax": 60037, + "unsupervised supervised": 59736, + "supervised way": 54070, + "best run": 6819, + "ranked 1st": 44952, + "annotations paper": 2996, + "using iterative": 60742, + "problem evaluate": 42553, + "language available": 27971, + "models input": 35133, + "model leads": 34049, + "10 relative": 51, + "propose multi": 43473, + "variational encoder": 61247, + "model labeled": 34035, + "learning generative": 29662, + "discrete continuous": 15420, + "features data": 20551, + "framework effectively": 21500, + "benchmark model": 6478, + "outperforms single": 38943, + "languages explore": 28667, + "explore ability": 19685, + "models uses": 35654, + "uses different": 60506, + "evaluating models": 18563, + "useful insights": 60371, + "approach modeling": 3603, + "media using": 32189, + "graph structured": 23171, + "structure experiments": 53104, + "architecture different": 4043, + "benefit model": 6566, + "al 2018": 2240, + "level granularity": 30127, + "document specific": 15834, + "based previous": 5951, + "corpus annotations": 11276, + "al 2017": 2239, + "english corpus": 17790, + "liu et": 30892, + "al 2021": 2245, + "al 2022": 2246, + "terms number": 56302, + "simulated annealing": 51259, + "search algorithms": 48963, + "95 f1": 568, + "research propose": 47101, + "new annotated": 37126, + "hope useful": 24016, + "community present": 9270, + "present contribution": 41877, + "typologically different": 59166, + "level reasoning": 30186, + "expressed natural": 19799, + "expressions like": 19808, + "strategies model": 52910, + "art attention": 4216, + "task additionally": 54886, + "additionally provide": 1732, + "tackle task": 54713, + "task efficiently": 55038, + "makes unsuitable": 31641, + "content ugc": 10566, + "brazilian portuguese": 7308, + "techniques work": 56153, + "propose technique": 43662, + "word relationships": 62280, + "vectors based": 61480, + "features present": 20646, + "embeddings approach": 17083, + "approach obtains": 3616, + "time sequence": 57209, + "algorithm generate": 2277, + "prediction paper": 41726, + "propose perform": 43581, + "later stage": 29151, + "bi lingual": 7011, + "correct incorrect": 11468, + "train set": 57632, + "set containing": 50128, + "performance instead": 40396, + "instead single": 26463, + "neural nets": 36992, + "work aims": 62564, + "aims reduce": 2213, + "practice work": 41487, + "set techniques": 50259, + "reducing dimensionality": 45706, + "network performs": 36783, + "step model": 52815, + "emotional state": 17299, + "analysis models": 2699, + "existing deep": 19056, + "data evaluated": 12330, + "using pretrained": 60871, + "pretrained model": 42168, + "evaluation score": 18709, + "lstm gru": 31264, + "results related": 47803, + "end architecture": 17614, + "process work": 42841, + "empirically evaluate": 17361, + "tasks conduct": 55552, + "thorough investigation": 57063, + "representations data": 46634, + "quantitative evaluation": 44618, + "light important": 30451, + "aspects neural": 4549, + "participation semeval": 39827, + "task multilingual": 55228, + "multilingual knowledge": 36087, + "submission semeval": 53574, + "work builds": 62593, + "lingual language": 30707, + "dependent information": 14147, + "require considerable": 46847, + "data produced": 12567, + "use larger": 59928, + "shows method": 50788, + "method previous": 32621, + "languages aim": 28597, + "representation independent": 46528, + "capture underlying": 7718, + "lingual similarity": 30730, + "measure compare": 32046, + "sentences provide": 49774, + "provide experimental": 44066, + "evidence sentences": 18819, + "sentences close": 49688, + "close embedding": 8684, + "languages chinese": 28614, + "coherence modeling": 8910, + "processing field": 42872, + "need feature": 36564, + "features capture": 20534, + "based current": 5662, + "model specifically": 34402, + "identifying entity": 24457, + "existing strong": 19148, + "achieved promising": 1258, + "number target": 38042, + "propose select": 43613, + "tokens training": 57340, + "japanese chinese": 27145, + "proved effectiveness": 43988, + "translation baseline": 58583, + "represented single": 46807, + "method enables": 32480, + "translate source": 58552, + "sentences proposed": 49773, + "models shot": 35493, + "properties text": 43271, + "using bayesian": 60584, + "known data": 27656, + "dataset question": 13052, + "question pairs": 44742, + "architectures furthermore": 4111, + "model pretrained": 34229, + "noisy dataset": 37615, + "dataset automatically": 12818, + "automatically collected": 5149, + "structure neural": 53122, + "rely graph": 46282, + "graph convolutional": 23123, + "networks gcns": 36860, + "produce representations": 43004, + "paper model": 39427, + "based visual": 6131, + "qualitative analyses": 44470, + "analyses demonstrate": 2593, + "resulting embeddings": 47465, + "gradient updates": 23011, + "method combined": 32420, + "different configurations": 14873, + "grained sentiment": 23046, + "analysis problem": 2726, + "techniques pre": 56122, + "lexical based": 30354, + "embeddings able": 17075, + "score 69": 48804, + "sub task": 53533, + "new large": 37234, + "scale dataset": 48563, + "dataset called": 12835, + "comprehension question": 9773, + "released datasets": 46174, + "generate question": 22232, + "existing question": 19134, + "conduct human": 10053, + "evaluation test": 18738, + "proposed dataset": 43752, + "dataset serve": 13080, + "serve benchmark": 50076, + "years seen": 63074, + "scale evaluation": 48570, + "datasets snli": 13434, + "inference problem": 25683, + "advance research": 1882, + "evaluation end": 18614, + "data major": 12476, + "spanish russian": 51948, + "set baselines": 50113, + "lingual word": 30737, + "best scores": 6821, + "average accuracy": 5402, + "research multilingual": 47076, + "variety text": 61293, + "sentences train": 49795, + "labels based": 27811, + "based manual": 5831, + "manual annotations": 31732, + "learning classifier": 29556, + "using labeled": 60749, + "use trained": 60054, + "trained classifier": 57687, + "baseline task": 6214, + "requires little": 46939, + "syntax morphology": 54350, + "sentence order": 49607, + "present brief": 41860, + "learning focus": 29650, + "extract common": 19970, + "common task": 9204, + "task invariant": 55146, + "features existing": 20574, + "shared features": 50472, + "propose adversarial": 43287, + "adversarial multi": 1976, + "tasks publicly": 55830, + "available url": 5385, + "url http": 59792, + "data knowledge": 12445, + "important resources": 24766, + "variety natural": 61282, + "tasks suffer": 55918, + "model emph": 33808, + "sparse attention": 51964, + "concepts relations": 9942, + "concepts learned": 9936, + "baselines use": 6316, + "media communication": 32162, + "distinct representations": 15594, + "detection challenging": 14465, + "task apply": 54907, + "conventional classification": 11002, + "features particular": 20639, + "experiments real": 19508, + "collected social": 8967, + "demonstrate deep": 13888, + "based rnn": 6001, + "mechanism effectively": 32109, + "relevant parts": 46228, + "investigate neural": 26971, + "tagging problem": 54749, + "including multi": 25275, + "learning natural": 29774, + "relation detection": 45969, + "component nlp": 9712, + "including knowledge": 25264, + "base question": 5550, + "residual learning": 47188, + "results evidence": 47621, + "detection performance": 14510, + "qa benchmarks": 44446, + "benchmarks paper": 6536, + "learning particularly": 29800, + "challenges arise": 8032, + "state representation": 52707, + "world events": 62940, + "processing computational": 42860, + "based existing": 5713, + "existing texts": 19160, + "quality applications": 44491, + "especially large": 18282, + "questions text": 44812, + "experiment using": 19255, + "users particular": 60473, + "particular model": 39854, + "model consider": 33699, + "complex models": 9635, + "models furthermore": 35047, + "instance model": 26428, + "sophisticated models": 51716, + "generates fluent": 22341, + "usually better": 61038, + "promising direction": 43163, + "combine advantages": 9061, + "framework leveraging": 21558, + "points best": 41068, + "best single": 6822, + "single output": 51327, + "text collection": 56495, + "novel form": 37824, + "missing data": 33361, + "dependency graph": 14121, + "using efficient": 60671, + "layer perceptron": 29199, + "art semantic": 4396, + "code open": 8839, + "source available": 51742, + "translation automatic": 58581, + "automatic question": 5118, + "handle long": 23411, + "long documents": 31011, + "difficult use": 15193, + "irrelevant information": 27040, + "underlying model": 59271, + "gradient method": 23009, + "method train": 32684, + "benchmarks different": 6518, + "analysis news": 2708, + "accuracy recent": 1036, + "works explored": 62888, + "speech representation": 52289, + "way investigate": 61814, + "investigate role": 26985, + "using siamese": 60939, + "siamese networks": 50819, + "different information": 14954, + "information particular": 26004, + "setting introduce": 50327, + "different combinations": 14867, + "present qualitative": 41992, + "based audio": 5584, + "systems method": 54561, + "best case": 6755, + "addition introduce": 1622, + "achieve bleu": 1119, + "reducing memory": 45709, + "study new": 53419, + "adversarial networks": 1980, + "networks gans": 36858, + "employ adversarial": 17373, + "training architecture": 57936, + "model human": 33965, + "goal create": 22879, + "language bridge": 27981, + "diverse language": 15704, + "theoretical guarantees": 57022, + "language demonstrate": 28019, + "scheduled sampling": 48719, + "exposure bias": 19789, + "new training": 37349, + "provide informative": 44090, + "points previous": 41078, + "addition using": 1649, + "detection research": 14518, + "research date": 47012, + "performance publicly": 40514, + "pairs various": 39230, + "baseline state": 6211, + "attracted lot": 4884, + "crucial component": 11897, + "task design": 55008, + "task focus": 55091, + "based datasets": 5667, + "right answer": 48137, + "results attained": 47510, + "models human": 35094, + "datasets particular": 13366, + "information question": 26038, + "task inspired": 55140, + "inspired propose": 26411, + "dataset visual": 13135, + "task extensive": 55069, + "models datasets": 34884, + "datasets methods": 13332, + "problem provide": 42638, + "provide fine": 44074, + "lexical constraints": 30359, + "incorporate additional": 25344, + "parameters training": 39725, + "lexically constrained": 30399, + "constrained decoding": 10364, + "conducting experiments": 10101, + "adaptation neural": 1530, + "provide large": 44096, + "user input": 60422, + "significant gains": 50867, + "gains performance": 21940, + "adaptation scenarios": 1537, + "played important": 40983, + "propose sequence": 43624, + "modeling objective": 34606, + "useful improving": 60369, + "accuracy different": 959, + "range datasets": 44911, + "datasets covering": 13200, + "improvements benchmark": 25052, + "requiring additional": 46960, + "additional annotated": 1651, + "unannotated data": 59207, + "data fundamental": 12376, + "learned features": 29460, + "useful knowledge": 60372, + "wikipedia pages": 62052, + "categories according": 7842, + "annotated labels": 2901, + "labels used": 27856, + "used information": 60213, + "recognize important": 45550, + "challenge set": 8015, + "hand designed": 23391, + "present english": 41902, + "use analyze": 59820, + "neural systems": 37101, + "analysis provides": 2733, + "presents attempt": 42073, + "language written": 28586, + "covering various": 11659, + "various areas": 61302, + "topics paper": 57457, + "proposes simple": 43942, + "simple machine": 51188, + "sources data": 51827, + "74 accuracy": 498, + "accuracy classifying": 945, + "styles paper": 53508, + "interesting observations": 26652, + "known facts": 27658, + "experiments support": 19538, + "algorithm implemented": 2279, + "discuss potential": 15479, + "failure modes": 20353, + "suggest directions": 53816, + "directions future": 15291, + "video captioning": 61581, + "promising improvements": 43169, + "improvements recent": 25097, + "models accurately": 34665, + "task remains": 55334, + "given lack": 22754, + "lack sufficient": 27915, + "sufficient annotated": 53800, + "sharing knowledge": 50516, + "encoder representations": 17538, + "representations present": 46740, + "present multi": 41949, + "model shares": 34370, + "encoders decoders": 17554, + "art standard": 4412, + "using diverse": 60664, + "automatic human": 5094, + "joint modeling": 27179, + "based classifiers": 5621, + "methods joint": 32910, + "languages experiments": 28666, + "work release": 62802, + "unsupervised model": 59712, + "modeling inter": 34585, + "knowledge useful": 27644, + "multiple topics": 36304, + "inspired previous": 26410, + "task modeling": 55219, + "approach useful": 3732, + "useful predicting": 60381, + "predicting missing": 41678, + "achieved notable": 1255, + "success machine": 53707, + "summarization dialog": 53881, + "model query": 34265, + "order enable": 38612, + "testing model": 56407, + "model introduce": 34018, + "new query": 37294, + "summarization dataset": 53880, + "dataset building": 12833, + "clearly outperforms": 8658, + "outperforms vanilla": 38958, + "representations effective": 46646, + "models match": 35219, + "predictive accuracy": 41774, + "model access": 33496, + "heavily relies": 23534, + "vectors different": 61484, + "different target": 15092, + "weighted sum": 61931, + "decoder states": 13614, + "new source": 37321, + "way obtain": 61823, + "propose variant": 43698, + "current input": 11980, + "input previous": 26317, + "challenging issue": 8104, + "text feature": 56578, + "extraction techniques": 20121, + "techniques using": 56149, + "techniques proven": 56127, + "useful tools": 60393, + "generation text": 22566, + "knowledge largest": 27545, + "computational study": 9865, + "study performed": 53431, + "extend existing": 19821, + "require linguistic": 46875, + "component neural": 9711, + "trained relatively": 57852, + "little labeled": 30880, + "context embeddings": 10620, + "bidirectional language": 7073, + "model standard": 34407, + "additional labeled": 1679, + "learning supervised": 29900, + "settings introduce": 50378, + "models finding": 35025, + "relations paper": 46049, + "participated task": 39822, + "post evaluation": 41347, + "better random": 6948, + "random baseline": 44869, + "systems cross": 54464, + "lingual model": 30711, + "method predicting": 32618, + "language parallel": 28372, + "corpora provide": 11236, + "limiting applicability": 30636, + "approaches address": 3758, + "improvements competitive": 25062, + "benchmark methods": 6477, + "methods quality": 33002, + "corpora low": 11217, + "resulting poor": 47472, + "poor translation": 41146, + "data augmentation": 12149, + "augmentation approach": 4948, + "words generating": 62426, + "generating new": 22385, + "new sentence": 37311, + "words new": 62467, + "settings method": 50382, + "quality bleu": 44497, + "consider different": 10210, + "learn multiple": 29401, + "dirichlet process": 15346, + "representations able": 46612, + "word models": 62248, + "lexical substitution": 30388, + "task indicating": 55135, + "describes participation": 14230, + "continuous scale": 10852, + "tackled problem": 54716, + "using number": 60846, + "memory blstm": 32246, + "improvement using": 25037, + "model reflect": 34294, + "metrics recent": 33195, + "based predictions": 5946, + "tasks related": 55844, + "years automatic": 63050, + "attention paper": 4804, + "paper particularly": 39439, + "datasets constructed": 13196, + "problem construct": 42523, + "ms coco": 35912, + "learning syntactic": 29901, + "encoder learns": 17524, + "able improve": 700, + "yields best": 63115, + "performance significant": 40560, + "techniques approach": 56061, + "task linking": 55190, + "applicable task": 3157, + "task different": 55020, + "different general": 14941, + "entities including": 18056, + "built models": 7488, + "models outperforms": 35291, + "clean data": 8643, + "tasks despite": 55584, + "trial error": 58785, + "understand role": 59311, + "systematically evaluate": 54411, + "evaluate effect": 18453, + "consists parts": 10326, + "bayes logistic": 6349, + "predictions different": 41757, + "different sub": 15086, + "3rd place": 390, + "applied sequence": 3292, + "features previous": 20647, + "ability propose": 636, + "simple technique": 51218, + "technique called": 56029, + "enhance learning": 17914, + "rnn layer": 48197, + "language main": 28146, + "point processes": 41048, + "distinct languages": 15593, + "software based": 51635, + "state machines": 52703, + "accuracy 99": 931, + "attentional sequence": 4859, + "new standard": 37322, + "challenge models": 7996, + "increase training": 25425, + "efficient baseline": 16864, + "decoder output": 13607, + "second propose": 49019, + "gru lstm": 23314, + "lstm layer": 31267, + "fully connected": 21716, + "connected layers": 10176, + "architecture achieves": 4022, + "achieves similar": 1373, + "similar accuracy": 51028, + "recurrent model": 45618, + "fraction training": 21431, + "combining techniques": 9124, + "100 words": 65, + "best published": 6809, + "accuracy speed": 1050, + "uses convolutional": 60501, + "network consists": 36726, + "convolutional layer": 11104, + "connected layer": 10175, + "inputs different": 26362, + "label given": 27710, + "lingual text": 30732, + "categories paper": 7847, + "model distillation": 33778, + "originally proposed": 38745, + "corpus documents": 11326, + "documents train": 15920, + "train classifiers": 57574, + "technique applied": 56026, + "applied model": 3283, + "training reduce": 58225, + "unlabeled target": 59579, + "attention layer": 4762, + "task reading": 55317, + "candidates generated": 7585, + "results improve": 47669, + "using contrastive": 60624, + "negative samples": 36633, + "data key": 12444, + "learning requires": 29843, + "requires annotated": 46915, + "hard obtain": 23449, + "task limited": 55189, + "stack overflow": 52418, + "difficult humans": 15168, + "mitigation strategies": 33397, + "finally based": 20839, + "kb entities": 27271, + "train proposed": 57623, + "relevant text": 46239, + "corpus texts": 11444, + "entity annotations": 18095, + "evaluated model": 18537, + "important nlp": 24748, + "tasks code": 55540, + "code trained": 8862, + "available academic": 5259, + "academic research": 794, + "results study": 47860, + "study model": 53412, + "seen significant": 49062, + "significant rise": 50920, + "novel dataset": 37799, + "10 000": 28, + "annotated task": 2919, + "trained deep": 57706, + "multimodal model": 36152, + "introduce architecture": 26781, + "networks compared": 36840, + "input length": 26293, + "attention module": 4789, + "wu et": 63022, + "translation order": 58651, + "faster speed": 20441, + "great potential": 23210, + "information largely": 25945, + "largely overlooked": 29061, + "overlooked existing": 39098, + "features produced": 20649, + "called sentence": 7554, + "language aim": 27958, + "aim study": 2160, + "summarize existing": 53905, + "existing works": 19176, + "carried different": 7769, + "build existing": 7399, + "powerful paradigm": 41440, + "paradigm natural": 39624, + "methods building": 32775, + "rely high": 46287, + "applied domain": 3269, + "mechanism encode": 32112, + "encode input": 17465, + "sequence vectors": 50017, + "help reduce": 23586, + "test method": 56356, + "data provides": 12576, + "used enhance": 60165, + "large quantities": 28949, + "design different": 14274, + "data need": 12507, + "text translation": 56825, + "trained generic": 57741, + "recently data": 45413, + "train data": 57575, + "unclear extent": 59236, + "challenging data": 8087, + "support development": 54116, + "development evaluation": 14678, + "evaluation comparison": 18593, + "new exciting": 37198, + "processing computer": 42862, + "techniques present": 56124, + "various datasets": 61321, + "datasets models": 13337, + "types non": 59109, + "attention deep": 4734, + "models fit": 35035, + "provide directions": 44054, + "reduces human": 45690, + "efforts building": 16935, + "tasks promising": 55816, + "promising technique": 43187, + "affect model": 2016, + "paper deep": 39314, + "noise training": 37604, + "thoroughly evaluate": 57068, + "approach wide": 3738, + "consistently improves": 10296, + "extraction results": 20105, + "results outperforms": 47755, + "various evaluation": 61336, + "techniques analyze": 56058, + "method existing": 32493, + "existing tools": 19162, + "good accuracy": 22925, + "dataset experiment": 12915, + "accuracy 94": 926, + "baseline future": 6171, + "future development": 21868, + "success existing": 53701, + "tasks effectiveness": 55602, + "achieved good": 1234, + "performance short": 40557, + "short input": 50557, + "output sequences": 39001, + "attention attends": 4712, + "generated output": 22306, + "supervised word": 54071, + "rl models": 48176, + "step training": 52831, + "standard word": 52541, + "prediction training": 41748, + "shown exhibit": 50706, + "large benchmark": 28851, + "dataset semantic": 13077, + "difficult measure": 15176, + "related problems": 45929, + "analysis design": 2650, + "annotation protocol": 2964, + "spoken conversations": 52352, + "propose evaluate": 43375, + "development process": 14699, + "selection relevant": 49151, + "relevant features": 46218, + "features high": 20596, + "systems struggle": 54641, + "work seek": 62815, + "problem proposing": 42636, + "dialogue agent": 14766, + "key value": 27340, + "retrieval mechanism": 47952, + "model dialogue": 33763, + "underlying knowledge": 59267, + "metrics present": 33189, + "jointly model": 27206, + "representations shared": 46755, + "problem extensive": 42562, + "resulting new": 47470, + "models https": 35092, + "limits performance": 30643, + "performance consistency": 40265, + "exploit structure": 19665, + "performance experiment": 40334, + "effectiveness methods": 16793, + "way training": 61833, + "training character": 57949, + "performing multi": 40684, + "use limited": 59933, + "data effectively": 12310, + "languages recent": 28765, + "high performances": 23760, + "need trained": 36596, + "dataset user": 13128, + "learning address": 29505, + "dataset dataset": 12879, + "number labels": 38013, + "using bi": 60592, + "performance nlp": 40457, + "use computational": 59848, + "computational social": 9862, + "social science": 51602, + "data lack": 12451, + "lack interpretability": 27895, + "modeling global": 34580, + "use representations": 59995, + "training techniques": 58289, + "models experimental": 34993, + "interpretability models": 26716, + "underlying corpus": 59264, + "directly model": 15324, + "human intuition": 24177, + "improve existing": 24851, + "information explicit": 25850, + "human feedback": 24166, + "embeddings instead": 17152, + "leverage semantic": 30289, + "learning evaluation": 29630, + "best answer": 6747, + "quality using": 44597, + "base model": 5546, + "retrieval techniques": 47973, + "appropriate context": 3963, + "extract appropriate": 19968, + "pure text": 44393, + "corpora resulting": 11239, + "ability generalize": 609, + "generalize paper": 22146, + "explore models": 19716, + "comprehensive ablation": 9780, + "ablation studies": 657, + "conceptually simple": 9955, + "architecture outperforms": 4073, + "multimodal approaches": 36143, + "established benchmarks": 18353, + "using basic": 60583, + "encode word": 17473, + "model support": 34431, + "al 2003": 2229, + "given model": 22760, + "trained contextual": 57692, + "contextual word": 10787, + "embeddings input": 17151, + "deep natural": 13731, + "models named": 35242, + "neighbor information": 36656, + "embeddings furthermore": 17140, + "contextual embeddings": 10765, + "information improves": 25915, + "performance cases": 40229, + "random initializations": 44883, + "downstream performance": 16348, + "research including": 47055, + "documents multiple": 15897, + "characteristics different": 8236, + "texts investigate": 56894, + "investigate cross": 26946, + "methods language": 32915, + "representations end": 46651, + "representations open": 46729, + "use tree": 60062, + "fully differentiable": 21722, + "eliminating need": 16991, + "easily trained": 16550, + "compared various": 9472, + "lstm architectures": 31240, + "reverse dictionary": 48022, + "structure aware": 53090, + "additional annotations": 1654, + "drawing inspiration": 16406, + "recent efforts": 45306, + "bias propose": 7040, + "encode document": 17461, + "automatically inducing": 5184, + "rich structural": 48125, + "use attention": 59826, + "evaluation different": 18610, + "results document": 47596, + "problem automatic": 42509, + "detection use": 14538, + "systems study": 54642, + "specific labels": 52098, + "labels using": 27858, + "using recently": 60898, + "biomedical entities": 7174, + "event event": 18783, + "methods current": 32807, + "rely complex": 46276, + "extract higher": 19976, + "present sentence": 42006, + "experiments shown": 19525, + "shown achieve": 50694, + "level event": 30114, + "nlp machine": 37495, + "techniques help": 56093, + "uses supervised": 60538, + "media user": 32187, + "decision process": 13567, + "tool provides": 57365, + "provides efficient": 44196, + "efficient way": 16910, + "specific attention": 52048, + "mechanism improves": 32123, + "improves overall": 25139, + "word list": 62240, + "automatic semi": 5121, + "different periods": 15024, + "fail capture": 20330, + "propose latent": 43434, + "learn underlying": 29441, + "learning goal": 29664, + "enable development": 17422, + "set vectors": 50276, + "simple techniques": 51219, + "techniques like": 56106, + "does provide": 15965, + "topic sentiment": 57430, + "straightforward approach": 52886, + "directly predicts": 15333, + "component end": 9702, + "previous attention": 42243, + "model capability": 33643, + "achieves improvement": 1341, + "score state": 48874, + "art baseline": 4220, + "generate appropriate": 22179, + "planning surface": 40945, + "produce natural": 42993, + "model extensively": 33868, + "extensively evaluated": 19918, + "domains results": 16290, + "achieved better": 1222, + "domains compared": 16239, + "critical applications": 11777, + "define task": 13780, + "task able": 54872, + "makes task": 31638, + "experiments multiple": 19473, + "multiple deep": 36196, + "architectures learn": 4114, + "annotated tweets": 2927, + "f1 points": 20192, + "different techniques": 15096, + "benefit multiple": 6567, + "multiple different": 36198, + "models difficulty": 34920, + "understanding systems": 59406, + "specific rules": 52141, + "far apart": 20395, + "large gains": 28881, + "long tail": 31035, + "sequence architectures": 49910, + "news translation": 37423, + "algorithms automatic": 2321, + "research opportunities": 47085, + "larger set": 29088, + "paper illustrates": 39393, + "challenge automatic": 7969, + "methods analysis": 32747, + "propose multiple": 43483, + "different factors": 14930, + "relational reasoning": 46011, + "central component": 7918, + "relation networks": 45990, + "plug play": 41026, + "challenging dataset": 8088, + "curated dataset": 11949, + "code switched": 8858, + "cnn used": 8777, + "effective nlp": 16681, + "despite great": 14362, + "effective domain": 16646, + "tasks spoken": 55907, + "label embeddings": 27708, + "models increasingly": 35126, + "learning use": 29927, + "unsupervised neural": 59717, + "training unsupervised": 58309, + "ability combine": 599, + "work does": 62641, + "important natural": 24745, + "perform specific": 40144, + "domains study": 16294, + "study introduce": 53395, + "introduce domain": 26799, + "similarity calculation": 51087, + "prove proposed": 43982, + "corpus methods": 11380, + "trained domain": 57712, + "use lexical": 59932, + "approaches ranging": 3905, + "results test": 47880, + "surprisingly good": 54187, + "results challenge": 47530, + "embeddings standard": 17219, + "correct answers": 11466, + "example pairs": 18879, + "dataset evaluating": 12911, + "embeddings demonstrate": 17108, + "pose significant": 41241, + "challenges current": 8036, + "methods common": 32789, + "knowledge required": 27594, + "neural natural": 36989, + "nlu systems": 37569, + "systems knowledge": 54538, + "knowledge acquired": 27388, + "time introduce": 57168, + "nlu models": 37564, + "specific text": 52158, + "text inputs": 56630, + "representations task": 46768, + "representations experiments": 46662, + "experiments document": 19421, + "document question": 15823, + "approach analysis": 3417, + "exploit knowledge": 19657, + "appropriate way": 3969, + "context surrounding": 10727, + "token embeddings": 57287, + "embeddings represent": 17203, + "simple efficient": 51161, + "token embedding": 57286, + "embeddings large": 17160, + "text evaluate": 56560, + "trained smaller": 57875, + "smaller amounts": 51515, + "embeddings consistently": 17100, + "set sizes": 50248, + "models obtaining": 35273, + "obtaining better": 38231, + "parameter count": 39666, + "perform given": 40111, + "networks applied": 36830, + "translation introduce": 58622, + "architecture inspired": 4053, + "architectural changes": 4017, + "removing need": 46380, + "convolution operation": 11099, + "reduces number": 45695, + "neighborhood information": 36661, + "adjacent sentences": 1841, + "sentences evaluate": 49710, + "tasks include": 55676, + "classification benchmarks": 8440, + "quantitative comparison": 44617, + "tasks addition": 55492, + "model perform": 34184, + "accurate classification": 1076, + "confusion matrix": 10161, + "90 f1": 553, + "research suggests": 47126, + "order obtain": 38643, + "child directed": 8289, + "directed speech": 15268, + "negative log": 36623, + "log probability": 30976, + "understanding social": 59402, + "study human": 53385, + "build automatic": 7386, + "social contexts": 51560, + "contexts paper": 10753, + "perform study": 40146, + "corpus freely": 11345, + "magnitude larger": 31417, + "larger previously": 29086, + "use corpus": 59856, + "corpus perform": 11401, + "reveal underlying": 48014, + "gender information": 22036, + "information problem": 26022, + "automatically predicting": 5194, + "number relevant": 38032, + "linguistic complexity": 30755, + "original context": 38706, + "context previous": 10692, + "selection framework": 49140, + "results empirical": 47605, + "learning platform": 29807, + "learning dl": 29599, + "approaches domains": 3802, + "datasets exist": 13259, + "systems applied": 54431, + "work adapt": 62553, + "large open": 28929, + "dataset squad": 13099, + "based state": 6055, + "embeddings novel": 17181, + "rely domain": 46278, + "expensive create": 19206, + "systems achieve": 54420, + "learning low": 29714, + "gained attention": 21914, + "entity representation": 18143, + "different representation": 15050, + "users easily": 60460, + "models implement": 35102, + "used rank": 60283, + "query extract": 44667, + "based complex": 5628, + "performing models": 40683, + "mechanism propose": 32137, + "architecture transformer": 4094, + "transformer based": 58452, + "based solely": 6045, + "tasks models": 55750, + "models superior": 35563, + "superior quality": 53941, + "requiring significantly": 46965, + "time train": 57233, + "task improving": 55128, + "improving existing": 25180, + "existing best": 19043, + "model establishes": 33835, + "establishes new": 18360, + "art bleu": 4230, + "training costs": 57964, + "reporting bias": 46459, + "knowledge does": 27447, + "knowledge acquisition": 27389, + "extract knowledge": 19982, + "performance study": 40581, + "layers model": 29227, + "grained level": 23040, + "vocabulary sizes": 61713, + "words target": 62525, + "new sentences": 37312, + "words address": 62363, + "based source": 6048, + "external word": 19957, + "simple novel": 51201, + "candidate pool": 7575, + "speedup compared": 52329, + "explore challenges": 19690, + "domain mismatch": 16112, + "mismatch training": 33352, + "data rare": 12583, + "improvements quality": 25096, + "advances state": 1927, + "art text": 4428, + "learning extract": 29642, + "classify sentences": 8631, + "sentences express": 49718, + "propose stage": 43644, + "stage neural": 52435, + "model tackle": 34437, + "answers question": 3111, + "model copy": 33722, + "extraction model": 20082, + "entity tagging": 18151, + "approaches demonstrate": 3795, + "documents based": 15860, + "certain properties": 7944, + "language case": 27983, + "tabular data": 54694, + "data structure": 12698, + "wide array": 61960, + "status quo": 52780, + "does fully": 15947, + "despite strong": 14393, + "web applications": 61878, + "good representations": 22942, + "mixture model": 33421, + "weights different": 61938, + "representation ability": 46488, + "acl anthology": 1431, + "reference corpus": 45737, + "produce reasonable": 43002, + "challenge human": 7984, + "human automated": 24109, + "automated methods": 5052, + "methods previous": 32990, + "scale large": 48589, + "networks extract": 36854, + "demonstrate learned": 13931, + "higher precision": 23837, + "retrieval methods": 47954, + "significantly increased": 50983, + "suggest promising": 53828, + "order facilitate": 38619, + "annotated experts": 2897, + "using naive": 60824, + "approaches training": 3944, + "mini batch": 33280, + "processing speed": 42941, + "length based": 30025, + "strategies work": 52922, + "presents analysis": 42071, + "analysis impact": 2677, + "floating point": 21115, + "classification quality": 8525, + "different classifiers": 14865, + "source toolkit": 51813, + "standard attention": 52466, + "visualization tool": 61681, + "english datasets": 17795, + "training significantly": 58259, + "success models": 53711, + "models relies": 35429, + "learning given": 29663, + "discriminative models": 15447, + "match accuracy": 31895, + "art generative": 4265, + "models easy": 34945, + "models require": 35441, + "introduced models": 26886, + "produced model": 43020, + "features computed": 20544, + "makes model": 31628, + "powerful generative": 41434, + "models unable": 35642, + "work aim": 62562, + "aim developing": 2145, + "selection using": 49158, + "representations key": 46696, + "document understanding": 15840, + "demonstrate methods": 13938, + "methods bring": 32773, + "include multiple": 25224, + "analysis confirms": 2637, + "model builds": 33637, + "supervision model": 54086, + "wise relevance": 62084, + "relevance propagation": 46193, + "input space": 26340, + "classification decisions": 8455, + "based bi": 5603, + "task evaluate": 55055, + "evaluate resulting": 18500, + "used previous": 60271, + "work previous": 62767, + "order avoid": 38597, + "maximum posteriori": 31974, + "translation trained": 58693, + "model solve": 34396, + "gumbel softmax": 23361, + "effective generating": 16655, + "result different": 47437, + "russian language": 48415, + "language shown": 28484, + "research various": 47141, + "research recently": 47113, + "automated text": 5062, + "text summarisation": 56796, + "techniques task": 56141, + "resource development": 47221, + "development future": 14679, + "corpora news": 11228, + "challenging current": 8086, + "ignoring fact": 24500, + "corpus manually": 11377, + "important features": 24727, + "features task": 20680, + "local contexts": 30932, + "jointly optimizes": 27212, + "enhance training": 17926, + "training stage": 58268, + "inference stage": 25694, + "number recent": 38031, + "works proposed": 62904, + "proposed techniques": 43912, + "human interpretable": 24175, + "supervision paper": 54087, + "present sequence": 42007, + "negative results": 36631, + "achieve near": 1172, + "near perfect": 36509, + "scale open": 48607, + "domain qa": 16140, + "fine tuned": 20957, + "overall results": 39048, + "performance recently": 40523, + "large proportion": 28945, + "chinese translation": 8325, + "novel memory": 37862, + "inference experiments": 25656, + "vocabulary paper": 61710, + "used datasets": 60138, + "poses new": 41250, + "reference texts": 45749, + "lexical richness": 30382, + "discourse phenomena": 15395, + "learning dataset": 29580, + "establish baseline": 18341, + "difficulties associated": 15196, + "success failure": 53702, + "set diverse": 50136, + "lack reliable": 27910, + "reliable automatic": 46250, + "scarcity high": 48665, + "corpora address": 11173, + "current evaluation": 11975, + "motivating need": 35882, + "second problem": 49018, + "quality corpus": 44502, + "training study": 58276, + "baselines unsupervised": 6315, + "model handle": 33951, + "short length": 50558, + "based objective": 5920, + "used state": 60312, + "state theart": 52710, + "task systems": 55426, + "difficult determine": 15164, + "deployed real": 14172, + "world use": 62965, + "relatively easy": 46114, + "higher bleu": 23815, + "conduct depth": 10035, + "compare relative": 9363, + "relative gains": 46099, + "results develop": 47591, + "learning machine": 29716, + "novel stage": 37926, + "given high": 22746, + "aims answer": 2173, + "squad dataset": 52395, + "dataset challenging": 12837, + "dataset achieve": 12797, + "achieve f1": 1137, + "approaching performance": 3960, + "models f1": 35011, + "approach generate": 3545, + "generate synthetic": 22251, + "generate data": 22191, + "related source": 45938, + "report experimental": 46434, + "generated data": 22282, + "points using": 41081, + "data shows": 12660, + "used provide": 60278, + "data standard": 12691, + "models computationally": 34841, + "expensive requires": 19218, + "longer sequences": 31053, + "attention scores": 4826, + "scores demonstrate": 48899, + "significant potential": 50913, + "suffer poor": 53776, + "especially problematic": 18294, + "problem firstly": 42570, + "sample efficient": 48451, + "models employ": 34955, + "policy learning": 41098, + "learning experience": 29633, + "improve sample": 24922, + "sample efficiency": 48450, + "pre train": 41515, + "models prior": 35356, + "demonstrate practical": 13958, + "rl based": 48174, + "author profiling": 5000, + "language variety": 28576, + "output multiple": 38987, + "svm classifiers": 54234, + "word grams": 62213, + "evaluate using": 18514, + "dataset provided": 13044, + "approach achieved": 3392, + "written languages": 63003, + "accuracy language": 996, + "united kingdom": 59525, + "spatial temporal": 51987, + "finally analyze": 20836, + "hierarchical attention": 23656, + "achieved remarkable": 1261, + "performance document": 40299, + "language multilingual": 28352, + "considered training": 10254, + "language transfer": 28537, + "single multilingual": 51323, + "propose multilingual": 43481, + "document structures": 15836, + "shared encoders": 50470, + "label sets": 27727, + "news documents": 37401, + "word propose": 62272, + "sequence s2s": 49972, + "style model": 53490, + "target model": 54832, + "model possible": 34212, + "generation strategy": 22554, + "approach superior": 3713, + "superior state": 53944, + "accuracy human": 987, + "measuring performance": 32087, + "complexity data": 9675, + "time order": 57186, + "document text": 15838, + "text segment": 56755, + "existing metrics": 19106, + "improvements existing": 25072, + "metrics new": 33183, + "applied sentence": 3291, + "achieved excellent": 1230, + "present context": 41876, + "external corpus": 19931, + "topic level": 57413, + "result deep": 47436, + "hierarchical representations": 23690, + "text sentence": 56761, + "layers network": 29229, + "sentence important": 49568, + "user preferences": 60435, + "current methods": 11985, + "use structured": 60031, + "text known": 56638, + "language explanations": 28062, + "predicting user": 41685, + "features design": 20558, + "term memories": 56243, + "generates text": 22359, + "large real": 28953, + "metrics shows": 33201, + "prediction method": 41717, + "prediction time": 41747, + "systems commonly": 54453, + "trained evaluated": 57724, + "datasets recently": 13396, + "researchers started": 47166, + "text preprocessing": 56703, + "step pipeline": 52822, + "potential impact": 41392, + "impact final": 24596, + "performance despite": 40288, + "investigate impact": 26960, + "simple text": 51220, + "neural text": 37103, + "evaluation standard": 18724, + "benchmarks text": 6546, + "analysis experiments": 2664, + "experiments simple": 19528, + "preprocessing techniques": 41830, + "comparing different": 9479, + "provides insights": 44206, + "mitigate problem": 33389, + "produce final": 42982, + "able select": 724, + "arguments relations": 4182, + "text previous": 56707, + "area focused": 4140, + "analysis real": 2738, + "learning using": 29931, + "paradigms language": 39633, + "interactive learning": 26630, + "role social": 48322, + "algorithms perform": 2333, + "document vector": 15843, + "captures semantic": 7728, + "embeddings word2vec": 17248, + "generating high": 22376, + "classification semantic": 8541, + "enables training": 17449, + "time model": 57179, + "model efficient": 33802, + "classifier attention": 8593, + "analysis classification": 2626, + "result indicates": 47439, + "goal task": 22902, + "task classify": 54951, + "consuming process": 10452, + "process recent": 42823, + "supervision used": 54099, + "larger datasets": 29074, + "order create": 38605, + "domain cross": 16035, + "methods obtained": 32965, + "approach addition": 3409, + "representing text": 46814, + "tasks main": 55736, + "makes data": 31618, + "architecture learns": 4060, + "efficiently model": 16919, + "outperforms comparable": 38885, + "comparable previous": 9306, + "mining task": 33325, + "gained significant": 21922, + "years paper": 63068, + "including text": 25309, + "health care": 23512, + "quantitative methods": 44621, + "exploratory study": 19683, + "related specific": 45939, + "social economic": 51562, + "time frame": 57157, + "model separately": 34358, + "model obtained": 34139, + "shared information": 50473, + "information sentences": 26079, + "using ensemble": 60677, + "progress natural": 43105, + "used standard": 60311, + "standard benchmark": 52470, + "paper revisit": 39570, + "problem end": 42549, + "introduce large": 26817, + "dataset extracted": 12923, + "corpus obtained": 11395, + "retrieval systems": 47970, + "learning procedures": 29820, + "use state": 60027, + "techniques large": 56104, + "potential solution": 41407, + "key contributions": 27305, + "based output": 5931, + "concludes discussion": 9974, + "practical use": 41476, + "use results": 59999, + "automatic tools": 5131, + "text particular": 56692, + "preserving semantics": 42127, + "evaluate popular": 18486, + "interact users": 26592, + "combination various": 9052, + "recognition natural": 45516, + "results demonstrated": 47586, + "procedure used": 42745, + "context effective": 10619, + "way capture": 61795, + "challenge work": 8023, + "example word": 18884, + "analysis approaches": 2616, + "context feature": 10637, + "just using": 27254, + "size language": 51387, + "hundreds thousands": 24298, + "spelling variations": 52337, + "paper challenges": 39288, + "cross genre": 11826, + "target documents": 54811, + "target document": 54810, + "establish strong": 18348, + "shows improvement": 50785, + "study measure": 53409, + "improvement classification": 24994, + "results illustrate": 47667, + "characteristics language": 8238, + "blog posts": 7226, + "benefit downstream": 6560, + "word2vec doc2vec": 62346, + "simultaneously learn": 51271, + "training employ": 58083, + "text instead": 56631, + "newly created": 37374, + "model usually": 34516, + "learning study": 29899, + "study interpretability": 53393, + "available models": 5328, + "automated detection": 5040, + "non relevant": 37680, + "task detection": 55013, + "detection semantic": 14520, + "choice word": 8339, + "models word2vec": 35685, + "glove word2vec": 22862, + "dataset human": 12954, + "task non": 55242, + "make comparison": 31553, + "propose dataset": 43349, + "lexical grammatical": 30365, + "research automatic": 46989, + "asr text": 4563, + "paper step": 39576, + "allows train": 2479, + "knowledge deep": 27435, + "trained labeled": 57756, + "data state": 12692, + "extraction approaches": 20049, + "level labels": 30143, + "consuming costly": 10442, + "life tasks": 30441, + "tasks make": 55737, + "labels usually": 27859, + "desired output": 14349, + "end e2e": 17632, + "based pointer": 5939, + "pointer networks": 41061, + "trained directly": 57710, + "raw input": 45036, + "corpus compare": 11296, + "compare neural": 9351, + "use token": 60048, + "model focuses": 33904, + "global semantic": 22841, + "rare word": 44999, + "word encoding": 62199, + "oov problem": 38405, + "models english": 34967, + "tasks does": 55595, + "non canonical": 37640, + "submission shared": 53575, + "editing task": 16597, + "performs particularly": 40710, + "particularly low": 39885, + "sentence provide": 49627, + "improve results": 24918, + "results training": 47888, + "dataset important": 12958, + "documents belonging": 15861, + "belonging different": 6419, + "propose extract": 43385, + "collection texts": 8987, + "unlabeled datasets": 59569, + "compare methods": 9346, + "dataset scientific": 13074, + "trending topics": 58780, + "time specifically": 57220, + "pairwise ranking": 39239, + "structure prediction": 53128, + "pairwise comparisons": 39238, + "translation framework": 58614, + "loss functions": 31096, + "create model": 11708, + "highest score": 23856, + "limited computational": 30574, + "models establish": 34974, + "establish new": 18344, + "meaningful representations": 32025, + "models making": 35215, + "making possible": 31662, + "representation existing": 46511, + "encoded word": 17486, + "problems introduce": 42703, + "fully leverage": 21736, + "leverage rich": 30287, + "information help": 25900, + "analysis demonstrate": 2645, + "performances benchmark": 40638, + "networks represent": 36905, + "hypothesis space": 24348, + "networks demonstrate": 36842, + "utility approach": 61080, + "spoken dialog": 52353, + "recognition neural": 45521, + "problems large": 42707, + "allows generate": 2466, + "enable effective": 17423, + "generation step": 22551, + "settings furthermore": 50375, + "score sentence": 48872, + "level instead": 30135, + "instead directly": 26448, + "suggest model": 53824, + "operations used": 38495, + "variety models": 61281, + "tasks improved": 55673, + "text models": 56669, + "outperform non": 38806, + "sub character": 53515, + "language method": 28150, + "units called": 59529, + "information difficult": 25815, + "difficult access": 15154, + "improvement strong": 25028, + "naive approach": 36361, + "annotations language": 2993, + "corpus train": 11446, + "classifier identify": 8598, + "trained non": 57834, + "does improve": 15952, + "level rnn": 30197, + "quality compared": 44499, + "aware language": 5454, + "model achieving": 33528, + "data generally": 12382, + "order learn": 38630, + "architecture learn": 4058, + "space test": 51900, + "task study": 55416, + "graphs kgs": 23188, + "novel reinforcement": 37906, + "graph embeddings": 23131, + "approach includes": 3567, + "accuracy diversity": 960, + "ranking based": 44968, + "learning datasets": 29581, + "selecting optimal": 49126, + "performance little": 40420, + "design choices": 14267, + "different network": 15004, + "event detection": 18781, + "different setups": 15069, + "large impact": 28888, + "lstm layers": 31268, + "embedding pre": 17054, + "word contexts": 62132, + "embeddings pre": 17190, + "results error": 47614, + "data additionally": 12122, + "predictions word": 41771, + "systems tend": 54649, + "bridges gap": 7324, + "unsupervised knowledge": 59702, + "providing interpretable": 44249, + "hotel reviews": 24034, + "particular train": 39867, + "train text": 57650, + "analyze large": 2820, + "score using": 48881, + "using established": 60680, + "provides effective": 44195, + "languages best": 28609, + "knowledge study": 27623, + "relational information": 46009, + "information english": 25834, + "classes results": 8417, + "proposed cross": 43748, + "transfer approach": 58351, + "approach sets": 3687, + "explored work": 19769, + "level long": 30154, + "grams features": 23084, + "evaluated models": 18538, + "models dataset": 34883, + "dataset consisting": 12860, + "applying classical": 3359, + "classical machine": 8423, + "close performance": 8690, + "discrete features": 15421, + "networks nns": 36883, + "context modeling": 10675, + "strategy training": 52953, + "algorithm developed": 2269, + "developed work": 14642, + "evaluated terms": 18550, + "lms perform": 30921, + "faster inference": 20437, + "inference efficiency": 25653, + "downstream nlp": 16345, + "applications order": 3226, + "dataset provides": 13045, + "existing dataset": 19053, + "addition conduct": 1602, + "different encoders": 14915, + "mechanism experiments": 32118, + "experiments new": 19481, + "analyses different": 2595, + "able generalize": 695, + "generalize unseen": 22149, + "art unsupervised": 4435, + "arguments appear": 4179, + "investigate effectiveness": 26955, + "features state": 20673, + "terms f1": 56289, + "analysis widely": 2792, + "languages non": 28739, + "non english": 37648, + "designed generate": 14317, + "set tools": 50266, + "learning human": 29673, + "improve current": 24839, + "translation training": 58694, + "expensive human": 19209, + "algorithm improves": 2281, + "large action": 28830, + "level machine": 30155, + "high variance": 23808, + "problem language": 42591, + "field recently": 20768, + "use character": 59840, + "attention capture": 4727, + "capture intra": 7686, + "based newly": 5910, + "newly published": 37380, + "comprehensive benchmark": 9783, + "benchmark contains": 6437, + "compared performance": 9429, + "performance popular": 40486, + "propose methodology": 43457, + "analysis order": 2710, + "knowledge perform": 27568, + "best combination": 6757, + "combination proposed": 9048, + "problem especially": 42552, + "number entities": 38000, + "teams participated": 56008, + "methods employed": 32836, + "constructed dataset": 10409, + "tweets annotated": 59009, + "multilingual representations": 36114, + "languages need": 28737, + "model lexicon": 34061, + "minimize distance": 33298, + "embeddings learning": 17165, + "context evaluate": 10627, + "method compare": 32424, + "classification shown": 8550, + "translation different": 58600, + "increase model": 25417, + "proposed far": 43774, + "evaluate existing": 18457, + "approaches introduce": 3850, + "additionally explore": 1721, + "including deep": 25247, + "attention used": 4844, + "evaluation carried": 18586, + "single gpu": 51305, + "proposed architectures": 43739, + "obtain best": 38161, + "average improvement": 5410, + "release code": 46144, + "joint entity": 27168, + "classification relation": 8532, + "extraction particular": 20091, + "types relations": 59114, + "algorithm search": 2299, + "faster training": 20443, + "retrieve relevant": 47979, + "space existing": 51859, + "model improve": 33977, + "shown significant": 50749, + "work suggest": 62832, + "difficult data": 15162, + "task investigate": 55147, + "investigate effective": 26954, + "particular introduce": 39849, + "analyzing performance": 2845, + "neural generation": 36956, + "generation methods": 22492, + "methods experiments": 32852, + "produce fluent": 42983, + "fluent text": 21134, + "exceed performance": 18945, + "models metrics": 35225, + "systems focused": 54507, + "user needs": 60432, + "retrieving relevant": 47994, + "relationship model": 46070, + "algorithmic framework": 2316, + "framework task": 21612, + "application task": 3180, + "neural representations": 37091, + "2017 shared": 267, + "kernel learning": 27290, + "speech transcripts": 52310, + "dimensional representation": 15233, + "audio recordings": 4928, + "task organizers": 55252, + "ridge regression": 48135, + "development set": 14701, + "art nli": 4315, + "results goal": 47650, + "based string": 6062, + "task despite": 55010, + "transcripts speech": 58342, + "models reached": 35399, + "macro f1": 31406, + "score 87": 48822, + "score 93": 48828, + "speech input": 52266, + "unsupervised speech": 59733, + "corpus demonstrated": 11320, + "spearman correlation": 52011, + "methods perform": 32978, + "performing baseline": 40670, + "language tags": 28517, + "improvement automatic": 24984, + "automatic language": 5100, + "analysis human": 2673, + "models commonly": 34832, + "communication platforms": 9251, + "detection entity": 14477, + "order provide": 38650, + "additional text": 1705, + "text state": 56788, + "mentioned entities": 32299, + "tweets propose": 59020, + "contextual knowledge": 10771, + "use models": 59951, + "truth dataset": 58836, + "addition standard": 1643, + "dataset publicly": 13048, + "research new": 47081, + "domain research": 16148, + "based joint": 5793, + "features textual": 20683, + "released research": 46182, + "demonstrate strong": 13979, + "using multimodal": 60821, + "new self": 37308, + "using predefined": 60868, + "approach capable": 3441, + "process provide": 42821, + "benchmarks language": 6528, + "tasks approach": 55503, + "fast efficient": 20423, + "achieving comparable": 1398, + "performance relative": 40529, + "complex problems": 9648, + "problems require": 42729, + "constraints training": 10379, + "based inference": 5782, + "model weights": 34533, + "inference procedure": 25684, + "study efficacy": 53367, + "constraints semantic": 10378, + "network state": 36806, + "capture deeper": 7660, + "italian language": 27111, + "hyper parameter": 24325, + "accurate word": 1090, + "provide accurate": 44004, + "provide automated": 44010, + "scientific community": 48756, + "languages dialects": 28640, + "corpus construct": 11304, + "analysis introduce": 2684, + "results novel": 47745, + "task organized": 55251, + "task encourage": 55046, + "encourage research": 17599, + "human references": 24233, + "propose translation": 43685, + "estimate quality": 18374, + "evaluation setup": 18716, + "german data": 22664, + "automatically predicted": 5193, + "model acoustic": 33529, + "studies paper": 53287, + "apply data": 3323, + "quickly identify": 44822, + "contains 10": 10490, + "dataset explore": 12919, + "effective general": 16654, + "general features": 22060, + "level evaluation": 30113, + "semantic indexing": 49283, + "alignment task": 2383, + "pairs training": 39223, + "string similarity": 52994, + "errors training": 18252, + "data argue": 12145, + "language provide": 28452, + "models know": 35153, + "subject matter": 53553, + "learn syntax": 29433, + "languages knowledge": 28701, + "knowledge extracted": 27479, + "knowledge existing": 27470, + "languages build": 28611, + "information missing": 25973, + "access information": 824, + "studied decades": 53221, + "recently studies": 45469, + "usually considered": 61040, + "studies applied": 53246, + "particular target": 39864, + "explicitly implicitly": 19636, + "study investigate": 53397, + "investigate possible": 26975, + "report evaluation": 46433, + "results ner": 47738, + "detection experiments": 14484, + "using named": 60826, + "set tweets": 50271, + "tweets results": 59021, + "tweets paper": 59018, + "non deterministic": 37646, + "performance scores": 40546, + "based multiple": 5892, + "using fixed": 60699, + "control model": 10968, + "systems important": 54525, + "methods aim": 32744, + "solely rely": 51644, + "properties words": 43273, + "properties language": 43263, + "used pre": 60262, + "present alternative": 41843, + "input language": 26288, + "output method": 38985, + "method obtains": 32593, + "languages investigate": 28698, + "investigate techniques": 26990, + "translation existing": 58609, + "small domain": 51472, + "number techniques": 38044, + "techniques reduce": 56130, + "reduce overfitting": 45677, + "regularization techniques": 45842, + "l2 regularization": 27685, + "novel regularization": 37905, + "regularization technique": 45841, + "techniques combination": 56068, + "combination neural": 9046, + "amounts domain": 2547, + "data needed": 12508, + "data gain": 12378, + "pretrained large": 42161, + "large supervised": 29020, + "models pretrained": 35351, + "vectors paper": 61493, + "trained machine": 57780, + "adding context": 1594, + "application deep": 3162, + "facilitate effective": 20267, + "domain given": 16079, + "based rl": 6000, + "module model": 35766, + "model free": 33909, + "use entity": 59880, + "transfer multi": 58407, + "11 absolute": 83, + "users express": 60464, + "using short": 60937, + "user experience": 60411, + "models integrate": 35136, + "user reviews": 60445, + "prediction existing": 41704, + "works consider": 62880, + "framework named": 21569, + "terms learning": 56297, + "studied extensively": 53224, + "work exploring": 62662, + "nlp perspective": 37508, + "introduce notion": 26841, + "lack available": 27876, + "use test": 60045, + "techniques automatic": 56063, + "proved useful": 43990, + "specific corpora": 52063, + "attracted attention": 4874, + "high demand": 23726, + "beginning end": 6382, + "design makes": 14288, + "training efficiency": 58079, + "art data": 4244, + "substantially lower": 53642, + "introduce dynamic": 26800, + "original data": 38707, + "systems english": 54487, + "layer normalization": 29196, + "architectures different": 4107, + "big models": 7092, + "terms training": 56319, + "corpus size": 11431, + "neural dependency": 36947, + "model initialization": 34004, + "competitive current": 9547, + "combination text": 9051, + "yielded better": 63104, + "better performances": 6938, + "work multi": 62728, + "introduce additional": 26777, + "knowledge end": 27460, + "training natural": 58186, + "tasks able": 55486, + "able leverage": 705, + "performance individual": 40392, + "task analyze": 54902, + "analyze impact": 2818, + "training training": 58303, + "sharing tasks": 50520, + "tasks defined": 55574, + "information named": 25982, + "improved using": 24971, + "task task": 55429, + "english sentence": 17872, + "techniques learn": 56105, + "learn effectively": 29365, + "adaptation using": 1547, + "task aims": 54894, + "aims evaluate": 2191, + "quality representation": 44571, + "inference task": 25696, + "model equipped": 33831, + "helps achieve": 23602, + "performance addition": 40183, + "popularity social": 41205, + "mining social": 33323, + "political news": 41111, + "perspective paper": 40776, + "used online": 60256, + "semantic gap": 49280, + "accurate robust": 1087, + "available social": 5365, + "sample sizes": 48456, + "privacy concerns": 42439, + "crucial natural": 11903, + "require language": 46867, + "process resulting": 42827, + "task argument": 54913, + "solution task": 51662, + "models reveal": 35460, + "fundamental component": 21778, + "words directly": 62399, + "directly applicable": 15304, + "vectors model": 61492, + "words compared": 62382, + "models evaluate": 34976, + "lm based": 30905, + "work terms": 62839, + "success recent": 53723, + "infrequent words": 26189, + "tasks demonstrated": 55580, + "nmt baseline": 37574, + "compared competitive": 9395, + "competitive methods": 9549, + "highly subjective": 23920, + "challenges proposing": 8073, + "method outperform": 32595, + "level cross": 30094, + "loss training": 31106, + "level task": 30221, + "achieving significant": 1422, + "baseline based": 6157, + "based automatic": 5588, + "metrics human": 33169, + "multiple datasets": 36193, + "reward model": 48069, + "model overall": 34170, + "sequential sentence": 50050, + "encoder multi": 17527, + "encoder based": 17492, + "uses encoder": 60508, + "relationship sentences": 46072, + "sentence encoders": 49550, + "encoders achieve": 17552, + "achieve strong": 1205, + "model result": 34319, + "classification different": 8456, + "classification datasets": 8452, + "byte level": 7513, + "provides best": 44183, + "methods employ": 32835, + "results domains": 47598, + "domains recently": 16288, + "recently variety": 45473, + "model designs": 33754, + "learning related": 29838, + "related models": 45918, + "numerous nlp": 38069, + "compare contrast": 9333, + "detailed understanding": 14430, + "past present": 39933, + "learning nlp": 29782, + "traditional natural": 57535, + "model extract": 33871, + "corpus compiled": 11298, + "enhanced using": 17941, + "performance final": 40346, + "using f1": 60691, + "accuracy improved": 989, + "features sentence": 20662, + "task make": 55201, + "dataset model": 12996, + "attentive recurrent": 4865, + "evaluations model": 18761, + "discuss different": 15464, + "use meta": 59945, + "information proposed": 26032, + "non textual": 37687, + "methods tackle": 33065, + "quality improvements": 44533, + "vocabulary large": 61705, + "respectively results": 47382, + "cost effective": 11579, + "explore state": 19737, + "method improved": 32533, + "user embeddings": 60409, + "main approaches": 31424, + "approaches automatically": 3772, + "work argue": 62573, + "graphs propose": 23191, + "lexicon expansion": 30409, + "tasks order": 55777, + "auxiliary tasks": 5242, + "tasks deep": 55571, + "various cross": 61319, + "cross corpus": 11809, + "gains using": 21947, + "networks popular": 36891, + "outperforms deep": 38893, + "publish code": 44365, + "dimensional dense": 15229, + "present word": 42053, + "tweet data": 59003, + "data combination": 12221, + "data general": 12380, + "data consist": 12241, + "experiments demonstrating": 19411, + "performing natural": 40685, + "embeddings learn": 17163, + "representations large": 46700, + "fashion paper": 20416, + "respect word": 47354, + "summary generation": 53914, + "information generate": 25891, + "novel challenging": 37781, + "challenging research": 8137, + "largely unexplored": 29065, + "topics discussed": 57448, + "generate summaries": 22250, + "analysis identifying": 2676, + "propose models": 43468, + "models enhance": 34968, + "features directly": 20561, + "character embeddings": 8201, + "paper created": 39311, + "temporal dynamics": 56185, + "term long": 56240, + "proposed state": 43902, + "corpora large": 11214, + "large diverse": 28870, + "compared methods": 9419, + "nlp recently": 37519, + "applications various": 3258, + "components natural": 9720, + "current trends": 12024, + "entities events": 18048, + "media paper": 32174, + "surprisingly simple": 54191, + "approach open": 3618, + "novel set": 37920, + "problem specific": 42664, + "features significantly": 20668, + "significantly boost": 50944, + "classifier accuracy": 8591, + "extraction language": 20076, + "level embedding": 30106, + "parameters proposed": 39720, + "prediction methods": 41718, + "learning efficient": 29612, + "propose types": 43687, + "tasks time": 55935, + "lstm approach": 31238, + "approach shows": 3689, + "studies based": 53249, + "text contain": 56509, + "syntactic constructions": 54295, + "time data": 57141, + "corpus release": 11418, + "release data": 46149, + "task trained": 55442, + "order assess": 38596, + "meaning preservation": 32010, + "task evaluating": 55057, + "arabic word": 4009, + "arabic dialects": 3998, + "segmentation results": 49086, + "using limited": 60770, + "context attention": 10586, + "movie subtitles": 35898, + "learn distinguish": 29360, + "pilot study": 40884, + "study observe": 53423, + "attention patterns": 4808, + "paper problem": 39487, + "context prediction": 10690, + "deep bidirectional": 13684, + "features classification": 20537, + "classification network": 8508, + "corpus achieves": 11267, + "task improve": 55126, + "incorporated model": 25369, + "typically involves": 59146, + "softmax function": 51631, + "approach alleviate": 3415, + "words play": 62481, + "play role": 40977, + "using optimal": 60850, + "train different": 57579, + "reduction training": 45722, + "different standard": 15077, + "recommendation systems": 45565, + "text social": 56776, + "objective task": 38106, + "users tweets": 60483, + "information user": 26145, + "using support": 60971, + "task process": 55296, + "cast problem": 7822, + "introduce semi": 26858, + "methods neural": 32956, + "builds recent": 7479, + "domain introduce": 16087, + "introduce graph": 26810, + "extraction performance": 20092, + "semeval task": 49444, + "task sequence": 55362, + "improved leveraging": 24951, + "leveraging unlabeled": 30342, + "form language": 21325, + "model work": 34543, + "fusion method": 21856, + "leverages pre": 30309, + "able better": 678, + "better utilize": 6992, + "better generalization": 6894, + "transfer new": 58410, + "extracting knowledge": 20033, + "literature review": 30862, + "combination bidirectional": 9034, + "lstm convolutional": 31256, + "attention natural": 4793, + "processing community": 42858, + "traditional models": 57533, + "models main": 35208, + "main advantages": 31423, + "large raw": 28952, + "addressing issue": 1819, + "models second": 35472, + "used automatically": 60098, + "achieved competitive": 1227, + "models range": 35395, + "tasks hand": 55661, + "information complementary": 25784, + "compared models": 9421, + "paper systematically": 39589, + "systematically investigate": 54413, + "fundamental nlp": 21783, + "tasks based": 55516, + "benchmarks state": 6543, + "comparable best": 9289, + "structure source": 53138, + "translation multi": 58639, + "investigate data": 26947, + "tuning model": 58928, + "level fine": 30121, + "tuning data": 58904, + "model ensemble": 33827, + "data fine": 12364, + "works best": 62878, + "best training": 6833, + "model incrementally": 33996, + "ensemble different": 17974, + "models performed": 35320, + "better data": 6874, + "tedious time": 56162, + "representation modeling": 46555, + "implement evaluate": 24633, + "potential application": 41380, + "generated target": 22324, + "words especially": 62410, + "dependency relationship": 14137, + "words design": 62396, + "systems crucial": 54465, + "tasks summarization": 55920, + "vary significantly": 61423, + "systems end": 54485, + "architecture enables": 4045, + "structure input": 53110, + "approach discuss": 3491, + "content social": 10558, + "develop test": 14617, + "learning fine": 29645, + "understanding information": 59353, + "answering text": 3101, + "test compare": 56337, + "specific datasets": 52067, + "enables learning": 17442, + "finer grained": 21038, + "grained knowledge": 23038, + "improvement baselines": 24987, + "relations events": 46028, + "learn topic": 29439, + "mainly driven": 31470, + "causal relations": 7876, + "source learning": 51781, + "time demonstrate": 57142, + "datasets extracted": 13269, + "narrative understanding": 36385, + "chronological order": 8354, + "discourse relation": 15396, + "collecting human": 8976, + "accuracy best": 940, + "data work": 12777, + "study task": 53467, + "annotate data": 2871, + "grained domain": 23030, + "cover different": 11644, + "different properties": 15039, + "context learning": 10667, + "supervised models": 54021, + "effectiveness data": 16774, + "need improve": 36572, + "humans easily": 24276, + "information useful": 26144, + "useful language": 60373, + "datasets generated": 13284, + "evaluation resources": 18699, + "present language": 41935, + "benchmarks used": 6548, + "best approaches": 6749, + "including large": 25266, + "crowdsourcing study": 11890, + "study involving": 53400, + "years researchers": 63073, + "methods question": 33003, + "approaches achieved": 3753, + "closed domain": 8697, + "domain settings": 16157, + "2016 dataset": 256, + "pre selected": 41513, + "passage answer": 39919, + "corpus wikipedia": 11458, + "model reads": 34274, + "generate answer": 22178, + "domain performance": 16133, + "new pipeline": 37283, + "learns rank": 29969, + "jointly trains": 27224, + "based reinforcement": 5987, + "art multiple": 4305, + "datasets given": 13285, + "modeling problem": 34613, + "problem chinese": 42518, + "propose explicitly": 43378, + "explicitly incorporate": 19637, + "chinese nlp": 8315, + "modeling word": 34637, + "segmentation model": 49084, + "task relevant": 55332, + "relevant semantic": 46233, + "framework aims": 21455, + "aims learn": 2202, + "logical inference": 30985, + "close gap": 8686, + "models evaluating": 34978, + "demonstrate promising": 13962, + "open problem": 38439, + "paper explain": 39358, + "probabilistic graphical": 42461, + "version model": 61553, + "techniques improve": 56096, + "growth online": 23309, + "query model": 44673, + "multiple hops": 36224, + "unsupervised setting": 59731, + "task real": 55318, + "recommender systems": 45572, + "information usually": 26148, + "longer texts": 31056, + "end solution": 17709, + "task deep": 54994, + "outperforms classical": 38880, + "sets present": 50301, + "model adapts": 33536, + "little effect": 30874, + "words natural": 62463, + "field model": 20761, + "construct new": 10394, + "allow better": 2434, + "representation task": 46590, + "plays pivotal": 41003, + "pivotal role": 40920, + "nlp application": 37462, + "tagging methods": 54741, + "surpass state": 54166, + "performance analysis": 40192, + "target tokens": 54852, + "performance learning": 40415, + "rely manually": 46295, + "relevant contextual": 46206, + "avoid explicit": 5431, + "characteristics data": 8235, + "data known": 12446, + "comes cost": 9131, + "datasets experiments": 13265, + "experiments focus": 19438, + "focus important": 21170, + "important contextual": 24715, + "features easily": 20567, + "analyze various": 2833, + "investigate automatic": 26943, + "automatic quality": 5117, + "errors using": 18253, + "good bad": 22926, + "methods non": 32961, + "translation current": 58594, + "approaches focus": 3829, + "large documents": 28873, + "documents like": 15893, + "task framework": 55098, + "aim automatically": 2138, + "identify classify": 24416, + "consistent human": 10276, + "documents key": 15889, + "key contribution": 27304, + "contribution research": 10946, + "apply machine": 3331, + "scholarly articles": 48737, + "hope dataset": 24006, + "nlp communities": 37473, + "modeling introduce": 34587, + "introduce cross": 26793, + "metric word": 33130, + "effective means": 16669, + "different vector": 15123, + "linguistic similarity": 30794, + "languages machine": 28720, + "translation demonstrate": 58598, + "limited word": 30633, + "observed using": 38151, + "using 50": 60548, + "track progress": 57493, + "paragraph level": 39636, + "model detects": 33758, + "identifying relations": 24462, + "approach suffers": 3711, + "words study": 62523, + "using benchmark": 60587, + "different relation": 15048, + "optimization process": 38555, + "resulting performance": 47471, + "written natural": 63004, + "effect different": 16613, + "study addresses": 53319, + "embedding approaches": 17012, + "output layers": 38982, + "layers neural": 29230, + "mechanism proposed": 32138, + "addition construct": 1603, + "model outperformed": 34154, + "furthermore experiments": 21821, + "demonstrate dynamic": 13891, + "help model": 23578, + "entities input": 18057, + "embeddings training": 17235, + "clustering word": 8748, + "varying lengths": 61432, + "using common": 60612, + "prediction evaluate": 41703, + "performance building": 40227, + "building models": 7455, + "level datasets": 30097, + "highly specific": 23918, + "information guide": 25899, + "translation translate": 58696, + "financial domain": 20891, + "statistical neural": 52758, + "significant advantage": 50848, + "selected sentences": 49119, + "domain model": 16113, + "optimal number": 38529, + "efficient information": 16878, + "does involve": 15955, + "rapid progress": 44991, + "systems extract": 54501, + "text existing": 56563, + "approaches make": 3871, + "answer spans": 3057, + "propose instead": 43419, + "achieves second": 1363, + "second highest": 49006, + "stanford question": 52558, + "answering dataset": 3067, + "attention flow": 4749, + "augmentation method": 4961, + "semantically valid": 49397, + "present joint": 41933, + "simultaneously train": 51277, + "learn latent": 29388, + "clinical notes": 8671, + "single layer": 51313, + "limited success": 30620, + "success method": 53708, + "modern day": 35703, + "systems information": 54533, + "benchmark models": 6479, + "types models": 59102, + "extraction framework": 20068, + "framework automatically": 21460, + "methods datasets": 32812, + "achieves high": 1331, + "research despite": 47015, + "successful applications": 53734, + "fields crfs": 20779, + "model local": 34070, + "terms inference": 56295, + "integrating external": 26522, + "incorporate information": 25356, + "improvements strong": 25102, + "models lstm": 35205, + "experimented using": 19337, + "using monolingual": 60811, + "models according": 34661, + "introduce open": 26852, + "research model": 47074, + "variety problems": 61287, + "based content": 5640, + "embedding multi": 17049, + "multi relational": 36001, + "task empirical": 55039, + "generally applicable": 22162, + "tasks furthermore": 55650, + "embeddings embeddings": 17124, + "types paper": 59110, + "response selection": 47402, + "party conversations": 39914, + "understanding multi": 59367, + "conversations challenging": 11057, + "multiple speakers": 36287, + "problem experimental": 42557, + "build reliable": 7423, + "models handcrafted": 35075, + "models study": 35544, + "neural framework": 36955, + "framework extract": 21520, + "knowledge hidden": 27515, + "level knowledge": 30139, + "knowledge contained": 27426, + "embeddings character": 17092, + "aware neural": 5464, + "guide language": 23336, + "model key": 34029, + "comparing previous": 9484, + "model conduct": 33696, + "training different": 58065, + "framework does": 21496, + "knowledge self": 27605, + "information training": 26130, + "effectiveness leveraging": 16787, + "efficiency training": 16856, + "score 91": 48826, + "using extra": 60690, + "knowledge grounded": 27508, + "contain multiple": 10466, + "entities propose": 18073, + "propose fully": 43394, + "fully data": 21720, + "capable generating": 7620, + "entities appear": 18037, + "dynamic knowledge": 16486, + "entities different": 18043, + "according different": 857, + "enabling model": 17455, + "collect human": 8944, + "human conversation": 24126, + "annotations proposed": 2998, + "translation corpus": 58591, + "use output": 59969, + "carry extensive": 7777, + "extensive feature": 19906, + "speak different": 51994, + "information communication": 25782, + "human translations": 24250, + "previous researches": 42273, + "future researchers": 21894, + "tasks capture": 55533, + "local dependencies": 30935, + "recently attracted": 45408, + "modeling dependencies": 34570, + "mechanism attention": 32100, + "multi dimensional": 35951, + "self attention": 49175, + "proposed learn": 43798, + "attention rnn": 4824, + "temporal order": 56190, + "prediction quality": 41733, + "test accuracy": 56331, + "accuracy sentence": 1044, + "encoding methods": 17570, + "shows state": 50805, + "multi genre": 35959, + "shot approach": 50598, + "language generator": 28091, + "generator learn": 22619, + "score highly": 48851, + "linguistic categories": 30752, + "tweets contain": 59011, + "knowledge external": 27478, + "texts improve": 56890, + "framework achieves": 21449, + "human annotator": 24104, + "area computational": 4137, + "models useful": 35652, + "models consider": 34847, + "models translating": 35632, + "approaches face": 3824, + "face problem": 20243, + "problem data": 42528, + "generate unseen": 22261, + "original training": 38733, + "effective solution": 16696, + "data social": 12671, + "information essential": 25840, + "difficult process": 15182, + "fast evolving": 20424, + "potential using": 41411, + "news reports": 37412, + "chinese words": 8328, + "new perspectives": 37282, + "received relatively": 45264, + "attention field": 4748, + "model accurately": 33501, + "paper makes": 39423, + "skip grams": 51422, + "words approach": 62366, + "model operate": 34145, + "model classification": 33659, + "witnessed rapid": 62094, + "discrepancy training": 15417, + "problems model": 42712, + "training address": 57928, + "process guided": 42787, + "receiving increasing": 45273, + "step fine": 52807, + "text difficult": 56537, + "process domain": 42774, + "integrate information": 26506, + "labeling using": 27798, + "performance specifically": 40571, + "performance differences": 40291, + "information directly": 25816, + "especially case": 18264, + "data insufficient": 12437, + "information jointly": 25933, + "model algorithm": 33557, + "information extensive": 25853, + "using meta": 60798, + "despite ubiquity": 14400, + "resource constraints": 47215, + "makes challenging": 31616, + "dnn models": 15757, + "traditional language": 57523, + "magnitude smaller": 31418, + "baseline performance": 6200, + "accuracy 90": 922, + "art non": 4320, + "non neural": 37670, + "analogical reasoning": 2580, + "reasoning knowledge": 45198, + "embeddings despite": 17111, + "remains unclear": 46351, + "embeddings conduct": 17099, + "pairs word": 39233, + "empirically verify": 17371, + "general applicability": 22043, + "user profiles": 60437, + "certain topics": 7948, + "relation based": 45966, + "concepts paper": 9939, + "study machine": 53408, + "work applies": 62568, + "problem large": 42592, + "slightly lower": 51436, + "lower accuracy": 31206, + "method terms": 32682, + "inference speed": 25692, + "room future": 48338, + "future improvement": 21875, + "incorporating pre": 25391, + "model introduced": 34019, + "jointly optimizing": 27213, + "generative discriminative": 22589, + "learned latent": 29466, + "latent codes": 29119, + "generalization model": 22121, + "unsupervised manner": 59709, + "predictive performance": 41777, + "text sequence": 56765, + "baselines especially": 6255, + "speech transcription": 52309, + "set 10": 50099, + "arabic dialect": 3997, + "features vector": 20695, + "features shared": 20666, + "teams submitted": 56012, + "methods fall": 32862, + "fall categories": 20373, + "benchmark results": 6490, + "representations compared": 46627, + "hand labeled": 23394, + "datasets work": 13486, + "obtained large": 38214, + "dataset constructed": 12863, + "method use": 32693, + "approach directly": 3490, + "dataset second": 13075, + "approach learns": 3587, + "evaluating automatic": 18557, + "fashion experiments": 20414, + "missing words": 33366, + "optimization methods": 38550, + "methods largely": 32920, + "largely improve": 29057, + "improve efficiency": 24848, + "widely known": 61998, + "end asr": 17616, + "models successful": 35552, + "hybrid deep": 24314, + "model process": 34237, + "features natural": 20627, + "question arises": 44718, + "corpus called": 11292, + "ai research": 2121, + "explore performance": 19724, + "art retrieval": 4394, + "semantic accuracy": 49231, + "linguistically informed": 30815, + "building block": 7438, + "models employed": 34956, + "convolutional filters": 11103, + "document set": 15831, + "generation mechanism": 22489, + "mechanism introduced": 32125, + "outperforms standard": 38945, + "standard cnn": 52476, + "cnn attention": 8759, + "methodology results": 32720, + "computational processing": 9855, + "use low": 59939, + "based strategies": 6060, + "achieve best": 1112, + "set best": 50116, + "best f1": 6763, + "score overall": 48863, + "team ranked": 56003, + "sentiment tendency": 49862, + "svm models": 54237, + "sentence training": 49660, + "sentence vector": 49668, + "improves perplexity": 25148, + "quality outputs": 44559, + "according human": 863, + "extended new": 19837, + "automatically process": 5195, + "especially important": 18280, + "generate novel": 22225, + "knowledge structured": 27621, + "rely simple": 46299, + "spurious associations": 52385, + "does capture": 15936, + "capture different": 7662, + "increase f1": 25414, + "building large": 7451, + "dataset includes": 12963, + "15 years": 154, + "unlike prior": 59607, + "prior studies": 42415, + "studies focus": 53266, + "propose dual": 43358, + "dual encoder": 16459, + "encoder approach": 17488, + "approach word": 3740, + "level encoder": 30108, + "learns representation": 29971, + "representation context": 46499, + "encoder learn": 17523, + "gru based": 23313, + "learning remains": 29840, + "amounts textual": 2559, + "data large": 12454, + "nlp algorithms": 37461, + "datasets order": 13356, + "algorithms nlp": 2331, + "tasks discuss": 55592, + "framework current": 21485, + "domain dependency": 16044, + "domain sentiment": 16155, + "similarity prediction": 51113, + "useful task": 60389, + "challenging large": 8107, + "investigate models": 26968, + "identifying sentences": 24467, + "results furthermore": 47642, + "process allows": 42757, + "suggests future": 53846, + "directions improvement": 15295, + "effective neural": 16680, + "art works": 4442, + "works focus": 62890, + "tasks identifying": 55667, + "tackle tasks": 54714, + "pipeline methods": 40903, + "tree propose": 58754, + "attention methods": 4786, + "encourage model": 17595, + "experimentally demonstrate": 19332, + "attentive neural": 4863, + "architectures proposed": 4121, + "studies demonstrated": 53258, + "tasks empirical": 55606, + "different document": 14902, + "task addressing": 54888, + "applications propose": 3237, + "end deep": 17625, + "approach detect": 3483, + "typically rely": 59152, + "results strong": 47858, + "sentences translation": 49799, + "investigate deep": 26949, + "demonstrate consistent": 13884, + "knowledge sentence": 27607, + "level classification": 30075, + "intent classification": 26564, + "classification slot": 8552, + "order utilize": 38660, + "potential benefits": 41384, + "propose jointly": 43427, + "level label": 30141, + "semantic relevance": 49331, + "classification proposed": 8524, + "introduce evaluation": 26804, + "evaluation scheme": 18708, + "collect annotate": 8938, + "includes tasks": 25235, + "user intent": 60425, + "task divided": 55028, + "divided sub": 15747, + "paper publish": 39561, + "data study": 12702, + "information develop": 25810, + "contributes better": 10936, + "present initial": 41929, + "grained information": 23037, + "trained real": 57848, + "agent trained": 2060, + "train evaluate": 57587, + "human dialogue": 24135, + "better trade": 6980, + "learning agent": 29508, + "ability process": 634, + "dialogue turns": 14793, + "findings paper": 20910, + "dataset text": 13117, + "collected large": 8963, + "better parameter": 6926, + "tuning paper": 58936, + "objective functions": 38090, + "problem alleviated": 42502, + "selected based": 49117, + "particular using": 39872, + "sentences achieve": 49677, + "language inputs": 28117, + "reasoning language": 45199, + "attention existing": 4745, + "existing data": 19051, + "simple task": 51216, + "language visual": 28579, + "visual reasoning": 61665, + "data nlp": 12513, + "model higher": 33960, + "context input": 10659, + "propose attentive": 43303, + "convolution network": 11094, + "features word": 20696, + "contexts experiments": 10752, + "context particular": 10686, + "qa data": 44448, + "data extract": 12353, + "texts train": 56937, + "models efficient": 34949, + "selection strategy": 49153, + "strategy based": 52928, + "extrinsic task": 20173, + "questions similar": 44809, + "similar questions": 51060, + "processing large": 42882, + "corpora corpora": 11187, + "corpora contain": 11185, + "report presents": 46444, + "set source": 50251, + "language groups": 28094, + "associated target": 4624, + "standing challenge": 52550, + "systems performing": 54589, + "humans paper": 24283, + "art english": 4257, + "using logistic": 60776, + "combines domain": 9094, + "models substantially": 35548, + "models knowledge": 35154, + "improve human": 24863, + "pipeline used": 40907, + "used multi": 60243, + "results shows": 47842, + "nlp studies": 37527, + "related task": 45942, + "models implicitly": 35105, + "input paper": 26312, + "paper implement": 39394, + "different hypotheses": 14953, + "relative performance": 46104, + "shown using": 50759, + "additional languages": 1681, + "help improve": 23570, + "tasks multilingual": 55754, + "primarily used": 42367, + "difficult compare": 15160, + "test splits": 56380, + "hope help": 24009, + "research purposes": 47106, + "present manually": 41940, + "provides evidence": 44198, + "solutions problem": 51669, + "words neural": 62466, + "inner product": 26244, + "module jointly": 35763, + "rest model": 47410, + "achieve improvements": 1165, + "level discourse": 30102, + "discourse aware": 15386, + "various existing": 61338, + "positively correlated": 41303, + "common crawl": 9170, + "sentences linguistic": 49748, + "corpus yields": 11462, + "smaller corpora": 51516, + "models suggest": 35559, + "related distinct": 45898, + "task making": 55204, + "making accurate": 31644, + "research study": 47124, + "speech model": 52269, + "semantics use": 49418, + "text labels": 56640, + "labels introduce": 27835, + "text query": 56723, + "matches human": 31905, + "extensive analysis": 19856, + "model resulting": 34320, + "representations neural": 46724, + "remain open": 46316, + "open questions": 38443, + "performance pretrained": 40494, + "empirically using": 17369, + "specifically compare": 52185, + "pretrained embeddings": 42154, + "outperform random": 38816, + "ones large": 38339, + "embeddings useful": 17239, + "information reference": 26047, + "experiment benchmark": 19231, + "metrics shared": 33199, + "analyze understand": 2831, + "network evaluate": 36740, + "including fine": 25257, + "metric correlates": 33113, + "par state": 39617, + "fewer resources": 20740, + "documents automatic": 15857, + "documents provided": 15907, + "data particularly": 12539, + "label document": 27706, + "approaches document": 3800, + "label information": 27712, + "approaches lexical": 3861, + "models efficiently": 34950, + "efficiently learn": 16918, + "results predicting": 47772, + "develop systems": 14615, + "previous efforts": 42255, + "valuable resource": 61205, + "presidential election": 42131, + "source news": 51788, + "resource setting": 47274, + "low resources": 31200, + "representations like": 46709, + "graph graph": 23141, + "using translation": 61003, + "additional input": 1676, + "applications introduce": 3213, + "fraction model": 21429, + "submissions shared": 53578, + "task multimodal": 55229, + "image captions": 24532, + "shared different": 50465, + "performance english": 40319, + "datasets achieves": 13143, + "previously available": 42329, + "public domain": 44318, + "domain new": 16121, + "corpus pre": 11404, + "pre processed": 41508, + "workshop asian": 62920, + "available non": 5333, + "non commercial": 37641, + "available english": 5288, + "web content": 61882, + "features identify": 20599, + "given tweet": 22799, + "methods method": 32944, + "method simple": 32660, + "simple fast": 51168, + "fast train": 20430, + "require extensive": 46853, + "sentences present": 49769, + "challenging setting": 8140, + "evaluate strong": 18509, + "strong neural": 53038, + "task translating": 55449, + "generalizes different": 22155, + "benchmarks english": 6520, + "english dutch": 17798, + "different attention": 14846, + "attention distributions": 4739, + "product attention": 43042, + "output tokens": 39006, + "tokens work": 57345, + "present strategies": 42025, + "strategies using": 52920, + "different translation": 15107, + "weak correlation": 61848, + "confidence score": 10117, + "score human": 48852, + "set synthetic": 50255, + "languages simple": 28788, + "single source": 51338, + "parser trained": 39761, + "language including": 28104, + "results target": 47876, + "corpus speech": 11436, + "problem usually": 42684, + "data supervised": 12710, + "baseline large": 6178, + "features global": 20592, + "models designed": 34903, + "models tend": 35589, + "local model": 30945, + "methods feature": 32864, + "indicative words": 25547, + "word identification": 62215, + "use ensemble": 59879, + "words furthermore": 62423, + "furthermore analyze": 21803, + "lexical complexity": 30358, + "analysis machine": 2693, + "learn non": 29404, + "general word": 22097, + "framework generate": 21528, + "results framework": 47639, + "datasets provide": 13386, + "representations information": 46690, + "classification dataset": 8451, + "sentences sentence": 49784, + "objective method": 38095, + "releasing dataset": 46188, + "dataset help": 12946, + "algorithms task": 2341, + "need better": 36549, + "network classifier": 36717, + "dev set": 14567, + "set pre": 50221, + "sentences experiment": 49714, + "segmentation errors": 49082, + "result significant": 47452, + "known problem": 27664, + "approach second": 3679, + "understanding semantics": 59398, + "despite potential": 14375, + "algorithm state": 2303, + "art classification": 4233, + "model create": 33728, + "information downstream": 25823, + "detection datasets": 14472, + "datasets particularly": 13367, + "fine granularity": 20946, + "research focus": 47040, + "code public": 8849, + "computational understanding": 9869, + "understanding human": 59349, + "especially high": 18279, + "short stories": 50566, + "approach enables": 3508, + "humans learn": 24278, + "languages jointly": 28699, + "task ability": 54871, + "ability understand": 648, + "visual modality": 61661, + "based natural": 5897, + "systems provide": 54607, + "large general": 28883, + "general corpus": 22049, + "results question": 47795, + "adapted domain": 1552, + "describes systems": 14236, + "performance certain": 40230, + "cnn bi": 8762, + "micro average": 33218, + "f1 metrics": 20189, + "tags using": 54759, + "parsing natural": 39788, + "end using": 17726, + "framework includes": 21541, + "efficient scalable": 16896, + "fast inference": 20426, + "text uses": 56835, + "tune parameters": 58859, + "corpora perform": 11231, + "model showing": 34373, + "evidence method": 18814, + "model case": 33650, + "scores test": 48925, + "important semantic": 24770, + "description length": 14245, + "high effectiveness": 23732, + "accuracy classifier": 944, + "corpus 17": 11265, + "medical information": 32207, + "intent detection": 26567, + "learning introduced": 29687, + "structured semantic": 53175, + "queries model": 44654, + "model extracts": 33873, + "23 relative": 324, + "best baseline": 6751, + "methods different": 32824, + "human emotion": 24139, + "human rated": 24221, + "predict fine": 41639, + "contribution work": 10950, + "mutual benefit": 36343, + "emotion prediction": 17292, + "text available": 56451, + "obtain representation": 38187, + "representation similarity": 46580, + "similarity matrix": 51101, + "achieved model": 1251, + "trained simultaneously": 57871, + "shared representation": 50485, + "results preliminary": 47773, + "direction future": 15271, + "research recent": 47111, + "recent advancements": 45279, + "information technology": 26115, + "information generation": 25894, + "systems exist": 54494, + "users search": 60479, + "create novel": 11714, + "document pair": 15815, + "model retrieval": 34325, + "spread information": 52377, + "media news": 32172, + "diverse sources": 15719, + "content news": 10541, + "languages leveraging": 28711, + "new findings": 37205, + "findings reported": 20914, + "comparison present": 9502, + "model multilingual": 34111, + "based traditional": 6102, + "end trained": 17719, + "systems compared": 54455, + "technique significantly": 56046, + "lack clear": 27877, + "clear understanding": 8654, + "understanding problem": 59384, + "human biases": 24118, + "contrast humans": 10878, + "ability shot": 642, + "little data": 30872, + "shown powerful": 50736, + "identify patterns": 24435, + "representations shown": 46756, + "application tasks": 3181, + "differently different": 15149, + "trained huge": 57746, + "framework train": 21614, + "individual users": 25586, + "use application": 59823, + "direct assessment": 15254, + "quality automatically": 44494, + "metrics comparing": 33152, + "used evaluating": 60172, + "human assessment": 24106, + "rate quality": 45014, + "video text": 61586, + "generation techniques": 22565, + "provides good": 44201, + "consuming laborious": 10449, + "user intents": 60426, + "user utterances": 60456, + "produced using": 43023, + "dataset observe": 13014, + "scores experiments": 48901, + "experiments synthetic": 19539, + "synthetic dataset": 54373, + "english parallel": 17855, + "corpus covering": 11311, + "domain conversational": 16032, + "conversational dialogue": 11045, + "dataset kind": 12973, + "number novel": 38023, + "techniques model": 56112, + "improve training": 24934, + "estimate model": 18372, + "sequential features": 50041, + "lstm lm": 31270, + "tasks achieved": 55488, + "methods lack": 32914, + "lack understanding": 27925, + "clustering results": 8745, + "aggregate information": 2072, + "information analyze": 25761, + "method demonstrated": 32454, + "extract important": 19977, + "automatically generates": 5178, + "method novel": 32590, + "learning traditional": 29915, + "features proposed": 20651, + "compared unsupervised": 9469, + "best supervised": 6829, + "methods achieved": 32732, + "achieved overall": 1256, + "work design": 62630, + "swiss german": 54252, + "widely spoken": 62002, + "input order": 26307, + "best solution": 6824, + "testing data": 56402, + "practical problem": 41467, + "techniques require": 56132, + "strong cross": 53024, + "corpora model": 11223, + "trained monolingual": 57813, + "based reading": 5974, + "significant advances": 50847, + "robust machine": 48253, + "generation network": 22505, + "architectures including": 4112, + "including long": 25269, + "ability generate": 610, + "generate sentence": 22242, + "manner experimental": 31715, + "understanding complex": 59332, + "propose treat": 43686, + "problem develop": 42536, + "data experimental": 12344, + "scale parallel": 48610, + "tens thousands": 56218, + "model widely": 34536, + "datasets language": 13310, + "data consisting": 12243, + "models aimed": 34704, + "aimed identifying": 2164, + "dialog context": 14752, + "models question": 35391, + "using cross": 60635, + "propose mixed": 43461, + "self critical": 49194, + "derived word": 14206, + "word overlap": 62257, + "objective improve": 38091, + "performance question": 40516, + "question types": 44754, + "types input": 59094, + "requires ability": 46911, + "exact match": 18850, + "accuracy 83": 915, + "accuracy 86": 918, + "86 f1": 537, + "f1 paper": 20190, + "language recent": 28461, + "complete sentence": 9601, + "based query": 5969, + "yield different": 63094, + "optimization model": 38551, + "model answer": 33567, + "new questions": 37297, + "robust evaluation": 48247, + "final answer": 20817, + "good balance": 22927, + "datasets framework": 13280, + "framework significantly": 21599, + "multiple strong": 36293, + "virtual assistants": 61625, + "given short": 22784, + "nature text": 36490, + "make prediction": 31586, + "text language": 56642, + "processing pipelines": 42923, + "pipelines paper": 40911, + "classifier distinguish": 8596, + "detection error": 14478, + "reproducible research": 46829, + "testing datasets": 56404, + "datasets code": 13173, + "african languages": 2040, + "humans understand": 24290, + "set simple": 50246, + "shot generalization": 50619, + "solve task": 51691, + "humans use": 24291, + "learned classifiers": 29453, + "new concept": 37153, + "concepts language": 9935, + "used pretraining": 60270, + "learning results": 29849, + "text editing": 56547, + "settings models": 50384, + "models linguistic": 35191, + "models access": 34659, + "linguistic context": 30760, + "context recent": 10702, + "standard automatic": 52467, + "proposed multi": 43865, + "previous sentence": 42275, + "encoder models": 17526, + "coherence cohesion": 8905, + "compared non": 9426, + "non contextual": 37644, + "performance novel": 40461, + "strategy multi": 52943, + "leads best": 29305, + "similar structures": 51068, + "current document": 11972, + "large database": 28867, + "support wide": 54134, + "tasks domain": 55596, + "domain examples": 16061, + "examples provided": 18926, + "better quality": 6947, + "scarce paper": 48658, + "proposed extract": 43773, + "english persian": 17856, + "systems shown": 54631, + "corpus consists": 11303, + "art feature": 4262, + "scarcity labeled": 48668, + "issue using": 27081, + "different multi": 15000, + "labeling srl": 27793, + "mtl models": 35933, + "makes predictions": 31632, + "predictions using": 41770, + "deeper analysis": 13757, + "require massive": 46879, + "propose construct": 43332, + "adopt multi": 1864, + "limited fixed": 30586, + "propose directly": 43354, + "tasks performance": 55796, + "performance loss": 40422, + "architecture learning": 4059, + "lexical overlap": 30374, + "learning multiple": 29773, + "tasks learning": 55719, + "study computational": 53344, + "task single": 55380, + "language time": 28530, + "tasks languages": 55712, + "languages simultaneously": 28789, + "languages benefit": 28608, + "advances nlp": 1919, + "large extent": 28877, + "minority languages": 33333, + "specific research": 52139, + "questions posed": 44798, + "task aimed": 54892, + "task provide": 55307, + "lessons learned": 30047, + "model leverages": 34058, + "standard recurrent": 52520, + "cost human": 11584, + "expert annotation": 19571, + "annotation paper": 2958, + "model loss": 34074, + "network experiments": 36741, + "models synthetic": 35573, + "explore approaches": 19688, + "model robustness": 34331, + "robust training": 48267, + "approaches neural": 3883, + "generated outputs": 22307, + "lower latency": 31212, + "autoregressive transformer": 5226, + "transformer network": 58503, + "demonstrate substantial": 13981, + "validate approach": 61173, + "non autoregressive": 37633, + "scale human": 48578, + "human created": 24129, + "test dataset": 56342, + "high school": 23798, + "requires deeper": 46924, + "attention span": 4829, + "including language": 25265, + "performance gap": 40359, + "limited ability": 30561, + "term context": 56232, + "methods compute": 32796, + "semantics given": 49404, + "vision based": 61634, + "present end": 41900, + "problem code": 42521, + "active area": 1471, + "acoustic linguistic": 1436, + "developing effective": 14651, + "based applications": 5569, + "ability existing": 607, + "language technologies": 28522, + "inter sentential": 26588, + "intra sentential": 26761, + "work studied": 62828, + "problem context": 42524, + "features effective": 20568, + "monolingual language": 35802, + "lm trained": 30913, + "code switch": 8857, + "larger number": 29083, + "parameters evaluate": 39693, + "models speech": 35528, + "systems achieved": 54422, + "performance systems": 40590, + "transcribed speech": 58334, + "key problem": 27329, + "method particular": 32608, + "existing training": 19163, + "unsupervised adaptation": 59679, + "different topics": 15102, + "thorough understanding": 57066, + "applying models": 3369, + "subjective information": 53564, + "modeling methods": 34598, + "networks proposed": 36898, + "supervised trained": 54060, + "trained contrastive": 57695, + "model layer": 34046, + "model compared": 33673, + "evaluation analysis": 18574, + "low efficiency": 31149, + "command line": 9136, + "annotation quality": 2965, + "multiple annotators": 36165, + "proposed reduce": 43887, + "reduce annotation": 45649, + "annotation time": 2975, + "existing annotation": 19024, + "successful natural": 53736, + "relationships different": 46077, + "pieces text": 40881, + "parameters perform": 39715, + "locality sensitive": 30955, + "sensitive hashing": 49499, + "significantly reduce": 51010, + "explicit representation": 19623, + "labels evaluate": 27818, + "extraction datasets": 20056, + "datasets observe": 13350, + "observe significant": 38141, + "art classifiers": 4234, + "mechanism address": 32096, + "address mismatch": 1780, + "area curve": 4138, + "encoding method": 17569, + "method attention": 32389, + "proposed deep": 43755, + "model code": 33663, + "paper summarize": 39585, + "automatically extracts": 5172, + "model identifies": 33969, + "topics different": 57446, + "people opinions": 40032, + "furthermore use": 21841, + "use evaluation": 59882, + "models recent": 35406, + "perform par": 40127, + "task require": 55338, + "memory time": 32285, + "time training": 57234, + "unsupervised document": 59692, + "approaches require": 3915, + "require complex": 46845, + "difficult parallelize": 15180, + "enables train": 17448, + "structure document": 53100, + "results public": 47791, + "public benchmarks": 44308, + "fraction computational": 21428, + "queries natural": 44655, + "training sequence": 58249, + "problem existing": 42556, + "learning limited": 29710, + "sequence set": 50000, + "set model": 50194, + "prior art": 42394, + "task major": 55200, + "data complex": 12227, + "data cleaning": 12209, + "potential future": 41390, + "digital technologies": 15214, + "systematic review": 54402, + "gap paper": 21970, + "outperform conventional": 38788, + "difficult tasks": 15188, + "paper conduct": 39297, + "furthermore discuss": 21817, + "generates new": 22351, + "demonstrate advantage": 13861, + "regularization method": 45839, + "input perturbations": 26315, + "tagging performance": 54747, + "task dependency": 55004, + "helps model": 23611, + "model generally": 33928, + "generally effective": 22165, + "model dynamic": 33794, + "dynamic fusion": 16484, + "fusion network": 21861, + "comprehension mrc": 9769, + "questions answer": 44770, + "multi step": 36011, + "step reasoning": 52827, + "reasoning module": 45206, + "generating answers": 22365, + "reasoning steps": 45225, + "detailed empirical": 14421, + "analysis demonstrates": 2646, + "mrc models": 35906, + "models explicit": 34996, + "models reason": 35403, + "models build": 34797, + "tasks sequential": 55880, + "evaluation language": 18631, + "models producing": 35364, + "domain chinese": 16028, + "dataset designed": 12892, + "designed address": 14307, + "datasets data": 13206, + "manually generated": 31780, + "provides rich": 44224, + "dataset far": 12927, + "experiments human": 19443, + "performance current": 40273, + "community make": 9266, + "posted online": 41356, + "encourage exploration": 17592, + "models release": 35424, + "baselines popular": 6285, + "popular recent": 41181, + "recent approach": 45291, + "models extract": 35007, + "usually extract": 61049, + "questions require": 44805, + "generated existing": 22287, + "ensemble techniques": 17982, + "approaches combine": 3783, + "tuned training": 58890, + "evaluated text": 18552, + "comparison methods": 9498, + "method directly": 32464, + "directly learns": 15322, + "learns relation": 29970, + "language requires": 28472, + "causal effects": 7871, + "text neural": 56674, + "complements existing": 9597, + "transformers model": 58526, + "model updates": 34506, + "model reason": 34277, + "information understanding": 26136, + "representations existing": 46658, + "learning shown": 29874, + "successful tasks": 53738, + "experiments investigate": 19447, + "learned source": 29482, + "dataset target": 13112, + "qa models": 44452, + "target datasets": 54807, + "examples available": 18889, + "lstm units": 31285, + "languages semantic": 28779, + "encoder obtain": 17530, + "obtain final": 38172, + "task dependent": 55005, + "performance conventional": 40266, + "increasingly popular": 25476, + "shown language": 50722, + "language representations": 28471, + "type language": 59059, + "various stages": 61395, + "multimodal representations": 36155, + "outperform single": 38820, + "single modality": 51316, + "input modalities": 26298, + "motivated human": 35868, + "concept representations": 9926, + "based semantics": 6017, + "interpretation methods": 26735, + "model handles": 33952, + "improved word": 24974, + "small sized": 51503, + "seven languages": 50420, + "data scarcity": 12624, + "similar language": 51049, + "language evaluate": 28054, + "improves current": 25122, + "baseline score": 6207, + "models solving": 35520, + "hierarchically structured": 23700, + "resources form": 47303, + "manually annotating": 31763, + "explore techniques": 19741, + "techniques incorporate": 56101, + "dataset knowledge": 12974, + "bases kb": 6324, + "existing information": 19077, + "new relations": 37299, + "methods traditionally": 33078, + "traditionally used": 57559, + "practical task": 41475, + "datasets significantly": 13430, + "sentence set": 49646, + "dataset 000": 12787, + "datasets including": 13300, + "work best": 62588, + "model complex": 33680, + "applications research": 3247, + "method better": 32405, + "future works": 21901, + "task encoder": 55045, + "encoder network": 17528, + "model respectively": 34317, + "findings indicate": 20908, + "limited knowledge": 30591, + "knowledge intensive": 27530, + "ai tasks": 2123, + "tasks open": 55775, + "existing end": 19063, + "entire text": 18029, + "linear text": 30673, + "synthetic text": 54384, + "latent structure": 29139, + "qa pairs": 44455, + "journal articles": 27227, + "text order": 56681, + "extracting structured": 20040, + "structured representations": 53174, + "materials science": 31928, + "approaches extracting": 3823, + "models extracting": 35009, + "nature data": 36477, + "exciting new": 18971, + "recognition relation": 45530, + "difficult problem": 15181, + "sets paper": 50300, + "level dataset": 30096, + "quality dataset": 44506, + "methods solve": 33046, + "models conduct": 34845, + "provide baselines": 44014, + "com lancopku": 9018, + "ner dataset": 36677, + "dataset work": 13137, + "building automatic": 7437, + "represent real": 46478, + "alignment approaches": 2365, + "iteratively improve": 27130, + "improve data": 24840, + "understanding performance": 59381, + "tasks analysis": 55499, + "understanding recently": 59391, + "effective representations": 16690, + "event level": 18786, + "semantic interactions": 49289, + "generation method": 22490, + "method produces": 32623, + "produces better": 43027, + "subtle differences": 53676, + "focus evaluating": 21161, + "evaluating quality": 18568, + "systems introduce": 54534, + "perform evaluation": 40100, + "evaluation state": 18725, + "english neural": 17849, + "evaluation confirms": 18596, + "effective identifying": 16659, + "role modern": 48315, + "grained control": 23029, + "control information": 10966, + "information retained": 26060, + "multiple benchmarks": 36174, + "community based": 9260, + "potential applications": 41381, + "based textual": 6092, + "achieve satisfactory": 1189, + "satisfactory results": 48525, + "sufficient information": 53803, + "modality data": 33475, + "data inspired": 12432, + "strong semantic": 53050, + "semantic correlation": 49262, + "representations training": 46774, + "pooling layer": 41125, + "promising result": 43178, + "clearly demonstrate": 8657, + "use train": 60053, + "user satisfaction": 60446, + "knowledge make": 27550, + "make difficult": 31566, + "use available": 59832, + "available knowledge": 5314, + "user preference": 60434, + "new tools": 37346, + "use topic": 60051, + "complex model": 9634, + "called multi": 7549, + "order models": 38642, + "effectively efficiently": 16730, + "poor target": 41145, + "domain specifically": 16194, + "specifically existing": 52199, + "target domains": 54814, + "learn shared": 29422, + "unified framework": 59472, + "combining sentence": 9123, + "interaction based": 26596, + "model extensive": 33866, + "competing models": 9531, + "networks work": 36925, + "random initialization": 44882, + "especially considering": 18268, + "significant difference": 50862, + "learn perform": 29408, + "perform reasonably": 40132, + "translation approaches": 58579, + "better automatic": 6851, + "automatic translations": 5134, + "promising way": 43188, + "results systematic": 47874, + "enhance quality": 17919, + "introduces additional": 26891, + "collection data": 8980, + "data specific": 12686, + "downstream classification": 16335, + "community detection": 9261, + "collecting data": 8973, + "annotation experiments": 2950, + "data drawn": 12297, + "task does": 55031, + "providing high": 44246, + "existing classification": 19046, + "approaches improving": 3845, + "detection investigate": 14494, + "task processing": 55297, + "step investigate": 52813, + "performs task": 40720, + "develop approach": 14572, + "test multiple": 56360, + "multiple hypotheses": 36225, + "predictions results": 41767, + "box neural": 7293, + "does scale": 15978, + "contribute better": 10927, + "words form": 62420, + "vocabulary knowledge": 61704, + "knowledge order": 27563, + "interaction model": 26606, + "methods effective": 32832, + "learning introduce": 29686, + "method embedding": 32477, + "context specific": 10724, + "posterior distributions": 41360, + "applications example": 3204, + "based variational": 6126, + "generated large": 22297, + "want know": 61768, + "traditional supervised": 57548, + "work attempted": 62578, + "number models": 38020, + "patterns human": 39969, + "perform thorough": 40155, + "analyses showing": 2606, + "text current": 56521, + "target text": 54849, + "order handle": 38624, + "handle issue": 23409, + "issue propose": 27075, + "set target": 50256, + "words semantically": 62505, + "build new": 7417, + "new layer": 37236, + "model estimated": 33838, + "manner experiments": 31717, + "analysis presented": 2724, + "crucial tasks": 11915, + "current studies": 12014, + "surface level": 54153, + "multiple information": 36226, + "present compositional": 41869, + "post processed": 41350, + "model adapted": 33533, + "automatically detected": 5158, + "compare analyze": 9328, + "machine interaction": 31304, + "data models": 12493, + "models combined": 34827, + "sentiment classifiers": 49837, + "vis vis": 61630, + "improvements paper": 25090, + "identify strengths": 24446, + "broad categories": 7350, + "comments given": 9145, + "extremely large": 20161, + "datasets tend": 13454, + "expert domain": 19578, + "knowledge embeddings": 27457, + "building previous": 7463, + "analysis challenging": 2625, + "proposed automatic": 43741, + "content context": 10515, + "microblog posts": 33229, + "trained separate": 57863, + "testing phase": 56409, + "embedding similarity": 17060, + "outperforming best": 38848, + "life applications": 30437, + "european language": 18428, + "languages set": 28781, + "results known": 47688, + "present challenges": 41864, + "best worst": 6839, + "worst scaling": 62978, + "required number": 46903, + "significantly affect": 50935, + "simple heuristics": 51178, + "created dataset": 11725, + "include various": 25226, + "dataset analyze": 12808, + "impact individual": 24598, + "solutions based": 51667, + "quantitative results": 44628, + "results terms": 47879, + "utmost importance": 61131, + "product paper": 43043, + "considered special": 10251, + "corpus date": 11318, + "automatic discovery": 5080, + "dual attention": 16458, + "qa pair": 44454, + "challenges addressed": 8029, + "vaswani et": 61445, + "solely using": 51647, + "encoder proposed": 17535, + "understand context": 59289, + "distance based": 15542, + "based self": 6011, + "order model": 38641, + "shows good": 50779, + "nli data": 37450, + "additionally model": 1726, + "paper identify": 39392, + "novel qa": 37902, + "url https": 59793, + "https www": 24062, + "design neural": 14291, + "network called": 36714, + "supervised attention": 53963, + "network san": 36800, + "supervised sequence": 54042, + "baselines neural": 6282, + "model applies": 33569, + "document represented": 15827, + "node edge": 37584, + "document graph": 15798, + "topic text": 57433, + "level supervision": 30218, + "combines advantages": 9091, + "document document": 15786, + "approach various": 3737, + "datasets compare": 13182, + "compare state": 9367, + "approach relying": 3674, + "output language": 38980, + "architecture takes": 4088, + "roman urdu": 48330, + "make following": 31572, + "following contributions": 21264, + "correctly predict": 11494, + "predict sentences": 41654, + "achieving bleu": 1397, + "serve baseline": 50073, + "work domain": 62642, + "outperformed previous": 38839, + "art benchmarks": 4227, + "code released": 8852, + "openly available": 38478, + "models freely": 35043, + "available propose": 5349, + "fully end": 21723, + "text encoder": 56552, + "batch normalization": 6341, + "new attention": 37138, + "analysis pipeline": 2716, + "kg embeddings": 27359, + "embeddings specifically": 17218, + "specifically explore": 52200, + "using entity": 60679, + "maintaining comparable": 31487, + "reasoning machine": 45201, + "unique feature": 59513, + "improves robustness": 25157, + "comprehension dataset": 9763, + "used convert": 60129, + "setting work": 50356, + "algorithm proposed": 2295, + "use efficient": 59872, + "rl framework": 48175, + "framework recent": 21591, + "model available": 33593, + "metric task": 33125, + "reducing computational": 45705, + "framework training": 21616, + "self attentional": 49189, + "different model": 14993, + "incorporate new": 25360, + "new ideas": 37221, + "overall best": 39035, + "used experiments": 60178, + "apache license": 3128, + "capable predicting": 7626, + "propose zero": 43707, + "method involves": 32553, + "embedding sentence": 17059, + "model generalize": 33924, + "classifiers learn": 8617, + "accuracy test": 1059, + "models generalize": 35054, + "new unseen": 37354, + "cases models": 7809, + "given growing": 22745, + "ability provide": 638, + "methods generally": 32876, + "social data": 51561, + "published works": 44376, + "10 languages": 45, + "leverage external": 30268, + "models requires": 35444, + "single reference": 51331, + "blind test": 7217, + "performance 17": 40168, + "baselines task": 6307, + "released dataset": 46173, + "dataset annotated": 12809, + "uses character": 60496, + "78 accuracy": 512, + "accuracy identifying": 988, + "challenge lies": 7993, + "discussed text": 15487, + "methods given": 32879, + "text research": 56742, + "white box": 61954, + "adversarial examples": 1970, + "neural classifier": 36943, + "decrease accuracy": 13667, + "accuracy method": 1005, + "method perform": 32610, + "training makes": 58167, + "proposes novel": 43940, + "training text": 58297, + "good trade": 22947, + "set available": 50110, + "span multiple": 51927, + "decoding methods": 13634, + "using separate": 60928, + "specific design": 52069, + "monolingual word": 35814, + "method comparable": 32423, + "classification algorithm": 8430, + "achieved accuracy": 1217, + "use approach": 59825, + "using best": 60591, + "best worlds": 6838, + "gradient boosting": 23005, + "learning state": 29894, + "art machine": 4278, + "experiments report": 19510, + "parameter settings": 39677, + "settings recent": 50393, + "answer based": 3031, + "leveraging external": 30323, + "additional source": 1699, + "task automated": 54921, + "questions answered": 44771, + "20 datasets": 223, + "datasets commonly": 13180, + "learning applications": 29517, + "improves prediction": 25149, + "approach applicable": 3418, + "specific case": 52052, + "legal scientific": 30008, + "model interpretability": 34015, + "proposed existing": 43771, + "methods suffer": 33059, + "leverages knowledge": 30305, + "knowledge entity": 27466, + "existing baselines": 19039, + "answering forums": 3073, + "forums social": 21400, + "language form": 28076, + "language focus": 28075, + "largely neglected": 29059, + "conduct large": 10055, + "world online": 62951, + "prevalent social": 42227, + "social biases": 51555, + "model suffers": 34425, + "problems propose": 42722, + "source token": 51810, + "token prediction": 57301, + "prediction module": 41721, + "headline generation": 23506, + "token wise": 57314, + "present best": 41858, + "important implications": 24732, + "models derive": 34900, + "context meaning": 10672, + "distance measure": 15546, + "model applying": 33571, + "current literature": 11984, + "results joint": 47684, + "attracted considerable": 4876, + "structure long": 53117, + "empirical experiments": 17328, + "topic analysis": 57389, + "languages hand": 28682, + "content work": 10574, + "include new": 25225, + "pose challenges": 41239, + "challenges using": 8080, + "study language": 53402, + "improve language": 24866, + "motivates research": 35878, + "methods approaches": 32755, + "15 000": 146, + "domain sentences": 16154, + "sentences labeled": 49744, + "baseline experiments": 6165, + "experiments experiments": 19434, + "data include": 12423, + "related phenomena": 45924, + "input contexts": 26261, + "need annotated": 36546, + "quality high": 44528, + "annotation costs": 2940, + "costs work": 11608, + "design experiments": 14282, + "terms text": 56317, + "consider multiple": 10215, + "multiple text": 36301, + "data applications": 12139, + "embeddings corpus": 17105, + "corpus generation": 11350, + "corpora demonstrate": 11190, + "models lead": 35171, + "rely pre": 46297, + "text learning": 56647, + "leading performance": 29294, + "performance bottleneck": 40226, + "sequential order": 50049, + "position information": 41267, + "information encoder": 25831, + "decoder experiments": 13593, + "experiments shows": 19526, + "learning significantly": 29879, + "fact based": 20287, + "text contribute": 56514, + "applications goal": 3207, + "domain propose": 16138, + "input training": 26352, + "understanding knowledge": 59356, + "word matching": 62244, + "systems utilize": 54666, + "generation propose": 22531, + "set questions": 50230, + "evaluated domain": 18530, + "end proposed": 17704, + "models led": 35181, + "enormous data": 17958, + "based components": 5630, + "augment data": 4939, + "cover diverse": 11645, + "continual learning": 10822, + "neural conversational": 36946, + "tasks data": 55568, + "efficacy method": 16832, + "support domain": 54117, + "text preserving": 56706, + "semantics using": 49419, + "semantic preservation": 49320, + "model unlike": 34502, + "languages multi": 28734, + "upper layers": 59775, + "representations design": 46638, + "experiments popular": 19488, + "parameter size": 39679, + "performance additionally": 40185, + "highly interpretable": 23903, + "recent researches": 45344, + "textual knowledge": 56972, + "knowledge concepts": 27424, + "learn knowledge": 29386, + "knowledge wikipedia": 27647, + "performance 91": 40172, + "datasets achieving": 13144, + "study evaluate": 53371, + "demonstrate competitive": 13882, + "highly challenging": 23882, + "challenging identify": 8101, + "systems incorporate": 54530, + "low accuracy": 31132, + "multiple ways": 36311, + "syntactically correct": 54340, + "architecture called": 4031, + "95 accuracy": 567, + "generalization capability": 22119, + "accurate automatic": 1075, + "adversarial attacks": 1965, + "short paper": 50560, + "model parameter": 34179, + "language dependent": 28020, + "new representation": 37300, + "instead words": 26470, + "simple automatic": 51137, + "summarization language": 53887, + "structure different": 53097, + "knowledge knowledge": 27535, + "extracting semantic": 20037, + "like named": 30488, + "context single": 10721, + "art sota": 4404, + "sota results": 51731, + "challenging practical": 8124, + "research problem": 47097, + "extractive method": 20135, + "semantic embedding": 49273, + "tasks attention": 55512, + "extensive set": 19912, + "advantage proposed": 1945, + "essential tasks": 18337, + "processing machine": 42885, + "million sentence": 33256, + "pairs collected": 39173, + "corpus experiment": 11336, + "lifelong learning": 30443, + "past tasks": 39935, + "help future": 23564, + "classification particular": 8512, + "particular proposed": 39860, + "task motivated": 55223, + "need large": 36576, + "answering dialogue": 3070, + "leading significant": 29298, + "automatically annotated": 5141, + "model auxiliary": 33592, + "auxiliary training": 5243, + "training objectives": 58197, + "model guided": 33949, + "performance strong": 40577, + "data annotated": 12135, + "questions dataset": 44781, + "customer service": 12057, + "usually employed": 61047, + "answering datasets": 3069, + "available study": 5372, + "study behavior": 53335, + "models convolutional": 34865, + "cost high": 11583, + "computational requirements": 9856, + "provide summary": 44138, + "learning environments": 29626, + "applied large": 3278, + "modeling used": 34633, + "researchers explore": 47155, + "distant supervised": 15556, + "approach scale": 3678, + "extraction large": 20077, + "relational facts": 46006, + "text recent": 56727, + "progress task": 43116, + "sentences low": 49750, + "syntax information": 54349, + "syntax aware": 54346, + "tree sentence": 58757, + "level entity": 30111, + "finally combine": 20842, + "combine sentence": 9073, + "embedding entity": 17027, + "classification conduct": 8444, + "experiments widely": 19562, + "used real": 60284, + "languages address": 28595, + "method reduce": 32635, + "effective mechanism": 16670, + "work english": 62646, + "processing involves": 42879, + "languages supervised": 28797, + "algorithm leverages": 2284, + "demonstrate advantages": 13862, + "advantages approach": 1948, + "structured inference": 53155, + "features associated": 20525, + "information users": 26146, + "art solutions": 4403, + "art algorithms": 4211, + "online shopping": 38383, + "voice text": 61724, + "incorporate context": 25347, + "multi round": 36003, + "latent random": 29130, + "model translation": 34491, + "complex dependencies": 9623, + "translations different": 58707, + "order deal": 38606, + "models quickly": 35393, + "new labeled": 37231, + "alternative way": 2510, + "lower cost": 31209, + "lower quality": 31221, + "experts paper": 19591, + "approach performing": 3639, + "adversarial learning": 1972, + "create data": 11694, + "ner tasks": 36684, + "tasks domains": 55597, + "domains experimental": 16253, + "results achieves": 47488, + "driving force": 16439, + "linear relationships": 30668, + "individual predictions": 25576, + "changes underlying": 8183, + "prediction using": 41751, + "inductive transfer": 25613, + "approaches nlp": 3885, + "require task": 46892, + "training scratch": 58240, + "propose universal": 43690, + "model fine": 33891, + "effective transfer": 16707, + "applied task": 3300, + "introduce techniques": 26870, + "tuning language": 58919, + "tasks reducing": 55843, + "reducing error": 45707, + "performance training": 40605, + "data open": 12522, + "pretrained models": 42169, + "models code": 34820, + "code paper": 8841, + "crawled web": 11687, + "generally better": 22163, + "models process": 35360, + "novel text": 37938, + "work different": 62637, + "different traditional": 15103, + "reduction methods": 45720, + "input sequences": 26333, + "following text": 21271, + "tasks shown": 55886, + "data achieves": 12113, + "produced data": 43018, + "great importance": 23206, + "promote research": 43192, + "words embedding": 62405, + "space extensive": 51863, + "concept based": 9921, + "based multilingual": 5890, + "multilingual embedding": 36080, + "key concepts": 27302, + "inputs outputs": 26365, + "subject predicate": 53555, + "answer span": 3056, + "interaction scenarios": 26611, + "supervised dataset": 53976, + "generative approach": 22587, + "capture structure": 7713, + "structure output": 53125, + "introduce hierarchical": 26811, + "generates words": 22361, + "designed measure": 14324, + "results approaches": 47508, + "question aware": 44721, + "approaches incorporating": 3849, + "benchmark evaluation": 6466, + "annotated different": 2891, + "ensure quality": 17990, + "trained reinforcement": 57850, + "user questions": 60443, + "generated sequence": 22317, + "quality vector": 44598, + "measure performance": 32058, + "analysis yields": 2796, + "learning semantics": 29867, + "models bilingual": 34788, + "handling long": 23425, + "models fall": 35017, + "results improvements": 47672, + "mt model": 35920, + "model having": 33954, + "relies solely": 46268, + "yield improvements": 63097, + "suffer lack": 53770, + "context models": 10676, + "words specifically": 62520, + "develop neural": 14604, + "neural non": 37082, + "context current": 10604, + "method wide": 32704, + "models demonstrate": 34892, + "systems low": 54553, + "propose leverage": 43439, + "information build": 25771, + "obtain large": 38179, + "model operates": 34146, + "select important": 49106, + "matching word": 31924, + "softmax based": 51629, + "differentiable neural": 15140, + "systems word": 54672, + "trained natural": 57822, + "language corpora": 28008, + "corpora models": 11224, + "popularity recent": 41203, + "discuss key": 15471, + "time text": 57231, + "spoken english": 52358, + "word importance": 62216, + "score word": 48882, + "training automatic": 57938, + "role knowledge": 48309, + "traditional deep": 57515, + "average pooling": 5414, + "learn attention": 29346, + "results relation": 47804, + "new solutions": 37318, + "supervision training": 54098, + "dependencies tokens": 14112, + "global dependencies": 22827, + "hard attention": 23440, + "select subset": 49113, + "paper integrate": 39399, + "soft hard": 51622, + "attention context": 4730, + "reward signals": 48072, + "facilitate training": 20278, + "encoding model": 17571, + "solely based": 51642, + "graphs kg": 23187, + "tasks end": 55611, + "task complex": 54962, + "factual questions": 20323, + "ii use": 24506, + "complex real": 9653, + "world settings": 62959, + "models reported": 35437, + "massive amounts": 31879, + "multilingual machine": 36094, + "shared parameters": 50483, + "model creates": 33730, + "language space": 28493, + "according language": 864, + "open door": 38428, + "driven language": 16425, + "research deep": 47013, + "achieve highly": 1157, + "vision tasks": 61644, + "architectures like": 4116, + "tend suffer": 56208, + "tasks inspired": 55691, + "densely connected": 14088, + "model benchmark": 33614, + "datasets sentence": 13415, + "obtain significant": 38190, + "model promising": 34241, + "unstructured data": 59667, + "different modalities": 14992, + "information captured": 25775, + "process neural": 42810, + "parent child": 39747, + "original problem": 38724, + "propose step": 43645, + "wide margin": 61963, + "including approaches": 25238, + "recent deep": 45300, + "adversarial network": 1978, + "text high": 56615, + "produce diverse": 42979, + "based discriminator": 5683, + "better distinguish": 6877, + "generation dialogue": 22446, + "baselines code": 6243, + "features set": 20665, + "previously selected": 42349, + "texts used": 56940, + "corpus identify": 11358, + "evaluation approach": 18575, + "systems build": 54443, + "differences performance": 14827, + "types different": 59082, + "language compare": 27997, + "different paradigms": 15018, + "use long": 59937, + "especially effective": 18274, + "explore multiple": 19719, + "attention layers": 4763, + "practical application": 41457, + "cognitive modeling": 8893, + "performance evaluated": 40326, + "emotional information": 17298, + "propose automated": 43305, + "automated framework": 5044, + "accuracy 88": 920, + "produce correct": 42977, + "questions using": 44815, + "paper mainly": 39420, + "constantly evolving": 10345, + "applications large": 3216, + "legal documents": 30004, + "consider semantic": 10220, + "work kind": 62700, + "methods automated": 32760, + "inference tasks": 25697, + "tasks lack": 55707, + "serve training": 50083, + "construct corpus": 10383, + "combination domain": 9041, + "knowledge provide": 27581, + "knowledge construct": 27425, + "text training": 56818, + "build end": 7396, + "paper tries": 39599, + "monolingual corpus": 35794, + "corresponding english": 11549, + "alignment quality": 2381, + "nli dataset": 37451, + "performing text": 40692, + "learning labeled": 29692, + "examples tasks": 18936, + "examples language": 18915, + "lingual learning": 30709, + "problem method": 42607, + "training yields": 58319, + "unlabeled document": 59570, + "concept set": 9927, + "stage use": 52446, + "supporting documents": 54137, + "languages tested": 28803, + "using wikipedia": 61026, + "test collections": 56336, + "serve input": 50079, + "dataset improves": 12962, + "performance original": 40468, + "highlights importance": 23876, + "datasets better": 13169, + "ai systems": 2122, + "generates human": 22345, + "capable producing": 7628, + "work tackle": 62836, + "problem sentence": 42647, + "sentence boundary": 49523, + "using trained": 60994, + "results general": 47644, + "accuracy models": 1009, + "f1 metric": 20188, + "encoded embeddings": 17477, + "behave like": 6387, + "years significant": 63075, + "improvements language": 25076, + "challenges field": 8049, + "language phenomenon": 28381, + "challenge lack": 7988, + "creating large": 11742, + "step better": 52801, + "positive examples": 41281, + "approach classification": 3445, + "methods consistently": 32800, + "translated text": 58560, + "language help": 28096, + "deep contextualized": 13686, + "contextualized word": 10812, + "easily added": 16536, + "challenging nlp": 8120, + "analysis showing": 2757, + "downstream models": 16342, + "semi supervision": 49470, + "supervision signals": 54094, + "signals paper": 50835, + "languages target": 28799, + "lexical representation": 30379, + "utilize lexical": 61098, + "higher resource": 23842, + "sentences compared": 49692, + "18 bleu": 179, + "multilingual training": 36127, + "dataset fine": 12930, + "tuning pre": 58943, + "trained multi": 57814, + "lingual zero": 30740, + "shot setting": 50642, + "setting present": 50342, + "architecture address": 4024, + "premise hypothesis": 41811, + "model relationship": 34300, + "inference introduce": 25663, + "final predictions": 20828, + "results improved": 47670, + "results achieving": 47489, + "art scores": 4395, + "sentences long": 49749, + "research related": 47114, + "methodology applied": 32717, + "dataset tested": 13116, + "highest correlation": 23851, + "similar models": 51055, + "learning signal": 29877, + "deep generative": 13692, + "posterior probability": 41363, + "investigate model": 26967, + "tasks achieving": 55490, + "including natural": 25280, + "text similarity": 56771, + "architectures shown": 4123, + "model machine": 34080, + "models end": 34965, + "models investigated": 35147, + "pairs similar": 39217, + "focus end": 21157, + "distant language": 15554, + "data examples": 12335, + "network structures": 36809, + "end encoder": 17634, + "task experiment": 55064, + "approach provide": 3658, + "provide significant": 44129, + "learning automatic": 29526, + "approach small": 3697, + "studied language": 53225, + "data suitable": 12709, + "linguistic diversity": 30765, + "using weighted": 61024, + "texts challenging": 56862, + "better current": 6873, + "techniques perform": 56119, + "model question": 34266, + "generation knowledge": 22480, + "generating questions": 22389, + "mechanism generate": 32120, + "art zero": 4443, + "leading state": 29299, + "performance key": 40401, + "representations train": 46772, + "models downstream": 34940, + "evaluate pre": 18487, + "showing strong": 50690, + "detection techniques": 14534, + "classification framework": 8473, + "gap present": 21974, + "event specific": 18788, + "documents domains": 15874, + "release annotated": 46141, + "attention modules": 4790, + "ability extract": 608, + "scratch using": 48946, + "training speed": 58266, + "various sizes": 61391, + "models implicit": 35104, + "predictions model": 41763, + "task example": 55060, + "performance reducing": 40526, + "models identifying": 35098, + "entities sentence": 18082, + "sentence does": 49545, + "interaction graph": 26599, + "facilitate evaluation": 20268, + "created datasets": 11726, + "covering diverse": 11656, + "evaluations proposed": 18766, + "methods natural": 32952, + "approach capture": 3442, + "demonstrate joint": 13925, + "embeddings compared": 17097, + "using relevant": 60907, + "subset data": 53607, + "data related": 12594, + "related methods": 45917, + "methods furthermore": 32873, + "demonstrate used": 13994, + "used address": 60083, + "natural questions": 36463, + "http github": 24048, + "information valuable": 26152, + "systems create": 54463, + "systems help": 54518, + "explore feasibility": 19707, + "end create": 17624, + "annotated named": 2906, + "lstm word": 31288, + "module learns": 35765, + "informative ones": 26174, + "data provide": 12574, + "provide details": 44052, + "resulting data": 47463, + "data recently": 12590, + "significant attention": 50851, + "approaches generate": 3834, + "work proposed": 62791, + "proposed generate": 43787, + "address research": 1797, + "research gap": 47044, + "gap presenting": 21975, + "generated humans": 22293, + "easily understood": 16552, + "propose ways": 43703, + "aware models": 5462, + "significant reductions": 50917, + "model sizes": 34392, + "model layers": 34047, + "best hypotheses": 6766, + "recognition results": 45533, + "text structured": 56791, + "study end": 53369, + "unified approach": 59466, + "achieve reasonable": 1184, + "demonstrated model": 14013, + "work extends": 62664, + "models fully": 35045, + "fully exploits": 21727, + "competitive models": 9550, + "changes meaning": 8178, + "desired properties": 14350, + "challenge recent": 8012, + "works use": 62914, + "sequence target": 50008, + "target label": 54821, + "ones experiments": 38338, + "experiments effectiveness": 19425, + "generate plausible": 22228, + "diverse sentences": 15716, + "parsing methods": 39785, + "methods problem": 32992, + "goal enable": 22883, + "expert written": 19586, + "millions users": 33265, + "users share": 60481, + "media sites": 32181, + "data public": 12578, + "challenging reasons": 8135, + "figurative language": 20790, + "problem challenging": 42517, + "unlike previously": 59605, + "supervised deep": 53978, + "data makes": 12479, + "detection benchmark": 14462, + "benchmark new": 6483, + "mentions multiple": 32307, + "data small": 12669, + "massively multilingual": 31892, + "language embeddings": 28043, + "differences language": 14823, + "task instance": 55141, + "high accuracies": 23706, + "label spaces": 27730, + "learning transfer": 29919, + "data auxiliary": 12174, + "sequence classification": 49915, + "single multi": 51321, + "task baselines": 54931, + "embeddings document": 17114, + "abstractive text": 774, + "summarization methods": 53891, + "methods adopt": 32740, + "representations fail": 46666, + "information carried": 25777, + "rouge points": 48353, + "used dataset": 60137, + "clean dataset": 8644, + "approach generates": 3546, + "performance new": 40455, + "based structured": 6065, + "entities document": 18044, + "classification regression": 8531, + "work employs": 62645, + "exact inference": 18849, + "propose bidirectional": 43315, + "inference algorithm": 25640, + "absolute accuracy": 738, + "accuracy popular": 1024, + "reading understanding": 45092, + "major research": 31521, + "problem field": 42565, + "nlp work": 37560, + "using knowledge": 60745, + "knowledge large": 27544, + "attention architecture": 4711, + "novel dual": 37808, + "models information": 35131, + "documents experiments": 15879, + "additionally develop": 1717, + "techniques demonstrate": 56075, + "span text": 51934, + "text single": 56773, + "single entity": 51302, + "response propose": 47400, + "efficient self": 16898, + "attention encoder": 4741, + "perform multi": 40122, + "multi instance": 35973, + "instance learning": 26425, + "datasets achieve": 13141, + "dataset order": 13019, + "larger existing": 29077, + "existing human": 19076, + "task difficult": 55021, + "years deep": 63054, + "results sentiment": 47825, + "approach current": 3475, + "arabic corpus": 3995, + "applying different": 3361, + "accuracy sentiment": 1045, + "available arabic": 5263, + "sentiment dataset": 49839, + "prior research": 42411, + "fully utilize": 21749, + "proposed sentence": 43894, + "optimal transport": 38533, + "multi scale": 36004, + "models supervised": 35564, + "supervised semantic": 54038, + "training based": 57941, + "text pair": 56684, + "dataset extensive": 12920, + "proposed hierarchical": 43791, + "cnn long": 8770, + "paper design": 39335, + "com neulab": 9021, + "describes semeval": 14232, + "semeval 2018": 49432, + "2018 task": 277, + "knowledge use": 27642, + "model interactions": 34013, + "question answers": 44717, + "incorporate commonsense": 25345, + "augment input": 4941, + "relation embedding": 45970, + "official test": 38312, + "data code": 12210, + "provide high": 44084, + "different subsets": 15087, + "role understanding": 48324, + "set concepts": 50122, + "grained semantics": 23045, + "built large": 7486, + "academic commercial": 790, + "results end": 47607, + "make model": 31580, + "closes gap": 8714, + "cross lingually": 11860, + "chinese paper": 8316, + "propose build": 43316, + "corpora limited": 11216, + "build sentence": 7426, + "tokens based": 57323, + "corpus propose": 11408, + "local attention": 30929, + "alignment translation": 2387, + "paper bring": 39283, + "representation proposed": 46572, + "similarity used": 51127, + "provide explanations": 44068, + "model increase": 33993, + "mechanism transformer": 32145, + "results machine": 47706, + "does explicitly": 15946, + "inputs work": 26369, + "relative positions": 46108, + "bleu bleu": 7203, + "relation aware": 45965, + "aware self": 5469, + "portion data": 41221, + "possible identify": 41328, + "model correctly": 33725, + "correctly classify": 11491, + "learn model": 29398, + "modeling different": 34571, + "output sentence": 38998, + "representation train": 46595, + "train using": 57657, + "models recognizing": 35415, + "sentences produced": 49771, + "model experiment": 33849, + "results given": 47649, + "quality diversity": 44511, + "diversity generated": 15735, + "facilitate development": 20265, + "geometric properties": 22655, + "end text": 17715, + "search word": 48988, + "embedding dimension": 17024, + "novel class": 37782, + "like neural": 30492, + "answering cqa": 3066, + "explore new": 19720, + "new problem": 37289, + "information external": 25855, + "structured outputs": 53168, + "structured output": 53167, + "develop large": 14593, + "network multi": 36770, + "accuracy sequence": 1046, + "model scores": 34339, + "output label": 38978, + "exploit hierarchical": 19654, + "hierarchical information": 23672, + "model structured": 34416, + "success nlp": 53715, + "work compare": 62602, + "poses significant": 41252, + "features best": 20531, + "setting proposed": 50344, + "work help": 62677, + "automatically evaluating": 5167, + "features semantic": 20661, + "gain better": 21904, + "annotation study": 2972, + "text methods": 56659, + "speakers different": 52004, + "shows possible": 50792, + "sound change": 51735, + "syntactic differences": 54301, + "using newly": 60839, + "learns different": 29957, + "despite having": 14365, + "poor results": 41143, + "encourage researchers": 17600, + "purposes paper": 44417, + "manual work": 31753, + "standard corpus": 52478, + "various metrics": 61363, + "corpus language": 11367, + "incorporate entity": 25353, + "expensive obtain": 19213, + "based contrastive": 5647, + "method alleviates": 32378, + "alleviates need": 2424, + "need data": 36552, + "domain demonstrate": 16043, + "ner model": 36678, + "proposed feature": 43775, + "work work": 62860, + "work systematically": 62834, + "generated word": 22334, + "information generated": 25892, + "generating embeddings": 22373, + "quality embeddings": 44514, + "introduce framework": 26806, + "multiple experimental": 36214, + "models employing": 34957, + "provide greater": 44082, + "sampled data": 48461, + "code https": 8818, + "general public": 22084, + "achieves 30": 1288, + "corpus furthermore": 11347, + "achieve substantial": 1208, + "substantial gain": 53619, + "requires reasoning": 46948, + "reasoning using": 45232, + "similar datasets": 51036, + "datasets focus": 13278, + "knowledge specifically": 27617, + "results substantial": 47863, + "validation set": 61196, + "accuracy task": 1058, + "validation test": 61197, + "fully neural": 21737, + "forward network": 21403, + "yields higher": 63123, + "accuracy approach": 935, + "language barriers": 27973, + "human parity": 24211, + "require significant": 46887, + "significant amounts": 50850, + "furthermore training": 21840, + "evaluate standard": 18506, + "tasks question": 55831, + "called hybrid": 7547, + "human intuitions": 24178, + "multiple aspects": 36169, + "attention text": 4834, + "text question": 56724, + "predictions experimental": 41758, + "accuracy 84": 916, + "languages rich": 28776, + "purpose evaluation": 44401, + "datasets based": 13165, + "corpus russian": 11424, + "substantially outperform": 53643, + "outperform competitive": 38786, + "previous years": 42324, + "years based": 63051, + "studies semantic": 53299, + "similarity analysis": 51082, + "different english": 14916, + "studied languages": 53226, + "successful approaches": 53735, + "approaches english": 3809, + "models directly": 34922, + "score systems": 48876, + "development novel": 14694, + "semantic aware": 49240, + "text complex": 56501, + "studies current": 53255, + "analysis application": 2612, + "approaches semantic": 3918, + "work training": 62846, + "models available": 34747, + "parallel dataset": 39648, + "translating text": 58570, + "text code": 56494, + "input natural": 26302, + "input languages": 26290, + "model facilitate": 33876, + "method benchmark": 32402, + "survey recent": 54216, + "generation introduce": 22478, + "techniques compare": 56070, + "properties models": 43267, + "common problems": 9194, + "generation diversity": 22450, + "finally conduct": 20845, + "known datasets": 27657, + "systems common": 54452, + "systems known": 54539, + "method analyzing": 32381, + "common framework": 9177, + "modern neural": 35715, + "learning speech": 29890, + "research research": 47116, + "group based": 23271, + "models automatic": 34744, + "text usually": 56837, + "post edit": 41345, + "systems addition": 54424, + "limited available": 30572, + "synthetic corpus": 54369, + "translating source": 58569, + "source publicly": 51791, + "text dialogue": 56535, + "level addition": 30057, + "specific discourse": 52070, + "like structures": 30507, + "multiple low": 36242, + "regression task": 45821, + "approach english": 3513, + "english data": 17792, + "competition results": 9534, + "support researchers": 54125, + "researchers want": 47170, + "build novel": 7419, + "framework makes": 21560, + "core semantic": 11155, + "al 2005": 2230, + "intelligence paper": 26538, + "task mining": 55214, + "received significant": 45266, + "task targets": 55428, + "automated classification": 5038, + "models annotated": 34713, + "approach demonstrates": 3482, + "demonstrates superior": 14047, + "classification current": 8449, + "datasets recent": 13394, + "equally important": 18191, + "context introduce": 10662, + "produces accurate": 43025, + "unsupervised sentence": 59730, + "representations classification": 46626, + "unsupervised state": 59734, + "research multi": 47075, + "lingual cross": 30693, + "analysis focused": 2667, + "reach performance": 45052, + "datasets supervised": 13449, + "annual conference": 3020, + "combine models": 9068, + "models conventional": 34863, + "retrieval models": 47956, + "match human": 31897, + "available existing": 5289, + "use annotated": 59821, + "corpora languages": 11213, + "supervised speech": 54050, + "recognition work": 45547, + "unseen languages": 59651, + "combining existing": 9111, + "trained single": 57872, + "additional improvements": 1674, + "language finally": 28070, + "pairs available": 39170, + "challenges adapting": 8027, + "evaluate simple": 18505, + "simple unsupervised": 51223, + "models varying": 35671, + "varying degrees": 61430, + "discuss challenges": 15462, + "classifiers using": 8628, + "detection data": 14470, + "unigrams bigrams": 59496, + "proposed data": 43750, + "endangered language": 17731, + "language documentation": 28033, + "neural multi": 36988, + "source model": 51783, + "datasets multi": 13339, + "terms memory": 56299, + "small memory": 51483, + "autoencoder architecture": 5025, + "ones experimental": 38336, + "using binary": 60598, + "30 times": 358, + "learning provides": 29828, + "methods applications": 32751, + "learning applied": 29518, + "longstanding challenge": 31062, + "challenge language": 7989, + "work formulate": 62673, + "based dataset": 5666, + "generalizes unseen": 22157, + "develop automatic": 14575, + "indo aryan": 25593, + "aryan languages": 4509, + "languages india": 28695, + "accuracy 96": 928, + "used corpora": 60130, + "based study": 6066, + "attention method": 4785, + "method encoding": 32481, + "apply self": 3352, + "mitigate issues": 33387, + "hybrid models": 24320, + "explicit control": 19613, + "model approaches": 33573, + "approaches strong": 3927, + "attention heads": 4757, + "key observation": 27323, + "provides significant": 44226, + "dataset machine": 12987, + "dataset uses": 13129, + "gap performance": 21971, + "performance 20": 40169, + "20 f1": 225, + "performance varies": 40618, + "temporal reasoning": 56191, + "past tense": 39936, + "quantify extent": 44608, + "strong correlations": 53023, + "simple extension": 51167, + "learning better": 29547, + "new end": 37185, + "input addition": 26253, + "addition traditional": 1647, + "small improvements": 51476, + "long document": 31010, + "encoding long": 17568, + "text encoders": 56553, + "lead higher": 29259, + "baselines including": 6271, + "based single": 6038, + "single encoder": 51299, + "temporal information": 56188, + "approach perform": 3638, + "verb phrase": 61511, + "new events": 37195, + "performance single": 40565, + "multi speaker": 36009, + "handle unseen": 23417, + "sample text": 48457, + "text articles": 56441, + "task main": 55198, + "articles paper": 4473, + "different modules": 14998, + "retrieved knowledge": 47986, + "meta learner": 32336, + "continuously update": 10859, + "specifically target": 52228, + "performance diverse": 40298, + "transfer tasks": 58424, + "models allow": 34707, + "transfer task": 58423, + "learning pretrained": 29815, + "use transfer": 60057, + "learning sentence": 29868, + "model bias": 33628, + "bias pre": 7037, + "faceted search": 20257, + "networks domain": 36847, + "models bidirectional": 34786, + "improved overall": 24954, + "scores compared": 48896, + "previous benchmarks": 42247, + "12 f1": 106, + "previous machine": 42259, + "customer experience": 12054, + "scarcity training": 48676, + "features relevant": 20654, + "large speech": 29017, + "dataset 10": 12788, + "lot recent": 31118, + "words trained": 62534, + "used general": 60196, + "problem recent": 42642, + "effective multi": 16677, + "framework sentence": 21596, + "inductive biases": 25611, + "sources multiple": 51837, + "multiple training": 36305, + "sentences extensive": 49720, + "settings using": 50402, + "able use": 731, + "english grammar": 17818, + "reading writing": 45093, + "need automatic": 36548, + "suggesting potential": 53840, + "potential directions": 41386, + "efficient robust": 16895, + "time applications": 57116, + "approaches different": 3798, + "limitations approach": 30543, + "crowdsourced dataset": 11886, + "poorly task": 41151, + "remaining errors": 46324, + "analysis code": 2627, + "mixed text": 33411, + "sentiment positive": 49855, + "contrastive learning": 10898, + "accuracy 10": 893, + "embedding deep": 17023, + "outstanding performance": 39030, + "performance approaches": 40197, + "incorporate word": 25366, + "classification propose": 8523, + "results clearly": 47534, + "proposed scheme": 43888, + "improvements 10": 25043, + "information crucial": 25797, + "text evaluation": 56561, + "evaluation dataset": 18603, + "released pre": 46179, + "datasets source": 13437, + "weighted average": 61924, + "final result": 20829, + "word2vec embeddings": 62347, + "ensemble approaches": 17971, + "approach highly": 3554, + "previous statistical": 42284, + "depend heavily": 14100, + "leveraging transfer": 30340, + "learning train": 29916, + "models multi": 35235, + "title generation": 57270, + "focus low": 21177, + "usage language": 59801, + "external factors": 19935, + "face face": 20242, + "time social": 57215, + "speakers language": 52006, + "considered low": 10249, + "corpora evaluate": 11197, + "existing corpora": 19048, + "used benchmark": 60103, + "benchmark future": 6469, + "subtasks semeval": 53672, + "historical text": 23961, + "proposed evaluation": 43770, + "evaluation practices": 18677, + "provide clear": 44025, + "rigorous evaluation": 48149, + "evaluation including": 18627, + "effective method": 16671, + "previously seen": 42348, + "process specifically": 42830, + "sentence use": 49666, + "pairs source": 39219, + "similar input": 51048, + "based similarities": 6035, + "knowledge propose": 27579, + "annotated gold": 2898, + "case using": 7803, + "work represents": 62807, + "applied languages": 3277, + "training multilingual": 58183, + "performance monolingual": 40443, + "capable performing": 7625, + "speech speech": 52295, + "numerous studies": 38070, + "detection approaches": 14460, + "problem social": 42658, + "input instance": 26286, + "false positive": 20381, + "tweets posted": 59019, + "posted users": 41357, + "better utilization": 6991, + "switchboard corpus": 54255, + "15 times": 152, + "tasks makes": 55738, + "sacrificing quality": 48423, + "media increasingly": 32168, + "needed order": 36602, + "overview research": 39115, + "research data": 47009, + "mining natural": 33319, + "different areas": 14842, + "problem lack": 42590, + "corpus languages": 11368, + "pairs extracted": 39191, + "extracted open": 20017, + "main problem": 31452, + "ability make": 623, + "formal informal": 21347, + "advantage method": 1942, + "make corpus": 31555, + "informal style": 25742, + "generated corpus": 22281, + "million comments": 33251, + "explore relationship": 19731, + "training signal": 58257, + "signal training": 50830, + "shared model": 50479, + "particular style": 39863, + "output distributions": 38969, + "textual input": 56970, + "models consistently": 34851, + "style language": 53489, + "generation capabilities": 22429, + "results publicly": 47793, + "methods tasks": 33069, + "vital task": 61693, + "task better": 54939, + "machine understanding": 31394, + "texts difficult": 56874, + "novel effective": 37810, + "expert human": 19582, + "task attempt": 54919, + "require prior": 46884, + "methods word2vec": 33103, + "user based": 60404, + "user profile": 60436, + "results user": 47897, + "capsule network": 7644, + "model way": 34532, + "strong search": 53049, + "notoriously difficult": 37737, + "german language": 22674, + "work progress": 62770, + "output quality": 38995, + "time possible": 57194, + "costs paper": 11607, + "non negligible": 37669, + "study examines": 53373, + "specifically use": 52232, + "student learning": 53211, + "field using": 20773, + "data boost": 12192, + "boost model": 7253, + "data reach": 12584, + "data respectively": 12612, + "approach predict": 3646, + "using sentiment": 60927, + "forum posts": 21398, + "art deep": 4246, + "learning time": 29912, + "series models": 50066, + "special focus": 52019, + "random sample": 44885, + "term pairs": 56253, + "false negatives": 20380, + "similar images": 51046, + "previous attempts": 42242, + "knowledge inference": 27523, + "called semantic": 7553, + "use commonsense": 59845, + "developed neural": 14636, + "systems outperform": 54577, + "existing multi": 19111, + "modal fusion": 33457, + "fusion methods": 21857, + "video understanding": 61588, + "multiple modalities": 36249, + "modalities different": 33468, + "rarely explored": 45005, + "modal representations": 33465, + "task finally": 55083, + "results widely": 47909, + "representations additional": 46615, + "systematically compare": 54410, + "compare popular": 9357, + "popular neural": 41176, + "integrating word": 26526, + "features outperform": 20635, + "second best": 48999, + "provides additional": 44180, + "10 f1": 41, + "summarization models": 53893, + "short documents": 50554, + "new hierarchical": 37217, + "models discourse": 34923, + "linguistic processes": 30783, + "samples non": 48484, + "possible achieve": 41314, + "provide useful": 44148, + "learning self": 29864, + "learning leveraging": 29708, + "enhancing model": 17948, + "model performances": 34198, + "model confidence": 33698, + "instance selection": 26429, + "automatically based": 5144, + "terms better": 56271, + "evaluation phase": 18672, + "score 75": 48810, + "frequently occurring": 21686, + "use recently": 59989, + "serve starting": 50080, + "aggregating information": 2076, + "multiple mentions": 36246, + "mentions entity": 32305, + "approaches automated": 3770, + "results conll": 47558, + "fluent sentences": 21133, + "present real": 41996, + "world application": 62926, + "methods improving": 32897, + "improving neural": 25187, + "commerce platform": 9151, + "simulation experiments": 51264, + "experiments paper": 19485, + "work real": 62800, + "thorough analysis": 57055, + "improve task": 24932, + "datasets target": 13451, + "different underlying": 15115, + "building state": 7471, + "languages challenging": 28613, + "data extremely": 12357, + "settings propose": 50391, + "approaches need": 3881, + "augmentation methods": 4963, + "finally explore": 20858, + "explore cross": 19693, + "train single": 57634, + "model related": 34297, + "tree learning": 58747, + "sentence syntactic": 49653, + "work models": 62726, + "like sentence": 30501, + "ability models": 626, + "single correct": 51290, + "needs learn": 36609, + "task current": 54989, + "framework utilize": 21624, + "data addition": 12120, + "methods exploit": 32854, + "predicted labels": 41667, + "labels unlabeled": 27855, + "samples based": 48465, + "based prediction": 5945, + "prediction confidence": 41698, + "augment training": 4944, + "sampling bias": 48500, + "explore data": 19695, + "select high": 49104, + "unlabeled samples": 59577, + "suffers low": 53792, + "resource scenarios": 47273, + "obtained pre": 38217, + "work perform": 62747, + "embeddings help": 17145, + "tasks embeddings": 55605, + "piece information": 40876, + "learning objectives": 29786, + "integrating domain": 26521, + "auto completion": 5013, + "task specifically": 55405, + "specifically address": 52179, + "queries work": 44659, + "work improve": 62683, + "goal building": 22878, + "focus using": 21212, + "time information": 57166, + "information study": 26108, + "datasets previous": 13375, + "methods accuracy": 32727, + "approach novel": 3614, + "novel reward": 37911, + "reward functions": 48068, + "including human": 25261, + "dataset strong": 13102, + "lack robustness": 27912, + "robustness propose": 48293, + "significantly increases": 50984, + "model making": 34089, + "learning capabilities": 29550, + "based adversarial": 5559, + "types adversarial": 59077, + "task recently": 55324, + "samples target": 48489, + "better predictions": 6943, + "german russian": 22675, + "nature neural": 36485, + "theoretically sound": 57033, + "constraints present": 10377, + "present algorithm": 41842, + "model bleu": 33632, + "implementation available": 24638, + "classification systems": 8560, + "domain agnostic": 16016, + "monolingual cross": 35796, + "lingual multilingual": 30715, + "languages german": 28680, + "results monolingual": 47730, + "combining machine": 9113, + "approaches discuss": 3799, + "corpus target": 11441, + "easier learn": 16526, + "algorithm automatically": 2263, + "images text": 24554, + "failure cases": 20352, + "facilitate future": 20269, + "research introduce": 47057, + "new benchmark": 37142, + "gender bias": 22035, + "winograd schema": 62073, + "approach combination": 3449, + "affecting performance": 2021, + "dataset code": 12839, + "word attention": 62116, + "better sentence": 6964, + "firstly propose": 21065, + "directional gated": 15277, + "bi gru": 7009, + "entity centric": 18098, + "combination model": 9044, + "combines multiple": 9098, + "datasets making": 13325, + "multiple real": 36269, + "framework contains": 21481, + "content similarity": 10557, + "enable better": 17421, + "capability generating": 7608, + "producing high": 43039, + "document recent": 15824, + "coherence model": 8909, + "fashion using": 20418, + "data empirical": 12317, + "efficiently capture": 16912, + "qualitative evaluation": 44476, + "complementary aspects": 9588, + "embeddings propose": 17196, + "approach generalizes": 3543, + "correlate strongly": 11508, + "effective using": 16710, + "new evidence": 37196, + "mixture experts": 33419, + "approaches yield": 3957, + "image based": 24529, + "answer queries": 3048, + "sentence paragraph": 49615, + "text addition": 56424, + "wikipedia entity": 62048, + "address aforementioned": 1741, + "aforementioned issues": 2037, + "improve models": 24875, + "network predicts": 36787, + "models adapt": 34689, + "leads state": 29329, + "art single": 4401, + "languages aligned": 28598, + "works typically": 62913, + "use retrieval": 60000, + "inference paper": 25678, + "propose unified": 43688, + "directly optimizes": 15327, + "improvements observed": 25088, + "near human": 36505, + "performance languages": 40410, + "amounts parallel": 2555, + "pairs work": 39235, + "having access": 23485, + "effect language": 16614, + "iterative translation": 27128, + "english benchmarks": 17778, + "benchmarks models": 6532, + "supervised supervised": 54053, + "important input": 24735, + "understand limitations": 59302, + "limitations methods": 30550, + "lack information": 27894, + "prediction label": 41713, + "trained maximum": 57787, + "tune models": 58858, + "high entropy": 23734, + "examples fine": 18904, + "tuned models": 58880, + "models interpretable": 35141, + "reduction accuracy": 45717, + "accuracy loss": 1000, + "maximizing mutual": 31964, + "focus training": 21207, + "simple architecture": 51136, + "parsing recent": 39794, + "model structure": 34415, + "neural methods": 36970, + "methods end": 32838, + "implicitly learns": 24670, + "information explicitly": 25851, + "entities involved": 18059, + "baselines significantly": 6300, + "processing previous": 42925, + "work demonstrated": 62626, + "inference data": 25650, + "conducting extensive": 10102, + "using auxiliary": 60578, + "compared single": 9453, + "learning addition": 29503, + "performance transfer": 40607, + "simple greedy": 51176, + "improvement comes": 24997, + "propose flexible": 43391, + "additional computational": 1657, + "corpus built": 11291, + "using output": 60851, + "work problem": 62769, + "learning trained": 29917, + "range models": 44923, + "yields substantial": 63134, + "created different": 11727, + "different versions": 15124, + "achieved near": 1252, + "techniques address": 56056, + "exhibit poor": 19003, + "performance f1": 40341, + "score 37": 48791, + "research avenues": 46991, + "understanding propose": 59386, + "processing method": 42889, + "method enriching": 32485, + "representation vector": 46603, + "consists steps": 10330, + "word2vec fasttext": 62348, + "based target": 6082, + "structural complexity": 53075, + "metric measure": 33120, + "dataset experiments": 12918, + "unsupervised semantic": 59727, + "context query": 10700, + "approaches usually": 3951, + "address limitations": 1778, + "better zero": 6994, + "shot performance": 50635, + "performance robust": 40542, + "specific training": 52164, + "need model": 36584, + "multilingual encoder": 36082, + "architecture demonstrate": 4039, + "learns language": 29962, + "shot translation": 50656, + "using smaller": 60950, + "tasks method": 55745, + "paper specifically": 39575, + "standard rnn": 52522, + "model known": 34033, + "including novel": 25286, + "work establish": 62647, + "establish state": 18346, + "transfer sentence": 58419, + "models given": 35063, + "size dataset": 51380, + "uses pre": 60528, + "matching model": 31915, + "achieves mean": 1345, + "accuracy 64": 901, + "significant gain": 50866, + "describes submitted": 14235, + "submitted semeval": 53586, + "texts large": 56899, + "labeled corpora": 27736, + "models feature": 35021, + "feature extractors": 20490, + "models support": 35565, + "embeddings train": 17232, + "information raw": 26039, + "end way": 17727, + "textual modalities": 56973, + "complete task": 9603, + "language grounding": 28093, + "results attention": 47511, + "task terms": 55432, + "attention approach": 4709, + "embeddings learnt": 17166, + "context different": 10614, + "wikidata knowledge": 62038, + "different entity": 14918, + "modeling lexical": 34589, + "reliable evaluation": 46251, + "method models": 32579, + "models researchers": 35448, + "store information": 52873, + "information human": 25906, + "work test": 62840, + "sentence corpus": 49537, + "originally written": 38746, + "increased use": 25432, + "used benchmarks": 60106, + "second introduce": 49007, + "performance level": 40416, + "multi choice": 35943, + "task makes": 55203, + "end task": 17713, + "important words": 24793, + "representations ii": 46684, + "position aware": 41260, + "improvement prior": 25019, + "art pretrained": 4359, + "applied successfully": 3298, + "learn relation": 29411, + "linear classifiers": 30652, + "performance methods": 40436, + "methods integrating": 32906, + "evaluation setups": 18717, + "suitable evaluation": 53857, + "models primarily": 35355, + "approach evaluating": 3521, + "evaluating language": 18559, + "proposed using": 43923, + "models exhibit": 34985, + "models discussed": 34928, + "aims train": 2217, + "shared encoder": 50468, + "pairs sentences": 39216, + "shared latent": 50477, + "issue introduce": 27063, + "proposed enhance": 43767, + "tasks report": 55853, + "task featured": 55080, + "set tasks": 50258, + "tasks binary": 55527, + "description papers": 14247, + "consuming task": 10453, + "evaluate compare": 18445, + "corpus addition": 11270, + "results finally": 47633, + "learn implicit": 29382, + "sota methods": 51727, + "generating human": 22378, + "sota systems": 51732, + "tasks standard": 55908, + "new target": 37331, + "like deep": 30467, + "quite difficult": 44829, + "difficult understand": 15192, + "model stage": 34406, + "aim identify": 2149, + "model errors": 33832, + "world large": 62946, + "com ibm": 9016, + "features instead": 20606, + "features generate": 20589, + "process present": 42817, + "present public": 41990, + "public dataset": 44313, + "peer reviews": 40011, + "accept reject": 809, + "collection process": 8985, + "novel nlp": 37886, + "second task": 49026, + "use online": 59967, + "relational databases": 46005, + "vast data": 61438, + "type information": 59057, + "accuracy 17": 896, + "17 absolute": 173, + "semantic coverage": 49265, + "distance metrics": 15549, + "better learn": 6908, + "representations approach": 46619, + "provides state": 44228, + "past year": 39939, + "advances sequence": 1926, + "modeling machine": 34595, + "recent transformer": 45361, + "transformer model": 58495, + "seq2seq architectures": 49894, + "new architectures": 37137, + "techniques apply": 56060, + "seq2seq architecture": 49893, + "relation sentences": 45996, + "current open": 11994, + "quality sentence": 44579, + "global structural": 22843, + "applied different": 3267, + "effectiveness generality": 16781, + "art open": 4321, + "directly text": 15337, + "models answer": 34715, + "types questions": 59113, + "question context": 44724, + "multiple conditions": 36186, + "greatly outperforms": 23236, + "supervised systems": 54054, + "predict relations": 41652, + "neural entity": 36954, + "helps explain": 23605, + "model contrast": 33715, + "contrast conventional": 10874, + "spaces paper": 51911, + "model sequence": 34360, + "tree search": 58756, + "lstm used": 31286, + "used summarize": 60316, + "parameters work": 39729, + "outperformed state": 38842, + "source semantic": 51794, + "data apply": 12141, + "model pre": 34217, + "method different": 32463, + "datasets second": 13413, + "entity given": 18107, + "way alleviate": 61791, + "problems neural": 42716, + "sentences usually": 49804, + "subword segmentation": 53687, + "trains model": 58324, + "model multiple": 34113, + "training addition": 57926, + "experiment multiple": 19243, + "improvements especially": 25071, + "resource domain": 47222, + "settings work": 50404, + "assignment problem": 4606, + "learning inspired": 29684, + "tools support": 57385, + "data recent": 12587, + "propose cross": 43342, + "suffer low": 53774, + "shed new": 50527, + "participated semeval": 39818, + "achieving f1": 1403, + "bias problem": 7039, + "suffer problems": 53778, + "paper employ": 39346, + "inverse reinforcement": 26929, + "generation specifically": 22549, + "generate higher": 22208, + "million articles": 33250, + "demonstrate high": 13918, + "high diversity": 23731, + "abstractive extractive": 770, + "extraction strategies": 20115, + "strategies used": 52919, + "methods data": 32809, + "techniques yield": 56154, + "essays written": 18318, + "spanish german": 51943, + "constituency parses": 10350, + "dependency parses": 14130, + "applications dataset": 3195, + "relationship extraction": 46069, + "semantic drift": 49272, + "approach multiple": 3607, + "capture inter": 7683, + "annotator disagreement": 3013, + "truth value": 58840, + "building intelligent": 7448, + "supervised language": 53992, + "ability capturing": 598, + "catastrophic forgetting": 7833, + "learning novel": 29784, + "novel knowledge": 37846, + "imitation reinforcement": 24579, + "trained approach": 57674, + "verified effectiveness": 61529, + "crucial understanding": 11917, + "related problem": 45928, + "methods particular": 32976, + "sentiment expressed": 49844, + "information general": 25889, + "aims extract": 2192, + "sentence conditioned": 49531, + "lingual information": 30704, + "language shared": 28483, + "iterative process": 27125, + "adversarial neural": 1983, + "unsupervised cross": 59689, + "lingual embeddings": 30700, + "outperform baselines": 38783, + "demonstrate improvements": 13923, + "models believe": 34764, + "non existent": 37652, + "performance boosted": 40223, + "mechanism called": 32103, + "rnn attention": 48182, + "combines multi": 9097, + "multi head": 35963, + "distributed multiple": 15622, + "multiple heads": 36222, + "sequential information": 50043, + "free model": 21644, + "art competitive": 4238, + "nlp benchmarks": 37469, + "vanilla seq2seq": 61216, + "models reach": 35398, + "scores proposed": 48916, + "data split": 12689, + "models augmented": 34741, + "seq2seq based": 49895, + "sequence language": 49942, + "fully capture": 21715, + "data distribution": 12290, + "different lengths": 14976, + "model ability": 33485, + "structure training": 53144, + "training distribution": 58068, + "highly sensitive": 23915, + "reduce human": 45664, + "reduced using": 45686, + "word predictions": 62268, + "number factors": 38004, + "significant effect": 50865, + "perform translation": 40157, + "size fits": 51384, + "parameter efficient": 39668, + "efficient adaptation": 16859, + "technique requires": 56045, + "particular user": 39871, + "various studies": 61401, + "studies proposed": 53289, + "errors lead": 18243, + "network generates": 36749, + "compositional representations": 9748, + "approach low": 3592, + "setting languages": 50329, + "languages different": 28641, + "news outlets": 37409, + "training learning": 58155, + "results compare": 47544, + "terms performance": 56307, + "set predefined": 50223, + "typically relies": 59151, + "train binary": 57570, + "annotation different": 2943, + "evaluating model": 18562, + "methods able": 32725, + "predict token": 41658, + "task set": 55365, + "relations existing": 46029, + "lack ability": 27870, + "hard time": 23452, + "generalizes better": 22154, + "continuous space": 10853, + "generalizing unseen": 22160, + "model helps": 33956, + "suffer various": 53785, + "sequential nature": 50048, + "investigate alternative": 26939, + "encoding text": 17579, + "text consists": 56508, + "information exchange": 25844, + "various classification": 61314, + "classification sequence": 8545, + "benchmarks proposed": 6538, + "representation power": 46566, + "competitive performances": 9557, + "performances compared": 40639, + "bilstm models": 7135, + "typically focused": 59144, + "systems highly": 54520, + "features study": 20676, + "better transfer": 6984, + "transfer languages": 58372, + "study ability": 53317, + "humans perform": 24284, + "approach fine": 3538, + "processing model": 42891, + "tasks downstream": 55598, + "tasks dialogue": 55588, + "positive effects": 41280, + "high time": 23806, + "time reduction": 57204, + "experiments use": 19551, + "use auxiliary": 59831, + "policy makers": 41099, + "languages existing": 28661, + "approaches assume": 3767, + "learning effective": 29608, + "effective word": 16716, + "available low": 5324, + "model corpus": 33723, + "information design": 25807, + "great challenges": 23201, + "model leverage": 34057, + "tree model": 58752, + "process extensive": 42781, + "conducted large": 10087, + "embeddings knowledge": 17156, + "view contrastive": 61596, + "negative examples": 36619, + "main model": 31447, + "multiple metrics": 36248, + "articles provide": 4476, + "improve user": 24938, + "chinese dataset": 8304, + "annotated subset": 2918, + "reference based": 45735, + "greatly improved": 23231, + "correlations human": 11535, + "effect adding": 16610, + "impact different": 24592, + "information learning": 25952, + "representational power": 46609, + "despite impressive": 14369, + "user provided": 60439, + "text spans": 56781, + "performance framework": 40352, + "new topic": 37347, + "years thanks": 63080, + "documents web": 15929, + "conventional text": 11015, + "embedding approach": 17011, + "necessary information": 36531, + "models preserve": 35350, + "academic paper": 792, + "experiments validate": 19554, + "limited information": 30590, + "provides natural": 44214, + "providing explanations": 44241, + "labels furthermore": 27828, + "perform inference": 40114, + "test ability": 56330, + "paper order": 39433, + "automatically determine": 5161, + "network achieve": 36692, + "highlight major": 23866, + "major limitations": 31516, + "main evaluation": 31435, + "evaluation procedures": 18681, + "provides analysis": 44181, + "intermediate representations": 26677, + "sequences propose": 50024, + "classification label": 8482, + "attention learned": 4765, + "learned training": 29486, + "labeled samples": 27761, + "ability leverage": 621, + "combined different": 9078, + "significantly lower": 50987, + "task received": 55320, + "achieve impressive": 1161, + "set performance": 50217, + "examine robustness": 18868, + "data identify": 12412, + "challenging models": 8113, + "nli models": 37454, + "models benefit": 34769, + "translation requires": 58668, + "text learn": 56646, + "address challenging": 1750, + "problem based": 42511, + "approach specifically": 3700, + "including semantic": 25295, + "syntactic knowledge": 54305, + "effectiveness multi": 16797, + "input embeddings": 26272, + "respect input": 47346, + "investigate extent": 26958, + "usually built": 61039, + "propagation paper": 43245, + "approach encoder": 3509, + "distinct existing": 15591, + "highly confident": 23887, + "tasks knowledge": 55704, + "algorithms rely": 2339, + "suggest proposed": 53829, + "achieve remarkable": 1185, + "terms mean": 56298, + "mean reciprocal": 31993, + "reciprocal rank": 45485, + "baselines automatic": 6234, + "largely focused": 29055, + "individual systems": 25580, + "systems benchmark": 54440, + "systems time": 54654, + "time present": 57197, + "carefully chosen": 7759, + "use dataset": 59860, + "dataset examine": 12913, + "sentiment intensity": 49848, + "race gender": 44844, + "supervised techniques": 54057, + "incorporates information": 25376, + "accuracy 80": 912, + "participated shared": 39819, + "tasks trained": 55938, + "teams participating": 56010, + "task submissions": 55419, + "class imbalance": 8402, + "sampling technique": 48509, + "domain terms": 16203, + "terms general": 56292, + "analysis involves": 2686, + "languages lrls": 28719, + "lingual training": 30733, + "training high": 58120, + "tag sets": 54726, + "aims improve": 2199, + "information sharing": 26083, + "demonstrate superior": 13982, + "existing cross": 19050, + "lingual approaches": 30692, + "method combine": 32419, + "called domain": 7544, + "domain adapted": 16008, + "help achieve": 23549, + "various benchmarks": 61311, + "learning pre": 29810, + "different statistical": 15080, + "analysis proposed": 2730, + "possible improvements": 41330, + "scores used": 48927, + "modeling neural": 34605, + "used estimate": 60168, + "allowing users": 2449, + "interpret model": 26710, + "confidence model": 10115, + "online resources": 38381, + "information address": 25757, + "work build": 62592, + "clarification questions": 8386, + "inspired idea": 26407, + "create dataset": 11695, + "samples dataset": 48469, + "intermediate layers": 26675, + "models latent": 35170, + "major advantage": 31500, + "inference learning": 25667, + "proposed graph": 43789, + "performing par": 40687, + "especially rare": 18295, + "models jointly": 35150, + "methods named": 32951, + "entities usually": 18090, + "model detect": 33756, + "bidirectional gated": 7070, + "mechanism designed": 32106, + "learn entity": 29369, + "introduce benchmark": 26786, + "code generation": 8816, + "sentences existing": 49713, + "share similar": 50461, + "order encourage": 38613, + "appeared training": 3144, + "model chinese": 33655, + "new annotation": 37128, + "broadly applicable": 7366, + "believe dataset": 6410, + "task success": 55421, + "articles collected": 4464, + "process corpus": 42765, + "bilingual multilingual": 7112, + "english turkish": 17895, + "data solve": 12673, + "unpaired data": 59625, + "review datasets": 48030, + "method substantially": 32671, + "substantially improves": 53639, + "content preservation": 10547, + "datasets respectively": 13407, + "systems face": 54502, + "applying framework": 3363, + "setting demonstrate": 50319, + "early warning": 16518, + "supervised scenarios": 54037, + "scenarios paper": 48702, + "contribution semeval": 10947, + "standard model": 52506, + "potentially relevant": 41418, + "unbalanced data": 59219, + "approaches building": 3780, + "theoretical results": 57025, + "vectors linear": 61491, + "requires fewer": 46929, + "fewer examples": 20736, + "tasks analyze": 55500, + "passages text": 39923, + "using notion": 60844, + "ignore important": 24491, + "important question": 24756, + "drop accuracy": 16441, + "model accurate": 33500, + "developing language": 14654, + "language word": 28581, + "hierarchical multi": 23680, + "scale language": 48586, + "lower level": 31214, + "prevent catastrophic": 42229, + "machine intelligence": 31303, + "shows effectiveness": 50775, + "specifically investigate": 52211, + "use significantly": 60015, + "utterances paper": 61150, + "neural representation": 37090, + "data settings": 12654, + "text provides": 56721, + "evaluation corpora": 18598, + "different user": 15118, + "user groups": 60420, + "characteristics training": 8245, + "including domain": 25252, + "setting new": 50334, + "sophisticated neural": 51717, + "single question": 51330, + "question paper": 44743, + "stage procedure": 52441, + "stage process": 52442, + "learning result": 29848, + "improved state": 24966, + "significantly challenging": 50947, + "evaluated results": 18547, + "generate correct": 22189, + "approach automated": 3424, + "important application": 24698, + "applications task": 3251, + "methods good": 32880, + "better encoding": 6882, + "methods leverage": 32925, + "hybrid method": 24317, + "leverage advantages": 30255, + "provided different": 44161, + "entailment question": 18004, + "entities text": 18085, + "models built": 34799, + "binary relations": 7154, + "specific models": 52113, + "improvement average": 24985, + "translation languages": 58626, + "combination machine": 9042, + "results consistently": 47560, + "mental states": 32293, + "challenge introduce": 7986, + "tasks suggesting": 55919, + "recent attention": 45295, + "advances word": 1932, + "adaptation target": 1539, + "syntactically similar": 54343, + "similar source": 51067, + "achieves 90": 1295, + "domains provide": 16287, + "error free": 18220, + "absolute increase": 747, + "outside training": 39027, + "capture global": 7674, + "domain study": 16197, + "external features": 19936, + "model methods": 34100, + "ensemble based": 17972, + "models guide": 35072, + "correlates better": 11512, + "intelligence systems": 26539, + "goal explore": 22885, + "potentially leading": 41415, + "text challenging": 56463, + "span prediction": 51929, + "available community": 5271, + "set given": 50161, + "vectors pre": 61494, + "help mitigate": 23577, + "datasets outperforms": 13359, + "models achieving": 34685, + "systems popular": 54590, + "data low": 12470, + "methods reduce": 33010, + "text explicitly": 56568, + "input work": 26360, + "extent models": 19924, + "classifier performance": 8601, + "models applying": 34720, + "style transfer": 53504, + "demonstrate trade": 13991, + "learning provide": 29827, + "different views": 15125, + "view training": 61603, + "data supervision": 12712, + "success unsupervised": 53728, + "modal alignment": 33452, + "fashion proposed": 20417, + "training followed": 58108, + "refinement procedure": 45768, + "comparable supervised": 9314, + "languages little": 28716, + "audio text": 4931, + "models account": 34662, + "data tasks": 12725, + "tasks improve": 55671, + "translation experimental": 58611, + "proposed adversarial": 43714, + "require labeled": 46865, + "pairs proposed": 39211, + "proposed generative": 43788, + "study shot": 53459, + "language domains": 28037, + "metric based": 33111, + "optimization based": 38545, + "based meta": 5844, + "domain low": 16106, + "inter task": 26589, + "realistic setting": 45152, + "tasks diverse": 55593, + "based algorithms": 5562, + "capture complex": 7654, + "propose adaptive": 43283, + "weighted combination": 61927, + "meta training": 32345, + "training tasks": 58286, + "shot task": 50649, + "extensive quantitative": 19909, + "art shot": 4400, + "corpus different": 11325, + "translation direction": 58601, + "ability distinguish": 604, + "original texts": 38732, + "languages annotated": 28599, + "corpus publicly": 11414, + "available work": 5392, + "text identify": 56618, + "major obstacle": 31517, + "generating large": 22380, + "synthetic training": 54385, + "techniques make": 56109, + "created manually": 11729, + "effort needed": 16929, + "improved versions": 24973, + "analysis previous": 2725, + "novel bidirectional": 37780, + "dependency structure": 14138, + "structure features": 53105, + "sentences key": 49743, + "dependency syntactic": 14140, + "form content": 21316, + "text typically": 56828, + "networks makes": 36874, + "baselines data": 6248, + "shows using": 50810, + "perplexity metric": 40739, + "models combination": 34825, + "strategies improve": 52904, + "model reduces": 34291, + "mean absolute": 31987, + "dataset respectively": 13066, + "models presence": 35345, + "representation learned": 46535, + "learned pre": 29474, + "integrating context": 26520, + "novel strategies": 37929, + "propose training": 43678, + "powerful technique": 41446, + "aware representations": 5468, + "introducing extra": 26900, + "yielding better": 63109, + "tasks paraphrase": 55792, + "similarity natural": 51110, + "tasks rely": 55850, + "level character": 30072, + "tasks single": 55893, + "media datasets": 32166, + "results news": 47742, + "identification paper": 24393, + "terms data": 56282, + "data baseline": 12186, + "lingual resources": 30723, + "dataset methods": 12994, + "unlike english": 59593, + "basic semantic": 6333, + "semantic unit": 49371, + "character information": 8205, + "propose low": 43444, + "feature maps": 20497, + "single models": 51320, + "data independent": 12428, + "relying external": 46307, + "time specific": 57219, + "languages cultures": 28628, + "cost time": 11595, + "corpus new": 11388, + "methods investigated": 32909, + "nlu task": 37570, + "large improvement": 28889, + "house data": 24041, + "processing approaches": 42854, + "task consisting": 54972, + "evaluation real": 18692, + "evaluation best": 18584, + "years lot": 63063, + "outstanding results": 39031, + "tasks unfortunately": 55948, + "solve issues": 51680, + "issues introduce": 27092, + "model retains": 34323, + "reasoning abilities": 45182, + "tested model": 56397, + "tasks joint": 55699, + "present supervised": 42031, + "supervision task": 54097, + "different content": 14874, + "reveal new": 48011, + "used simple": 60303, + "incorporating additional": 25378, + "control degree": 10962, + "new methodology": 37255, + "quality labeled": 44539, + "require substantial": 46890, + "solving task": 51706, + "equipped attention": 18197, + "lack systematic": 27920, + "work cross": 62614, + "classification aims": 8429, + "additional resources": 1697, + "best practice": 6802, + "transfer english": 58362, + "prior distributions": 42398, + "using multilingual": 60819, + "framework evaluate": 21507, + "research important": 47052, + "important area": 24700, + "models surprisingly": 35570, + "comparable superior": 9313, + "majority cases": 31527, + "propose additional": 43284, + "including classification": 25243, + "code datasets": 8808, + "obtained https": 38211, + "describes submissions": 14234, + "translation generation": 58616, + "translation improve": 58619, + "learning high": 29671, + "tasks general": 55653, + "domain embeddings": 16055, + "proposed meta": 43802, + "data past": 12540, + "results domain": 47597, + "process improve": 42792, + "methods generate": 32877, + "methods demonstrate": 32817, + "systems process": 54600, + "sentences isolation": 49741, + "introduce context": 26789, + "experiment english": 19238, + "observe model": 38137, + "consistent gains": 10275, + "languages significantly": 28786, + "inputs experiments": 26363, + "quality state": 44582, + "design paper": 14294, + "models small": 35515, + "data generation": 12387, + "generation procedure": 22528, + "identifying potential": 24461, + "systems identifying": 54524, + "learning exploit": 29638, + "experiments zero": 19566, + "spanish french": 51942, + "setting method": 50330, + "data zero": 12780, + "shot language": 50622, + "directions improve": 15294, + "models features": 35022, + "paper seek": 39571, + "quality questions": 44570, + "questions collected": 44777, + "dataset enables": 12907, + "context demonstrate": 10607, + "efficacy model": 16833, + "model comparing": 33676, + "comparing state": 9486, + "baselines human": 6268, + "remains elusive": 46332, + "outperforms range": 38936, + "scores human": 48903, + "evaluation large": 18632, + "based network": 5899, + "dynamic context": 16483, + "effect quality": 16618, + "intra inter": 26759, + "regression based": 45812, + "way leverage": 61817, + "maximize performance": 31960, + "quality task": 44586, + "specific sentences": 52145, + "sentences context": 49697, + "feature level": 20495, + "needs large": 36608, + "code pre": 8843, + "learns generate": 29959, + "reasoning capability": 45188, + "systems experimental": 54497, + "results validate": 47900, + "validate effectiveness": 61174, + "tasks following": 55648, + "knowledge discovery": 27438, + "texts recently": 56917, + "task gained": 55100, + "problems related": 42727, + "related social": 45936, + "data shared": 12655, + "modalities text": 33472, + "images audio": 24552, + "valuable insights": 61203, + "identification using": 24402, + "using visual": 61018, + "comparison traditional": 9509, + "tasks modeled": 55749, + "trained different": 57709, + "search based": 48965, + "achieves improvements": 1342, + "methods attention": 32759, + "retain original": 47922, + "network propose": 36790, + "preserving original": 42125, + "feature information": 20493, + "increasing size": 25464, + "task compare": 54959, + "agnostic approach": 2087, + "order gain": 38621, + "efficiency modularity": 16847, + "learning frameworks": 29657, + "graph allows": 23094, + "decoding algorithms": 13626, + "present text": 42039, + "approach estimating": 3518, + "works propose": 62903, + "user profiling": 60438, + "unified end": 59469, + "fuse information": 21846, + "knowledge approach": 27397, + "learning baselines": 29545, + "outperforms approaches": 38866, + "approaches significantly": 3920, + "users propose": 60475, + "approach information": 3572, + "combines benefits": 9092, + "limited annotations": 30568, + "embeddings multi": 17177, + "datasets conll": 13190, + "model consisting": 33704, + "given class": 22724, + "performs close": 40701, + "interpretable models": 26727, + "context training": 10735, + "prediction multi": 41722, + "context available": 10588, + "trained state": 57882, + "model bert": 33618, + "bert language": 6667, + "modeling framework": 34577, + "use bert": 59834, + "bert embeddings": 6651, + "success paper": 53716, + "arabic texts": 4006, + "relevant textual": 46241, + "constructed corpus": 10408, + "success variety": 53729, + "general models": 22070, + "investigate properties": 26978, + "data affect": 12126, + "input models": 26301, + "data furthermore": 12377, + "collected using": 8970, + "analysis state": 2766, + "systems reason": 54612, + "surprisingly strong": 54192, + "data constructed": 12246, + "hierarchical architecture": 23655, + "representation transfer": 46596, + "order mitigate": 38640, + "mitigate data": 33381, + "second place": 49016, + "score english": 48842, + "2018 shared": 275, + "training low": 58160, + "low precision": 31165, + "tuning methods": 58927, + "transformer variant": 58516, + "number high": 38007, + "corpus additional": 11271, + "used initialize": 60214, + "parameters fine": 39699, + "tuned using": 58892, + "score 74": 48809, + "enhances model": 17945, + "mechanism helps": 32121, + "informative words": 26178, + "furthermore existing": 21819, + "greatly benefit": 23227, + "perform text": 40154, + "using memory": 60797, + "architecture better": 4030, + "perform data": 40084, + "data sampling": 12621, + "classes demonstrate": 8416, + "performance large": 40411, + "large batch": 28849, + "training larger": 58151, + "yields state": 63131, + "available high": 5302, + "crucial real": 11908, + "world domain": 62938, + "leverages domain": 30303, + "human participants": 24212, + "performance improves": 40387, + "performance suffers": 40584, + "humans better": 24273, + "models utilize": 35663, + "architecture design": 4040, + "including data": 25246, + "open set": 38446, + "recognition previous": 45525, + "baselines based": 6238, + "task determining": 55015, + "understanding existing": 59344, + "stress tests": 52980, + "systems ability": 54418, + "models respect": 35450, + "challenging linguistic": 8109, + "area recently": 4148, + "improve learning": 24868, + "creating new": 11745, + "attention structure": 4832, + "quality experiments": 44522, + "investigate relationship": 26983, + "understand user": 59316, + "depends quality": 14163, + "domain specificity": 16195, + "mitigate effects": 33383, + "data availability": 12175, + "20 relative": 230, + "learn policy": 29409, + "score 78": 48813, + "twitter posts": 59039, + "dataset construction": 12864, + "dataset creation": 12873, + "closed world": 8700, + "world assumption": 62931, + "limited human": 30589, + "attributes work": 4912, + "formalize problem": 21360, + "networks specifically": 36911, + "context semantics": 10710, + "mechanism provide": 32139, + "provide interpretable": 44095, + "sampling strategy": 48508, + "reduce burden": 45651, + "features prior": 20648, + "annotated samples": 2912, + "score 83": 48818, + "reference data": 45738, + "encode text": 17472, + "aggregation mechanism": 2079, + "mechanism obtain": 32132, + "dynamic routing": 16491, + "information need": 25986, + "aggregation methods": 2080, + "tasks largely": 55715, + "analysis effect": 2654, + "effect using": 16622, + "varying amounts": 61426, + "local syntactic": 30951, + "task capturing": 54945, + "open knowledge": 38436, + "formal definition": 21345, + "domain applications": 16020, + "human perception": 24213, + "labeled datasets": 27755, + "annotation procedure": 2960, + "present annotation": 41845, + "observations propose": 38128, + "study provide": 53447, + "available labeled": 5315, + "explore possible": 19726, + "different cross": 14882, + "scores different": 48900, + "use human": 59910, + "method identifying": 32527, + "gaussian process": 22017, + "trained small": 57874, + "bayesian approach": 6357, + "languages does": 28645, + "requires substantial": 46953, + "used humans": 60205, + "challenge sets": 8016, + "set testing": 50263, + "structure news": 53124, + "sets used": 50310, + "build machine": 7410, + "capture multiple": 7699, + "including english": 25255, + "benchmarks measure": 6529, + "achieve multi": 1171, + "document describes": 15783, + "tasks fine": 55643, + "trained representations": 57854, + "technique improving": 56035, + "model particular": 34183, + "specific model": 52112, + "reduce need": 45673, + "quite useful": 44834, + "propose embed": 43363, + "2003 dataset": 240, + "dataset document": 12900, + "natural question": 36462, + "develop evaluation": 14588, + "using translated": 61002, + "asked predict": 4522, + "conduct study": 10064, + "cause performance": 7887, + "differences languages": 14824, + "label prediction": 27720, + "directly capture": 15308, + "art overall": 4322, + "task involves": 55148, + "jointly models": 27208, + "cost data": 11578, + "need additional": 36544, + "number baselines": 37985, + "tasks extracting": 55637, + "limited amounts": 30564, + "text words": 56846, + "potential improve": 41393, + "indigenous languages": 25553, + "highly diverse": 23894, + "challenges research": 8077, + "distant languages": 15555, + "areas like": 4154, + "2017 proposed": 266, + "hierarchical data": 23665, + "data demonstrated": 12273, + "technique allows": 56025, + "allows learn": 2470, + "hyperbolic embeddings": 24329, + "hyperbolic space": 24330, + "embeddings encode": 17127, + "space makes": 51875, + "inference question": 25687, + "lstm does": 31259, + "does help": 15950, + "word interaction": 62219, + "advances cross": 1908, + "adaptation problem": 1535, + "perform domain": 40091, + "adaptation experiments": 1524, + "novel state": 37927, + "pairs including": 39196, + "domains code": 16238, + "challenging problems": 8130, + "architecture trained": 4091, + "learn structure": 29430, + "datasets enable": 13245, + "labeling models": 27788, + "conduct systematic": 10065, + "model comparison": 33677, + "comparison analysis": 9489, + "generation given": 22471, + "user defined": 60408, + "model producing": 34240, + "model implement": 33973, + "studied nlp": 53230, + "goal determine": 22880, + "dependent word": 14153, + "consuming labor": 10447, + "models resource": 35449, + "collect data": 8940, + "following research": 21269, + "build single": 7427, + "results robust": 47818, + "knowledge world": 27651, + "relation paths": 45991, + "benefits modeling": 6585, + "framework models": 21565, + "fundamental building": 21776, + "novel ensemble": 37816, + "student knowledge": 53210, + "knowledge gaps": 27489, + "online educational": 38365, + "achieved highest": 1243, + "score evaluation": 48843, + "metrics datasets": 33156, + "model discuss": 33776, + "mainly rely": 31477, + "provide sufficient": 44137, + "meaning work": 32022, + "approaches enable": 3806, + "strong generalization": 53030, + "work open": 62741, + "provide complementary": 44033, + "words consequently": 62385, + "trained task": 57891, + "task agnostic": 54890, + "agnostic data": 2088, + "evaluated various": 18554, + "evaluation benchmarks": 18583, + "models benchmarks": 34767, + "identify aspects": 24413, + "multiple applications": 36167, + "study aim": 53320, + "aim develop": 2144, + "clinical data": 8668, + "curve auc": 12050, + "auc score": 4921, + "dataset obtained": 13016, + "respectively model": 47373, + "efficiently identify": 16916, + "speech word": 52317, + "work review": 62812, + "approaches domain": 3801, + "approaches including": 3847, + "different amounts": 14835, + "progress recent": 43113, + "uses self": 60531, + "semantics work": 49422, + "path information": 39947, + "coming different": 9134, + "cross attention": 11807, + "performance sequence": 40553, + "source python": 51793, + "industrial applications": 25616, + "mit license": 33378, + "license https": 30429, + "speech used": 52315, + "speech systems": 52297, + "used cross": 60133, + "apply different": 3326, + "transformer models": 58498, + "subword vocabulary": 53689, + "models low": 35202, + "analysis case": 2624, + "study recent": 53451, + "released code": 46171, + "recommend future": 45563, + "experiments consider": 19390, + "variety datasets": 61266, + "aid understanding": 2128, + "provide practical": 44113, + "make results": 31593, + "gpu memory": 22995, + "simple pre": 51203, + "evaluated automatic": 18521, + "linked entities": 30831, + "simple sequence": 51210, + "model significant": 34378, + "document multi": 15814, + "neighboring sentences": 36664, + "marginal improvements": 31824, + "decrease performance": 13669, + "present multiple": 41952, + "multiple sentence": 36280, + "vectors context": 61483, + "detailed overview": 14428, + "overview various": 39120, + "task open": 55248, + "approaches time": 3941, + "specific issues": 52091, + "addition provide": 1640, + "commonly applied": 9217, + "assessing performance": 4587, + "performance open": 40466, + "automatically determining": 5162, + "aspects like": 4544, + "past research": 39934, + "called code": 7541, + "field text": 20771, + "hindi code": 23936, + "datasets social": 13435, + "texts collected": 56863, + "uses various": 60542, + "algorithms identify": 2326, + "task allows": 54899, + "related semantic": 45933, + "effects models": 16827, + "concepts human": 9934, + "usually manually": 61058, + "rely external": 46280, + "extra parameters": 19964, + "linear unit": 30677, + "english multi": 17846, + "showed using": 50674, + "similar word": 51076, + "words distributed": 62400, + "capture syntactic": 7714, + "explicit supervision": 19626, + "original test": 38730, + "gaussian distribution": 22011, + "examples sampled": 18930, + "target distribution": 54809, + "recognition specifically": 45537, + "lower dimensional": 31210, + "potential practical": 41403, + "generated samples": 22314, + "final goal": 20821, + "study state": 53462, + "layer used": 29211, + "based low": 5824, + "frequency distribution": 21671, + "distribution tokens": 15653, + "methods low": 32931, + "entire model": 18025, + "multilingual nmt": 36107, + "learning furthermore": 29658, + "shot inference": 50620, + "despite increasing": 14370, + "motivated work": 35875, + "generation achieved": 22410, + "current context": 11966, + "fed decoder": 20704, + "tuning process": 58950, + "data performance": 12542, + "effective parameter": 16683, + "parameter choices": 39665, + "performance main": 40429, + "training demonstrate": 58059, + "problem domain": 42544, + "training deployment": 58060, + "test effectiveness": 56345, + "used regularization": 60289, + "knowledge finally": 27483, + "papers published": 39609, + "results help": 47653, + "human labelled": 24189, + "step study": 52829, + "ai based": 2115, + "local contextual": 30933, + "information global": 25896, + "benchmarks verify": 6550, + "deal large": 13517, + "algorithms applied": 2320, + "based annotation": 5566, + "presents extension": 42084, + "compared similar": 9451, + "relations used": 46061, + "relations natural": 46045, + "method provide": 32628, + "recognition approach": 45492, + "effectively using": 16761, + "information neural": 25991, + "propose inject": 43417, + "especially fine": 18276, + "automatically learning": 5189, + "entities multiple": 18067, + "crucial aspect": 11895, + "dialogue structure": 14786, + "dialogue datasets": 14772, + "generate meaningful": 22218, + "specifically employ": 52197, + "make training": 31605, + "datasets human": 13292, + "available internet": 5313, + "mining tools": 33328, + "model represent": 34308, + "ability incorporate": 613, + "additionally proposed": 1731, + "general nlp": 22076, + "learns tasks": 29976, + "specific modules": 52114, + "modules parameters": 35773, + "multitask setting": 36326, + "shot capabilities": 50602, + "pointer generator": 41056, + "task setting": 55366, + "good data": 22931, + "type semantic": 59068, + "sequence paper": 49965, + "simple variant": 51224, + "tasks alleviate": 55497, + "alleviate need": 2414, + "human labor": 24190, + "features methods": 20622, + "methods utilize": 33097, + "great results": 23215, + "methods evaluating": 32846, + "performance 96": 40175, + "62 f1": 465, + "73 accuracy": 495, + "tagging pos": 54748, + "13 f1": 124, + "score ner": 48860, + "linguistic unit": 30806, + "models actually": 34688, + "solution paper": 51657, + "augmentation strategies": 4966, + "thorough examination": 57060, + "conducted evaluate": 10081, + "comprehensive performance": 9796, + "performance generalization": 40362, + "ability proposed": 637, + "approach helps": 3553, + "baselines various": 6319, + "various public": 61380, + "addresses challenges": 1809, + "data including": 12425, + "model accuracy": 33499, + "quality annotation": 44489, + "annotation cost": 2939, + "provide online": 44107, + "maintaining high": 31492, + "evaluate framework": 18459, + "drastically reduces": 16395, + "scores respectively": 48919, + "representation embedding": 46506, + "head attention": 23495, + "mean pooling": 31992, + "resulting state": 47477, + "performances datasets": 40640, + "modeling propose": 34615, + "possible solution": 41336, + "method adapt": 32369, + "agent interaction": 2056, + "topic classifier": 57396, + "decoding strategy": 13649, + "visual semantic": 61670, + "adversarial attack": 1964, + "limitation current": 30535, + "model establish": 33834, + "textual semantics": 56979, + "visual concepts": 61651, + "adversarial samples": 1986, + "noticeable improvement": 37728, + "set downstream": 50142, + "answering knowledge": 3077, + "increasing use": 25466, + "generation large": 22483, + "resulting improved": 47466, + "settings text": 50400, + "medical domains": 32203, + "datasets researchers": 13405, + "framework develop": 21493, + "paper compares": 39292, + "vectors compared": 61482, + "context automatic": 10587, + "systems aim": 54428, + "systematic errors": 54395, + "errors machine": 18244, + "empirical observations": 17335, + "domain english": 16057, + "tasks additionally": 55493, + "providing better": 44238, + "performance far": 40344, + "languages study": 28796, + "conduct qualitative": 10058, + "ai nlp": 2120, + "systems models": 54563, + "trained translate": 57904, + "based comparison": 5627, + "nlp practitioners": 37511, + "developed various": 14641, + "various research": 61386, + "representation format": 46521, + "typologically diverse": 59168, + "lingual settings": 30728, + "settings finally": 50374, + "auxiliary losses": 5234, + "results sequence": 47827, + "speed performance": 52324, + "additional prediction": 1694, + "forces model": 21289, + "process training": 42835, + "aims capture": 2180, + "lack human": 27893, + "labeled resources": 27760, + "information existing": 25845, + "terms coverage": 56281, + "approach adapts": 3408, + "discrete nature": 15425, + "suggest approach": 53813, + "models deal": 34885, + "discrete space": 15428, + "space allows": 51849, + "performance widely": 40629, + "model information": 34003, + "learn make": 29395, + "training cost": 57963, + "sentences generated": 49728, + "overfitting problem": 39083, + "problem caused": 42516, + "seq2seq framework": 49897, + "correction model": 11484, + "model correct": 33724, + "correct sentence": 11475, + "annotation dataset": 2942, + "set respectively": 50240, + "evaluate multiple": 18476, + "positive instances": 41283, + "instances learn": 26435, + "set constructed": 50127, + "understand interpret": 59300, + "representations explore": 46663, + "explore best": 19689, + "use pretrained": 59979, + "pretrained representations": 42180, + "using technique": 60981, + "sentiment score": 49857, + "score prediction": 48866, + "sentiment scores": 49858, + "unimodal multimodal": 59499, + "models sentiment": 35481, + "methods fail": 32861, + "kinds information": 27371, + "capturing semantics": 7744, + "language key": 28124, + "dataset train": 13120, + "primary secondary": 42372, + "recent large": 45316, + "analysis different": 2651, + "classifiers applied": 8611, + "results experiment": 47624, + "models fundamental": 35046, + "decision trees": 13571, + "objective measures": 38094, + "forest model": 21302, + "model according": 33497, + "resource indian": 47229, + "types word": 59126, + "efforts develop": 16936, + "according defined": 856, + "systems automatic": 54436, + "correlate poorly": 11506, + "poorly human": 41150, + "model improvements": 33981, + "expensive paper": 19214, + "evaluation practice": 18676, + "accurately reflect": 1100, + "content using": 10568, + "movie scripts": 35897, + "extensive training": 19915, + "address present": 1782, + "individual concepts": 25564, + "time proposed": 57201, + "faster train": 20442, + "advantages compared": 1949, + "designing neural": 14340, + "users using": 60487, + "conducted real": 10091, + "approach effectively": 3501, + "digital age": 15208, + "form news": 21331, + "linking relation": 30836, + "digital assistants": 15209, + "art research": 4367, + "challenges future": 8050, + "acoustic information": 1435, + "novel generative": 37830, + "languages cross": 28626, + "efficient simple": 16900, + "different problems": 15037, + "make effective": 31568, + "improvements experimental": 25074, + "final evaluation": 20820, + "work existing": 62653, + "datasets new": 13345, + "dataset release": 13060, + "information effectively": 25825, + "downstream use": 16372, + "maximization em": 31955, + "improve detection": 24842, + "lstms model": 31291, + "introduce supervised": 26866, + "difficulty levels": 15201, + "text fragment": 56585, + "generated framework": 22289, + "framework better": 21462, + "labels hierarchical": 27831, + "level input": 30134, + "high complexity": 23711, + "complex deep": 9621, + "certain aspects": 7934, + "learned various": 29489, + "model argue": 33579, + "does correlate": 15940, + "classifier predicts": 8603, + "tasks inherently": 55689, + "achieve superior": 1210, + "despite successes": 14397, + "simple tasks": 51217, + "observed training": 38150, + "self attentive": 49191, + "tasks contrast": 55559, + "standard transformer": 52538, + "en dataset": 17413, + "novel annotation": 37754, + "build classifiers": 7390, + "human authored": 24108, + "scenario data": 48684, + "truth labels": 58839, + "visual language": 61659, + "highly efficient": 23898, + "manually compiled": 31767, + "combines state": 9101, + "usually employ": 61046, + "linear transformations": 30676, + "shared space": 50489, + "using bilingual": 60596, + "unsupervised techniques": 59741, + "techniques sentence": 56134, + "task languages": 55162, + "new intrinsic": 37229, + "achieve average": 1111, + "new entity": 37189, + "use new": 59963, + "existing benchmarks": 19042, + "using multitask": 60823, + "head word": 23500, + "train multi": 57613, + "task hierarchical": 55116, + "learning efficiently": 29613, + "datasets containing": 13198, + "hierarchy aware": 23703, + "text collections": 56496, + "40 languages": 396, + "domain adversarial": 16014, + "avoid overfitting": 5433, + "training domain": 58071, + "languages case": 28612, + "languages monolingual": 28732, + "pretrained multilingual": 42172, + "layers encoder": 29223, + "model empirically": 33811, + "using pseudo": 60882, + "results trained": 47887, + "effectively leveraging": 16749, + "core nlp": 11153, + "open corpus": 38415, + "faced task": 20251, + "study participants": 53428, + "contextual dependencies": 10761, + "contextual model": 10776, + "contextual models": 10777, + "independent models": 25502, + "learning auxiliary": 29528, + "deep encoder": 13690, + "work observe": 62738, + "observe performance": 38139, + "ctc model": 11928, + "intermediate layer": 26674, + "performance lower": 40425, + "lower resource": 31222, + "improves standard": 25162, + "multitask training": 36327, + "experiments low": 19458, + "training works": 58318, + "obtained combining": 38205, + "learning pretraining": 29816, + "pretraining improves": 42205, + "analysis reveal": 2744, + "10 improvement": 44, + "set messages": 50191, + "systems incorporating": 54531, + "common data": 9171, + "early stage": 16514, + "texts automatically": 56860, + "corpus collected": 11294, + "data representations": 12603, + "training stages": 58269, + "knowledge unlabeled": 27639, + "requiring training": 46966, + "corpora achieve": 11171, + "domain adaption": 16009, + "increases performance": 25438, + "score gain": 48847, + "metric model": 33121, + "task building": 54943, + "verb noun": 61509, + "does yield": 15984, + "score propose": 48868, + "time based": 57122, + "apply pre": 3345, + "information including": 25918, + "used help": 60203, + "embeddings requires": 17204, + "requires fine": 46930, + "method word": 32708, + "use fine": 59890, + "grained typing": 23047, + "build datasets": 7394, + "datasets large": 13312, + "grained classes": 23025, + "sentence context": 49534, + "draw conclusions": 16400, + "cloud based": 8720, + "main techniques": 31463, + "models minimal": 35226, + "performance impact": 40379, + "generalization novel": 22124, + "requiring models": 46964, + "settings model": 50383, + "analysis large": 2688, + "scale social": 48624, + "emotional status": 17300, + "analysis textual": 2779, + "analysis studied": 2768, + "sentiment classifier": 49836, + "embeddings attention": 17085, + "aware embeddings": 5449, + "scanned documents": 48654, + "recurrent encoder": 45613, + "corresponding text": 11559, + "focal loss": 21140, + "improvement standard": 25025, + "standard cross": 52479, + "imbalance problem": 24564, + "capacity model": 7637, + "joint distribution": 27166, + "version original": 61554, + "recently used": 45472, + "novel document": 37806, + "document context": 15778, + "title abstract": 57269, + "using generative": 60705, + "generate document": 22195, + "user behavior": 60405, + "standard seq2seq": 52526, + "scale propose": 48617, + "models scale": 35469, + "provide comparison": 44031, + "input trained": 26351, + "produce different": 42978, + "different rates": 15043, + "factors influence": 20310, + "level labeled": 30142, + "decoder modules": 13602, + "second apply": 48995, + "significantly compared": 50948, + "high impact": 23741, + "especially non": 18292, + "english speaking": 17882, + "150 000": 156, + "complexity lexical": 9680, + "studies mainly": 53279, + "quantitative metrics": 44622, + "automatically evaluate": 5166, + "evaluate metrics": 18471, + "depth analyses": 14183, + "theory mind": 57037, + "experiments testing": 19544, + "framework provide": 21588, + "set test": 50261, + "set allows": 50106, + "does suffer": 15980, + "perform empirical": 40095, + "data substantially": 12706, + "models fixed": 35036, + "solving complex": 51701, + "modelling language": 34643, + "number common": 37990, + "constructed based": 10406, + "design model": 14289, + "algorithms developed": 2324, + "developed years": 14643, + "datasets report": 13400, + "evaluation study": 18730, + "establishing new": 18365, + "optimized training": 38569, + "making useful": 31674, + "computational research": 9857, + "annotations word": 3007, + "level speech": 30215, + "resource tasks": 47282, + "experiments named": 19475, + "computational techniques": 9866, + "techniques identify": 56094, + "improve nlp": 24879, + "recognition techniques": 45544, + "techniques recent": 56129, + "types human": 59091, + "using quantitative": 60888, + "quantitative measures": 44620, + "metrics demonstrate": 33157, + "data sequence": 12641, + "fundamental problems": 21788, + "framework improve": 21539, + "information additionally": 25756, + "survey provides": 54215, + "applications computational": 3189, + "recommendations future": 45568, + "scientific disciplines": 48757, + "makes easier": 31620, + "science domain": 48746, + "research understanding": 47137, + "task new": 55239, + "forward pass": 21408, + "unstructured information": 59669, + "important topics": 24787, + "specific fine": 52085, + "effective architecture": 16631, + "addition dataset": 1606, + "approach benchmark": 3429, + "increases size": 25440, + "cause significant": 7888, + "quality trade": 44589, + "space proposed": 51887, + "function encourages": 21753, + "generate words": 22264, + "best candidates": 6754, + "french data": 21659, + "sets proposed": 50303, + "improvements standard": 25099, + "datasets especially": 13250, + "dataset freely": 12936, + "youtube comments": 63148, + "creation process": 11750, + "content data": 10517, + "evaluate dataset": 18450, + "public attitudes": 44304, + "work define": 62623, + "provide evaluation": 44062, + "evaluation benchmark": 18582, + "compared number": 9427, + "systems able": 54419, + "domain ood": 16124, + "neural sentence": 37094, + "dimensional continuous": 15226, + "set unlabeled": 50272, + "embedding used": 17068, + "sentence detection": 49540, + "text pairs": 56686, + "task participants": 55268, + "use combination": 59843, + "weighted f1": 61928, + "f1 measures": 20187, + "task ranked": 55316, + "ranked 2nd": 44953, + "biases training": 7060, + "bias models": 7035, + "models view": 35673, + "mitigate bias": 33380, + "trained pre": 57840, + "research human": 47049, + "modeling techniques": 34630, + "varies different": 61256, + "task demands": 54999, + "input feature": 26278, + "wide application": 61958, + "learning powerful": 29809, + "approaches developed": 3797, + "hindi telugu": 23943, + "speakers languages": 52007, + "class distribution": 8400, + "distribution different": 15636, + "size vector": 51401, + "useful building": 60357, + "building nlp": 7462, + "systems including": 54529, + "auto encoding": 5018, + "parameters improve": 39702, + "accuracy finally": 978, + "benchmark suite": 6496, + "look like": 31065, + "data according": 12109, + "different inputs": 14957, + "related events": 45906, + "information possible": 26011, + "dependencies using": 14113, + "residual connection": 47186, + "way construct": 61798, + "capture temporal": 7717, + "information shared": 26082, + "number layers": 38016, + "future information": 21877, + "term information": 56239, + "corpus compared": 11297, + "facebook posts": 20246, + "health informatics": 23515, + "processing long": 42884, + "conversations recent": 11062, + "improve recognition": 24916, + "model explicitly": 33857, + "uses context": 60498, + "information end": 25833, + "manner evaluate": 31714, + "corpus outperforms": 11397, + "role specific": 48323, + "method recent": 32634, + "datasets release": 13398, + "domain limited": 16104, + "effectively utilize": 16762, + "utilize existing": 61091, + "source domains": 51768, + "adaptation paper": 1532, + "procedure model": 42743, + "tuned small": 58885, + "related domain": 45900, + "effective representation": 16689, + "text critical": 56518, + "important understand": 24788, + "multi channel": 35942, + "representation experimental": 46512, + "dataset low": 12985, + "generally improves": 22167, + "studied context": 53220, + "body research": 7240, + "variety topics": 61294, + "topics including": 57451, + "available use": 5388, + "different modeling": 14995, + "propose answer": 43294, + "answer open": 3041, + "information bottleneck": 25770, + "like semantic": 30500, + "languages complex": 28620, + "surpasses state": 54176, + "embeddings widely": 17244, + "gap propose": 21976, + "model adopts": 33546, + "results cross": 47563, + "novel hierarchical": 37837, + "achieves 94": 1297, + "dataset furthermore": 12938, + "vinyals et": 61612, + "alzheimer disease": 2517, + "introduce multilingual": 26827, + "task conversational": 54979, + "transfer methods": 58404, + "new multilingual": 37264, + "experiments dataset": 19397, + "methods practical": 32984, + "evaluated benchmark": 18523, + "dutch spanish": 16479, + "corpus german": 11351, + "inputs model": 26364, + "model quickly": 34267, + "generation address": 22412, + "kullback leibler": 27679, + "leibler divergence": 30013, + "output probabilities": 38993, + "comment generation": 9140, + "users work": 60488, + "work construct": 62610, + "generate human": 22211, + "baselines study": 6304, + "general multi": 22071, + "tasks extensive": 55635, + "benefit tasks": 6569, + "learned jointly": 29461, + "test suite": 56384, + "limit performance": 30533, + "automated metrics": 5054, + "test suites": 56385, + "effectiveness multilingual": 16798, + "multilingual settings": 36120, + "semi markov": 49451, + "markov conditional": 31843, + "examples generated": 18906, + "probing tasks": 42494, + "shows improved": 50784, + "individual tasks": 25582, + "processing neural": 42896, + "perform remarkably": 40134, + "use inter": 59914, + "comparably better": 9316, + "terms efficiency": 56284, + "aware information": 5452, + "relationship different": 46067, + "precision score": 41619, + "work primarily": 62768, + "language invariant": 28123, + "study cross": 53351, + "language adversarial": 27955, + "training cross": 57966, + "agnostic representations": 2097, + "tasks experiment": 55628, + "training consistently": 57957, + "trained baseline": 57680, + "compare multiple": 9350, + "baselines addition": 6231, + "boosts performance": 7264, + "nlp recent": 37517, + "represent state": 46481, + "structure semantics": 53134, + "benchmark automatic": 6426, + "evaluation recent": 18694, + "consistency generated": 10266, + "texts input": 56892, + "data guide": 12397, + "guide training": 23342, + "measure consistency": 32047, + "humans reason": 24287, + "commonsense inference": 9233, + "commonsense reasoning": 9239, + "reasoning present": 45215, + "annotation artifacts": 2935, + "novel procedure": 37899, + "models struggle": 35540, + "solving various": 51709, + "used technique": 60325, + "processing speech": 42940, + "context nlp": 10681, + "nlp specifically": 37526, + "word lexicon": 62239, + "produce similar": 43009, + "methods addition": 32738, + "view problem": 61601, + "problem weakly": 42686, + "novel soft": 37924, + "problem experiments": 42559, + "model beats": 33610, + "set outperforms": 50210, + "set present": 50225, + "learning graph": 29666, + "dense space": 14082, + "distance measures": 15547, + "information graph": 25898, + "results outperforming": 47754, + "outperforming strong": 38862, + "embedding baselines": 17019, + "model computationally": 33689, + "yields consistent": 63122, + "news story": 37418, + "important difficult": 24718, + "seed set": 49044, + "graphs model": 23189, + "task enables": 55044, + "content high": 10528, + "accuracy multiple": 1011, + "time study": 57227, + "range text": 44939, + "text entity": 56558, + "entity graph": 18108, + "techniques automatically": 56064, + "form graph": 21321, + "captured word": 7726, + "abstract level": 759, + "measuring word": 32090, + "model limited": 34065, + "investigate importance": 26961, + "presented results": 42062, + "based test": 6089, + "best neural": 6785, + "points compared": 41071, + "small high": 51475, + "competitive traditional": 9569, + "data applied": 12140, + "challenges existing": 8046, + "models hand": 35074, + "datasets multiple": 13340, + "aware context": 5444, + "agnostic models": 2095, + "auxiliary classifier": 5229, + "approaches finally": 3826, + "translation possible": 58658, + "com google": 9012, + "information task": 26113, + "task event": 55059, + "dataset english": 12909, + "dataset make": 12989, + "systems showing": 54630, + "human agents": 24091, + "poses great": 41247, + "building universal": 7476, + "framework specifically": 21603, + "annotations target": 3002, + "dialog data": 14753, + "train state": 57638, + "knowledge student": 27622, + "italian german": 27110, + "achieve promising": 1180, + "task measuring": 55210, + "understanding recent": 59390, + "different perspectives": 15025, + "aggregation module": 2081, + "problem insufficient": 42585, + "previous strong": 42285, + "dataset date": 12881, + "addition observe": 1629, + "principal components": 42382, + "representations extensive": 46664, + "learning adversarial": 29506, + "demographic information": 13856, + "neural classifiers": 36944, + "trained textual": 57895, + "accuracy training": 1065, + "post hoc": 41348, + "substantially higher": 53635, + "improve effectiveness": 24847, + "training achieve": 57923, + "invariant representation": 26920, + "widely applicable": 61992, + "input graph": 26284, + "hierarchical reinforcement": 23686, + "different sizes": 15070, + "sampling strategies": 48507, + "models applied": 34718, + "capable recognizing": 7630, + "task setup": 55368, + "different auxiliary": 14848, + "provide dataset": 44045, + "identifying speaker": 24468, + "mutually exclusive": 36353, + "experiments automatic": 19358, + "based adaptive": 5558, + "used collect": 60116, + "information ii": 25909, + "copying words": 11139, + "baselines finally": 6261, + "model yielded": 34550, + "distribution based": 15633, + "wasserstein distance": 61781, + "closed form": 8698, + "evaluated paper": 18541, + "gather information": 22001, + "questions introduce": 44793, + "conversational question": 11048, + "systems dataset": 54469, + "present existing": 41908, + "obtains f1": 38248, + "score 65": 48801, + "ample room": 2566, + "growth number": 23308, + "information necessary": 25985, + "aims automatically": 2175, + "online comments": 38353, + "dataset quality": 13051, + "multi target": 36014, + "outperform various": 38832, + "various baselines": 61308, + "accuracy benchmark": 938, + "domain wikipedia": 16227, + "web corpus": 61883, + "applications knowledge": 3215, + "propose semantic": 43617, + "designed handle": 14318, + "multiple domain": 36203, + "context windows": 10745, + "features predicting": 20645, + "predicting sentence": 41681, + "accuracy standard": 1051, + "tasks designed": 55583, + "generalization paper": 22125, + "external resource": 19951, + "effect word": 16624, + "task downstream": 55035, + "model encoder": 33818, + "attention different": 4737, + "types words": 59128, + "words function": 62422, + "called self": 7552, + "model attend": 33584, + "quality work": 44600, + "augmentation text": 4970, + "based tasks": 6084, + "design data": 14271, + "existing augmentation": 19035, + "extremely simple": 20166, + "simple data": 51145, + "augmentation strategy": 4967, + "sentence target": 49654, + "different scales": 15057, + "implement method": 24635, + "convolutional models": 11106, + "text vision": 56840, + "rnn encoders": 48193, + "gains bleu": 21934, + "image generation": 24537, + "focus general": 21166, + "need generate": 36568, + "need consider": 36551, + "generation fully": 22466, + "training mechanism": 58171, + "baselines proposed": 6289, + "neural parser": 37084, + "generates candidate": 22339, + "utterances using": 61153, + "achieving results": 1421, + "despite current": 14357, + "promising performances": 43173, + "vulnerable adversarial": 61754, + "paper tackles": 39592, + "leveraging knowledge": 30328, + "aims transfer": 2218, + "applied answer": 3263, + "used annotate": 60088, + "annotate corpus": 2870, + "variety data": 61265, + "twitter api": 59031, + "collection annotation": 8979, + "annotation efforts": 2946, + "arabic paper": 4004, + "large manually": 28902, + "various social": 61392, + "media sources": 32182, + "popular methods": 41171, + "learn contextual": 29352, + "outperforms popular": 38917, + "compared training": 9467, + "model coupled": 33727, + "graph enhanced": 23134, + "tasks state": 55909, + "results analyze": 47500, + "different sentiment": 15065, + "syntactically complex": 54339, + "random seeds": 44888, + "distillation model": 15575, + "using contextualized": 60622, + "elmo embeddings": 16996, + "yields significantly": 63130, + "sentiment labels": 49850, + "model optimization": 34147, + "proximal policy": 44261, + "policy optimization": 41101, + "models introduced": 35144, + "problems like": 42709, + "mainly based": 31467, + "known suffer": 27667, + "latent distribution": 29122, + "learning learning": 29704, + "corresponding target": 11558, + "unseen data": 59644, + "data pairs": 12528, + "robustness experiments": 48280, + "tasks arabic": 55505, + "trained training": 57896, + "added training": 1591, + "labels predicted": 27844, + "classifier training": 8607, + "data detect": 12276, + "data outperforms": 12526, + "dataset german": 12944, + "annotated examples": 2895, + "indirect supervision": 25555, + "emerged promising": 17263, + "language represent": 28467, + "knowledge relations": 27588, + "supervision using": 54100, + "using variational": 61014, + "rich domain": 48097, + "approach propose": 3656, + "models enables": 34960, + "universal model": 59542, + "adaptation approach": 1519, + "changes model": 8179, + "input generates": 26282, + "parameters encoder": 39692, + "remains unchanged": 46350, + "enables use": 17450, + "perform zero": 40162, + "enhance understanding": 17927, + "used express": 60181, + "paper combine": 39290, + "capture dependencies": 7661, + "attention time": 4835, + "generated context": 22279, + "embeddings effectively": 17120, + "propose apply": 43295, + "better cross": 6871, + "experiments confirm": 19389, + "models monolingual": 35233, + "independently trained": 25509, + "address shortcoming": 1799, + "shortcoming propose": 50577, + "approaches experiments": 3818, + "addition model": 1626, + "recurrent architectures": 45608, + "improves ability": 25112, + "capturing long": 7740, + "semantic feature": 49277, + "results self": 47820, + "cnns outperform": 8779, + "representations derived": 46637, + "derived pre": 14202, + "trained bidirectional": 57684, + "lstm cnn": 31255, + "properties representations": 43270, + "contextual representations": 10779, + "representations outperform": 46731, + "network depth": 36730, + "perplexity ppl": 40740, + "sentences demonstrate": 49703, + "method aims": 32375, + "data synthetic": 12716, + "translating sentences": 58568, + "randomly sampled": 44903, + "prediction loss": 41716, + "different parameter": 15019, + "unrelated languages": 59633, + "sharing parameters": 50518, + "conventional wisdom": 11018, + "features novel": 20632, + "present ablation": 41840, + "representation state": 46585, + "disfluency detection": 15506, + "features representations": 20657, + "model automatic": 33590, + "layer capture": 29182, + "result task": 47454, + "textual structural": 56984, + "dataset 50": 12793, + "capture various": 7721, + "progress neural": 43107, + "architectures models": 4118, + "lack explicit": 27888, + "generation stage": 22550, + "results 10": 47480, + "points higher": 41075, + "approach trained": 3724, + "larger dataset": 29073, + "par best": 39611, + "able reason": 718, + "models memory": 35222, + "keeping track": 27280, + "models accuracy": 34663, + "generating relevant": 22391, + "introducing additional": 26898, + "work needed": 62733, + "query terms": 44677, + "complex queries": 9650, + "performances using": 40650, + "structures like": 53188, + "constituency dependency": 10347, + "structural representation": 53082, + "model naturally": 34119, + "structures experiments": 53184, + "result model": 47440, + "binary trees": 7157, + "models bleu": 34791, + "language generated": 28082, + "encode different": 17460, + "corpus resource": 11421, + "goal generate": 22886, + "generate accurate": 22174, + "novel lightweight": 37856, + "task focuses": 55093, + "models german": 35062, + "gains strong": 21945, + "strong transformer": 53056, + "models social": 35517, + "tasks effectively": 55601, + "effectively integrates": 16745, + "tasks spanning": 55901, + "generalization new": 22123, + "using linguistically": 60773, + "gain performance": 21912, + "effectiveness approaches": 16768, + "recognition languages": 45511, + "hierarchical nature": 23682, + "vector quantization": 61459, + "continuous embeddings": 10846, + "method standard": 32666, + "tasks allowing": 55498, + "achieve substantially": 1209, + "perplexity scores": 40741, + "learn patterns": 29407, + "languages models": 28731, + "generation including": 22476, + "quite successful": 44833, + "multiple references": 36273, + "importantly propose": 24797, + "greedy search": 23244, + "methods address": 32739, + "identifying classifying": 24455, + "tasks develop": 55587, + "develop unified": 14621, + "scientific information": 48762, + "span representations": 51930, + "received considerable": 45256, + "propose incorporate": 43415, + "alignment framework": 2368, + "network embedding": 36735, + "results downstream": 47599, + "based encoders": 5700, + "parsing trees": 39803, + "study effectiveness": 53365, + "effectiveness different": 16776, + "gives better": 22805, + "words closer": 62379, + "additional experiments": 1666, + "design effective": 14275, + "crucial information": 11902, + "text modality": 56666, + "specifically leverage": 52213, + "outperforms text": 38954, + "model baselines": 33609, + "tasks classification": 55538, + "framework building": 21465, + "building unsupervised": 7477, + "unsupervised representations": 59726, + "methods code": 32782, + "investigate effects": 26956, + "introduced task": 26889, + "negative transfer": 36639, + "considerable improvements": 10232, + "shows consistent": 50771, + "data novel": 12516, + "time provide": 57202, + "respect previous": 47350, + "based individual": 5781, + "information spread": 26103, + "problem graph": 42576, + "nodes graph": 37592, + "graph edges": 23129, + "questions requiring": 44807, + "challenging introduce": 8103, + "graph kg": 23143, + "text represented": 56739, + "multiple valid": 36308, + "problem goal": 42575, + "setting different": 50320, + "reach goal": 45046, + "learning reinforcement": 29836, + "new effective": 37178, + "drop performance": 16442, + "accuracy original": 1018, + "label bias": 27694, + "model improvement": 33980, + "tasks goal": 55659, + "generation output": 22512, + "time algorithm": 57114, + "algorithm significantly": 2302, + "success text": 53726, + "models largely": 35168, + "generation using": 22577, + "using hidden": 60726, + "decoder learns": 13599, + "interpretable controllable": 26719, + "achieves strong": 1380, + "systems exhibit": 54493, + "exhibit significant": 19004, + "results non": 47744, + "sentence dependencies": 49539, + "results wide": 47907, + "models integrating": 35138, + "knowledge different": 27437, + "knowledge driven": 27452, + "knowledge explicitly": 27474, + "neural module": 36984, + "greatly improve": 23229, + "generalization abilities": 22113, + "annotation methodology": 2955, + "multiple forms": 36218, + "introduce syntactic": 26867, + "improve strong": 24929, + "dataset crowd": 12876, + "generation perform": 22516, + "generation multi": 22501, + "generation proposed": 22533, + "inner workings": 26245, + "negative polarity": 36629, + "evaluate extent": 18458, + "extent neural": 19925, + "model finds": 33890, + "emotion classification": 17288, + "model additional": 33540, + "model analysis": 33563, + "addition discuss": 1610, + "embeddings target": 17223, + "models leads": 35173, + "leads faster": 29311, + "training better": 57947, + "quality given": 44527, + "propose structure": 43649, + "model generalized": 33926, + "allows learning": 2471, + "allows effective": 2460, + "better leverage": 6910, + "leverage prior": 30285, + "english finnish": 17804, + "method strong": 32669, + "baselines trained": 6313, + "answers multiple": 3110, + "data allow": 12129, + "extract keywords": 19981, + "outside scope": 39026, + "exceeds performance": 18949, + "way allows": 61792, + "potential biases": 41385, + "based transformer": 6106, + "focus improving": 21171, + "model rnn": 34328, + "demonstrates state": 14042, + "strong indicator": 53033, + "informal texts": 25744, + "autoregressive models": 5222, + "accuracy drop": 964, + "sentence better": 49521, + "including transformer": 25313, + "estimation qe": 18386, + "effectively encode": 16731, + "encode local": 17467, + "global contextual": 22825, + "information target": 26112, + "languages second": 28778, + "making predictions": 31663, + "model submitted": 34421, + "results ranking": 47797, + "different embeddings": 14913, + "understanding key": 59355, + "local optima": 30947, + "models shared": 35492, + "filtering noisy": 20813, + "data sentence": 12640, + "predicate object": 41631, + "detection based": 14461, + "apply multiple": 3339, + "methods generalize": 32875, + "results visual": 47906, + "combined approach": 9077, + "achieves superior": 1384, + "quickly learn": 44823, + "document sets": 15832, + "entities various": 18091, + "application machine": 3166, + "sense text": 49489, + "language recently": 28463, + "main findings": 31440, + "findings study": 20916, + "vary widely": 61424, + "paper time": 39596, + "core task": 11156, + "input utterance": 26357, + "different benchmark": 14850, + "conventional method": 11006, + "predict future": 41641, + "text length": 56648, + "content recent": 10551, + "works neural": 62899, + "conversation history": 11033, + "learning proven": 29826, + "resource conditions": 47211, + "baseline trained": 6218, + "targeting different": 54862, + "significantly advances": 50934, + "modern machine": 35710, + "noise robust": 37603, + "noisy inputs": 37620, + "propose benchmark": 43311, + "noisy text": 37625, + "types noise": 59108, + "methods tailored": 33066, + "text mt": 56670, + "cs cmu": 11920, + "cmu edu": 8755, + "learning jointly": 29689, + "entropy minimization": 18164, + "target data": 54805, + "approach better": 3433, + "leverage unlabeled": 30295, + "domain achieve": 15993, + "various experimental": 61339, + "experimental settings": 19325, + "news datasets": 37399, + "simple implement": 51180, + "recently growing": 45432, + "growing developing": 23295, + "human agent": 24090, + "work topic": 62844, + "training paper": 58201, + "method increases": 32542, + "sense reasoning": 49488, + "hypothesis model": 24345, + "allowing direct": 2445, + "space recent": 51890, + "annotate large": 2873, + "representations results": 46749, + "embeddings significantly": 17216, + "deep nlp": 13741, + "structures data": 53182, + "using shelf": 60936, + "latent structures": 29140, + "approach end": 3511, + "medical records": 32208, + "leveraging existing": 30322, + "resulting corpus": 47462, + "learning potential": 29808, + "training baseline": 57942, + "form question": 21333, + "derive new": 14198, + "dialogue response": 14782, + "addition demonstrate": 1607, + "dataset multimodal": 13003, + "capturing temporal": 7746, + "test bed": 56333, + "benchmark evaluating": 6465, + "systems data": 54468, + "idea method": 24370, + "experiments various": 19556, + "generalizes new": 22156, + "dataset audio": 12816, + "second experiment": 49005, + "labels approach": 27809, + "tend generate": 56199, + "incorporating information": 25387, + "generated responses": 22311, + "help generate": 23565, + "obtaining high": 38234, + "high correlation": 23719, + "applicability approach": 3153, + "model update": 34505, + "different families": 14931, + "dataset framework": 12935, + "entire sentence": 18027, + "prediction framework": 41710, + "energy based": 17748, + "model adopt": 33545, + "approaches generally": 3833, + "lattice based": 29164, + "task pre": 55279, + "layer learns": 29188, + "learns high": 29960, + "achieve macro": 1169, + "world question": 62954, + "questions long": 44795, + "performance rule": 40544, + "rely information": 46289, + "based queries": 5968, + "propose reinforcement": 43601, + "model framework": 33908, + "framework able": 21446, + "approach recent": 3667, + "occurrence graph": 38273, + "graph present": 23156, + "increase difficulty": 25411, + "problem address": 42498, + "information especially": 25839, + "especially suitable": 18303, + "aware attention": 5441, + "slot type": 51443, + "inter dependencies": 26580, + "training step": 58272, + "important questions": 24757, + "classification objective": 8510, + "finally experiments": 20857, + "showing effectiveness": 50678, + "decoding neural": 13635, + "systems requires": 54622, + "sequential encoder": 50040, + "models method": 35223, + "translation addition": 58575, + "model sequential": 34362, + "generated models": 22300, + "english resource": 17864, + "integrate multiple": 26507, + "multiple pieces": 36261, + "correctly answer": 11489, + "information encoding": 25832, + "information rich": 26071, + "graph neural": 23152, + "networks graph": 36865, + "information leads": 25948, + "following recent": 21268, + "semantics syntax": 49416, + "addition explore": 1616, + "unsupervised systems": 59738, + "supervised ones": 54030, + "ones propose": 38343, + "owing lack": 39126, + "trained languages": 57763, + "languages written": 28826, + "currently exists": 12036, + "models shows": 35504, + "sub words": 53540, + "capture patterns": 7700, + "embedding layers": 17034, + "model convolutional": 33720, + "diverse target": 15720, + "evidence model": 18815, + "useful features": 60364, + "stacked lstm": 52421, + "analysis understand": 2786, + "approach existing": 3524, + "information derived": 25806, + "inference compared": 25645, + "data pre": 12551, + "input proposed": 26322, + "capture common": 7652, + "structure languages": 53113, + "languages evaluate": 28659, + "model semantics": 34351, + "using dense": 60649, + "knowledge novel": 27561, + "models smaller": 35516, + "features additional": 20519, + "generation dataset": 22442, + "dataset outperforming": 13021, + "systems long": 54552, + "study focuses": 53381, + "demographic groups": 13855, + "enable new": 17427, + "structural constraints": 53076, + "model understanding": 34500, + "human loop": 24203, + "generation human": 22473, + "models aid": 34702, + "models adversarial": 34699, + "hop reasoning": 24004, + "set result": 50241, + "models naturally": 35246, + "manner propose": 31724, + "models effectiveness": 34948, + "method mitigating": 32577, + "problems training": 42735, + "information learned": 25951, + "learned model": 29467, + "indicates model": 25540, + "strategy named": 52944, + "basic building": 6327, + "middle layers": 33237, + "time achieve": 57113, + "best option": 6786, + "content related": 10552, + "provide explicit": 44069, + "models having": 35079, + "architecture paper": 4074, + "instances training": 26437, + "help alleviate": 23552, + "structure dataset": 53096, + "limitations paper": 30553, + "adapted task": 1555, + "systems open": 54574, + "based f1": 5725, + "score 58": 48796, + "performance unseen": 40613, + "theoretical linguistics": 57023, + "grows exponentially": 23306, + "work instead": 62690, + "results depth": 47587, + "significantly effective": 50954, + "model parsing": 34181, + "chinese german": 8308, + "technique able": 56023, + "competitively state": 9571, + "usually trained": 61071, + "loss using": 31108, + "using teacher": 60980, + "teacher forcing": 55990, + "level training": 30225, + "mitigate problems": 33391, + "addition method": 1625, + "improved quality": 24962, + "present generic": 41922, + "directions english": 15289, + "performance remains": 40532, + "based crf": 5658, + "score achieved": 48833, + "obtain embeddings": 38171, + "reflect semantic": 45776, + "approaches achieve": 3752, + "remarkable success": 46362, + "main reason": 31455, + "reason lack": 45168, + "external commonsense": 19928, + "incorporating commonsense": 25379, + "media post": 32179, + "incorporating context": 25381, + "topic extraction": 57405, + "message level": 32321, + "content information": 10530, + "model outputs": 34169, + "representations discourse": 46641, + "topics present": 57458, + "information dataset": 25800, + "dataset comprised": 12854, + "used query": 60282, + "dataset main": 12988, + "attention values": 4847, + "dataset source": 13094, + "current automatic": 11961, + "written sentences": 63009, + "written references": 63008, + "advantage model": 1943, + "model utilize": 34517, + "dependency words": 14144, + "methods applicable": 32750, + "adversarial loss": 1974, + "languages tasks": 28801, + "method zero": 32712, + "techniques deep": 56073, + "generation language": 22482, + "model baseline": 33608, + "generates coherent": 22340, + "additional contextual": 1660, + "23 languages": 323, + "adopted nlp": 1872, + "applications existing": 3205, + "methods result": 33022, + "paper overcome": 39436, + "graph convolution": 23120, + "framework incorporating": 21545, + "make source": 31598, + "available encourage": 5286, + "encourage reproducible": 17598, + "task learns": 55184, + "simultaneously specifically": 51276, + "specifically develop": 52195, + "consists neural": 10325, + "information increasing": 25921, + "data improving": 12422, + "training experimental": 58099, + "absolute improvements": 746, + "practical scenarios": 41470, + "using audio": 60569, + "92 f1": 559, + "lower performance": 31219, + "controllable generation": 10977, + "tasks unsupervised": 55951, + "shelf language": 50536, + "framework text": 21613, + "closer look": 8711, + "dataset requires": 13064, + "fields computer": 20776, + "progress machine": 43103, + "train multilingual": 57614, + "mt nmt": 35922, + "performing zero": 40694, + "amazon reviews": 2523, + "shot classification": 50604, + "understand underlying": 59315, + "shared vocabulary": 50510, + "data type": 12747, + "encoder representation": 17536, + "push forward": 44424, + "present task": 42036, + "label model": 27714, + "features help": 20594, + "ai agents": 2112, + "evaluation protocol": 18686, + "model asked": 33580, + "helpful improving": 23596, + "news websites": 37426, + "train automatic": 57564, + "based retrieval": 5998, + "topics topic": 57462, + "topic representation": 57427, + "obtained neural": 38216, + "model news": 34130, + "tokens text": 57338, + "task distinguishing": 55025, + "model capturing": 33649, + "idf features": 24477, + "work indicates": 62686, + "vs non": 61748, + "nlp technologies": 37554, + "text classifiers": 56492, + "identify problems": 24438, + "develop deep": 14581, + "annotated manually": 2903, + "testing set": 56411, + "classifiers used": 8627, + "learning target": 29903, + "based pre": 5941, + "score 85": 48820, + "relied hand": 46260, + "features provide": 20652, + "strong inductive": 53034, + "structure task": 53139, + "language new": 28357, + "artificial training": 4500, + "sense aware": 49482, + "instead focus": 26451, + "challenging multi": 8114, + "requires model": 46943, + "information context": 25792, + "context generate": 10645, + "requires understanding": 46956, + "present strong": 42026, + "strong generative": 53031, + "multi attention": 35941, + "perform multiple": 40123, + "performs substantially": 40718, + "art span": 4410, + "models introduce": 35142, + "based scoring": 6007, + "reasoning dataset": 45191, + "fact checking": 20288, + "model evidence": 33846, + "presents neural": 42092, + "datasets ablation": 13139, + "method despite": 32457, + "analysis properties": 2727, + "learn universal": 29442, + "reconstruct input": 45577, + "hidden vectors": 23651, + "furthermore compared": 21809, + "traditional recurrent": 57540, + "depth error": 14185, + "parameter initialization": 39671, + "neural classification": 36942, + "applied existing": 3273, + "representations pre": 46737, + "models elmo": 34951, + "results set": 47828, + "task participated": 55269, + "trained transformer": 57897, + "transformer architecture": 58449, + "architecture using": 4097, + "large quantity": 28950, + "generated new": 22304, + "incremental training": 25484, + "simple combination": 51142, + "language leveraging": 28137, + "target corpora": 54803, + "interpretable representations": 26729, + "highlight differences": 23861, + "information models": 25977, + "generating response": 22392, + "fluent responses": 21132, + "clause based": 8638, + "based type": 6115, + "contexts propose": 10754, + "modeling context": 34567, + "clause level": 8639, + "systems consider": 54457, + "style evaluation": 53483, + "grained evaluation": 23034, + "initial step": 26219, + "using public": 60883, + "public benchmark": 44306, + "datasets suggest": 13446, + "affect performance": 2017, + "training improves": 58127, + "benefit training": 6570, + "language enables": 28047, + "performance limited": 40419, + "data alleviate": 12128, + "propose exploit": 43379, + "present input": 41930, + "baseline outperforms": 6198, + "extraction existing": 20064, + "improve generation": 24861, + "generation used": 22575, + "generation performance": 22517, + "human interactions": 24174, + "inference approaches": 25643, + "tasks biomedical": 55528, + "showed promising": 50669, + "scarce resources": 48659, + "dataset covers": 12870, + "major obstacles": 31518, + "results address": 47493, + "address lack": 1775, + "data entity": 12326, + "models reduce": 35418, + "reduce false": 45662, + "performance leveraging": 40418, + "leveraging multiple": 30334, + "datasets annotated": 13152, + "types given": 59090, + "accuracy downstream": 962, + "supervised sentence": 54041, + "additionally experiment": 1720, + "experiment datasets": 19236, + "little understood": 30889, + "using contextual": 60621, + "specific labeled": 52096, + "data main": 12474, + "cross view": 11874, + "encoder using": 17549, + "modules model": 35772, + "learning evaluate": 29629, + "able solve": 726, + "showing models": 50683, + "representations achieve": 46613, + "languages previous": 28756, + "dense embeddings": 14075, + "approaches obtain": 3886, + "sparse representation": 51970, + "dense models": 14076, + "multilingual societies": 36122, + "use code": 59842, + "learning make": 29717, + "release model": 46157, + "loss information": 31097, + "image information": 24538, + "bases generate": 6323, + "media platform": 32175, + "covering different": 11655, + "baselines average": 6237, + "score metric": 48858, + "framework design": 21490, + "training extremely": 58104, + "training algorithms": 57930, + "range potential": 44929, + "help facilitate": 23563, + "memory computational": 32248, + "proposed hybrid": 43792, + "memory consumption": 32250, + "datasets real": 13392, + "users interested": 60469, + "exhibit different": 19002, + "question dataset": 44726, + "use crowdsourcing": 59858, + "models neglect": 35250, + "apply novel": 3344, + "pruning strategy": 44269, + "existing sequence": 19141, + "complementary strengths": 9591, + "models combining": 34828, + "models nlms": 35262, + "computation complexity": 9826, + "life paper": 30439, + "pruning techniques": 44270, + "techniques provide": 56128, + "energy consumption": 17749, + "relative increase": 46103, + "match outperform": 31898, + "f1 performance": 20191, + "datasets question": 13390, + "unanswerable questions": 59209, + "extractive model": 20136, + "datasets improved": 13296, + "similarity models": 51109, + "trained dataset": 57704, + "datasets https": 13291, + "study empirically": 53368, + "specifically study": 52227, + "choosing right": 8347, + "predict human": 41642, + "human accuracy": 24088, + "sentences propose": 49772, + "gap source": 21980, + "sentences current": 49701, + "experiments 10": 19342, + "baselines text": 6312, + "task discuss": 55024, + "research text": 47129, + "output labels": 38979, + "pooling mechanism": 41127, + "models cases": 34808, + "ir models": 27034, + "model existing": 33848, + "inference performance": 25680, + "shows superior": 50808, + "competitive approaches": 9539, + "amounts labelled": 2553, + "models open": 35277, + "generate responses": 22237, + "models expensive": 34991, + "requires extensive": 46928, + "issue existing": 27062, + "approaches leverage": 3860, + "combining pre": 9120, + "learning extensive": 29640, + "experiments analyses": 19347, + "self supervised": 49204, + "better downstream": 6879, + "conditional generative": 9993, + "network gan": 36745, + "unbalanced datasets": 59220, + "datasets limited": 13318, + "limited labelled": 30594, + "framework explicitly": 21515, + "related datasets": 45896, + "schema challenge": 48723, + "reasoning task": 45227, + "uses knowledge": 60515, + "text web": 56843, + "generates relevant": 22354, + "approach competitive": 3460, + "plausible alternatives": 40959, + "sequence lengths": 49948, + "objective evaluation": 38087, + "tasks given": 55657, + "task identification": 55120, + "legal domain": 30005, + "adaptation tasks": 1541, + "chinese corpora": 8301, + "annotations available": 2984, + "learn cross": 29354, + "monolingual settings": 35812, + "settings cross": 50362, + "task translation": 55450, + "terms automatic": 56265, + "bleu metrics": 7206, + "assess models": 4580, + "encoder architectures": 17490, + "scale multimodal": 48601, + "propose multimodal": 43482, + "tv series": 58999, + "emotion sentiment": 17294, + "modalities propose": 33471, + "propose strong": 43647, + "conversations dataset": 11058, + "increase precision": 25421, + "alignment mechanism": 2372, + "mechanism learns": 32127, + "leverages multi": 30307, + "leveraging information": 30327, + "performance conduct": 40264, + "language example": 28057, + "framework suitable": 21608, + "attention enables": 4740, + "query understanding": 44680, + "handle task": 23415, + "dataset suggest": 13106, + "showing potential": 50684, + "like model": 30485, + "intended meaning": 26550, + "highly desirable": 23893, + "robustness paper": 48291, + "current utterance": 12026, + "based incremental": 5780, + "10 percentage": 48, + "dataset additional": 12804, + "related topic": 45947, + "bayes model": 6352, + "datasets cross": 13203, + "corpus high": 11353, + "rich resource": 48116, + "open sourced": 38462, + "context encoder": 10621, + "represent document": 46469, + "usually available": 61036, + "method advantage": 32373, + "corpora experiments": 11200, + "datasets approach": 13155, + "language rich": 28477, + "simplified version": 51239, + "token sentence": 57305, + "monolingual text": 35813, + "systems monolingual": 54564, + "develop unsupervised": 14622, + "language subject": 28513, + "results evaluate": 47617, + "using test": 60987, + "gaining insights": 21927, + "content automatically": 10513, + "recognition challenging": 45496, + "classifiers paper": 8621, + "audio signals": 4930, + "content model": 10538, + "information audio": 25764, + "sequences using": 50029, + "combines information": 9096, + "information data": 25799, + "features extensive": 20578, + "emotion categories": 17287, + "called bert": 7540, + "representations transformers": 46776, + "unlike recent": 59609, + "models bert": 34770, + "representations unlabeled": 46778, + "right context": 48138, + "trained bert": 57681, + "bert model": 6683, + "models wide": 35679, + "simple empirically": 51165, + "obtains new": 38252, + "score 80": 48815, + "squad v2": 52396, + "text like": 56650, + "improvements obtained": 25089, + "understanding experiments": 59345, + "strong assumptions": 53000, + "application scenarios": 3179, + "usually large": 61056, + "identification method": 24390, + "methods apply": 32753, + "text new": 56675, + "large paired": 28931, + "examples work": 18942, + "architecture perform": 4075, + "perform ablation": 40064, + "reference evaluation": 45739, + "conditions paper": 10020, + "systematically study": 54414, + "models broad": 34794, + "empirical insights": 17331, + "2018 proposed": 274, + "improve original": 24880, + "better exploit": 6889, + "structure generation": 53107, + "unsupervised learned": 59704, + "algorithm learn": 2282, + "experiments prove": 19498, + "art fully": 4263, + "using semi": 60921, + "language low": 28142, + "train baseline": 57565, + "model ii": 33972, + "manual labeling": 31745, + "utterances high": 61148, + "level based": 30069, + "corpus labeled": 11366, + "entire training": 18030, + "greatly improves": 23232, + "different state": 15078, + "fashion experimental": 20412, + "score 71": 48806, + "history previous": 23969, + "current question": 12003, + "single turn": 51352, + "flow mechanism": 21120, + "mechanism incorporate": 32124, + "outperforms best": 38878, + "greek language": 23246, + "augmented models": 4981, + "dependencies paper": 14110, + "jointly embedding": 27194, + "corpora annotated": 11175, + "type level": 59060, + "level corpus": 30091, + "built different": 7484, + "large performance": 28936, + "need manual": 36581, + "usually based": 61037, + "linguistic understanding": 30805, + "sequence text": 50011, + "autoencoder vae": 5028, + "hierarchical latent": 23674, + "clean noisy": 8645, + "noise level": 37600, + "automatic understanding": 5135, + "understanding domain": 59340, + "use non": 59965, + "information fixed": 25879, + "easy understand": 16567, + "accurately classify": 1093, + "lack diversity": 27885, + "capturing lexical": 7737, + "diversity quality": 15740, + "models single": 35514, + "model mixture": 34102, + "seq2seq baseline": 49896, + "additional parameters": 1691, + "computation cost": 9827, + "attention distribution": 4738, + "distribute attention": 15620, + "learn align": 29344, + "intent slot": 26568, + "parsing systems": 39798, + "sequence approaches": 49908, + "approaches dataset": 3793, + "models clear": 34817, + "question given": 44731, + "question asked": 44720, + "answer query": 3049, + "query given": 44669, + "existing efforts": 19061, + "uses multiple": 60522, + "performance interpretability": 40397, + "models big": 34787, + "negatively affect": 36642, + "affect quality": 2018, + "systems identify": 54523, + "sentences input": 49738, + "seq seq": 49890, + "framework present": 21584, + "trained generate": 57738, + "strategy achieves": 52925, + "low latency": 31155, + "zh en": 63186, + "informal text": 25743, + "university students": 59556, + "dataset new": 13009, + "jointly encode": 27195, + "difficulty level": 15200, + "outputs model": 39017, + "advantages model": 1953, + "model previous": 34232, + "ones paper": 38341, + "affects model": 2025, + "knowledge using": 27645, + "applications despite": 3199, + "despite remarkable": 14386, + "remarkable results": 46361, + "leverage machine": 30277, + "framework tackle": 21611, + "language spanish": 28494, + "rich annotation": 48093, + "annotation data": 2941, + "shared multilingual": 50480, + "encoder sentence": 17542, + "superiority method": 53951, + "method state": 32667, + "sentence inference": 49569, + "knowledge relationships": 27589, + "languages showing": 28783, + "showing different": 50677, + "different structures": 15083, + "facto standard": 20294, + "impact accuracy": 24588, + "negative positive": 36630, + "problem applied": 42504, + "contribution present": 10944, + "experimental study": 19329, + "task addition": 54884, + "reference future": 45742, + "advanced deep": 1886, + "methods pre": 32985, + "trained 30": 57668, + "30 million": 357, + "model input": 34005, + "performance prediction": 40491, + "time speed": 57221, + "baselines new": 6283, + "experiments available": 19359, + "current solutions": 12008, + "classified using": 8588, + "importantly model": 24796, + "embeddings data": 17106, + "augmentation techniques": 4969, + "model final": 33887, + "objective study": 38103, + "methods automatically": 32763, + "experimented various": 19338, + "trained original": 57836, + "imbalanced data": 24566, + "features achieved": 20516, + "contributions include": 10954, + "improvements using": 25109, + "user level": 60431, + "targeted syntactic": 54859, + "networks state": 36912, + "process texts": 42834, + "methods syntactic": 33063, + "systems generate": 54513, + "shown state": 50752, + "performance recent": 40522, + "compared lstm": 9418, + "models reaching": 35400, + "inference dataset": 25651, + "fail perform": 20342, + "replace original": 46402, + "corpus designed": 11323, + "results argue": 47509, + "argue current": 4162, + "inference using": 25703, + "large pre": 28939, + "models helps": 35084, + "datasets similar": 13431, + "nli datasets": 37452, + "non redundant": 37679, + "modeling lm": 34591, + "methods far": 32863, + "batch size": 6342, + "embedding matrix": 17039, + "scale number": 48606, + "negligible loss": 36654, + "advanced neural": 1891, + "introduce auxiliary": 26785, + "regularization term": 45843, + "enhance ability": 17909, + "short range": 50563, + "unsupervised pretraining": 59723, + "improvements nlp": 25086, + "place task": 40927, + "training final": 58105, + "training code": 57953, + "models challenging": 34810, + "monolingual sentences": 35811, + "using augmented": 60570, + "score 10": 48783, + "accordingly propose": 874, + "dataset derived": 12890, + "scenarios language": 48699, + "incorporate external": 25354, + "learn local": 29391, + "approach outperformed": 3621, + "performances achieved": 40637, + "usually costly": 61044, + "idf based": 24476, + "based cosine": 5654, + "present design": 41888, + "design implementation": 14287, + "general overview": 22078, + "generate sequence": 22244, + "promising solution": 43184, + "art seq2seq": 4398, + "models representing": 35440, + "evaluate efficacy": 18456, + "glove fasttext": 22859, + "python package": 44442, + "improved robustness": 24964, + "model representations": 34310, + "model behaviors": 33612, + "imbalanced dataset": 24567, + "distribution data": 15634, + "label text": 27731, + "performance classifiers": 40236, + "high potential": 23762, + "boosting performance": 7261, + "mechanism allows": 32097, + "ability effectively": 605, + "control generation": 10964, + "remain largely": 46315, + "challenging new": 8119, + "access external": 820, + "address new": 1781, + "challenge learning": 7992, + "multimodal dialogue": 36147, + "introduce knowledge": 26815, + "learning paradigms": 29797, + "datasets specifically": 13441, + "multiple label": 36233, + "approaches evaluate": 3812, + "gains achieved": 21932, + "insights models": 26392, + "tune language": 58855, + "improve interpretability": 24865, + "downstream application": 16331, + "accessed https": 832, + "com thunlp": 9027, + "tasks gap": 55652, + "representations address": 46616, + "conduct detailed": 10036, + "detailed experiments": 14426, + "weight matrices": 61918, + "addition subtraction": 1645, + "modeling long": 34592, + "proposed network": 43867, + "yield competitive": 63093, + "present practical": 41986, + "practical challenges": 41461, + "achieve highest": 1156, + "recent successes": 45357, + "sentences high": 49731, + "facilitate learning": 20271, + "performance adding": 40182, + "knowledge high": 27516, + "way new": 61822, + "art architectures": 4215, + "proposed supervised": 43905, + "english news": 17851, + "identify appropriate": 24412, + "search best": 48966, + "memory bilstm": 32245, + "crf architecture": 11761, + "representation vectors": 46604, + "surrounding sentences": 54197, + "unsupervised pre": 59720, + "tested proposed": 56399, + "score respectively": 48870, + "overall proposed": 39046, + "predictions paper": 41766, + "graph representing": 23164, + "level predictions": 30181, + "systems various": 54667, + "appropriate word": 3970, + "data offers": 12521, + "challenges large": 8057, + "studies highlight": 53268, + "designed language": 14322, + "quality conversational": 44501, + "conversational data": 11043, + "data chinese": 12208, + "attracted increasing": 4881, + "enhanced multi": 17934, + "multi headed": 35967, + "headed attention": 23502, + "attend information": 4702, + "representation subspaces": 46588, + "model interaction": 34012, + "interaction multiple": 26608, + "multiple attention": 36170, + "transformer baseline": 58476, + "new parameters": 37279, + "question introduce": 44734, + "separate encoder": 49874, + "better long": 6911, + "available state": 5370, + "resources used": 47338, + "texts generated": 56883, + "methods semi": 33031, + "minimal human": 33288, + "important facts": 24725, + "space limited": 51874, + "fewer words": 20742, + "programming model": 43088, + "texts texts": 56935, + "information limited": 25955, + "confirm proposed": 10133, + "perceived quality": 40048, + "models fast": 35019, + "unsupervised objective": 59719, + "sentences method": 49753, + "explicitly modeled": 19643, + "generate pseudo": 22230, + "pseudo data": 44273, + "phonetic similarity": 40829, + "orthographic information": 38756, + "model traditional": 34465, + "method superior": 32676, + "superior existing": 53933, + "errors paper": 18246, + "data current": 12265, + "build unified": 7432, + "achieves significantly": 1370, + "empirical theoretical": 17354, + "elastic weight": 16958, + "weight consolidation": 61916, + "experiments current": 19395, + "decoder using": 13621, + "large portion": 28938, + "fully explore": 21728, + "framework obtain": 21573, + "resource translation": 47286, + "text common": 56498, + "context help": 10651, + "global contexts": 22824, + "context encoders": 10622, + "dataset newly": 13010, + "flexible way": 21111, + "capsule networks": 7645, + "stimulate research": 52850, + "world people": 62952, + "people express": 40029, + "learning leverage": 29706, + "necessary step": 36533, + "speaker utterance": 52002, + "utterance paper": 61139, + "architecture capable": 4032, + "multilingual sentence": 36118, + "languages train": 28807, + "used transfer": 60339, + "efficient development": 16868, + "agnostic model": 2094, + "languages test": 28802, + "evaluate transfer": 18513, + "transfer performance": 58413, + "experiments detailed": 19412, + "lingual transferability": 30736, + "effective context": 16638, + "line work": 30647, + "tasks ability": 55485, + "implicit knowledge": 24661, + "approaches explicitly": 3819, + "decision boundary": 13561, + "using layer": 60764, + "light future": 30449, + "future study": 21897, + "demonstrates model": 14036, + "improve consistency": 24834, + "impressive progress": 24814, + "set work": 50279, + "performance outperforms": 40470, + "baseline average": 6156, + "training help": 58118, + "help train": 23591, + "results highly": 47659, + "data hungry": 12411, + "common types": 9208, + "propose bayesian": 43310, + "data named": 12502, + "art approach": 4213, + "better captures": 6860, + "analyze errors": 2815, + "methods mitigate": 32946, + "mitigate issue": 33385, + "dataset report": 13062, + "projection layer": 43140, + "tail distribution": 54763, + "capabilities paper": 7606, + "inductive learning": 25612, + "investigate influence": 26962, + "data regimes": 12593, + "highlight need": 23868, + "queries using": 44658, + "based mechanisms": 5842, + "effectiveness existing": 16779, + "based matching": 5835, + "support future": 54119, + "generated neural": 22303, + "models prone": 35367, + "topic aware": 57392, + "reddit comments": 45642, + "generate diverse": 22194, + "effective language": 16664, + "glue benchmark": 22864, + "training bert": 57945, + "bert devlin": 6640, + "score 81": 48816, + "improvement bert": 24989, + "peters et": 40800, + "radford et": 44847, + "map natural": 31795, + "using beam": 60585, + "difficult work": 15194, + "posterior regularization": 41364, + "provide general": 44077, + "task transferring": 55447, + "task representations": 55337, + "representations form": 46672, + "resources data": 47298, + "learned multiple": 29469, + "space recently": 51891, + "languages possible": 28752, + "paper extends": 39372, + "novel formulation": 37825, + "leading better": 29288, + "consistent improvement": 10277, + "text generates": 56592, + "attempts extract": 4698, + "identifying extracting": 24458, + "manual labor": 31747, + "manner specifically": 31726, + "identify text": 24448, + "better trained": 6982, + "sentences trained": 49796, + "able accurately": 667, + "role language": 48314, + "results overall": 47756, + "fundamental differences": 21779, + "demonstrating usefulness": 14056, + "information dense": 25804, + "generate sensible": 22241, + "potentially provide": 41417, + "provide benefits": 44018, + "train end": 57585, + "end automatic": 17617, + "data end": 12322, + "need expert": 36561, + "paired data": 39162, + "cycle consistency": 12074, + "proposed way": 43925, + "unsupervised data": 59691, + "loss based": 31083, + "instead raw": 26461, + "consistency training": 10271, + "initial model": 26215, + "trained 100": 57667, + "audio data": 4926, + "data mainly": 12475, + "modeling improve": 34583, + "labeled text": 27765, + "setting recently": 50347, + "gap different": 21960, + "understanding properties": 59385, + "art datasets": 4245, + "measure used": 32064, + "datasets use": 13467, + "discover best": 15406, + "developing model": 14656, + "model tailored": 34438, + "factually correct": 20327, + "build language": 7407, + "effectively incorporate": 16743, + "investigate various": 26995, + "transfer improves": 58367, + "reduces performance": 45697, + "systems speech": 54636, + "trained acoustic": 57670, + "speech different": 52260, + "lower word": 31225, + "reasonably good": 45177, + "models purpose": 35389, + "apply models": 3337, + "truly low": 58824, + "effective feature": 16650, + "specifically construct": 52188, + "sentence understanding": 49665, + "embedding framework": 17030, + "improves baselines": 25116, + "classify text": 8632, + "method utilizes": 32700, + "space experimental": 51860, + "words need": 62465, + "edit operations": 16593, + "capture sequence": 7711, + "raises question": 44862, + "sub linear": 53521, + "training input": 58132, + "input learns": 26292, + "does generalize": 15948, + "understand generate": 59294, + "study aimed": 53321, + "study design": 53358, + "analysis revealed": 2745, + "group level": 23272, + "changes time": 8181, + "task zero": 55475, + "require different": 46849, + "great challenge": 23200, + "data similar": 12664, + "propose principled": 43591, + "model zero": 34552, + "unseen ones": 59652, + "method utilizing": 32701, + "multiple instances": 36231, + "participating teams": 39825, + "teams paper": 56007, + "interaction paper": 26610, + "production perception": 43048, + "information focus": 25882, + "focus model": 21181, + "present techniques": 42038, + "techniques train": 56144, + "word relations": 62279, + "information provide": 26034, + "concept hierarchy": 9923, + "learning usually": 29933, + "automatic construction": 5074, + "approaches better": 3778, + "set data": 50133, + "methods according": 32726, + "graph relations": 23160, + "syntactic cues": 54298, + "model implicit": 33975, + "make good": 31574, + "good use": 22948, + "relevant knowledge": 46221, + "able extract": 694, + "related attributes": 45888, + "attributes entities": 4906, + "approach build": 3437, + "conversation corpus": 11031, + "88 f1": 544, + "formally define": 21364, + "advanced models": 1890, + "novel paradigm": 37890, + "relations model": 46042, + "evaluated public": 18545, + "improving results": 25195, + "utilize knowledge": 61096, + "setting data": 50318, + "novel auxiliary": 37775, + "ability work": 651, + "text directly": 56538, + "multiple relations": 36275, + "current works": 12029, + "handle multiple": 23413, + "considerably better": 10238, + "individual sentence": 25578, + "margin loss": 31821, + "training task": 58285, + "history using": 23970, + "dialogue level": 14777, + "need explicit": 36562, + "promote diversity": 43191, + "main feature": 31437, + "second train": 49027, + "auxiliary objective": 5236, + "baselines multi": 6280, + "attention transformer": 4840, + "autoregressive decoding": 5213, + "novel non": 37887, + "significant speedup": 50925, + "model heterogeneous": 33957, + "encoders different": 17555, + "information demonstrate": 25803, + "huge success": 24077, + "useful understanding": 60395, + "propose improved": 43413, + "contains multiple": 10501, + "multiple channels": 36179, + "translation abstractive": 58572, + "modeling experimental": 34573, + "expert users": 19585, + "user utterance": 60455, + "robot interaction": 48232, + "promising solutions": 43185, + "novel multimodal": 37881, + "results relative": 47805, + "based end": 5701, + "information given": 25895, + "local languages": 30944, + "use transformer": 60059, + "nlp fields": 37489, + "text improve": 56625, + "trained weights": 57913, + "results f1": 47630, + "model extracting": 33872, + "perform equally": 40097, + "method latent": 32560, + "token alignment": 57280, + "used encoding": 60161, + "encoding sequence": 17576, + "importance score": 24689, + "improvement various": 25038, + "parsing paper": 39790, + "paper survey": 39587, + "mechanism different": 32107, + "techniques machine": 56107, + "structure discourse": 53099, + "discourse features": 15389, + "process identifying": 42791, + "evaluation performance": 18669, + "systems tasks": 54648, + "detection specifically": 14530, + "evaluated tasks": 18549, + "datasets best": 13168, + "results systems": 47875, + "identification relevant": 24395, + "relevant entities": 46213, + "described text": 14216, + "problem short": 42652, + "achieves improved": 1340, + "ranked 7th": 44956, + "understanding data": 59337, + "requires generating": 46931, + "generating long": 22381, + "despite considerable": 14356, + "considerable efforts": 10229, + "topic generated": 57408, + "paper create": 39310, + "knowledge resource": 27597, + "understanding use": 59412, + "does hold": 15951, + "wikipedia news": 62050, + "focus specific": 21204, + "thorough experimental": 57061, + "outperforming baseline": 38845, + "models improvement": 35114, + "finally release": 20880, + "text investigate": 56633, + "building knowledge": 7449, + "graph text": 23173, + "generate set": 22245, + "higher recall": 23841, + "syntactic relationships": 54317, + "neural seq2seq": 37095, + "pre post": 41506, + "data traditional": 12737, + "models global": 35064, + "global inference": 22830, + "performance lags": 40405, + "approach substantially": 3707, + "english low": 17838, + "challenge neural": 8000, + "usually achieve": 61034, + "performance trained": 40604, + "sets data": 50286, + "translated data": 58554, + "data create": 12258, + "end systems": 17712, + "systems difficult": 54480, + "languages highly": 28686, + "guide models": 23339, + "learning perform": 29801, + "tasks jointly": 55700, + "jointly multiple": 27209, + "approaches fail": 3825, + "fail model": 20341, + "suffer error": 53763, + "classifier detect": 8595, + "require model": 46880, + "model understand": 34499, + "models diverse": 34932, + "current datasets": 11968, + "examples existing": 18899, + "models evaluation": 34979, + "currently existing": 12035, + "work evaluating": 62650, + "settings different": 50367, + "million scale": 33255, + "news headline": 37405, + "body text": 7241, + "text dataset": 56524, + "dataset develop": 12894, + "networks hierarchical": 36866, + "architectures model": 4117, + "input size": 26337, + "experiments qualitative": 19507, + "contribution method": 10942, + "data labeling": 12449, + "step building": 52802, + "artificial intelligent": 4494, + "recognition language": 45510, + "models explicitly": 34997, + "tasks build": 55531, + "domain multi": 16115, + "turn conversation": 58987, + "cue words": 11932, + "paper experiments": 39357, + "evaluation natural": 18660, + "modeling provides": 34617, + "dataset diverse": 12899, + "simple flexible": 51172, + "conceptual framework": 9949, + "kinds questions": 27372, + "dataset including": 12964, + "require reasoning": 46885, + "proposed tackle": 43907, + "overview different": 39111, + "code results": 8856, + "results limitations": 47700, + "massive corpus": 31882, + "present day": 41884, + "component model": 9706, + "estimation mle": 18384, + "designed specifically": 14331, + "outperforms multiple": 38913, + "including standard": 25302, + "training strategies": 58274, + "architectures demonstrate": 4106, + "approach recently": 3668, + "recently large": 45435, + "mechanisms models": 32152, + "proposed sequence": 43895, + "model graph": 33946, + "networks gnns": 36863, + "methods specifically": 33049, + "propose contextualized": 43336, + "text introduce": 56632, + "datasets verify": 13482, + "better handle": 6896, + "model fully": 33911, + "fully extract": 21731, + "representations furthermore": 46674, + "approaches cross": 3789, + "paper seeks": 39572, + "pairs language": 39199, + "corpora exist": 11198, + "knowledge captured": 27421, + "learning resource": 29846, + "scale labeled": 48584, + "dataset facilitate": 12926, + "research future": 47043, + "identify major": 24430, + "useful context": 60358, + "information sequence": 26081, + "generalization power": 22127, + "representations non": 46725, + "context improves": 10655, + "languages experimental": 28664, + "utilize information": 61095, + "specific pre": 52126, + "datasets high": 13289, + "language fine": 28072, + "level using": 30230, + "output existing": 38972, + "broad spectrum": 7355, + "idea approach": 24367, + "nlp domain": 37482, + "architecture deep": 4038, + "dataset significant": 13086, + "significant gap": 50868, + "world tasks": 62963, + "corpus existing": 11335, + "tasks learned": 55718, + "message passing": 32322, + "effective interpretable": 16662, + "present rule": 42001, + "work consists": 62609, + "different challenges": 14858, + "ones using": 38345, + "dataset investigate": 12971, + "tasks explored": 55634, + "method joint": 32554, + "complementary knowledge": 9590, + "lingual knowledge": 30706, + "knowledge attention": 27399, + "attention cross": 4731, + "fact extraction": 20290, + "using evidence": 60682, + "aims identifying": 2198, + "specific aspect": 52046, + "public corpora": 44309, + "scarce data": 48657, + "data largely": 12456, + "aims leverage": 2203, + "resource source": 47278, + "task easily": 55036, + "resource target": 47279, + "domain fine": 16071, + "multi granularity": 35962, + "alignment network": 2377, + "task help": 55114, + "modeling fine": 34576, + "method adopted": 32372, + "data challenging": 12205, + "challenging highly": 8098, + "challenges paper": 8066, + "13 million": 127, + "model meaning": 34095, + "model fact": 33878, + "translations language": 58708, + "apply methods": 3335, + "translation corpora": 58590, + "learn diverse": 29361, + "selecting relevant": 49127, + "proposed trained": 43916, + "scale generative": 48575, + "learn discriminative": 29359, + "supervised fine": 53985, + "domain shift": 16159, + "studies suggest": 53302, + "suggest models": 53825, + "systems adopt": 54427, + "evaluate competitive": 18446, + "models challenge": 34809, + "expensive collect": 19205, + "learning signals": 29878, + "improved significantly": 24965, + "feedback data": 20716, + "essential information": 18327, + "study explore": 53375, + "representations predict": 46739, + "rapidly increasing": 44996, + "modern large": 35709, + "new supervised": 37329, + "approaches benchmark": 3774, + "learn training": 29440, + "source training": 51815, + "context type": 10736, + "task ii": 55123, + "systematic generalization": 54399, + "capable reasoning": 7629, + "end methods": 17684, + "methods prior": 32991, + "require explicit": 46852, + "content detection": 10518, + "dataset data": 12878, + "supervised datasets": 53977, + "datasets showcase": 13422, + "providing explicit": 44242, + "explicit labels": 19618, + "average recall": 5416, + "using annotated": 60558, + "remains major": 46340, + "used generating": 60199, + "question natural": 44739, + "actively studied": 1481, + "complexity model": 9683, + "detect cases": 14435, + "costly time": 11604, + "consuming paper": 10451, + "learning pipeline": 29805, + "conventional supervised": 11014, + "data 50": 12103, + "performance fully": 40353, + "fully supervised": 21738, + "data external": 12352, + "external source": 19953, + "supervised counterparts": 53974, + "model working": 34545, + "token time": 57311, + "available word": 5391, + "python module": 44441, + "framework composed": 21474, + "demonstrate large": 13928, + "scale unsupervised": 48637, + "tuning training": 58970, + "task create": 54983, + "finetuned model": 21044, + "studies investigating": 53275, + "architectures datasets": 4105, + "results real": 47798, + "semantic change": 49244, + "key terms": 27339, + "different research": 15052, + "propose metric": 43459, + "retrieval process": 47963, + "fast text": 20429, + "aims developing": 2188, + "nlp field": 37488, + "low recall": 31171, + "recall rate": 45245, + "optimal combination": 38527, + "features chinese": 20536, + "identification experiments": 24387, + "selected features": 49118, + "precision rate": 41615, + "33 f1": 370, + "based public": 5963, + "study automatic": 53332, + "world problems": 62953, + "texts containing": 56868, + "leads lack": 29319, + "agglomerative clustering": 2068, + "using cnn": 60607, + "method conduct": 32433, + "short sentence": 50564, + "propose robust": 43610, + "learn human": 29381, + "question word": 44758, + "estimate importance": 18371, + "importance words": 24696, + "model agnostic": 33551, + "agnostic explanations": 2089, + "models high": 35086, + "changes input": 8177, + "model sensitivity": 34352, + "accuracy measure": 1004, + "understand models": 59305, + "robustness model": 48286, + "corpus support": 11440, + "context evaluation": 10628, + "prior literature": 42407, + "widely regarded": 62000, + "developed model": 14634, + "text generative": 56606, + "framework trained": 21615, + "level chinese": 30074, + "method good": 32520, + "generate realistic": 22233, + "data aspect": 12146, + "detection important": 14492, + "analysis given": 2671, + "aims detect": 2184, + "implicitly explicitly": 24667, + "depends availability": 14161, + "availability labeled": 5251, + "costly obtain": 11602, + "method address": 32371, + "task need": 55235, + "baselines substantial": 6305, + "substantial margin": 53625, + "contextual understanding": 10785, + "attention self": 4827, + "model empirical": 33809, + "translation human": 58617, + "learning achieved": 29502, + "substantial progress": 53629, + "discuss main": 15473, + "using encoder": 60673, + "conditional text": 10006, + "zhang et": 63188, + "vision domain": 61636, + "approaches mitigate": 3874, + "utilizes latent": 61114, + "used final": 60190, + "generation usually": 22578, + "continuous embedding": 10844, + "distribution vocabulary": 15658, + "capable handling": 7622, + "produce meaningful": 42991, + "user interactions": 60428, + "hand written": 23396, + "evaluated approach": 18520, + "text extract": 56574, + "used supervised": 60317, + "addresses challenge": 1808, + "shelf pretrained": 50540, + "remove spurious": 46376, + "target token": 54851, + "tokens predicted": 57332, + "study non": 53421, + "results low": 47704, + "propose sentence": 43621, + "sentence wise": 49670, + "require high": 46860, + "nouns adjectives": 37745, + "present article": 41850, + "qualitative differences": 44475, + "models systems": 35575, + "bengali language": 6596, + "systems respect": 54624, + "types sentence": 59116, + "researchers use": 47168, + "using temporal": 60984, + "temporal graph": 56187, + "time sensitive": 57208, + "time frames": 57158, + "weights model": 61940, + "neural topic": 37106, + "trained fully": 57734, + "number datasets": 37993, + "datasets demonstrates": 13220, + "process use": 42836, + "nlp technology": 37555, + "train task": 57644, + "cross task": 11870, + "improve understanding": 24937, + "commerce platforms": 9152, + "language description": 28021, + "root leaf": 48342, + "demonstrate machine": 13933, + "graph dag": 23127, + "essential problem": 18332, + "performing downstream": 40675, + "tasks successfully": 55917, + "domain best": 16027, + "approach jointly": 3579, + "corpus context": 11309, + "context event": 10629, + "frequency features": 21672, + "entities large": 18062, + "python based": 44439, + "based open": 5927, + "embeddings efficiently": 17121, + "various recent": 61385, + "task sentences": 55359, + "abstract semantic": 762, + "building neural": 7460, + "data variety": 12769, + "variety real": 61288, + "challenge using": 8022, + "train domain": 57580, + "synthetic datasets": 54374, + "datasets domain": 13238, + "test datasets": 56343, + "improving user": 25200, + "domain relevant": 16146, + "sequential decision": 50037, + "time period": 57191, + "local feature": 30936, + "type feature": 59056, + "methods novel": 32963, + "accuracy coverage": 952, + "results provided": 47790, + "texts annotated": 56858, + "annotated according": 2876, + "obtained text": 38226, + "used far": 60185, + "identification models": 24391, + "task complexity": 54963, + "manually selected": 31787, + "different pre": 15028, + "resource corpus": 47216, + "corpus linguistics": 11374, + "internet people": 26697, + "groups work": 23285, + "overview current": 39109, + "area present": 4146, + "number examples": 38002, + "significantly affects": 50937, + "representations demonstrate": 46636, + "time existing": 57153, + "traditional systems": 57550, + "evaluate neural": 18477, + "novel fine": 37823, + "survey paper": 54210, + "research trends": 47135, + "point potential": 41047, + "factors make": 20312, + "reduce labor": 45667, + "secondly propose": 49032, + "applied low": 3280, + "transfer related": 58417, + "classification present": 8518, + "present cross": 41879, + "predictions based": 41755, + "use joint": 59917, + "generalize knowledge": 22143, + "lingual monolingual": 30713, + "monolingual resources": 35810, + "available experiments": 5292, + "transfer data": 58356, + "languages sentiment": 28780, + "issues existing": 27090, + "datasets terms": 13455, + "inference speedup": 25693, + "compared autoregressive": 9381, + "source tokens": 51811, + "embeddings decoder": 17107, + "method largely": 32559, + "largely outperforms": 29060, + "task previous": 55290, + "model distribution": 33780, + "information wikipedia": 26158, + "languages dataset": 28633, + "articles dataset": 4466, + "domain topic": 16212, + "theoretically empirically": 57029, + "method widely": 32706, + "indicate method": 25526, + "method compared": 32425, + "introduce methodology": 26822, + "stages model": 52450, + "problem time": 42675, + "making classification": 31648, + "score 77": 48812, + "task detect": 55011, + "sentence previous": 49624, + "specific sentence": 52144, + "introduce noise": 26840, + "propose constrained": 43331, + "attention multi": 4791, + "single aspect": 51284, + "fails capture": 20350, + "transition probability": 58543, + "order word": 38662, + "texts text": 56934, + "bilstm crf": 7130, + "latent features": 29127, + "classification specifically": 8556, + "role entity": 48305, + "recognition dataset": 45498, + "benefits pre": 6586, + "elmo bert": 16995, + "bert outperforms": 6696, + "fasttext embeddings": 20447, + "million parameters": 33253, + "parameters make": 39708, + "train separate": 57629, + "separate models": 49877, + "multilingual pre": 36109, + "training fine": 58106, + "languages final": 28672, + "compared fine": 9411, + "91 f1": 557, + "open world": 38467, + "tasks entity": 55615, + "works utilize": 62918, + "context entity": 10626, + "context level": 10669, + "generic domain": 22628, + "improvement terms": 25032, + "method natural": 32585, + "constituency tree": 10352, + "trees encoding": 58768, + "based mechanism": 5841, + "models created": 34872, + "datasets tasks": 13453, + "model showed": 34372, + "study introduces": 53396, + "learning informative": 29683, + "studies models": 53283, + "document work": 15846, + "propose coarse": 43320, + "finds relevant": 20920, + "scores candidate": 48894, + "attention learn": 4764, + "set outperforming": 50209, + "various factors": 61341, + "making task": 31669, + "task document": 55029, + "datasets reveal": 13412, + "couple years": 11631, + "exchange information": 18965, + "regional languages": 45805, + "numerous methods": 38066, + "aim improving": 2152, + "approaches particular": 3893, + "generated reviews": 22313, + "tackles problem": 54718, + "different aspect": 14843, + "demonstrates approach": 14028, + "topic attention": 57391, + "identifying words": 24470, + "related different": 45897, + "topics work": 57465, + "provides empirical": 44197, + "iterative training": 27127, + "applications especially": 3203, + "lesser extent": 30045, + "large labelled": 28896, + "labelled datasets": 27802, + "datasets significant": 13427, + "important applications": 24699, + "applications case": 3187, + "data privacy": 12562, + "hidden information": 23638, + "82 f1": 528, + "benefit transfer": 6571, + "second model": 49012, + "learning binary": 29549, + "documents used": 15924, + "evaluation models": 18657, + "field information": 20756, + "report strong": 46449, + "results transfer": 47889, + "specific dataset": 52066, + "matching network": 31918, + "capable encoding": 7618, + "metrics achieving": 33136, + "performance demonstrating": 40285, + "embeddings glove": 17143, + "glove elmo": 22856, + "matching module": 31917, + "task aiming": 54893, + "interaction information": 26602, + "deeper level": 13759, + "method multiple": 32583, + "multiple perspectives": 36260, + "study tackles": 53466, + "summarization model": 53892, + "model instead": 34008, + "focuses generating": 21239, + "target style": 54844, + "accuracy drops": 965, + "drops dramatically": 16449, + "meaning space": 32018, + "graph reasoning": 23159, + "reasoning problems": 45216, + "path forward": 39946, + "richer representations": 48130, + "transformer networks": 58504, + "pretrained language": 42157, + "preliminary evaluation": 41801, + "related questions": 45931, + "provide answers": 44009, + "questions related": 44803, + "dataset transfer": 13123, + "accuracy experimental": 971, + "experimental setting": 19324, + "trained work": 57919, + "understanding linguistic": 59360, + "general neural": 22074, + "corpus multilingual": 11384, + "languages similar": 28787, + "languages end": 28650, + "translated english": 58555, + "languages just": 28700, + "causal relationships": 7878, + "vocabulary mismatch": 61706, + "mismatch problem": 33351, + "sentence rewriting": 49635, + "gain f1": 21907, + "glove embeddings": 22858, + "bias evaluation": 7026, + "existing biases": 19044, + "baseline text": 6217, + "used communication": 60118, + "making tasks": 31670, + "proposed text": 43914, + "aims generating": 2196, + "generator network": 22621, + "nli model": 37453, + "numerical reasoning": 38061, + "reasoning capabilities": 45187, + "framework support": 21609, + "understanding question": 59387, + "answers generated": 3108, + "questions humans": 44791, + "attempt understand": 4692, + "true meaning": 58822, + "work human": 62680, + "interactive attention": 26627, + "models comparison": 34838, + "models current": 34878, + "generating texts": 22401, + "texts given": 56884, + "researches focus": 47173, + "focus generating": 21167, + "model fit": 33899, + "experiments based": 19361, + "strong language": 53036, + "poor generalization": 41134, + "embedding network": 17050, + "cases neural": 7810, + "recent improvements": 45314, + "struggle generalize": 53200, + "examples shot": 18931, + "complex ways": 9671, + "solving tasks": 51707, + "capabilities language": 7597, + "presenting new": 42065, + "new topics": 37348, + "words particular": 62477, + "pairs generated": 39193, + "form data": 21318, + "user inputs": 60423, + "suggest ways": 53833, + "latin script": 29162, + "language communities": 27996, + "multiple possible": 36264, + "possible use": 41339, + "mle training": 33436, + "supervision based": 54078, + "based optimal": 5928, + "utility proposed": 61083, + "translation benchmarks": 58585, + "samples text": 48490, + "text samples": 56752, + "generating semantically": 22393, + "standard sequence": 52527, + "generate semantically": 22240, + "replication study": 46419, + "data preprocessing": 12555, + "argument identification": 4172, + "augmented dataset": 4977, + "statistical classifiers": 52739, + "finally suggest": 20882, + "algorithms neural": 2330, + "rich input": 48104, + "randomly initialized": 44899, + "embeddings empirically": 17125, + "empirically observed": 17365, + "form model": 21327, + "data following": 12369, + "early stopping": 16516, + "method method": 32575, + "training outperforms": 58199, + "error accumulation": 18211, + "used detect": 60145, + "tends generate": 56215, + "scale unlabeled": 48634, + "challenging traditional": 8161, + "representation module": 46558, + "value memory": 61208, + "capture relevant": 7704, + "module proposed": 35768, + "strong reasoning": 53045, + "overcome issue": 39063, + "achieved new": 1253, + "rouge f1": 48350, + "score 24": 48786, + "argue important": 4163, + "particularly propose": 39888, + "features encode": 20570, + "way humans": 61807, + "representations core": 46633, + "study compared": 53340, + "used traditional": 60331, + "analysis finally": 2666, + "performance tested": 40597, + "tested models": 56398, + "initial set": 26217, + "data computing": 12233, + "approach diverse": 3492, + "addition providing": 1641, + "providing evidence": 44240, + "effectiveness transfer": 16819, + "present open": 41982, + "corpus hindi": 11356, + "corpus news": 11389, + "verify quality": 61543, + "based bert": 5599, + "gap model": 21969, + "original dataset": 38708, + "google com": 22954, + "data pretrained": 12558, + "google research": 22956, + "research language": 47063, + "tree master": 58751, + "independent model": 25501, + "experiments known": 19450, + "obtaining best": 38230, + "seven different": 50418, + "furthermore results": 21838, + "models generated": 35059, + "facilitate reproducibility": 20274, + "framework evaluation": 21510, + "corpora containing": 11186, + "approach ability": 3386, + "models corpus": 34869, + "based research": 5996, + "benefit use": 6573, + "vision models": 61640, + "address gap": 1757, + "processing use": 42963, + "classification single": 8551, + "bert achieves": 6606, + "perfect accuracy": 40061, + "classification code": 8442, + "construction process": 10430, + "task generate": 55103, + "generate complex": 22185, + "simple sentence": 51208, + "generator model": 22620, + "pipeline model": 40904, + "important building": 24703, + "highly specialized": 23917, + "domain ii": 16081, + "overview recent": 39114, + "recent findings": 45311, + "processing various": 42966, + "interactions work": 26625, + "importance context": 24678, + "current time": 12020, + "perform competitively": 40079, + "simpler efficient": 51230, + "scales linearly": 48646, + "scale machine": 48593, + "utterances corresponding": 61145, + "data contain": 12248, + "differences different": 14821, + "trained automatically": 57677, + "corpus evaluated": 11332, + "improved model": 24952, + "benchmark test": 6499, + "introduce span": 26863, + "capturing context": 7731, + "theoretically prove": 57032, + "witnessed dramatic": 62091, + "explored previous": 19762, + "proposed handle": 43790, + "handle problem": 23414, + "search decoding": 48969, + "decoding phase": 13638, + "training memory": 58172, + "various experiments": 61340, + "method multi": 32581, + "relevant target": 46236, + "open challenge": 38412, + "information predict": 26016, + "novel alignment": 37753, + "transformer attention": 58451, + "framework construct": 21480, + "construct dataset": 10384, + "dataset covering": 12869, + "english web": 17902, + "leverages large": 30306, + "bidirectional transformer": 7084, + "transformer language": 58490, + "glue tasks": 22867, + "bert representations": 6710, + "knowledge document": 27446, + "document prior": 15821, + "prior document": 42399, + "systems largely": 54545, + "largely relied": 29063, + "gcn based": 22027, + "accuracy points": 1023, + "algorithms trained": 2343, + "trained learn": 57773, + "utilizing data": 61120, + "language output": 28364, + "people social": 40037, + "quality natural": 44555, + "quality experimental": 44520, + "analysis tweets": 2785, + "song lyrics": 51712, + "indicate models": 25529, + "increasing performance": 25458, + "learning field": 29643, + "models graph": 35069, + "effective approaches": 16630, + "approaches multi": 3877, + "relations multi": 46043, + "task leveraging": 55186, + "answer multi": 3038, + "task aware": 54928, + "aware pre": 5465, + "bert pre": 6703, + "proposed pre": 43879, + "requires data": 46921, + "method combining": 32422, + "representations improve": 46685, + "experimentally approach": 19331, + "performance commonly": 40244, + "based surface": 6074, + "features obtained": 20633, + "systems additionally": 54425, + "additionally study": 1735, + "methods utilizing": 33098, + "especially beneficial": 18263, + "extraction multiple": 20086, + "require multiple": 46881, + "task multiple": 55230, + "trained self": 57857, + "prediction layer": 41715, + "multiple entity": 36211, + "information associated": 25762, + "entity aware": 18096, + "ace 2005": 1102, + "need methods": 36583, + "unseen datasets": 59645, + "research line": 47065, + "identify address": 24411, + "best unsupervised": 6834, + "conventional neural": 11009, + "generation demonstrate": 22444, + "input information": 26285, + "technical challenges": 56018, + "trained low": 57779, + "available benchmarks": 5267, + "benchmarks work": 6552, + "code reproduce": 8855, + "reproduce experiments": 46822, + "problem making": 42605, + "spelling mistakes": 52336, + "dramatically improve": 16387, + "focus translation": 21208, + "noise data": 37597, + "super characters": 53921, + "characters method": 8254, + "asian languages": 4514, + "accuracy gain": 981, + "gain compared": 21905, + "texts compare": 56864, + "language results": 28476, + "text attention": 56442, + "used wide": 60350, + "systematic overview": 54401, + "representations textual": 46771, + "propose taxonomy": 43661, + "output present": 38992, + "examples prior": 18924, + "body literature": 7239, + "approaches low": 3865, + "model offers": 34143, + "extends previous": 19844, + "based counterparts": 5657, + "generic framework": 22629, + "standard self": 52523, + "time different": 57145, + "original self": 38725, + "meeting corpus": 32223, + "rely fixed": 46281, + "autoregressive generation": 5215, + "various settings": 61390, + "settings training": 50401, + "transformer outperforms": 58506, + "original transformer": 38736, + "led impressive": 29990, + "impressive accuracy": 24808, + "improvements low": 25078, + "encoding framework": 17565, + "latent embedding": 29124, + "bert generate": 6664, + "sentences diverse": 49706, + "slightly worse": 51438, + "human reader": 24227, + "challenge present": 8006, + "segment document": 49073, + "distinct domains": 15590, + "f1 compared": 20182, + "effectiveness modeling": 16795, + "representations considering": 46629, + "considering contextual": 10257, + "information proven": 26033, + "query key": 44670, + "layers used": 29237, + "conducted extensive": 10084, + "extensive analyses": 19855, + "development accurate": 14666, + "popular languages": 41168, + "modern methods": 35712, + "ensemble methods": 17977, + "work develops": 62636, + "predicting sentiment": 41682, + "investigate potential": 26976, + "sentiment datasets": 49840, + "datasets domains": 13239, + "serve useful": 50084, + "relevant datasets": 46208, + "fully labeled": 21735, + "networks twitter": 36919, + "perform substantially": 40147, + "practical settings": 41472, + "optimal solution": 38532, + "global optimal": 22836, + "quantitatively analyze": 44630, + "text conduct": 56505, + "higher score": 23844, + "useful machine": 60375, + "study usefulness": 53471, + "building multilingual": 7457, + "languages language": 28704, + "models share": 35491, + "adaptation new": 1531, + "obtain comparable": 38165, + "adapting new": 1567, + "addition design": 1608, + "models robust": 35465, + "best trade": 6832, + "diversity compared": 15731, + "using time": 60990, + "pooling strategy": 41129, + "dev test": 14568, + "learn predict": 29410, + "human assessments": 24107, + "generic task": 22632, + "enhance semantic": 17922, + "knowledge introduce": 27533, + "classification knowledge": 8481, + "text help": 56614, + "information unlike": 26138, + "decisions based": 13573, + "pays attention": 39994, + "attention important": 4758, + "overcome aforementioned": 39057, + "relevant work": 46246, + "task known": 55155, + "paper hypothesize": 39391, + "strategies incorporating": 52907, + "incorporating knowledge": 25388, + "model dubbed": 33793, + "significant consistent": 50858, + "experiments involving": 19448, + "research automatically": 46990, + "showed model": 50667, + "overlap metrics": 39088, + "function proposed": 21759, + "evaluation paper": 18666, + "novel representation": 37908, + "used image": 60207, + "compare approach": 9329, + "considerable improvement": 10231, + "accuracy 93": 925, + "approach code": 3446, + "applications different": 3200, + "different fields": 14934, + "treat problem": 58731, + "prone error": 43226, + "training sentences": 58247, + "annotated based": 2877, + "created training": 11733, + "data easily": 12307, + "data naturally": 12505, + "leads accurate": 29304, + "multinomial naive": 36161, + "bayes mnb": 6351, + "forest rf": 21303, + "demonstrate certain": 13879, + "additionally discuss": 1718, + "propose future": 43395, + "token token": 57312, + "networks able": 36827, + "ones work": 38346, + "outputs using": 39020, + "propose strategies": 43646, + "faces challenges": 20253, + "chinese natural": 8312, + "utilize multi": 61099, + "word lattice": 62222, + "noisy information": 37618, + "document based": 15768, + "models strong": 35537, + "encode knowledge": 17466, + "generalize poorly": 22147, + "effectively integrate": 16744, + "surpass previous": 54165, + "extraction event": 20062, + "aims solve": 2215, + "information transfer": 26132, + "ability automatically": 595, + "specific characteristics": 52054, + "characteristics task": 8244, + "dataset benchmarking": 12826, + "learn domain": 29362, + "scoring model": 48937, + "models considerable": 34848, + "attention devoted": 4736, + "systems fail": 54503, + "domain generalization": 16078, + "method multilingual": 32582, + "multilingual transfer": 36128, + "deep contextual": 13685, + "embeddings pretrained": 17193, + "end construct": 17623, + "embeddings experimental": 17131, + "approach zero": 3746, + "perform consistently": 40082, + "text human": 56616, + "text particularly": 56693, + "paper leverage": 39417, + "enhance robustness": 17921, + "preserve meaning": 42114, + "quality measured": 44549, + "multilingual contextual": 36071, + "training single": 58261, + "text multiple": 56671, + "increasing complexity": 25447, + "individual components": 25563, + "performs sentence": 40712, + "baselines datasets": 6250, + "search generate": 48973, + "devise novel": 14726, + "according relevance": 866, + "dialog model": 14757, + "tracking data": 57497, + "data contains": 12250, + "contains information": 10496, + "corpora manually": 11221, + "entity labels": 18113, + "models ner": 35251, + "models individual": 35128, + "research domain": 47023, + "sota model": 51728, + "framework achieved": 21448, + "compared sota": 9455, + "approaches end": 3807, + "achieve sota": 1200, + "examine performance": 18867, + "specifically evaluate": 52198, + "evaluate systems": 18510, + "metric evaluating": 33115, + "incorrect answer": 25398, + "drastically improve": 16392, + "framework compare": 21472, + "existing novel": 19121, + "novel sentence": 37917, + "especially chinese": 18266, + "focus automatic": 21146, + "scale high": 48576, + "module extract": 35757, + "relations multiple": 46044, + "networks widely": 36923, + "applications deep": 3196, + "networks pre": 36893, + "approach select": 3680, + "different algorithms": 14833, + "new metrics": 37258, + "nlp classification": 37472, + "selection baselines": 49134, + "common challenges": 9168, + "search result": 48982, + "issues work": 27107, + "generative neural": 22600, + "keyword generation": 27351, + "set keywords": 50173, + "adapt unseen": 1514, + "scenarios recent": 48707, + "support set": 54126, + "learning way": 29938, + "better evaluate": 6885, + "dataset chinese": 12838, + "shot text": 50651, + "architecture new": 4069, + "times compared": 57247, + "experiments determine": 19413, + "determine optimal": 14557, + "sentence fusion": 49563, + "information required": 26056, + "respect various": 47353, + "approach addresses": 3411, + "models enabling": 34961, + "mainly designed": 31469, + "fail handle": 20340, + "critical problems": 11787, + "graph constructed": 23116, + "graph propose": 23158, + "graph attention": 23095, + "learn multi": 29400, + "models building": 34798, + "static word": 52726, + "information performance": 26006, + "evaluation suite": 18732, + "including tasks": 25308, + "semeval 2019": 49434, + "domain evaluation": 16059, + "score 68": 48803, + "facebook twitter": 20247, + "demonstrate value": 13999, + "users opinions": 60471, + "using predicted": 60869, + "faster accurate": 20432, + "address following": 1756, + "high error": 23735, + "large label": 28892, + "including new": 25283, + "networks nodes": 36884, + "simultaneously learning": 51272, + "scaled dot": 48642, + "mutual attention": 36342, + "pairs documents": 39181, + "parameters use": 39726, + "learn meaningful": 29397, + "increasingly difficult": 25471, + "recommendation tasks": 45566, + "tasks works": 55973, + "recent natural": 45322, + "methods jointly": 32911, + "systems finally": 54504, + "approaches build": 3779, + "leading poor": 29295, + "sources propose": 51839, + "performance adversarial": 40189, + "training analysis": 57932, + "contextual representation": 10778, + "various downstream": 61331, + "layer output": 29197, + "reduces time": 45698, + "time spent": 57222, + "times speedup": 57255, + "level retrieval": 30196, + "level similarities": 30213, + "adaptation performance": 1533, + "models relatively": 35423, + "despite effectiveness": 14358, + "data enhance": 12324, + "level open": 30171, + "hard understand": 23453, + "knowledge facts": 27482, + "new interesting": 37228, + "number scientific": 38035, + "growing exponentially": 23296, + "actionable insights": 1460, + "techniques natural": 56114, + "variety sources": 61291, + "pipeline uses": 40908, + "embeddings work": 17250, + "capture latent": 7689, + "latent information": 29128, + "module helps": 35761, + "embeddings traditional": 17231, + "embeddings map": 17171, + "knowledge transferred": 27634, + "clinical domains": 8670, + "comparison popular": 9501, + "possible solutions": 41337, + "various challenges": 61313, + "finally conclude": 20844, + "entities method": 18065, + "related user": 45952, + "remains explored": 46333, + "news paper": 37410, + "paper verify": 39606, + "compatible existing": 9517, + "generation considering": 22437, + "variational auto": 61241, + "auto encoders": 5017, + "used retrieve": 60293, + "information incorporated": 25920, + "methods knowledge": 32912, + "requiring external": 46961, + "benchmark method": 6476, + "remains competitive": 46330, + "offers better": 38300, + "extracts text": 20144, + "transformer decoder": 58480, + "users understand": 60485, + "existing entity": 19066, + "label correlations": 27700, + "novel label": 37849, + "graph propagation": 23157, + "relative f1": 46096, + "step natural": 52816, + "processing problems": 42927, + "problems previous": 42721, + "making models": 31661, + "work make": 62720, + "time method": 57178, + "accuracy 92": 924, + "model deployment": 33751, + "2019 shared": 284, + "settings results": 50396, + "selecting correct": 49124, + "level better": 30071, + "better identify": 6901, + "grounded dialogue": 23260, + "models humans": 35095, + "training state": 58270, + "models setting": 35488, + "independent word": 25507, + "translation retrieval": 58671, + "missing information": 33363, + "information pre": 26013, + "perform fine": 40109, + "analysis generated": 2670, + "multiple approaches": 36168, + "study data": 53352, + "systematic approach": 54391, + "task evaluation": 55058, + "approach pre": 3643, + "vietnamese word": 61594, + "trained embedding": 57715, + "task utilize": 55465, + "dataset apply": 12811, + "users quickly": 60478, + "select suitable": 49114, + "embeddings addition": 17079, + "approach facilitate": 3536, + "driven method": 16428, + "utterance based": 61133, + "based fusion": 5742, + "converting natural": 11078, + "exploiting data": 19673, + "domain pre": 16134, + "training auxiliary": 57939, + "tuning target": 58964, + "learning able": 29500, + "domains experiment": 16252, + "including document": 25251, + "level multi": 30164, + "new network": 37268, + "explicitly encourages": 19634, + "translation document": 58603, + "learning lead": 29702, + "tasks observe": 55772, + "consistent significant": 10285, + "data abundant": 12106, + "representations sequence": 46754, + "sequence tagger": 50001, + "level metrics": 30161, + "topological data": 57467, + "combines strengths": 9103, + "information local": 25957, + "syntax neural": 54351, + "historical context": 23956, + "including information": 25263, + "corpora typically": 11252, + "rich metadata": 48111, + "exploit contextual": 19652, + "data long": 12469, + "corpora new": 11227, + "start problem": 52567, + "mining approach": 33311, + "macro averaged": 31404, + "different pretraining": 15032, + "pretraining objectives": 42214, + "adapt pretrained": 1509, + "pretrained weights": 42193, + "directly fine": 15316, + "tuning pretrained": 58945, + "results diverse": 47595, + "diverse nlp": 15709, + "models relative": 35422, + "target tasks": 54848, + "role semantic": 48321, + "ongoing debate": 38348, + "expressions different": 19807, + "methodology used": 32721, + "used fine": 60191, + "highlighting challenges": 23872, + "downstream text": 16370, + "perturbations input": 40793, + "change input": 8170, + "largely ignored": 29056, + "growing body": 23290, + "related literature": 45916, + "framework adversarial": 21454, + "semantic equivalence": 49275, + "additional constraints": 1658, + "adversarial perturbations": 1984, + "adversarial robustness": 1985, + "released https": 46175, + "text specifically": 56783, + "patient doctor": 39956, + "fully investigated": 21734, + "transcript text": 58338, + "benchmark consisting": 6436, + "presents unsupervised": 42110, + "concretely propose": 9984, + "various online": 61376, + "online media": 38374, + "automated data": 5039, + "driven manner": 16427, + "models makes": 35214, + "industry research": 25622, + "provides comparable": 44186, + "information visual": 26155, + "constructed new": 10415, + "transcribed text": 58335, + "87 accuracy": 540, + "furthermore study": 21839, + "focus domain": 21156, + "domains work": 16301, + "qa based": 44445, + "generation quality": 22534, + "model popular": 34211, + "using conversational": 60627, + "conversational context": 11041, + "nlg models": 37445, + "suffer high": 53765, + "multiple distinct": 36200, + "study method": 53410, + "expressions present": 19810, + "detection multi": 14504, + "context example": 10630, + "usage scenarios": 59806, + "resulting representation": 47474, + "task types": 55453, + "datasets created": 13201, + "datasets finally": 13273, + "problem setting": 42651, + "despite progress": 14377, + "focus relevant": 21195, + "attention approaches": 4710, + "context document": 10615, + "decoder transformer": 13617, + "context experiments": 10635, + "experiments evaluation": 19431, + "increase complexity": 25407, + "complexity models": 9685, + "models adding": 34691, + "multiple pre": 36265, + "score results": 48871, + "set nlp": 50201, + "knowledge capture": 27420, + "representations produced": 46742, + "recent pretrained": 45335, + "transformer layers": 58494, + "model pretraining": 34231, + "supervised pretraining": 54034, + "pretraining tasks": 42219, + "pretraining dataset": 42199, + "pretraining data": 42198, + "2019 task": 286, + "identifying categorizing": 24454, + "000 english": 5, + "english tweets": 17896, + "tasks sub": 55915, + "popular tasks": 41191, + "participate task": 39816, + "treat task": 58732, + "steps including": 52842, + "learning clustering": 29559, + "problem multi": 42611, + "second step": 49023, + "clustering model": 8743, + "model verified": 34528, + "dataset large": 12977, + "despite growing": 14364, + "number word": 38055, + "lack standardized": 27914, + "performance design": 40287, + "simple classification": 51141, + "address introduce": 1761, + "tasks case": 55534, + "24 languages": 329, + "multilingual setting": 36119, + "probing task": 42493, + "used explore": 60180, + "setting neural": 50333, + "dataset lack": 12975, + "lack datasets": 27884, + "datasets leads": 13316, + "leads poor": 29323, + "case low": 7791, + "text augmentation": 56446, + "improvements models": 25082, + "augmented data": 4976, + "handful languages": 23404, + "benchmark based": 6428, + "news topic": 37422, + "classification textual": 8574, + "embeddings context": 17102, + "multi tasking": 36032, + "languages far": 28671, + "information image": 25911, + "consistency loss": 10268, + "images texts": 24555, + "comparative experiments": 9321, + "provide concrete": 44041, + "learning complex": 29565, + "conclude discussing": 9968, + "scale empirical": 48567, + "empirical data": 17322, + "gender race": 22039, + "bias sentence": 7043, + "including state": 25303, + "mixed results": 33408, + "conclude proposing": 9971, + "federated learning": 20709, + "trained generative": 57739, + "dataset popular": 13029, + "document pairs": 15816, + "concise summary": 9964, + "abstract information": 758, + "incorporating domain": 25384, + "scale annotated": 48552, + "scientific domain": 48760, + "lack high": 27891, + "pretraining large": 42208, + "datasets variety": 13478, + "bert achieve": 6602, + "code pretrained": 8845, + "com allenai": 9005, + "documents context": 15867, + "based similar": 6034, + "single parameter": 51328, + "features architecture": 20524, + "implications results": 24657, + "tweet classification": 59002, + "used instead": 60216, + "sequence processing": 49971, + "separate tasks": 49878, + "heterogeneous sources": 23624, + "performance specific": 40570, + "improvement current": 25000, + "use prior": 59982, + "knowledge current": 27431, + "benchmarks shows": 6542, + "findings demonstrate": 20906, + "information semantics": 26077, + "graph sequence": 23167, + "explicitly capture": 19631, + "achieves 24": 1287, + "art points": 4356, + "paper builds": 39285, + "particular define": 39840, + "work robust": 62814, + "related corpus": 45892, + "data observe": 12518, + "humans express": 24277, + "discourse markers": 15393, + "play method": 40976, + "highly imbalanced": 23901, + "easily integrated": 16545, + "using self": 60916, + "self labeled": 49199, + "data lead": 12457, + "data performs": 12543, + "performs worse": 40722, + "noisy label": 37621, + "explicitly modelling": 19645, + "performance overall": 40472, + "systematic differences": 54394, + "neural transformer": 37107, + "model includes": 33988, + "bert elmo": 6649, + "propose distill": 43355, + "knowledge bert": 27416, + "bert state": 6721, + "tasks multiple": 55755, + "times inference": 57252, + "popular way": 41197, + "problem make": 42604, + "use pipeline": 59973, + "nlp modules": 37503, + "detection recent": 14517, + "task usually": 55464, + "usually involves": 61054, + "evaluation use": 18745, + "jointly perform": 27214, + "attention topic": 4837, + "number domains": 37998, + "score classification": 48839, + "provide results": 44123, + "network graph": 36751, + "informed decision": 26181, + "generation attracted": 22422, + "methods mainly": 32936, + "mainly use": 31478, + "methods performance": 32981, + "way generate": 61805, + "important factors": 24724, + "baseline various": 6223, + "effectively experimental": 16732, + "baselines recent": 6295, + "shot scenario": 50639, + "information make": 25962, + "score higher": 48850, + "model recent": 34280, + "based online": 5926, + "learn automatically": 29347, + "automatically classify": 5147, + "model classifying": 33662, + "idea model": 24371, + "built domain": 7485, + "unlabeled domain": 59572, + "classification result": 8537, + "way explain": 61803, + "text humans": 56617, + "described natural": 14212, + "acquired knowledge": 1444, + "context representations": 10706, + "assess model": 4579, + "published dataset": 44368, + "learned attention": 29452, + "speed inference": 52322, + "reading model": 45088, + "agents capable": 2063, + "sub sentence": 53530, + "comprehensive experimental": 9790, + "compared vanilla": 9471, + "text study": 56792, + "challenge develop": 7977, + "tasks bert": 55523, + "distillation method": 15573, + "applied train": 3305, + "developed models": 14635, + "language real": 28459, + "results newly": 47741, + "newly annotated": 37370, + "search optimization": 48977, + "testing time": 56415, + "allows researchers": 2477, + "researchers developers": 47152, + "training multiple": 58184, + "multiple gpus": 36220, + "demo video": 13849, + "www youtube": 63025, + "youtube com": 63147, + "scoring functions": 48934, + "computationally challenging": 9871, + "inference methods": 25669, + "accuracy levels": 999, + "syntactic representations": 54319, + "tasks novel": 55770, + "tasks empirically": 55608, + "lda model": 29250, + "model reduce": 34290, + "hard interpret": 23444, + "method interpret": 32549, + "end train": 17716, + "pairs semantically": 39214, + "dataset 20": 12792, + "20 000": 219, + "covers broad": 11663, + "better modeling": 6920, + "task entails": 55050, + "respect given": 47345, + "evidence supporting": 18823, + "models ranging": 35396, + "heuristic rule": 23629, + "evaluation available": 18580, + "diversity present": 15739, + "mainly consider": 31468, + "consider textual": 10223, + "effectively boost": 16724, + "advantages different": 1950, + "prediction word": 41752, + "help task": 23590, + "extra data": 19961, + "heavily human": 23531, + "human crafted": 24128, + "features knowledge": 20608, + "effectively capture": 16725, + "models studies": 35543, + "come different": 9129, + "different conclusions": 14872, + "languages comparing": 28619, + "categories proposed": 7849, + "effect training": 16620, + "data quantity": 12582, + "accurately predicting": 1098, + "leveraging multilingual": 30333, + "multilingual bert": 36064, + "bert self": 6714, + "languages fine": 28675, + "tuning datasets": 58905, + "recurrent language": 45615, + "multilingual learning": 36091, + "training provides": 58222, + "bert trained": 6727, + "trained code": 57690, + "sentence test": 49656, + "highest scoring": 23858, + "explores potential": 19774, + "large source": 29014, + "knowledge exploited": 27475, + "based popular": 5940, + "popular benchmark": 41157, + "analysis explore": 2665, + "post training": 41354, + "approach popular": 3641, + "tuning bert": 58902, + "training applied": 57933, + "propose modeling": 43467, + "model discriminator": 33775, + "showing approach": 50676, + "training regularization": 58228, + "represented word": 46809, + "datasets showed": 13423, + "sequence encoder": 49920, + "encoder architecture": 17489, + "case performance": 7794, + "improves significantly": 25160, + "version task": 61556, + "examples model": 18916, + "model test": 34452, + "neural components": 36945, + "datasets codes": 13176, + "work publicly": 62797, + "trained existing": 57725, + "tuning new": 58934, + "data challenge": 12203, + "datasets lead": 13314, + "features shown": 20667, + "improvement tasks": 25031, + "limitations using": 30558, + "current limitations": 11983, + "tasks human": 55665, + "understanding representation": 59393, + "network convolutional": 36727, + "task retrieving": 55347, + "start end": 52566, + "text video": 56839, + "simple elegant": 51164, + "datasets comparable": 13181, + "labels dataset": 27813, + "dataset different": 12897, + "rely different": 46277, + "create single": 11716, + "introduce generative": 26809, + "outperforms individual": 38905, + "impact nlp": 24601, + "community recent": 9274, + "nlp architectures": 37465, + "embeddings like": 17167, + "directional language": 15279, + "model elmo": 33804, + "al proposed": 2247, + "model order": 34150, + "possible performance": 41334, + "task competition": 54961, + "participated subtasks": 39821, + "encode words": 17474, + "official results": 38310, + "subtasks respectively": 53671, + "sequence problem": 49970, + "specifically given": 52207, + "text decoder": 56528, + "manner present": 31722, + "jointly extract": 27197, + "information aggregation": 25758, + "attention work": 4854, + "existing generation": 19071, + "generation approaches": 22421, + "baselines dataset": 6249, + "large room": 28956, + "relevant facts": 46217, + "generating appropriate": 22366, + "promising directions": 43164, + "better datasets": 6875, + "detection present": 14511, + "collection pipeline": 8984, + "technique automatically": 56027, + "diverse corpora": 15695, + "evaluating multiple": 18565, + "ml models": 33431, + "response quality": 47401, + "semantic correctness": 49261, + "paper existing": 39355, + "nlg systems": 37446, + "entity spans": 18149, + "iteratively refine": 27133, + "score improvement": 48854, + "bias bias": 7020, + "transformer bert": 58478, + "experiments bert": 19366, + "bert performs": 6701, + "combining bert": 9108, + "non bert": 37639, + "bert encoder": 6653, + "achieved score": 1266, + "additional external": 1667, + "global structure": 22845, + "contain relevant": 10468, + "state information": 52699, + "models currently": 34879, + "achieving promising": 1419, + "results unsupervised": 47894, + "datasets neural": 13344, + "models nlm": 35261, + "words challenging": 62376, + "embedding matrices": 17038, + "keeping parameters": 27279, + "16 absolute": 162, + "answer extraction": 3034, + "choose appropriate": 8343, + "languages words": 28821, + "compared languages": 9417, + "documents introduce": 15888, + "order predict": 38647, + "trained detect": 57708, + "unlabeled training": 59585, + "novel loss": 37857, + "article introduces": 4452, + "corpus creation": 11315, + "creation new": 11749, + "bilstm network": 7136, + "conclusions drawn": 9978, + "larger improvements": 29079, + "world low": 62947, + "data relatively": 12595, + "discriminator learn": 15453, + "performance use": 40615, + "labels work": 27861, + "art adversarial": 4210, + "training shown": 58256, + "shown impressive": 50718, + "embeddings shared": 17214, + "performance non": 40460, + "methods challenging": 32780, + "work revisit": 62813, + "stable training": 52413, + "method includes": 32537, + "method robust": 32641, + "modern natural": 35713, + "evaluate ability": 18434, + "perform semantic": 40136, + "work create": 62613, + "datasets datasets": 13208, + "pairs total": 39221, + "evaluation new": 18663, + "late fusion": 29113, + "fusion techniques": 21862, + "sequence modelling": 49957, + "architecture sequence": 4086, + "arbitrarily long": 4011, + "long contexts": 31006, + "performances state": 40647, + "thanks use": 57001, + "different distributions": 14901, + "model stable": 34405, + "model calibration": 33640, + "features downstream": 20565, + "new embeddings": 37181, + "available pretrained": 5347, + "relevant training": 46243, + "building existing": 7445, + "data methods": 12485, + "better systems": 6974, + "takes word": 54785, + "meta embeddings": 32333, + "embeddings classify": 17093, + "baselines results": 6297, + "improves cross": 25121, + "al 2019": 2241, + "al 2019b": 2243, + "lstm transformer": 31284, + "openai gpt": 38469, + "special characters": 52017, + "capability handling": 7609, + "work question": 62799, + "mapping method": 31803, + "comparing models": 9482, + "strong empirical": 53027, + "sentence levels": 49598, + "yield improved": 63096, + "minimum description": 33305, + "corpus given": 11352, + "single representation": 51332, + "relative distances": 46092, + "sub spaces": 53532, + "task contextual": 54977, + "nlp downstream": 37484, + "sequence previous": 49969, + "including pre": 25288, + "set provide": 50228, + "prediction objective": 41725, + "task explore": 55068, + "explore unsupervised": 19746, + "training pre": 58210, + "simple multi": 51197, + "layer convolutional": 29183, + "phrases sentence": 40852, + "language cross": 28012, + "lingual setting": 30727, + "models submitted": 35546, + "carefully selected": 7763, + "baselines future": 6264, + "used multiple": 60245, + "multiple consecutive": 36187, + "task 2018": 54868, + "information removed": 26052, + "output paper": 38990, + "coherence relations": 8911, + "penn discourse": 40022, + "discourse treebank": 15401, + "gained great": 21915, + "labels distant": 27815, + "usually suffer": 61069, + "distribution given": 15640, + "method dynamically": 32469, + "similarity graph": 51098, + "labels noisy": 27842, + "better classification": 6863, + "performance extensive": 40339, + "systems significant": 54632, + "success large": 53704, + "directly applied": 15305, + "passage retrieval": 39921, + "retrieval using": 47975, + "augmentation technique": 4968, + "datasets establish": 13251, + "demonstrate despite": 13889, + "despite trained": 14398, + "method designed": 32456, + "explicitly learn": 19639, + "models order": 35281, + "underlying language": 59268, + "tests ability": 56417, + "ability language": 615, + "tuning using": 58973, + "dataset adding": 12802, + "bert does": 6644, + "availability training": 5256, + "labels user": 27857, + "information instead": 25926, + "instead uses": 26467, + "easier obtain": 16527, + "improves previous": 25151, + "consecutive sentences": 10196, + "method beats": 32401, + "multilabel text": 36055, + "relevant query": 46230, + "terms exact": 56286, + "retrieval performance": 47962, + "techniques generate": 56091, + "representations elmo": 46647, + "architecture results": 4082, + "leads competitive": 29309, + "resources human": 47304, + "human subject": 24245, + "datasets derived": 13222, + "experiments domain": 19422, + "domain transfer": 16217, + "despite vast": 14404, + "information cross": 25796, + "descriptions entities": 14251, + "art alternatives": 4212, + "half million": 23367, + "intra document": 26758, + "annotation training": 2979, + "datasets varying": 13481, + "documents structured": 15915, + "sentences specific": 49789, + "posts social": 41372, + "approach semeval": 3683, + "target paper": 54836, + "spoken question": 52364, + "spoken documents": 52357, + "documents recent": 15908, + "asr hypotheses": 4557, + "adversarial model": 1975, + "systematic comparative": 54392, + "study focusing": 53382, + "crucial components": 11898, + "representations allows": 46617, + "position embeddings": 41265, + "embeddings self": 17207, + "attention using": 4846, + "using unified": 61007, + "outperform ones": 38807, + "ones based": 38335, + "newly developed": 37376, + "contextual embedding": 10763, + "architecture achieved": 4021, + "2nd place": 352, + "model reached": 34272, + "appropriate model": 3964, + "model syntactic": 34434, + "input documents": 26269, + "using bert": 60588, + "bert able": 6601, + "datasets address": 13148, + "bert large": 6670, + "large small": 29013, + "bert base": 6612, + "primary contribution": 42370, + "baselines provide": 6290, + "foundation future": 21416, + "resources results": 47332, + "represents step": 46819, + "controversial topics": 10992, + "change word": 8174, + "analysis includes": 2679, + "includes different": 25230, + "different measures": 14985, + "paper takes": 39594, + "takes step": 54784, + "facilitate progress": 20273, + "dataset small": 13091, + "reasoning text": 45229, + "significantly larger": 50985, + "new automatic": 37139, + "news coverage": 37396, + "diverse natural": 15707, + "synthesized data": 54363, + "train semantic": 57628, + "work introduces": 62695, + "compression technique": 9814, + "method constructs": 32442, + "applications users": 3256, + "models sub": 35545, + "lstm bidirectional": 31252, + "bidirectional gru": 7072, + "task rely": 55333, + "ranked 5th": 44955, + "sentence task": 49655, + "task asks": 54916, + "predict sentence": 41653, + "various pre": 61378, + "techniques training": 56145, + "leveraging user": 30344, + "practical significance": 41473, + "various reasons": 61384, + "model enhanced": 33825, + "knowledge integration": 27529, + "masking strategy": 31875, + "masking strategies": 31874, + "level masking": 30157, + "inference semantic": 25690, + "test machine": 56355, + "masked language": 31862, + "subset words": 53610, + "parallel decoding": 39650, + "unclear best": 59235, + "best utilize": 6836, + "entailment models": 18003, + "pairs introduce": 39197, + "importance weights": 24694, + "important linguistic": 24742, + "current unsupervised": 12025, + "propose weakly": 43704, + "languages better": 28610, + "vary greatly": 61422, + "domain supervised": 16198, + "graph knowledge": 23144, + "extraction semantic": 20107, + "specific parallel": 52122, + "domains end": 16249, + "used unsupervised": 60344, + "scene graphs": 48715, + "impact using": 24607, + "widely employed": 61995, + "code url": 8865, + "com microsoft": 9019, + "https youtu": 24064, + "nlp datasets": 37479, + "aimed providing": 2166, + "useful variety": 60396, + "tasks applied": 55502, + "reduces error": 45689, + "small corpora": 51467, + "embeddings general": 17141, + "work conduct": 62606, + "finding propose": 20901, + "deep pre": 13742, + "tuning models": 58929, + "like bert": 30461, + "works leverage": 62895, + "resolve problem": 47202, + "loss model": 31099, + "teacher models": 55993, + "demonstrates method": 14035, + "incorrect answers": 25399, + "answer different": 3032, + "different related": 15047, + "related question": 45930, + "challenging existing": 8095, + "based pretrained": 5948, + "resource transfer": 47285, + "samples propose": 48487, + "purely data": 44395, + "risk overfitting": 48163, + "particular data": 39838, + "effectively model": 16750, + "build strong": 7428, + "previous method": 42260, + "method ignores": 32528, + "level coherence": 30077, + "english human": 17823, + "accuracy respectively": 1037, + "method various": 32703, + "network output": 36777, + "language empirically": 28045, + "methods multiple": 32950, + "representations evaluated": 46657, + "evaluated based": 18522, + "representational similarity": 46610, + "selected set": 49120, + "linguistic experts": 30768, + "identify semantic": 24442, + "experiments word": 19564, + "languages code": 28615, + "speech representations": 52290, + "particular consider": 39836, + "setting use": 50354, + "previously studied": 42351, + "propose multitask": 43484, + "supervision form": 54081, + "higher average": 23813, + "evaluation neural": 18662, + "textual inputs": 56971, + "asr outputs": 4560, + "systems test": 54651, + "test robustness": 56365, + "similar target": 51069, + "present overview": 41984, + "documents annotated": 15854, + "annotated entities": 2893, + "type entities": 59054, + "document types": 15839, + "introduce set": 26859, + "tasks created": 55563, + "pretraining language": 42206, + "pretraining objective": 42213, + "trained gold": 57743, + "corpus achieving": 11268, + "silver data": 51023, + "success transformer": 53727, + "surge research": 54158, + "key research": 27331, + "scale learning": 48591, + "learning transformer": 29921, + "transformer blocks": 58479, + "relative positional": 46107, + "learning rate": 29832, + "text provided": 56720, + "applied state": 3295, + "task submission": 55418, + "submission achieved": 53572, + "models leading": 35172, + "able perform": 712, + "hop question": 24001, + "doing multi": 15991, + "investigate recently": 26982, + "proposed datasets": 43753, + "tasks design": 55582, + "span based": 51920, + "datasets useful": 13472, + "previously thought": 42352, + "knowledge effort": 27454, + "examples propose": 18925, + "possible automatically": 41316, + "small annotated": 51464, + "examples experiments": 18900, + "work long": 62712, + "better control": 6869, + "matching task": 31922, + "task binary": 54940, + "method employs": 32479, + "suffer issues": 53769, + "number user": 38052, + "responses different": 47404, + "approaches goal": 3837, + "different complementary": 14870, + "recognition paper": 45523, + "broadcast news": 7358, + "task perform": 55272, + "recognition using": 45546, + "models performs": 35322, + "experiments techniques": 19542, + "reach human": 45049, + "users query": 60477, + "query using": 44681, + "architecture models": 4065, + "baselines training": 6314, + "addresses key": 1813, + "model student": 34418, + "use textual": 60047, + "propose time": 43673, + "model constructs": 33709, + "considerably improve": 10239, + "decision makers": 13562, + "severely limited": 50427, + "political events": 41109, + "topics discourse": 57447, + "propose topic": 43676, + "models applicable": 34716, + "task general": 55101, + "detection framework": 14488, + "framework identify": 21536, + "capture relations": 7701, + "relations arguments": 46016, + "average scores": 5418, + "scores 81": 48889, + "underlying reasons": 59275, + "small manually": 51481, + "set investigate": 50172, + "investigate approach": 26941, + "outperforming models": 38853, + "automatically aligned": 5138, + "en es": 17416, + "related works": 45956, + "information regarding": 26048, + "portuguese english": 41225, + "potential source": 41409, + "demonstrate capability": 13878, + "developed corpus": 14626, + "models presented": 35348, + "better bleu": 6856, + "accurate sentence": 1089, + "average 82": 5400, + "uses bert": 60495, + "bert fine": 6660, + "novel augmentation": 37773, + "increasing training": 25465, + "different embedding": 14912, + "create training": 11718, + "according official": 865, + "accuracy obtained": 1017, + "gan generate": 21954, + "aims discover": 2189, + "unseen relations": 59653, + "corpus knowledge": 11365, + "construct set": 10403, + "set constraints": 50126, + "art relation": 4365, + "transformers bert": 58523, + "bert models": 6687, + "different bert": 14853, + "consistently high": 10293, + "model exhibits": 33847, + "exhibits strong": 19012, + "paper conducts": 39301, + "empirical investigation": 17332, + "given complexity": 22727, + "accuracy metrics": 1007, + "based fine": 5734, + "tuning approaches": 58900, + "learning components": 29566, + "particular use": 39870, + "siamese neural": 50820, + "accuracy propose": 1032, + "scale semantic": 48622, + "dataset size": 13089, + "develop better": 14578, + "better robust": 6961, + "extracts relevant": 20143, + "modelling tasks": 34644, + "uses recurrent": 60530, + "makes significant": 31636, + "ability detect": 601, + "sequence sentences": 49974, + "able parse": 711, + "datasets chinese": 13172, + "score improvements": 48855, + "improvements datasets": 25068, + "datasets compared": 13183, + "respectively addition": 47359, + "models vary": 35670, + "synthetic human": 54376, + "shot transfer": 50654, + "studies indicate": 53271, + "indicate neural": 25530, + "based shared": 6031, + "scores using": 48928, + "using manual": 60787, + "considerably improves": 10240, + "performance respect": 40537, + "new unified": 37353, + "shared transformer": 50509, + "generation datasets": 22443, + "explore deep": 19696, + "stack lstm": 52417, + "carry experiments": 7776, + "deep transformer": 13751, + "positional encoding": 41272, + "case language": 7790, + "modeling information": 34584, + "analysis attention": 2618, + "outputs paper": 39018, + "extensively evaluate": 19917, + "performance chinese": 40233, + "accurately estimate": 1094, + "studies focused": 53267, + "studies investigated": 53274, + "binary labels": 7150, + "correct label": 11469, + "demonstrate novel": 13952, + "different instances": 14958, + "performance predicting": 40490, + "explicit reasoning": 19622, + "reasoning paths": 45214, + "reasoning questions": 45221, + "dataset achieving": 12801, + "score 34": 48790, + "prone overfitting": 43228, + "need high": 36570, + "better model": 6919, + "relevance diversity": 46190, + "wide attention": 61961, + "model tends": 34449, + "handle large": 23410, + "student framework": 53209, + "words little": 62450, + "generating process": 22388, + "relations based": 46017, + "depth study": 14189, + "study specific": 53461, + "model variations": 34525, + "important challenges": 24705, + "challenges address": 8028, + "similar content": 51032, + "processing researchers": 42934, + "novel bert": 37777, + "extract entities": 19973, + "people communicate": 40027, + "examples natural": 18917, + "complex human": 9628, + "language nlp": 28360, + "role human": 48306, + "provided natural": 44167, + "platform allows": 40949, + "use analysis": 59819, + "key feature": 27311, + "information public": 26037, + "multiple machine": 36244, + "probability mass": 42480, + "propose sparse": 43643, + "new family": 37201, + "baselines experiments": 6259, + "experiments distinct": 19418, + "time improve": 57163, + "reduce errors": 45661, + "information enhance": 25835, + "generation apply": 22419, + "method dataset": 32450, + "extensive human": 19908, + "accuracy pre": 1025, + "tasks focus": 55646, + "reveals model": 48019, + "lack context": 27880, + "perform human": 40112, + "dataset identify": 12956, + "address second": 1798, + "level introduce": 30138, + "baseline new": 6196, + "new benchmarks": 37144, + "sacrificing performance": 48422, + "grained representations": 23042, + "corresponding textual": 11560, + "image representations": 24545, + "applications code": 3188, + "nlu natural": 37565, + "critical research": 11789, + "utterances natural": 61149, + "learning providing": 29829, + "selection bias": 49135, + "framework alleviate": 21456, + "benchmarks natural": 6534, + "understanding commonsense": 59331, + "large unsupervised": 29043, + "contextualized representation": 10808, + "tasks building": 55532, + "level contextual": 30082, + "representations recent": 46746, + "tasks non": 55768, + "work ask": 62574, + "methods case": 32778, + "features users": 20691, + "using modified": 60810, + "task generates": 55104, + "task construct": 54974, + "demonstrate incorporating": 13924, + "model adding": 33538, + "release pre": 46163, + "popular machine": 41169, + "environment knowledge": 18171, + "knowledge analysis": 27394, + "test training": 56390, + "external world": 19958, + "motivated recent": 35873, + "learning entity": 29624, + "models behave": 34763, + "heavily depend": 23529, + "deal complex": 13513, + "problems low": 42710, + "applied open": 3287, + "makes attempt": 31615, + "information process": 26023, + "node representations": 37587, + "domain classifier": 16030, + "explored task": 19765, + "networks gcn": 36859, + "science literature": 48748, + "methods attempt": 32758, + "propose automatically": 43308, + "knowledge hand": 27512, + "different extents": 14928, + "corpora capture": 11182, + "text fine": 56580, + "existing pre": 19127, + "rarely consider": 45003, + "paper utilize": 39605, + "textual corpora": 56955, + "information simultaneously": 26089, + "various knowledge": 61349, + "paper obtained": 39430, + "language pre": 28382, + "comprehension natural": 9771, + "approaches train": 3943, + "transfer language": 58371, + "model monolingual": 34106, + "monolingual model": 35803, + "time pre": 57195, + "creating high": 11741, + "creative text": 11753, + "data imbalance": 12415, + "certain number": 7943, + "typically assume": 59136, + "viterbi decoding": 61695, + "performance introduce": 40398, + "table structure": 54688, + "datasets end": 13246, + "prior best": 42395, + "text related": 56731, + "comprehension text": 9778, + "text cross": 56519, + "entire corpus": 18021, + "corpus second": 11425, + "effective compared": 16637, + "proposed language": 43797, + "predict text": 41657, + "interaction language": 26603, + "bidirectional attention": 7065, + "studies demonstrate": 53257, + "effectively generate": 16736, + "using sophisticated": 60954, + "cases work": 7817, + "deep representations": 13748, + "quality annotated": 44488, + "specialized domains": 52033, + "reduce costs": 45656, + "noise paper": 37602, + "directly modeling": 15325, + "encoder experiments": 17515, + "data instances": 12434, + "effective strategy": 16699, + "improvements model": 25081, + "annotation schema": 2967, + "evaluate techniques": 18512, + "deep understanding": 13753, + "understanding large": 59359, + "key elements": 27308, + "work finally": 62666, + "written ones": 63007, + "scenario based": 48683, + "people tend": 40039, + "resource neural": 47255, + "strategy improve": 52936, + "brings significant": 7343, + "bert word": 6736, + "annotations automatically": 2983, + "multi feature": 35958, + "challenging especially": 8092, + "features analysis": 20521, + "paper adapt": 39248, + "conduct analysis": 10027, + "pairs wikipedia": 39232, + "develop methods": 14597, + "entities provide": 18075, + "cultural differences": 11936, + "longer text": 31055, + "facilitate better": 20262, + "conduct user": 10068, + "approaches chinese": 3782, + "aims predicting": 2207, + "writing patterns": 62989, + "present thorough": 42041, + "task social": 55382, + "domains achieve": 16232, + "newswire corpus": 37433, + "weights using": 61943, + "prior model": 42409, + "obtain reliable": 38186, + "model feature": 33883, + "task distribution": 55026, + "head self": 23498, + "linguistically interpretable": 30816, + "achieving strong": 1426, + "challenging address": 8082, + "query aware": 44661, + "graph sentences": 23166, + "context size": 10722, + "integration method": 26529, + "achieve consistent": 1130, + "increases accuracy": 25434, + "usually required": 61065, + "required task": 46905, + "specifically perform": 52218, + "better task": 6975, + "times data": 57248, + "just language": 27250, + "modeling pre": 34612, + "data release": 12596, + "supervised task": 54055, + "train embeddings": 57582, + "embeddings new": 17180, + "bias detection": 7023, + "bias word": 7048, + "need addressed": 36545, + "models fact": 35014, + "combine existing": 9066, + "batch sizes": 6343, + "metrics quality": 33193, + "introduce technique": 26869, + "agnostic meta": 2091, + "learning maml": 29719, + "quickly adapt": 44821, + "human evaluated": 24141, + "time writing": 57240, + "train bert": 57566, + "classification remains": 8534, + "approaches text": 3940, + "boost accuracy": 7251, + "tasks limited": 55729, + "replace words": 46403, + "sentence contextual": 49536, + "mixture multiple": 33423, + "model vocabulary": 34530, + "text infilling": 56627, + "conditioned context": 10014, + "based gradient": 5757, + "evaluations different": 18757, + "detection context": 14469, + "context awareness": 10593, + "cosine distance": 11573, + "performance time": 40601, + "time constraints": 57126, + "models binary": 34790, + "infer latent": 25636, + "pose challenge": 41238, + "dataset highly": 12949, + "sourced dataset": 51823, + "task release": 55331, + "code dataset": 8804, + "dataset hope": 12951, + "properties human": 43262, + "ai applications": 2113, + "sources paper": 51838, + "web sources": 61899, + "novel ways": 37953, + "study consider": 53347, + "automatically build": 5145, + "corpus containing": 11307, + "systems assist": 54435, + "focus single": 21200, + "enhances performance": 17946, + "english evaluation": 17802, + "effective cross": 16639, + "process long": 42802, + "categorization task": 7855, + "words irrelevant": 62440, + "model dynamically": 33795, + "classification benchmark": 8439, + "performance efficiency": 40314, + "respectively neural": 47374, + "bert transformer": 6729, + "transformer xl": 58517, + "context length": 10668, + "models transformer": 35629, + "models interaction": 35139, + "better align": 6847, + "bert using": 6732, + "model increased": 33994, + "outperforms original": 38916, + "space discrete": 51854, + "empirically method": 17364, + "text style": 56793, + "measuring degree": 32084, + "component modern": 9708, + "models understanding": 35646, + "challenging work": 8163, + "use explicit": 59884, + "embeddings fine": 17138, + "generative methods": 22596, + "dataset automatic": 12817, + "metrics task": 33202, + "data example": 12334, + "combination techniques": 9050, + "respectively best": 47362, + "representation key": 46532, + "task end": 55047, + "performed extensive": 40663, + "entity relations": 18141, + "encoding scheme": 17574, + "unlike human": 59597, + "far human": 20400, + "lead successful": 29279, + "controllable text": 10979, + "public release": 44326, + "ethical issues": 18418, + "network sentence": 36801, + "approximate nearest": 3977, + "triplet loss": 58808, + "modeling multiple": 34602, + "method bert": 32404, + "bert encode": 6652, + "bert experiments": 6657, + "experiments representative": 19512, + "accordingly paper": 873, + "text applying": 56437, + "applying transfer": 3380, + "novel latent": 37853, + "datasets empirical": 13242, + "powerful pre": 41441, + "adapt bert": 1500, + "bert pretrained": 6706, + "bert based": 6616, + "pretrained bert": 42147, + "furthermore explore": 21822, + "respectively finally": 47371, + "data instead": 12435, + "instead generating": 26453, + "hypothesis using": 24349, + "demonstrate combined": 13881, + "new english": 37187, + "study reveals": 53455, + "success rates": 53722, + "potential research": 41405, + "research cross": 47007, + "steps step": 52843, + "performance drop": 40309, + "poorly understood": 41152, + "experiments single": 19529, + "challenges including": 8054, + "inference experimental": 25654, + "important machine": 24744, + "direct way": 15262, + "way method": 61820, + "information embeddings": 25828, + "task field": 55081, + "pairs single": 39218, + "challenges modeling": 8061, + "series data": 50062, + "multimodal transformer": 36157, + "words better": 62373, + "mono lingual": 35788, + "sentence contexts": 49535, + "semantic constraints": 49258, + "words visual": 62545, + "original domain": 38712, + "dialogue paper": 14781, + "present stage": 42021, + "stage method": 52433, + "use bidirectional": 59836, + "loss feature": 31089, + "deep features": 13691, + "inter class": 26578, + "intra class": 26757, + "final task": 20833, + "task sharing": 55370, + "sharing information": 50515, + "models processing": 35361, + "intermediate step": 26679, + "dataset able": 12794, + "able directly": 688, + "directly generate": 15318, + "aims transform": 2219, + "tasks studied": 55912, + "grammatical gender": 23072, + "words sequence": 62509, + "strong performances": 53042, + "explored end": 19758, + "word position": 62265, + "difficulty learning": 15199, + "position embedding": 41264, + "learns better": 29954, + "demonstrated strong": 14021, + "robustness domain": 48277, + "domain inputs": 16086, + "presents strong": 42106, + "base architecture": 5537, + "propose adaptation": 43282, + "effects performance": 16828, + "modeling performance": 34610, + "time previous": 57198, + "proposed modifications": 43864, + "significant computational": 50856, + "computational overhead": 9852, + "translation widely": 58702, + "model adds": 33544, + "used computer": 60122, + "paper implemented": 39395, + "analysis applications": 2613, + "edge devices": 16586, + "memory compute": 32249, + "subjective evaluation": 53562, + "extracting set": 20039, + "language diversity": 28032, + "advance field": 1881, + "perform far": 40108, + "training zero": 58320, + "parameter setting": 39676, + "setting performance": 50339, + "lags far": 27937, + "decoder pre": 13610, + "challenging multilingual": 8115, + "multilingual datasets": 36078, + "online world": 38394, + "experiment large": 19241, + "twitter reddit": 59040, + "score 61": 48797, + "sequences text": 50026, + "specifically generate": 52206, + "documents usually": 15927, + "collect release": 8952, + "release large": 46154, + "popular chinese": 41160, + "extensive experiment": 19870, + "coherent informative": 8916, + "tasks construct": 55557, + "like generalization": 30471, + "architecture specifically": 4087, + "larger degree": 29075, + "work contribute": 62611, + "explored area": 19756, + "strategies evaluate": 52899, + "automatic scores": 5120, + "certain emotions": 7940, + "step approach": 52798, + "learning conduct": 29568, + "data strong": 12697, + "model findings": 33889, + "training regimes": 58227, + "events propose": 18797, + "framework utilizes": 21625, + "qualitative experiments": 44477, + "implicitly encode": 24666, + "far limited": 20403, + "embedding learned": 17035, + "led significant": 29993, + "model facilitates": 33877, + "integrate proposed": 26510, + "approach state": 3703, + "neural lm": 36965, + "bert bert": 6628, + "shot cross": 50608, + "probing experiments": 42491, + "pairs problem": 39209, + "stark contrast": 52564, + "bert multilingual": 6691, + "combination bert": 9033, + "embeddings low": 17168, + "representations hierarchical": 46681, + "syntactic trees": 54335, + "dialog history": 14756, + "context test": 10731, + "dialog datasets": 14754, + "like recurrent": 30496, + "learning zero": 29948, + "systems wide": 54669, + "tasks need": 55762, + "adaptation work": 1548, + "work attempts": 62579, + "understanding ability": 59321, + "ability interpret": 614, + "help explain": 23561, + "proposed paradigm": 43876, + "lattice structure": 29166, + "method adapting": 32370, + "positional embeddings": 41271, + "inference cross": 25649, + "signal model": 50829, + "model receives": 34279, + "baseline training": 6219, + "lookup table": 31072, + "analysis indicate": 2681, + "focus detecting": 21153, + "extent knowledge": 19922, + "encoded model": 17481, + "novel sentences": 37918, + "addition domain": 1611, + "attention research": 4821, + "information recent": 26043, + "bert perform": 6698, + "studies aim": 53244, + "automatically induce": 5183, + "learning manner": 29720, + "module used": 35770, + "shown model": 50727, + "structural knowledge": 53080, + "task fact": 55078, + "subtasks subtask": 53673, + "submissions subtask": 53580, + "subtask subtask": 53668, + "systems improved": 54527, + "oriented tasks": 38701, + "data limits": 12466, + "networks transformer": 36918, + "transformer uses": 58514, + "3x faster": 393, + "training transformer": 58305, + "making applicable": 31645, + "detailed discussion": 14420, + "existing benchmark": 19040, + "sentences create": 49700, + "important tool": 24785, + "modeling work": 34638, + "focuses learning": 21241, + "aware network": 5463, + "module based": 35752, + "employ state": 17391, + "techniques extract": 56085, + "documents use": 15923, + "use score": 60002, + "potential risks": 41406, + "information structured": 26107, + "typically consists": 59138, + "leibler kl": 30014, + "quality sentences": 44580, + "major issues": 31513, + "levels information": 30242, + "literal meaning": 30852, + "significantly differ": 50952, + "provides insight": 44205, + "capture hierarchical": 7675, + "euclidean space": 18424, + "auto regressive": 5020, + "achieve performance": 1178, + "improved combining": 24946, + "improvements results": 25098, + "contextual encoding": 10767, + "label based": 27693, + "nature tasks": 36489, + "prompt design": 43202, + "commonly observed": 9219, + "observed language": 38145, + "based pseudo": 5962, + "problem human": 42578, + "presence multiple": 41837, + "multiple heterogeneous": 36223, + "components paper": 9722, + "simple strong": 51213, + "modeled using": 34556, + "tasks strong": 55911, + "models inference": 35130, + "models textual": 35599, + "multi grained": 35960, + "hierarchical neural": 23683, + "network effectively": 36734, + "information manually": 25966, + "construct multi": 10392, + "methods statistical": 33053, + "essential applications": 18321, + "information results": 26059, + "effectively extract": 16735, + "pretraining framework": 42204, + "computing resources": 9905, + "resources limited": 47312, + "model variational": 34524, + "domain unlabeled": 16221, + "baselines low": 6277, + "decent performance": 13548, + "recently increasing": 45433, + "model discrete": 33772, + "dataset achieves": 12800, + "art terms": 4427, + "base models": 5547, + "difficult apply": 15157, + "depth knowledge": 14187, + "knowledge embedding": 27456, + "representations specific": 46759, + "emotions play": 17304, + "written humans": 63001, + "non verbal": 37690, + "strongly associated": 53068, + "propose global": 43404, + "context enhanced": 10624, + "global representation": 22840, + "achieves 91": 1296, + "leveraging bert": 30319, + "bert additional": 6607, + "improving robustness": 25196, + "robustness adversarial": 48272, + "adversarial inputs": 1971, + "inputs propose": 26367, + "higher robustness": 23843, + "robustness noisy": 48290, + "recently state": 45467, + "does work": 15983, + "introduce unsupervised": 26876, + "selection experiments": 49139, + "creates new": 11736, + "embeddings identify": 17146, + "relations pairs": 46048, + "methods determining": 32821, + "inherent limitations": 26203, + "information helps": 25902, + "corpora finally": 11202, + "require costly": 46848, + "annotations data": 2987, + "data creation": 12260, + "annotated texts": 2924, + "suggestions future": 53843, + "tasks cross": 55565, + "likelihood objective": 30520, + "source corpus": 51759, + "english method": 17843, + "yields average": 63114, + "average absolute": 5401, + "direct transfer": 15259, + "art discriminative": 4249, + "words topic": 62533, + "adapt pre": 1507, + "adapting pretrained": 1570, + "improving language": 25184, + "students learn": 53217, + "candidate set": 7580, + "given paper": 22767, + "goal study": 22901, + "hindering development": 23929, + "scalability large": 48546, + "instance level": 26427, + "strong competitors": 53021, + "data highly": 12405, + "domain learning": 16102, + "compare use": 9374, + "learning query": 29830, + "phrases using": 40857, + "obtain consistent": 38170, + "datasets state": 13442, + "target embedding": 54815, + "propose shared": 43629, + "embeddings tend": 17226, + "v2 dataset": 61160, + "future models": 21878, + "model arbitrary": 33575, + "based relation": 5989, + "models practical": 35336, + "production setting": 43050, + "evaluation indicate": 18628, + "production quality": 43049, + "heterogeneous domains": 23619, + "domain aware": 16022, + "method particularly": 32609, + "metrics assess": 33137, + "differences way": 14830, + "contextualized features": 10802, + "achieving average": 1393, + "surpasses human": 54173, + "resource cross": 47217, + "retrieved documents": 47983, + "training label": 58142, + "based baselines": 5595, + "method transformer": 32690, + "impact model": 24599, + "generation automatically": 22424, + "abstractive summary": 773, + "compare baselines": 9332, + "systems investigate": 54535, + "propose address": 43285, + "create initial": 11703, + "highlight key": 23864, + "understanding challenges": 59329, + "baselines multiple": 6281, + "multiple subtasks": 36296, + "built state": 7490, + "nlp language": 37493, + "assess ability": 4576, + "tasks learn": 55717, + "content task": 10563, + "model means": 34096, + "critical problem": 11786, + "training embedding": 58081, + "model post": 34213, + "form knowledge": 21323, + "propose online": 43576, + "space pre": 51883, + "domains model": 16275, + "pearson spearman": 40006, + "respectively using": 47385, + "alleviate problems": 2418, + "augmentation low": 4960, + "translation uses": 58700, + "framework extensive": 21518, + "settings data": 50364, + "f1 using": 20232, + "sets english": 50290, + "level transformer": 30226, + "compared different": 9401, + "improvement language": 25007, + "simple methods": 51192, + "model cross": 33731, + "setting using": 50355, + "provides comprehensive": 44188, + "dataset russian": 13072, + "component language": 9705, + "task address": 54887, + "present benchmark": 41856, + "causal effect": 7870, + "overcome challenge": 39058, + "challenge learn": 7991, + "edit based": 16591, + "perform accurate": 40066, + "task difficulty": 55022, + "leverages information": 30304, + "information hidden": 25904, + "spatial information": 51985, + "tasks requires": 55856, + "furthermore considering": 21812, + "learning cl": 29554, + "approach real": 3666, + "cost model": 11589, + "apply bert": 3322, + "captured bert": 7724, + "approach transfer": 3726, + "model response": 34318, + "framework simultaneously": 21602, + "independent training": 25506, + "transfers knowledge": 58438, + "training teacher": 58287, + "student models": 53213, + "approach generally": 3544, + "systems research": 54623, + "approaches commonly": 3785, + "ungrammatical sentences": 59455, + "languages spanish": 28793, + "level tags": 30220, + "languages average": 28605, + "create multi": 11709, + "content structure": 10561, + "model aware": 33596, + "fair comparison": 20356, + "representations including": 46688, + "produce higher": 42987, + "improve computational": 24833, + "available limited": 5321, + "data actually": 12118, + "generate context": 22188, + "cloze questions": 8723, + "questions finally": 44788, + "various unsupervised": 61415, + "including training": 25312, + "64 f1": 469, + "critical step": 11794, + "diverse dataset": 15698, + "require expensive": 46851, + "expensive manual": 19212, + "models representation": 35439, + "allows training": 2480, + "simple lstm": 51187, + "information representation": 26053, + "make better": 31544, + "better use": 6989, + "data applying": 12142, + "select key": 49108, + "step understanding": 52832, + "approach identifying": 3559, + "concepts related": 9941, + "great extent": 23204, + "language trained": 28534, + "using sentences": 60926, + "information representations": 26054, + "order answer": 38595, + "propose interpretable": 43422, + "selects relevant": 49170, + "reasoning chains": 45190, + "human readers": 24228, + "jointly optimize": 27210, + "multiple reasoning": 36271, + "reasoning chain": 45189, + "sentence source": 49648, + "attention head": 4756, + "architecture search": 4083, + "previously learned": 42337, + "learning explore": 29639, + "structure knowledge": 53111, + "task empirically": 55041, + "tasks glue": 55658, + "jointly extracts": 27198, + "systems new": 54571, + "baseline addition": 6153, + "experiments https": 19441, + "seq2seq neural": 49903, + "solve new": 51683, + "pipeline based": 40893, + "augmentation based": 4950, + "results average": 47517, + "encoder neural": 17529, + "instead word": 26469, + "network capable": 36715, + "generator produces": 22622, + "challenge nlp": 8001, + "thesis presents": 57044, + "search recommendation": 48981, + "evaluations human": 18759, + "high volume": 23809, + "features experimental": 20575, + "languages apply": 28601, + "test languages": 56354, + "problems arise": 42696, + "addressing problems": 1823, + "languages providing": 28763, + "research aims": 46980, + "learning obtain": 29787, + "query paper": 44674, + "dense sparse": 14083, + "effectively captures": 16726, + "optimization strategies": 38556, + "analysis make": 2694, + "lingual models": 30712, + "language combinations": 27993, + "employ data": 17376, + "language bias": 27980, + "widely investigated": 61997, + "debiasing methods": 13535, + "cost sensitive": 11593, + "term training": 56254, + "label pairs": 27719, + "based instance": 5787, + "models leveraging": 35184, + "consistently better": 10291, + "absolute f1": 740, + "additionally use": 1736, + "use self": 60003, + "model inputs": 34006, + "early fusion": 16512, + "tasks computational": 55551, + "focused data": 21218, + "experimental design": 19261, + "larger amounts": 29068, + "efficient knowledge": 16880, + "distillation kd": 15571, + "biases models": 7057, + "leverages state": 30315, + "method applicable": 32382, + "risk factors": 48162, + "using weakly": 61021, + "advantages using": 1955, + "easily interpretable": 16547, + "internal knowledge": 26687, + "set domains": 50141, + "using query": 60889, + "life cycle": 30438, + "label distributions": 27705, + "perform systematic": 40150, + "representation question": 46574, + "performance known": 40403, + "strong supervision": 53054, + "patterns language": 39970, + "effectively train": 16758, + "language related": 28465, + "furthermore develop": 21815, + "based experiments": 5717, + "strategies able": 52891, + "level present": 30182, + "detection aims": 14456, + "style given": 53487, + "svm random": 54238, + "detection paper": 14509, + "important parts": 24752, + "detection text": 14535, + "learns dense": 29956, + "graphs paper": 23190, + "approach incorporate": 3569, + "supporting evidence": 54138, + "accuracy datasets": 957, + "answering requires": 3094, + "long context": 31005, + "yang et": 63044, + "2018 dataset": 273, + "models drops": 34943, + "drops significantly": 16450, + "compositional reasoning": 9747, + "remain unclear": 46317, + "framework quantify": 21590, + "framework conduct": 21476, + "enable model": 17426, + "dataset corpus": 12868, + "benchmark experiments": 6468, + "model additionally": 33541, + "additionally compare": 1715, + "complete sentences": 9602, + "combines word": 9105, + "graph extracted": 23137, + "realistic task": 45154, + "label smoothing": 27728, + "modern text": 35722, + "trained publicly": 57846, + "based decoder": 5668, + "aspects model": 4546, + "consuming manual": 10450, + "new ones": 37273, + "information ignoring": 25908, + "information dependency": 25805, + "question existing": 44728, + "strategies selecting": 52917, + "guided graph": 23345, + "proves effectiveness": 44000, + "reasoning systems": 45226, + "used improving": 60210, + "improving model": 25185, + "rnn transformer": 48204, + "technique propose": 56043, + "propose structural": 43648, + "applied transformer": 3307, + "people usually": 40043, + "share thoughts": 50462, + "possible data": 41321, + "discussions work": 15495, + "evaluate accuracy": 18435, + "contain noise": 10467, + "data useful": 12763, + "bert shown": 6717, + "performance pre": 40487, + "chinese pre": 8317, + "models baselines": 34762, + "including bert": 25241, + "bert roberta": 6711, + "roberta electra": 48221, + "performances nlp": 40643, + "research open": 47083, + "based pretraining": 5950, + "autoregressive language": 5216, + "pretrain finetune": 42143, + "pretraining method": 42209, + "outperforms bert": 38877, + "explore using": 19750, + "document encoding": 15790, + "document encoder": 15789, + "embeddings derived": 17110, + "using document": 60666, + "improving ability": 25171, + "providing alternative": 44237, + "respect source": 47351, + "encoders learn": 17556, + "multilingual dataset": 36077, + "dataset achieved": 12799, + "methods shot": 33035, + "unique challenge": 59509, + "label dependency": 27703, + "specially designed": 52038, + "shot scenarios": 50640, + "makes hard": 31624, + "embedding technique": 17066, + "outperforms strongest": 38951, + "learning baseline": 29544, + "information fine": 25877, + "grained labels": 23039, + "paper submission": 39581, + "agnostic sentence": 2098, + "corpus obtain": 11394, + "filter noisy": 20808, + "yields strong": 63133, + "additional gains": 1672, + "respectively compared": 47364, + "practical scenario": 41469, + "pairs context": 39174, + "negative instances": 36622, + "negatively affects": 36643, + "contain useful": 10475, + "information utilized": 26149, + "distinguish positive": 15604, + "models goal": 35065, + "text despite": 56534, + "label consistency": 27699, + "built model": 7487, + "process new": 42811, + "generated machine": 22298, + "user interfaces": 60430, + "problem exacerbated": 42555, + "analyses revealed": 2605, + "models generative": 35061, + "lstm gated": 31262, + "scale long": 48592, + "neural ranking": 37088, + "efficiently effectively": 16913, + "improved efficiency": 24948, + "efficiency effectiveness": 16841, + "sources including": 51830, + "event driven": 18782, + "systems understand": 54659, + "auto complete": 5012, + "novel algorithms": 37752, + "algorithms use": 2344, + "embedding parameters": 17052, + "overall model": 39044, + "model discriminative": 33774, + "used learning": 60227, + "information latent": 25946, + "datasets improve": 13295, + "generative approaches": 22588, + "methods heavily": 32884, + "query generation": 44668, + "uses adversarial": 60490, + "effectively improves": 16741, + "improves diversity": 25124, + "describes novel": 14228, + "given content": 22728, + "digital information": 15211, + "higher scores": 23845, + "use user": 60068, + "based insights": 5786, + "decoding procedure": 13639, + "guidance future": 23327, + "context crucial": 10603, + "collected datasets": 8960, + "dataset discuss": 12898, + "challenge building": 7970, + "propose research": 43607, + "question regarding": 44748, + "resources building": 47295, + "utilize available": 61086, + "novel self": 37912, + "verify proposed": 61542, + "transformer transformer": 58513, + "function layer": 21756, + "existing pretrained": 19130, + "tasks pretrained": 55808, + "reducing training": 45714, + "yielding significant": 63110, + "explicit knowledge": 19617, + "powerful neural": 41438, + "help disambiguate": 23557, + "need paper": 36587, + "western languages": 61949, + "models exist": 34986, + "code provided": 8848, + "newly released": 37381, + "models teacher": 35587, + "model multitask": 34115, + "surprising results": 54184, + "counterparts paper": 11624, + "use generated": 59898, + "model manually": 34090, + "does affect": 15934, + "model estimate": 33837, + "require use": 46896, + "using discriminative": 60658, + "require careful": 46843, + "errors best": 18236, + "tasks varying": 55960, + "propose contextual": 43335, + "utterance dialogue": 61135, + "consider contextual": 10209, + "domain adaptability": 15994, + "2019 challenge": 280, + "data utilized": 12768, + "investigate questions": 26980, + "bert use": 6730, + "work showing": 62821, + "data helps": 12400, + "biased data": 7051, + "features layer": 20612, + "limits model": 30642, + "baseline transformer": 6220, + "furthermore evaluate": 21818, + "ablation experiments": 656, + "effect proposed": 16617, + "quantify quality": 44612, + "alignments paper": 2392, + "quality use": 44595, + "improve best": 24828, + "end investigate": 17678, + "respectively experiments": 47368, + "codes used": 8882, + "problems specifically": 42730, + "problem providing": 42639, + "help guide": 23566, + "build recent": 7421, + "dialogue modeling": 14779, + "impact various": 24608, + "scarcity problem": 48675, + "model finetuning": 33897, + "train robust": 57627, + "dataset state": 13100, + "usually rely": 61062, + "transformer encoder": 58482, + "spans input": 51957, + "use feature": 59889, + "attain state": 4668, + "documents given": 15883, + "systems tested": 54652, + "particularly significant": 39889, + "corresponding context": 11547, + "questions training": 44813, + "addition report": 1642, + "tagging framework": 54740, + "language downstream": 28038, + "layer representations": 29207, + "sentence token": 49659, + "topics document": 57449, + "models conducted": 34846, + "reddit dataset": 45643, + "linear support": 30670, + "results accuracy": 47483, + "accuracy 82": 914, + "performance observed": 40464, + "methods second": 33027, + "believe work": 6413, + "qa challenging": 44447, + "understanding evaluation": 59342, + "present collection": 41866, + "small collection": 51466, + "study context": 53348, + "containing 10": 10482, + "utterances based": 61144, + "slot values": 51444, + "classification applications": 8434, + "problem directly": 42541, + "stage approach": 52427, + "models result": 35453, + "transformer baselines": 58477, + "focus fine": 21165, + "demonstrated impressive": 14010, + "claim generation": 8381, + "sampling approach": 48498, + "gpt model": 22983, + "nlp computer": 37475, + "study existing": 53374, + "enhance model": 17915, + "based various": 6127, + "ml techniques": 33434, + "previous utterances": 42300, + "machines svm": 31399, + "dataset topic": 13119, + "approaches models": 3876, + "datasets ii": 13293, + "250 million": 337, + "modeling choices": 34564, + "work believe": 62586, + "believe useful": 6412, + "predict corresponding": 41638, + "corresponding label": 11554, + "models future": 35048, + "dominant approaches": 16307, + "data intensive": 12438, + "models reflect": 35420, + "produce semantic": 43006, + "previous data": 42253, + "analysis leads": 2689, + "new directions": 37173, + "expert demonstrations": 19577, + "vectors encode": 61485, + "scale manually": 48594, + "building task": 7474, + "human conversations": 24127, + "existing annotated": 19023, + "train universal": 57656, + "investigate multiple": 26970, + "supervised setup": 54046, + "labeled target": 27764, + "novel multilingual": 37880, + "japanese russian": 27148, + "exploit domain": 19653, + "model followed": 33905, + "scenario paper": 48688, + "dataset built": 12834, + "used named": 60246, + "resources low": 47313, + "linear projection": 30666, + "domain evaluate": 16058, + "survey based": 54203, + "comparing results": 9485, + "twitter based": 59032, + "model scratch": 34340, + "github repository": 22719, + "datasets lack": 13309, + "transfer text": 58426, + "types using": 59125, + "standard fine": 52493, + "grained coarse": 23027, + "outperforms domain": 38895, + "outperforms zero": 38962, + "shot fine": 50616, + "systems state": 54639, + "par human": 39615, + "allows human": 2468, + "sentences addition": 49678, + "settings demonstrate": 50366, + "does guarantee": 15949, + "based time": 6096, + "unseen entity": 59649, + "method discover": 32465, + "tasks supervised": 55921, + "100 billion": 60, + "methods develop": 32822, + "task mainly": 55199, + "mainly focuses": 31474, + "tasks news": 55766, + "mechanism significantly": 32142, + "previous baseline": 42245, + "descriptive statistics": 14256, + "relatively higher": 46117, + "pairs corresponding": 39176, + "aspects human": 4540, + "information concerning": 25785, + "attract attention": 4872, + "political social": 41113, + "analysis sentiment": 2752, + "suggesting possible": 53839, + "based arabic": 5574, + "accuracy increased": 993, + "introduce pre": 26854, + "embed text": 17005, + "16 languages": 164, + "provide performance": 44111, + "generated users": 22331, + "opening new": 38475, + "complex linguistic": 9632, + "knowledge models": 27553, + "huge corpus": 24071, + "answer retrieval": 3053, + "evaluating large": 18561, + "establish baselines": 18342, + "evaluation code": 18590, + "generative language": 22591, + "way use": 61835, + "use general": 59897, + "encoder produce": 17534, + "modifying model": 35739, + "demo https": 13848, + "leading improved": 29291, + "translation self": 58673, + "demonstrate robustness": 13971, + "promising alternative": 43158, + "extraction aims": 20045, + "aims extracting": 2193, + "suffer insufficient": 53767, + "massive unlabeled": 31890, + "preserve semantic": 42116, + "relations entity": 46027, + "types existing": 59087, + "promising improvement": 43168, + "improvement task": 25030, + "performance cnn": 40238, + "models gained": 35050, + "high efficiency": 23733, + "using relation": 60905, + "upper bounds": 59774, + "process instead": 42795, + "understanding pipeline": 59382, + "model component": 33683, + "information title": 26125, + "learn informative": 29383, + "regarding different": 45793, + "mechanism select": 32141, + "performance news": 40456, + "study address": 53318, + "bidirectional rnn": 7083, + "efficient using": 16908, + "account word": 881, + "translated training": 58562, + "nlp area": 37466, + "usually contain": 61042, + "thesis propose": 57045, + "model considering": 33700, + "tasks keyword": 55703, + "extraction used": 20129, + "content document": 10522, + "theoretic measure": 57014, + "used efficiently": 60159, + "media like": 32170, + "informal language": 25740, + "bleu human": 7204, + "present solutions": 42020, + "result paper": 47445, + "demonstrates potential": 14037, + "propose retrieval": 43608, + "socio political": 51614, + "data ensemble": 12325, + "learning methodology": 29729, + "large textual": 29027, + "time improving": 57164, + "improving cross": 25173, + "linguistic studies": 30798, + "seminal work": 49472, + "problem current": 42527, + "data unseen": 12755, + "seen unseen": 49068, + "performance seen": 40548, + "provides robust": 44225, + "word2vec bert": 62345, + "bert deep": 6638, + "success achieved": 53696, + "effective achieving": 16627, + "imitate human": 24573, + "human based": 24112, + "study perform": 53429, + "data parameter": 12536, + "similar information": 51047, + "best way": 6837, + "selected according": 49116, + "task directly": 55023, + "directly predict": 15331, + "network overall": 36778, + "currently deployed": 12033, + "way making": 61819, + "language especially": 28052, + "promising approaches": 43161, + "100 accuracy": 59, + "extend idea": 19823, + "classifiers predict": 8624, + "perform manual": 40120, + "correspond different": 11541, + "current benchmarks": 11962, + "make case": 31546, + "data multi": 12497, + "using evaluation": 60681, + "pretraining task": 42218, + "universal representations": 59544, + "challenge multi": 7997, + "conversations task": 11065, + "existing dialogue": 19058, + "novel transformer": 37942, + "process design": 42770, + "biases paper": 7058, + "contextualized embedding": 10799, + "using bilstm": 60597, + "using relatively": 60906, + "bilstm model": 7134, + "achieves higher": 1334, + "training separate": 58248, + "separate model": 49876, + "using coarse": 60608, + "finally train": 20884, + "properties model": 43266, + "meaningful sentences": 32028, + "sentences used": 49802, + "topic work": 57437, + "long form": 31012, + "dataset comprises": 12855, + "conventional seq2seq": 11011, + "seq2seq language": 49898, + "faithfulness generated": 20369, + "metric achieves": 33110, + "intuitive way": 26911, + "attracted significant": 4888, + "making informed": 31658, + "informed decisions": 26182, + "typically generate": 59145, + "table data": 54687, + "style generated": 53486, + "tasks generating": 55655, + "generating language": 22379, + "huge performance": 24075, + "control semantic": 10973, + "test different": 56344, + "15 points": 150, + "key point": 27327, + "textual contents": 56953, + "text joint": 56634, + "relative position": 46105, + "method public": 32630, + "datasets conduct": 13187, + "focused developing": 21219, + "paper outline": 39434, + "recognition module": 45515, + "based event": 5711, + "token sequence": 57307, + "challenging train": 8162, + "training regime": 58226, + "textual inference": 56968, + "based prior": 5952, + "models specialized": 35524, + "task quality": 55313, + "framework combine": 21470, + "bert xlm": 6737, + "compare systems": 9371, + "considerable margin": 10233, + "comments social": 9147, + "terms content": 56279, + "nlg tasks": 37447, + "media based": 32159, + "taken consideration": 54774, + "highly flexible": 23900, + "theoretically motivated": 57031, + "better represent": 6952, + "masked span": 31868, + "span selection": 51931, + "challenges opportunities": 8065, + "text rewriting": 56749, + "information redundancy": 26046, + "extractive abstractive": 20134, + "model development": 33762, + "better unseen": 6988, + "language employ": 28046, + "simple linguistic": 51186, + "detection new": 14507, + "propose encoder": 43367, + "multiple instance": 36229, + "learning mil": 29738, + "minimal supervision": 33292, + "fully understand": 21746, + "bert significantly": 6719, + "previously overlooked": 42338, + "important develop": 24716, + "study compare": 53339, + "models predictions": 35344, + "impact paper": 24603, + "medium low": 32218, + "results suggested": 47868, + "impact data": 24591, + "extract topics": 19999, + "codes publicly": 8879, + "recently pre": 45448, + "current pre": 11999, + "usually focus": 61050, + "bert xlnet": 6739, + "models released": 35425, + "automated solutions": 5057, + "semantics domain": 49403, + "baseline research": 6205, + "compute time": 9881, + "focus mainly": 21180, + "model compatible": 33678, + "bert gpt": 6665, + "result new": 47442, + "code base": 8796, + "lingual embedding": 30699, + "data followed": 12368, + "trained synthetic": 57888, + "embedding mapping": 17037, + "task developing": 55018, + "based general": 5746, + "framework incorporates": 21544, + "provide feedback": 44073, + "method comparison": 32427, + "comparison various": 9511, + "models lexical": 35185, + "identify relations": 24439, + "systems lack": 54540, + "data far": 12360, + "000 labeled": 8, + "relations including": 46036, + "comparison future": 9496, + "art work": 4441, + "guidelines future": 23352, + "human interaction": 24173, + "proposed question": 43883, + "automatic natural": 5112, + "generation evaluation": 22454, + "metrics propose": 33190, + "approach employs": 3506, + "specific target": 52152, + "summarization data": 53879, + "study apply": 53326, + "approaches effectively": 3804, + "simply treat": 51253, + "effective graph": 16656, + "context graph": 10649, + "sequence context": 49917, + "present fast": 41915, + "model par": 34177, + "faster compared": 20434, + "achieves remarkable": 1358, + "model type": 34495, + "entity span": 18148, + "trained fine": 57731, + "task code": 54953, + "language skills": 28489, + "fluency semantic": 21127, + "offline online": 38317, + "baselines real": 6294, + "networks effective": 36848, + "text deep": 56529, + "based interaction": 5789, + "speech patterns": 52276, + "incorporating contextual": 25382, + "representing meaning": 46813, + "correct prediction": 11472, + "volume information": 61729, + "nlp deep": 37480, + "highly technical": 23921, + "developing general": 14653, + "models optimized": 35280, + "number downstream": 37999, + "different output": 15015, + "tuned pre": 58882, + "parameters demonstrate": 39690, + "specialized language": 52035, + "million speakers": 33258, + "knowledge lack": 27539, + "existing structured": 19149, + "related issues": 45912, + "effective structured": 16700, + "texts help": 56885, + "level natural": 30165, + "speech models": 52270, + "academia industry": 788, + "way reduce": 61829, + "dropout method": 16445, + "tasks simple": 55891, + "effectively solve": 16757, + "provide model": 44102, + "processing generation": 42873, + "won place": 62107, + "model firstly": 33898, + "accuracy chinese": 942, + "incorporate domain": 25351, + "significant manual": 50897, + "contextualized representations": 10809, + "bert effectively": 6647, + "content results": 10554, + "results bert": 47524, + "performance biomedical": 40220, + "model exposed": 33862, + "bert cite": 6634, + "task semi": 55356, + "utilize self": 61102, + "task manually": 55205, + "relevant document": 46210, + "presents significant": 42103, + "extremely important": 20160, + "aspect natural": 4532, + "text better": 56459, + "data insufficiency": 12436, + "study presented": 53437, + "sequence deep": 49919, + "comprehensive comparison": 9784, + "based available": 5591, + "attempt generate": 4687, + "challenging nature": 8118, + "accuracy various": 1069, + "training specifically": 58265, + "specifically pre": 52219, + "leverage language": 30272, + "adapted different": 1551, + "training gives": 58115, + "world situations": 62960, + "scenarios present": 48705, + "crafted linguistic": 11681, + "representations bert": 46623, + "topological structure": 57469, + "remains unknown": 46355, + "exhibit strong": 19005, + "future efforts": 21872, + "fundamental challenge": 21777, + "challenge ai": 7966, + "challenge current": 7972, + "main parts": 31450, + "abductive reasoning": 588, + "irrelevant redundant": 27042, + "redundant information": 45729, + "create datasets": 11696, + "bert task": 6724, + "model efficiency": 33801, + "codes available": 8874, + "multimodal context": 36144, + "text transformer": 56823, + "visual commonsense": 61650, + "25 relative": 334, + "reduction error": 45719, + "performance date": 40278, + "public leaderboard": 44324, + "detailed ablation": 14408, + "ablation analysis": 655, + "effectiveness new": 16800, + "provided https": 44162, + "existing automatic": 19036, + "implicitly model": 24671, + "information gain": 25888, + "performance conversational": 40267, + "conversational qa": 11047, + "generalization different": 22120, + "requires additional": 46913, + "effectively modeling": 16751, + "explicit syntactic": 19627, + "syntactic constraints": 54294, + "representations self": 46750, + "syntax guided": 54348, + "pseudo training": 44282, + "supervised pre": 54032, + "words ii": 62431, + "network pre": 36784, + "results commonly": 47540, + "aims test": 2216, + "order study": 38653, + "lingual machine": 30710, + "task respectively": 55343, + "baseline tasks": 6215, + "tasks common": 55543, + "obtain reasonable": 38185, + "challenges ahead": 8030, + "method domain": 32468, + "degrades significantly": 13811, + "realistic scenarios": 45151, + "multi type": 36038, + "multi span": 36008, + "support various": 54130, + "span extraction": 51923, + "achieves 79": 1291, + "hidden test": 23649, + "footnote url": 21281, + "existing self": 19139, + "predict masked": 41645, + "source models": 51784, + "joint representations": 27189, + "art multi": 4303, + "capability model": 7610, + "conditional generation": 9992, + "68 accuracy": 479, + "performance related": 40527, + "issue model": 27069, + "model optimize": 34148, + "dataset performs": 13028, + "different demographic": 14896, + "using metrics": 60803, + "bias dataset": 7022, + "low memory": 31161, + "demonstrated state": 14019, + "tuned bert": 58867, + "generation long": 22486, + "novel step": 37928, + "addition investigate": 1623, + "challenging benchmark": 8083, + "causes effects": 7891, + "new situations": 37317, + "model combine": 33665, + "morphological richness": 35843, + "major limitation": 31515, + "summarization code": 53878, + "despite prevalence": 14376, + "tweet level": 59005, + "majority voting": 31535, + "voting based": 61741, + "level f1": 30117, + "tasks directly": 55591, + "directly applying": 15307, + "patterns model": 39971, + "prototypical networks": 43976, + "based computer": 5632, + "pairs previous": 39208, + "works usually": 62917, + "solve low": 51681, + "boosts accuracy": 7263, + "accuracy low": 1001, + "points terms": 41080, + "sensitive input": 49501, + "input noise": 26306, + "projection method": 43142, + "generate generic": 22204, + "like responses": 30498, + "task shared": 55369, + "task increasing": 55132, + "model reasoning": 34278, + "ability paper": 629, + "capture useful": 7719, + "standard encoder": 52487, + "big model": 7091, + "obtains competitive": 38245, + "long sentence": 31023, + "clinical research": 8673, + "lack dataset": 27883, + "answering based": 3064, + "make dataset": 31560, + "effective improve": 16660, + "spaces propose": 51912, + "languages question": 28764, + "syntactically related": 54341, + "human linguistic": 24202, + "addressed problem": 1805, + "novel interactive": 37843, + "series questions": 50068, + "analysis evaluate": 2659, + "compared rule": 9448, + "systems furthermore": 54509, + "propose advanced": 43286, + "attacks using": 4665, + "generation focus": 22462, + "performance generated": 40364, + "proposed baseline": 43743, + "scheme proposed": 48730, + "using hard": 60723, + "embeddings elmo": 17122, + "information plays": 26008, + "information findings": 25876, + "deepen understanding": 13755, + "uses features": 60510, + "random guessing": 44881, + "models provides": 35381, + "potential improving": 41396, + "improving understanding": 25199, + "highlight model": 23867, + "input agnostic": 26254, + "gpt language": 22981, + "optimized using": 38570, + "transfer models": 58406, + "dataset biases": 12831, + "networks gan": 36857, + "data generator": 12389, + "knowledge crucial": 27430, + "corpora existing": 11199, + "entities work": 18092, + "model commonsense": 33670, + "requires careful": 46917, + "based graph": 5760, + "scope paper": 48779, + "13 languages": 126, + "evaluation recently": 18695, + "contextualized embeddings": 10800, + "methods comparison": 32795, + "approach incorporates": 3570, + "graph information": 23142, + "relations context": 46021, + "usually need": 61059, + "need hand": 36569, + "dialogue text": 14791, + "generate possible": 22229, + "employ multi": 17385, + "constructed datasets": 10410, + "parameters new": 39712, + "techniques experiments": 56084, + "differences paper": 14826, + "respect different": 47344, + "increasing research": 25461, + "years existing": 63059, + "design enables": 14277, + "benefit large": 6564, + "common evaluation": 9174, + "convert words": 11074, + "different pairs": 15017, + "high similarity": 23802, + "score 62": 48798, + "today world": 57278, + "help learn": 23575, + "long tailed": 31037, + "raises concerns": 44858, + "data diversity": 12292, + "features models": 20624, + "classification especially": 8463, + "time performance": 57190, + "examples results": 18929, + "2016 presidential": 257, + "focuses task": 21244, + "independent content": 25495, + "building new": 7461, + "bert propose": 6707, + "style content": 53482, + "scale experiment": 48571, + "knowledge common": 27423, + "denoising autoencoders": 14066, + "encoder bert": 17493, + "jointly pre": 27216, + "pre trains": 41603, + "tuning stage": 58959, + "model transformer": 34489, + "specific techniques": 52156, + "techniques significantly": 56136, + "theoretically grounded": 57030, + "models systematically": 35574, + "popular sequence": 41185, + "areas improvement": 4152, + "leads models": 29320, + "propose extended": 43382, + "applying proposed": 3376, + "scenarios experiments": 48696, + "training helps": 58119, + "comparison strong": 9508, + "models strongly": 35538, + "quality input": 44535, + "input dialogue": 26264, + "utterance representations": 61140, + "using transformer": 60999, + "models automated": 34743, + "model statistical": 34410, + "transition model": 58541, + "explicitly leverages": 19640, + "parameters significantly": 39722, + "representation major": 46547, + "speaker role": 52001, + "understand users": 59317, + "fail account": 20329, + "currently state": 12038, + "augment model": 4943, + "model like": 34063, + "discuss ways": 15484, + "contrast recent": 10888, + "proposed end": 43764, + "end approaches": 17613, + "pipeline end": 40898, + "text approaches": 56439, + "analysis suggest": 2771, + "intermediate steps": 26680, + "various attributes": 61304, + "view model": 61599, + "lot time": 31120, + "fusion layer": 21854, + "new strategy": 37326, + "informative prior": 26175, + "experiment proposed": 19245, + "new sota": 37319, + "anomaly detection": 3024, + "analyses model": 2599, + "common patterns": 9190, + "methods train": 33079, + "different generation": 14942, + "hinders application": 23931, + "scale model": 48596, + "distillation approach": 15567, + "model teacher": 34447, + "previous knowledge": 42257, + "distillation methods": 15574, + "distillation student": 15578, + "partially annotated": 39808, + "methods machine": 32933, + "specifically trained": 52231, + "better fine": 6892, + "tuning technique": 58968, + "domain work": 16228, + "adapt model": 1504, + "time test": 57230, + "data achieving": 12114, + "generic model": 22631, + "propose modified": 43471, + "usually involve": 61053, + "rules paper": 48395, + "statistical method": 52752, + "clean corpus": 8642, + "dataset performed": 13027, + "extracted model": 20016, + "focused english": 21220, + "sizable improvements": 51375, + "developments neural": 14715, + "findings highlight": 20907, + "novel training": 37939, + "multiple granularities": 36221, + "transfer downstream": 58361, + "user using": 60454, + "model https": 33963, + "effective improving": 16661, + "performance real": 40519, + "procedure paper": 42744, + "language distribution": 28031, + "detect correct": 14437, + "70 accuracy": 487, + "propose syntax": 43655, + "represent syntactic": 46483, + "variables experiments": 61228, + "achieve lower": 1168, + "reconstruction loss": 45583, + "informed model": 26183, + "recognition based": 45494, + "focused learning": 21226, + "input using": 26356, + "simultaneously using": 51278, + "different granularity": 14946, + "inspired cognitive": 26405, + "dependent specific": 14150, + "models assessing": 34729, + "pose problem": 41240, + "main tasks": 31462, + "datasets current": 13204, + "vanilla transformer": 61217, + "long complex": 31004, + "commonly adopted": 9216, + "focus lexical": 21175, + "identify sentences": 24443, + "methods comparing": 32794, + "art summarization": 4415, + "data https": 12408, + "performance used": 40616, + "input does": 26270, + "knowledge chinese": 27422, + "applications downstream": 3201, + "levels experimental": 30238, + "recent line": 45317, + "gains state": 21943, + "aware semantic": 5471, + "relations long": 46041, + "use discourse": 59868, + "make progress": 31589, + "progress goal": 43099, + "classification analyze": 8432, + "focuses improving": 21240, + "domain perform": 16132, + "automatically constructing": 5153, + "domain large": 16101, + "gate mechanism": 21992, + "mechanism control": 32105, + "reduce bias": 45650, + "core module": 11150, + "modeling multi": 34601, + "remedy problem": 46367, + "multi token": 36033, + "identification multi": 24392, + "computationally cheap": 9872, + "baselines analysis": 6232, + "proposed objective": 43872, + "encoder vae": 17550, + "present meta": 41942, + "similarity experiments": 51095, + "moderate sized": 35696, + "tasks focused": 55647, + "need identify": 36571, + "observation present": 38122, + "trained auxiliary": 57678, + "66 f1": 474, + "model rank": 34268, + "shot semantic": 50641, + "opinion based": 38500, + "development social": 14703, + "works rely": 62906, + "fine coarse": 20922, + "provide competitive": 44032, + "task level": 55185, + "popular transformer": 41196, + "required training": 46907, + "effectiveness learning": 16786, + "sample training": 48458, + "finally highlight": 20862, + "modelling techniques": 34645, + "tasks finally": 55640, + "sampling techniques": 48510, + "reliable way": 46254, + "deep multi": 13729, + "motivated intuition": 35869, + "performance dialogue": 40289, + "robust handle": 48249, + "predicting multiple": 41679, + "enhanced graph": 17932, + "designed based": 14309, + "capturing sequential": 7745, + "series state": 50069, + "models measure": 35220, + "video transcripts": 61587, + "topics related": 57461, + "detection essential": 14480, + "aspect information": 4529, + "spaces different": 51909, + "heterogeneous graph": 23620, + "information heterogeneous": 25903, + "shows different": 50774, + "developing new": 14661, + "level control": 30088, + "sentences paragraph": 49764, + "model discourse": 33770, + "codes data": 8876, + "data help": 12399, + "propose practical": 43586, + "model especially": 33833, + "explicitly trained": 19649, + "languages neural": 28738, + "accuracy natural": 1012, + "nli tasks": 37456, + "propose make": 43447, + "attention fusion": 4752, + "information state": 26104, + "augmentation language": 4959, + "datasets confirm": 13189, + "labels model": 27838, + "covering multiple": 11658, + "involving entities": 27024, + "quality entity": 44515, + "diverse tasks": 15721, + "prediction entity": 41700, + "addition develop": 1609, + "better entity": 6883, + "gpt bert": 22973, + "work multilingual": 62729, + "version bert": 61551, + "shot zero": 50657, + "finetune model": 21041, + "improve multilingual": 24876, + "trained limited": 57775, + "uncertainty based": 59228, + "based confidence": 5637, + "given dataset": 22734, + "labels test": 27851, + "challenging realistic": 8134, + "realistic evaluation": 45149, + "effective unsupervised": 16708, + "lingual scenarios": 30724, + "information obtained": 25994, + "lingual pre": 30717, + "training signals": 58258, + "new pre": 37285, + "models taking": 35582, + "models encoder": 34963, + "hinders improvement": 23932, + "great successes": 23220, + "contextualized information": 10803, + "chinese tasks": 8322, + "standard accuracy": 52461, + "achieved strong": 1275, + "regularize model": 45847, + "model simply": 34388, + "extraction complex": 20053, + "nodes edges": 37591, + "relations better": 46018, + "types nodes": 59107, + "level graph": 30128, + "enables learn": 17441, + "diverse collection": 15693, + "baseline performances": 6201, + "gap machine": 21968, + "dependencies different": 14105, + "aim capture": 2142, + "model dependencies": 33748, + "task defined": 54997, + "dataset analysis": 12807, + "advantages existing": 1952, + "language encoders": 28048, + "previously unknown": 42354, + "furthermore analysis": 21802, + "developed automatically": 14624, + "trained combination": 57691, + "stance prediction": 52456, + "dataset combined": 12847, + "setting human": 50326, + "employ hierarchical": 17383, + "accuracy baseline": 937, + "models extensive": 35004, + "analyses confirm": 2591, + "texts short": 56924, + "various information": 61347, + "huge challenge": 24070, + "models tackle": 35577, + "task conduct": 54966, + "conduct comprehensive": 10031, + "experiments case": 19369, + "tasks covering": 55562, + "compare strong": 9369, + "bert mbert": 6681, + "number labeled": 38011, + "annotation experiment": 2949, + "tasks just": 55701, + "static embedding": 52724, + "years studies": 63078, + "useful source": 60388, + "problem instead": 42584, + "structure entity": 53103, + "model employing": 33813, + "introduce self": 26856, + "related given": 45909, + "used shelf": 60301, + "improve downstream": 24845, + "deal issues": 13516, + "model develop": 33760, + "generated results": 22312, + "existing medical": 19088, + "novel automatic": 37774, + "structure embedding": 53102, + "features describing": 20557, + "size diversity": 51381, + "outcomes using": 38767, + "based version": 6129, + "tasks help": 55663, + "tuning multiple": 58932, + "15 languages": 149, + "word substitutions": 62318, + "models robustness": 35466, + "test examples": 56348, + "possible leverage": 41332, + "accuracy gains": 982, + "leading improvements": 29292, + "automatically produce": 5196, + "samples model": 48482, + "key advantage": 27293, + "studies verify": 53311, + "present depth": 41887, + "present fine": 41917, + "models fine": 35028, + "knowledge fine": 27484, + "systems code": 54451, + "training additional": 57927, + "achieves absolute": 1300, + "techniques shown": 56135, + "fully trained": 21745, + "trained gpt": 57745, + "gpt models": 22984, + "number data": 37992, + "applying pre": 3374, + "16 times": 166, + "data fully": 12375, + "achieved improvement": 1247, + "model gpt": 33944, + "tagging approach": 54736, + "task target": 55427, + "target texts": 54850, + "evaluated english": 18531, + "systems produce": 54601, + "produce plausible": 42997, + "round trip": 48359, + "sentence approach": 49517, + "approach successfully": 3710, + "development sets": 14702, + "following questions": 21267, + "resource nlp": 47257, + "experimental setups": 19327, + "limitation existing": 30536, + "training means": 58170, + "ways propose": 61844, + "training second": 58241, + "challenging previous": 8125, + "read write": 45068, + "available source": 5367, + "context provides": 10699, + "work recent": 62801, + "modeling hierarchical": 34581, + "benchmark machine": 6475, + "text rich": 56750, + "networks text": 36916, + "benefits downstream": 6581, + "leverage textual": 30293, + "terms work": 56325, + "multi granular": 35961, + "signals text": 50836, + "based high": 5771, + "unsupervised technique": 59740, + "generating diverse": 22372, + "target sequences": 54841, + "method explicitly": 32498, + "using general": 60702, + "diverse content": 15694, + "content source": 10560, + "diversity training": 15741, + "scores 90": 48890, + "solely text": 51645, + "grounding model": 23265, + "metric scores": 33124, + "results single": 47850, + "novel entity": 37818, + "informative coherent": 26169, + "approach entity": 3514, + "salient sentences": 48442, + "generate final": 22201, + "generally trained": 22172, + "data spanning": 12679, + "distribution propose": 15649, + "overlap training": 39089, + "reviews news": 48049, + "accuracy statistical": 1054, + "models remain": 35433, + "train transformer": 57651, + "produce accurate": 42974, + "leverage multi": 30278, + "better alignment": 6849, + "costly process": 11603, + "stream tasks": 52960, + "tasks key": 55702, + "detection benchmarks": 14463, + "benchmarks empirically": 6519, + "applied corpus": 3265, + "effectively utilizes": 16763, + "sub graph": 53519, + "knowledge aware": 27403, + "aware graph": 5450, + "graph network": 23150, + "models relation": 35421, + "propose annotate": 43292, + "automatically labeled": 5186, + "5x speedup": 455, + "video games": 61584, + "approach demonstrate": 3480, + "semantics pre": 49411, + "trained semantic": 57859, + "improved language": 24950, + "aware bert": 5443, + "compared bert": 9389, + "comprehension language": 9766, + "promising method": 43170, + "according word": 871, + "building robust": 7468, + "simple highly": 51179, + "scale pretrained": 48614, + "techniques achieve": 56053, + "recently transformer": 45470, + "syntax structure": 54354, + "module trained": 35769, + "objective model": 38096, + "generate better": 22181, + "systematically analyze": 54408, + "based propose": 5958, + "help knowledge": 23573, + "pretraining models": 42212, + "range downstream": 44917, + "modeling objectives": 34607, + "objectives work": 38114, + "standard bert": 52474, + "bert masked": 6679, + "word relation": 62278, + "vanilla bert": 61215, + "knowledge triples": 27636, + "art using": 4436, + "generate token": 22257, + "generated tokens": 22327, + "generate tokens": 22258, + "autoregressive sequence": 5225, + "knowledge grammar": 27495, + "grammatical knowledge": 23074, + "generated dataset": 22283, + "methods conclude": 32797, + "human writers": 24257, + "developments natural": 14713, + "interactions self": 26621, + "encoders like": 17557, + "like gpt": 30473, + "test standard": 56381, + "trained transformers": 57903, + "stronger results": 53064, + "second assess": 48997, + "networks capture": 36836, + "context clues": 10597, + "form complex": 21315, + "articles different": 4467, + "media outlets": 32173, + "questions different": 44784, + "knowledge generate": 27491, + "knowledge enriched": 27465, + "text relevant": 56734, + "study public": 53449, + "problem dialogue": 42538, + "shows great": 50780, + "evaluate state": 18507, + "bert led": 6674, + "focal point": 21141, + "dutch language": 16477, + "order overcome": 38644, + "method applies": 32384, + "reasoning skills": 45223, + "work largely": 62705, + "student teacher": 53215, + "produce reliable": 43003, + "single deep": 51295, + "mechanism utilizes": 32149, + "alternative propose": 2508, + "sentences results": 49779, + "information compared": 25783, + "contextualised word": 10795, + "consider linguistic": 10212, + "question neural": 44741, + "information automatically": 25765, + "incorporate syntactic": 25365, + "knowledge transformer": 27635, + "novel parameter": 37891, + "textual sources": 56983, + "results automatic": 47513, + "established baselines": 18352, + "baselines significant": 6299, + "achieves macro": 1343, + "challenging testbed": 8159, + "words rare": 62492, + "languages achieves": 28592, + "non overlapping": 37673, + "properties like": 43265, + "does mean": 15959, + "accuracy second": 1043, + "semantic reasoning": 49323, + "essential natural": 18330, + "prediction nsp": 41724, + "bert learns": 6673, + "learns contextual": 29955, + "great significance": 23216, + "remedy propose": 46368, + "task includes": 55130, + "encourages model": 17603, + "broader range": 7364, + "training recent": 58223, + "research hotspot": 47048, + "comprehensive review": 9797, + "discuss open": 15476, + "open issues": 38435, + "type entity": 59055, + "depending context": 14155, + "context recently": 10703, + "embeddings bert": 17087, + "synthetic sentences": 54382, + "reveal models": 48010, + "data created": 12259, + "dataset tweets": 13124, + "author text": 5001, + "models compute": 34842, + "texts demonstrate": 56870, + "correlation score": 11528, + "metrics text": 33203, + "language collected": 27992, + "called neural": 7551, + "module generate": 35759, + "bert bidirectional": 6630, + "related pre": 45926, + "tasks masked": 55741, + "base bert": 5538, + "best pre": 6804, + "approach particularly": 3637, + "help make": 23576, + "types pre": 59111, + "neural lms": 36966, + "domain classification": 16029, + "multiple knowledge": 36232, + "scale models": 48597, + "representations structured": 46762, + "curated knowledge": 11951, + "enhanced bert": 17930, + "extraction entity": 20060, + "problems existing": 42700, + "propose meta": 43450, + "detection shared": 14524, + "tune pre": 58861, + "data augment": 12148, + "data conditions": 12235, + "models generalization": 35052, + "cross dataset": 11811, + "alleviate negative": 2415, + "speech datasets": 52258, + "introduce fully": 26807, + "transfer results": 58418, + "important characteristic": 24709, + "different elements": 14911, + "propose fine": 43388, + "known priori": 27663, + "truth label": 58838, + "models contrast": 34861, + "original task": 38729, + "trained biomedical": 57685, + "discourse structures": 15400, + "detection work": 14541, + "models promising": 35365, + "training existing": 58096, + "pretrained cross": 42150, + "models underperform": 35644, + "train fine": 57590, + "language addition": 27953, + "datasets outperform": 13357, + "data compute": 12232, + "model hand": 33950, + "model prediction": 34223, + "fusion mechanism": 21855, + "mechanism improve": 32122, + "evaluation scores": 18710, + "analysis focuses": 2668, + "focuses models": 21242, + "information stored": 26106, + "traditional pipeline": 57539, + "information token": 26126, + "prediction errors": 41702, + "layers language": 29226, + "numerous works": 38071, + "models aim": 34703, + "approaches latent": 3857, + "systems explicitly": 54500, + "global attention": 22821, + "types based": 59078, + "overwhelming majority": 39124, + "increasingly large": 25474, + "learning bert": 29546, + "method applying": 32386, + "identify set": 24444, + "quality demonstrate": 44508, + "increases model": 25435, + "detection low": 14497, + "resource text": 47283, + "prototypical network": 43975, + "methods zero": 33106, + "control style": 10974, + "directly related": 15335, + "related textual": 45946, + "quality evaluation": 44518, + "task f1": 55075, + "generated sentence": 22315, + "generate structured": 22249, + "process called": 42763, + "learn optimal": 29406, + "dialog generation": 14755, + "introduced dataset": 26883, + "suffer limited": 53772, + "annotators label": 3015, + "especially dealing": 18270, + "helps generate": 23606, + "al 2019a": 2242, + "supervised data": 53975, + "data goal": 12392, + "goal learn": 22890, + "learn neural": 29402, + "model comparable": 33671, + "corpora human": 11206, + "corresponding labels": 11555, + "framework combines": 21471, + "specifically apply": 52180, + "educational applications": 16606, + "model checkpoints": 33654, + "model iteratively": 34022, + "score test": 48878, + "set low": 50187, + "cost effectively": 11580, + "unlabeled dataset": 59568, + "dataset domain": 12902, + "number real": 38030, + "analysis multi": 2700, + "2017 2018": 264, + "language reasoning": 28460, + "single hop": 51307, + "interpretability model": 26715, + "sub questions": 53529, + "designed human": 14320, + "supporting facts": 54139, + "modern deep": 35704, + "processing present": 42924, + "diagnostic tests": 14740, + "distribution learning": 15644, + "learning heuristic": 29669, + "similar human": 51045, + "suggests new": 53850, + "strategies building": 52894, + "understanding study": 59405, + "instead training": 26464, + "training augmented": 57937, + "introduce multiple": 26828, + "methods approach": 32754, + "22 respectively": 319, + "question use": 44756, + "dbpedia wikidata": 13508, + "scene graph": 48714, + "attributes relations": 4910, + "attention graph": 4755, + "achieve score": 1190, + "translation st": 58682, + "task including": 55131, + "knowledge important": 27520, + "language sources": 28492, + "direction propose": 15274, + "need taken": 36591, + "experiment use": 19254, + "general world": 22098, + "lingual multi": 30714, + "quality low": 44546, + "new low": 37243, + "aware text": 5473, + "rewriting text": 48081, + "generation pre": 22519, + "scale raw": 48619, + "texts fine": 56881, + "desired task": 14352, + "structures better": 53181, + "need research": 36589, + "systems directly": 54481, + "improve final": 24855, + "recently nlp": 45446, + "nlp domains": 37483, + "billion parameters": 7119, + "benefits model": 6584, + "model parallel": 34178, + "experiments transformer": 19548, + "accuracy code": 946, + "lingual representations": 30722, + "based publicly": 5964, + "static embeddings": 52725, + "embeddings compare": 17096, + "trained massive": 57785, + "remain challenge": 46312, + "german german": 22671, + "previously released": 42345, + "performance long": 40421, + "embedding feature": 17028, + "tasks public": 55829, + "datasets comparing": 13184, + "mrc task": 35907, + "performance difficult": 40296, + "pressing need": 42135, + "carlo dropout": 7766, + "shared semantic": 50487, + "direction present": 15273, + "present multilingual": 41951, + "transformer self": 58509, + "data multilingual": 12499, + "understanding important": 59352, + "bert capture": 6633, + "capture general": 7673, + "corpora lack": 11211, + "lack domain": 27886, + "called knowledge": 7548, + "models equipped": 34972, + "training self": 58242, + "parameters pre": 39716, + "especially domain": 18273, + "automatically provide": 5197, + "statistics dataset": 52778, + "model explain": 33854, + "discuss current": 15463, + "amounts high": 2548, + "face challenges": 20241, + "challenges related": 8075, + "providing insights": 44248, + "surprisingly little": 54190, + "based explanations": 5719, + "text way": 56842, + "methods bert": 32770, + "detection document": 14475, + "systems leveraging": 54548, + "uses contextual": 60499, + "datasets text": 13458, + "datasets diverse": 13234, + "perform worse": 40161, + "solving downstream": 51702, + "experiments variety": 19555, + "texts social": 56927, + "datasets illustrate": 13294, + "multiple downstream": 36205, + "introduce embedding": 26803, + "relation recognition": 45993, + "multiple monolingual": 36252, + "create language": 11704, + "switched language": 54257, + "difficult lack": 15171, + "generate artificial": 22180, + "require external": 46854, + "constituency parsers": 10349, + "using copy": 60630, + "data leveraging": 12462, + "based experimental": 5715, + "acceptability judgments": 811, + "surpasses previous": 54174, + "contains valuable": 10507, + "model comprises": 33688, + "retrieve similar": 47980, + "model suggesting": 34426, + "finding relevant": 20902, + "features various": 20694, + "model fusion": 33915, + "faster decoding": 20436, + "emerging topics": 17273, + "findings provide": 20912, + "integrated representation": 26517, + "problems need": 42715, + "novel context": 37788, + "units model": 59534, + "use supervision": 60035, + "set propose": 50226, + "centric approach": 7927, + "study assess": 53328, + "feasibility proposed": 20469, + "quality requirements": 44573, + "language queries": 28455, + "scenarios different": 48694, + "domains experiments": 16255, + "information domain": 25821, + "specific document": 52071, + "method evaluating": 32491, + "datasets single": 13432, + "significance testing": 50843, + "methods strong": 33054, + "learning important": 29676, + "techniques transfer": 56146, + "languages learning": 28709, + "embeddings contextual": 17103, + "learning contextual": 29570, + "languages facilitate": 28668, + "facilitate cross": 20263, + "evaluate language": 18465, + "information shown": 26084, + "compositional manner": 9743, + "structure trees": 53145, + "model tuning": 34494, + "features demonstrate": 20555, + "output length": 38984, + "model stronger": 34413, + "accuracy generated": 984, + "sentence corresponding": 49538, + "connected neural": 10177, + "network task": 36812, + "task combined": 54955, + "margin achieving": 31818, + "great impact": 23205, + "impact downstream": 24594, + "various sentence": 61389, + "challenging high": 8097, + "improvement benchmark": 24988, + "research computational": 47004, + "argument spans": 4173, + "used encode": 60160, + "users twitter": 60484, + "explore hypothesis": 19708, + "task produce": 55298, + "modeling semantic": 34621, + "use translation": 60061, + "better f1": 6890, + "authors propose": 5008, + "specific problems": 52131, + "capture language": 7688, + "summary various": 53916, + "improve future": 24857, + "addition performance": 1632, + "model improving": 33986, + "embeddings jointly": 17154, + "layer training": 29209, + "make robust": 31594, + "expert evaluations": 19581, + "produces high": 43028, + "rouge bleu": 48349, + "bias data": 7021, + "did provide": 14810, + "li et": 30419, + "code used": 8867, + "entities natural": 18068, + "domain named": 16116, + "remains significant": 46349, + "provide context": 44043, + "final classification": 20818, + "advances language": 1912, + "employ bert": 17375, + "transfer capabilities": 58354, + "express emotions": 19792, + "knowledge dynamically": 27453, + "datasets f1": 13270, + "suggests models": 53847, + "diverse text": 15722, + "future progress": 21882, + "com miulab": 9020, + "training knowledge": 58140, + "containing text": 10486, + "difficult expensive": 15166, + "signals using": 50838, + "level human": 30130, + "unsupervised feature": 59697, + "representation documents": 46505, + "carbon footprint": 7749, + "additional cost": 1663, + "modifications model": 35730, + "times training": 57256, + "predicting human": 41676, + "ignored previous": 24496, + "investigated work": 27001, + "entities using": 18089, + "extract key": 19980, + "conducted datasets": 10079, + "capture multi": 7698, + "generated state": 22321, + "highest f1": 23852, + "classification critical": 8447, + "areas machine": 4155, + "applications conversational": 3191, + "performance depends": 40286, + "specifically build": 52183, + "data prohibitively": 12568, + "scale pretraining": 48616, + "datasets indicate": 13301, + "set achieved": 50102, + "keyword search": 27352, + "explores possibility": 19773, + "adding extra": 1595, + "information mi": 25972, + "10 fold": 42, + "low data": 31139, + "resource ones": 47258, + "largely depends": 29053, + "type task": 59072, + "contextual language": 10772, + "minimal additional": 33283, + "architecture encode": 4046, + "single linear": 51314, + "linear layer": 30658, + "outperforms random": 38935, + "spurious patterns": 52390, + "methods resources": 33021, + "models sensitive": 35479, + "given documents": 22738, + "labels task": 27850, + "task humans": 55119, + "data fail": 12359, + "datasets perform": 13368, + "spurious features": 52389, + "available recently": 5358, + "training monolingual": 58180, + "models independently": 35127, + "hundreds languages": 24296, + "understanding essential": 59341, + "paper inspired": 39397, + "trained multilingual": 57816, + "using mixture": 60805, + "annotations corpus": 2986, + "multilingual code": 36068, + "cs data": 11922, + "perform annotation": 40069, + "various perspectives": 61377, + "different cultures": 14884, + "day life": 13503, + "parts world": 39912, + "task extremely": 55074, + "label distribution": 27704, + "aim improve": 2150, + "framework generates": 21529, + "generates pseudo": 22352, + "framework improves": 21540, + "performance shot": 40558, + "related research": 45932, + "features statistical": 20675, + "demonstrate cross": 13885, + "controlled generation": 10985, + "multiple diverse": 36201, + "generating sentence": 22394, + "attributes including": 4907, + "match target": 31901, + "measured automatic": 32067, + "contain large": 10464, + "novel encoder": 37812, + "successfully generates": 53745, + "achieving highest": 1411, + "graph aware": 23100, + "prediction test": 41745, + "understanding multilingual": 59368, + "diverse multi": 15706, + "space evaluate": 51858, + "developing better": 14649, + "levels lexical": 30243, + "information resulting": 26058, + "variational information": 61249, + "sentences recent": 49777, + "model general": 33919, + "input existing": 26276, + "controlled text": 10986, + "generation training": 22569, + "knowledge pretrained": 27576, + "offer potential": 38295, + "generative text": 22612, + "marginal likelihood": 31825, + "designed specific": 14330, + "including news": 25284, + "previously considered": 42331, + "make challenging": 31547, + "written summaries": 63011, + "particular explore": 39845, + "ways integrating": 61843, + "relation arguments": 45964, + "understand impact": 59297, + "documents available": 15858, + "model prior": 34235, + "efficient use": 16907, + "based tools": 6100, + "image models": 24540, + "training bidirectional": 57948, + "better domain": 6878, + "uncertainty estimates": 59229, + "success downstream": 53699, + "multi stream": 36013, + "corpus best": 11288, + "results simple": 47849, + "classification layer": 8485, + "layer bert": 29180, + "validation dataset": 61194, + "based benchmark": 5597, + "performance higher": 40373, + "group people": 23273, + "word token": 62322, + "large pretrained": 28941, + "annotation work": 2981, + "used label": 60221, + "task human": 55118, + "raises new": 44861, + "trained general": 57735, + "achieve improved": 1163, + "correctness generated": 11497, + "limitations current": 30544, + "current approach": 11959, + "build word": 7433, + "extract word": 20003, + "models assume": 34732, + "defined set": 13785, + "explicitly models": 19646, + "quantitatively evaluate": 44631, + "models aiming": 34705, + "develop models": 14599, + "use similarity": 60016, + "target different": 54808, + "commonly available": 9218, + "explored literature": 19759, + "expensive annotation": 19203, + "complex structures": 9665, + "samples labeled": 48479, + "improving data": 25176, + "representations limited": 46710, + "long sequences": 31026, + "techniques improving": 56099, + "set english": 50143, + "supplementary materials": 54107, + "requires models": 46945, + "parts text": 39911, + "process propose": 42819, + "consists series": 10327, + "performance particular": 40481, + "compositional generalization": 9742, + "making hard": 31655, + "attention maps": 4767, + "task hope": 55117, + "develop robust": 14611, + "model french": 33910, + "leveraging visual": 30345, + "simple short": 51211, + "evaluate novel": 18480, + "datasets growing": 13287, + "features multi": 20625, + "architectures evaluate": 4109, + "task helps": 55115, + "user experiences": 60412, + "annotation methods": 2956, + "results higher": 47655, + "noise input": 37599, + "form domain": 21319, + "form speech": 21336, + "predictions work": 41772, + "prone generating": 43227, + "framework state": 21604, + "framework shows": 21598, + "standard way": 52540, + "expert based": 19574, + "pairs dataset": 39177, + "applied training": 3306, + "provide additional": 44005, + "embeddings existing": 17130, + "enables better": 17436, + "models spoken": 35530, + "language token": 28531, + "desired target": 14351, + "nature human": 36479, + "original language": 38718, + "able increase": 702, + "propose algorithms": 43289, + "training performed": 58206, + "reasons propose": 45237, + "word time": 62321, + "performance generative": 40366, + "tasks corresponding": 55561, + "single topic": 51349, + "scientific documents": 48759, + "represent text": 46484, + "represent documents": 46470, + "bert widely": 6734, + "including self": 25294, + "art generation": 4264, + "accomplish tasks": 847, + "techniques evaluate": 56083, + "corpus nlp": 11391, + "explore model": 19715, + "task prediction": 55284, + "used measuring": 60234, + "benchmarks evaluating": 6521, + "provide complete": 44035, + "corpus named": 11386, + "proposed corpus": 43747, + "baseline classifiers": 6161, + "data good": 12394, + "data cases": 12200, + "data build": 12194, + "models susceptible": 35571, + "general linguistic": 22067, + "model biases": 33629, + "sentiment model": 49852, + "performance largely": 40414, + "knowledge context": 27427, + "entities existing": 18049, + "entities semantic": 18081, + "fusion module": 21860, + "target class": 54802, + "unlabeled documents": 59571, + "heuristic methods": 23628, + "pseudo labeling": 44277, + "models linear": 35190, + "build multilingual": 7415, + "language explore": 28063, + "strategies training": 52918, + "use multilingual": 59955, + "impacts performance": 24614, + "network text": 36813, + "applied token": 3304, + "use adversarial": 59816, + "generate adversarial": 22176, + "performance complex": 40262, + "complex nlp": 9644, + "training available": 57940, + "trained unlabeled": 57906, + "capable zero": 7632, + "extractive question": 20137, + "provides results": 44223, + "languages directly": 28642, + "size models": 51391, + "downstream fine": 16338, + "conversational models": 11046, + "models leveraged": 35183, + "context conversation": 10600, + "conversation based": 11029, + "datasets improvement": 13297, + "performance robustness": 40543, + "task subtasks": 55420, + "following issues": 21265, + "affect downstream": 2012, + "tackle issues": 54706, + "detection model": 14501, + "entities proposed": 18074, + "investigate language": 26964, + "learned self": 29480, + "help humans": 23568, + "interactive tool": 26634, + "languages pre": 28753, + "systems demonstrate": 54473, + "representations given": 46678, + "parsing problem": 39792, + "text sql": 56787, + "approach treats": 3728, + "historical corpora": 23957, + "recognition methods": 45512, + "resources including": 47306, + "f1 macro": 20185, + "macro score": 31410, + "modal information": 33458, + "surprisingly high": 54189, + "remain competitive": 46314, + "extract contextual": 19971, + "text shown": 56768, + "tasks leveraging": 55721, + "provided pre": 44171, + "trained masked": 57783, + "models semi": 35477, + "model run": 34332, + "deployment models": 14176, + "production environments": 43047, + "recent trend": 45363, + "roberta based": 48216, + "transformer layer": 58493, + "answer accuracy": 3030, + "learnable parameters": 29449, + "architectures training": 4127, + "success recently": 53724, + "tuned large": 58878, + "models run": 35467, + "machine human": 31302, + "75 accuracy": 502, + "accuracy higher": 986, + "method extracts": 32505, + "model wide": 34534, + "single training": 51350, + "training run": 58235, + "course training": 11637, + "lot memory": 31116, + "applying deep": 3360, + "researchers using": 47169, + "directly comparable": 15309, + "labelled dataset": 27801, + "set benchmarks": 50115, + "mechanism experimental": 32116, + "external tools": 19956, + "efficient solution": 16901, + "task baseline": 54930, + "trained additional": 57671, + "used techniques": 60326, + "techniques domain": 56079, + "dataset baselines": 12824, + "dl models": 15754, + "achieving human": 1412, + "bert classifier": 6635, + "architecture integrating": 4054, + "large synthetic": 29021, + "architectures bert": 4104, + "learn nuances": 29405, + "knowledge obtained": 27562, + "test items": 56352, + "contains 000": 10489, + "automatic way": 5136, + "capture document": 7664, + "information transformer": 26133, + "baselines achieved": 6227, + "context proposed": 10697, + "encoder encoder": 17511, + "classification baselines": 8438, + "embeddings introduce": 17153, + "accuracy bert": 939, + "speaker identification": 51997, + "proposed different": 43758, + "different contributions": 14877, + "comparison baselines": 9492, + "apply multi": 3338, + "language obtained": 28362, + "models target": 35583, + "written author": 62995, + "approaches built": 3781, + "techniques context": 56071, + "relational graph": 46007, + "learning open": 29789, + "specific queries": 52134, + "adversarial domain": 1967, + "small gold": 51474, + "performance par": 40480, + "provide example": 44064, + "supervised objective": 54028, + "methods transfer": 33082, + "previous model": 42263, + "teacher knowledge": 55991, + "effectively reduces": 16755, + "identify word": 24451, + "explore compare": 19692, + "compare ways": 9377, + "control output": 10970, + "require information": 46863, + "local graph": 30941, + "encode graph": 17463, + "generative tasks": 22611, + "input long": 26295, + "used explain": 60179, + "generate similar": 22247, + "predicted label": 41666, + "learning key": 29690, + "semantics context": 49400, + "parts document": 39904, + "highly non": 23907, + "document content": 15777, + "characteristics human": 8237, + "focused exclusively": 21222, + "gold annotations": 22911, + "multimodal dataset": 36146, + "dataset developed": 12895, + "use acoustic": 59814, + "dataset research": 13065, + "help advance": 23551, + "trained encoders": 57719, + "deal issue": 13515, + "trained encoder": 57717, + "specific layers": 52103, + "labeled instances": 27759, + "space large": 51871, + "using label": 60748, + "newly constructed": 37373, + "important low": 24743, + "use internet": 59916, + "expert curated": 19576, + "learning tl": 29913, + "91 accuracy": 556, + "shot baselines": 50601, + "baselines furthermore": 6263, + "transfer techniques": 58425, + "performance achieving": 40181, + "model lastly": 34044, + "utilized training": 61110, + "drawn different": 16409, + "having different": 23486, + "level work": 30234, + "datasets creating": 13202, + "architecture generate": 4050, + "experiments discuss": 19417, + "based scores": 6006, + "task relation": 55329, + "respectively recent": 47381, + "including various": 25319, + "internal dataset": 26685, + "recent breakthroughs": 45298, + "learning demonstrate": 29585, + "application pre": 3175, + "encoder pre": 17532, + "datasets notably": 13349, + "applications pre": 3232, + "methods comparable": 32791, + "task state": 55410, + "fixed number": 21078, + "easy hard": 16561, + "output predictions": 38991, + "transformer using": 58515, + "decoder layers": 13598, + "approach estimate": 3517, + "regression models": 45815, + "generated automatic": 22269, + "experts propose": 19592, + "noisy input": 37619, + "relation graph": 45983, + "higher f1": 23823, + "pretraining methods": 42210, + "insertion deletion": 26379, + "based estimated": 5707, + "validates effectiveness": 61189, + "furthermore extend": 21823, + "applications computer": 3190, + "investigated paper": 26999, + "systems particular": 54585, + "construct benchmark": 10382, + "variety applications": 61261, + "automatically summarize": 5202, + "specific keywords": 52092, + "steps taken": 52844, + "subword representations": 53686, + "representations context": 46632, + "enriched word": 17964, + "problems use": 42736, + "method handle": 32521, + "efficiency problem": 16851, + "transformers recently": 58530, + "tuning procedure": 58949, + "method conceptually": 32431, + "layer transformer": 29210, + "classification decision": 8454, + "texts natural": 56906, + "generation translation": 22570, + "systems represent": 54618, + "represent knowledge": 46475, + "nlu research": 37568, + "independent representations": 25503, + "algorithms learning": 2328, + "traditional evaluation": 57517, + "models utility": 35662, + "experiments limited": 19457, + "learning simple": 29880, + "attention matrices": 4768, + "recently generative": 45429, + "common issue": 9180, + "reward signal": 48071, + "does appear": 15935, + "choice language": 8332, + "language prior": 28389, + "metrics demonstrating": 33158, + "importance language": 24684, + "new chinese": 37149, + "instructions based": 26484, + "language zero": 28587, + "empirical findings": 17330, + "studies limited": 53278, + "study differences": 53360, + "differences speech": 14829, + "available text": 5377, + "determinantal point": 14550, + "rise deep": 48152, + "lack thereof": 27921, + "contributing factor": 10939, + "develop transformer": 14620, + "small model": 51484, + "larger models": 29082, + "train small": 57636, + "ner based": 36676, + "shown benefit": 50697, + "languages mainly": 28722, + "far explored": 20398, + "study methods": 53411, + "corpus augmented": 11281, + "learn syntactic": 29432, + "resulting better": 47461, + "annotated sentence": 2914, + "applying methods": 3367, + "datasets evaluating": 13254, + "examine different": 18861, + "unsupervised multilingual": 59715, + "giving rise": 22814, + "based masked": 5833, + "new embedding": 37180, + "languages release": 28768, + "fail effectively": 20335, + "understanding multiple": 59369, + "information online": 25995, + "news classification": 37391, + "accuracy existing": 970, + "scenarios code": 48692, + "increasing rapidly": 25460, + "users generate": 60466, + "performance changes": 40232, + "input contains": 26259, + "multiple sub": 36295, + "apply deep": 3324, + "provides competitive": 44187, + "setting source": 50349, + "code experiments": 8812, + "results available": 47515, + "extraction natural": 20087, + "search optimal": 48976, + "models number": 35269, + "generated different": 22285, + "errors present": 18247, + "sequence graph": 49931, + "87 f1": 541, + "respectively second": 47383, + "exhibits state": 19010, + "tailed distribution": 54766, + "lead high": 29258, + "according experiments": 861, + "greater accuracy": 23223, + "learned data": 29454, + "labelled examples": 27803, + "intended use": 26551, + "language variations": 28574, + "aim predict": 2156, + "reduction using": 45723, + "adaptation framework": 1525, + "time aware": 57121, + "corpora fine": 11203, + "lottery ticket": 31127, + "ticket hypothesis": 57101, + "better test": 6978, + "question work": 44759, + "domain addition": 16012, + "particular domains": 39844, + "processing approach": 42853, + "finally make": 20867, + "19 english": 186, + "multiple tokens": 36303, + "prevent model": 42231, + "produce multiple": 42992, + "based modeling": 5864, + "models follow": 35040, + "explore large": 19712, + "task order": 55249, + "models bart": 34751, + "model reconstruct": 34285, + "effective fine": 16651, + "performance roberta": 40541, + "training resources": 58230, + "glue squad": 22866, + "end use": 17723, + "significant advantages": 50849, + "encoding module": 17573, + "augmented transformer": 4984, + "transformer paper": 58507, + "single end": 51300, + "best end": 6759, + "score 72": 48807, + "deal domain": 13514, + "models extremely": 35010, + "model unseen": 34503, + "settings zero": 50405, + "data 13": 12102, + "model potentially": 34215, + "difficulty training": 15203, + "systems context": 54459, + "addition improving": 1620, + "systems better": 54442, + "models vulnerable": 35676, + "retraining model": 47936, + "model integrated": 34010, + "building natural": 7458, + "language perform": 28378, + "user intended": 60424, + "accuracy 78": 911, + "behavior using": 6398, + "task bert": 54934, + "provides large": 44210, + "data bert": 12187, + "power large": 41425, + "bert sentence": 6715, + "mining machine": 33316, + "information work": 26164, + "related studies": 45940, + "studies recently": 53294, + "data remains": 12600, + "context open": 10682, + "leverage data": 30262, + "tasks transfer": 55940, + "tuning smaller": 58956, + "human model": 24208, + "model loop": 34073, + "dataset leads": 12982, + "shortcomings current": 50579, + "pretrained masked": 42166, + "models mlms": 35227, + "tasks instead": 55693, + "gains domain": 21937, + "linguistic acceptability": 30745, + "greatly improving": 23233, + "quite common": 44827, + "number errors": 38001, + "analysis common": 2631, + "result training": 47455, + "resource domains": 47223, + "various benchmark": 61309, + "languages ii": 28689, + "effectively handle": 16737, + "architecture capture": 4033, + "previous sota": 42279, + "monolingual datasets": 35799, + "datasets common": 13179, + "generative pre": 22602, + "transformer trained": 58512, + "hugging face": 24083, + "generate relevant": 22235, + "systems pre": 54592, + "training pipeline": 58209, + "personal experiences": 40756, + "set proposed": 50227, + "tasks identify": 55666, + "similarity candidate": 51088, + "candidate reference": 7577, + "gram overlap": 23059, + "corpus natural": 11387, + "traditional metrics": 57530, + "level pre": 30178, + "easily adapt": 16533, + "training result": 58231, + "scenarios limited": 48700, + "limitations evaluation": 30547, + "thorough error": 57057, + "guide future": 23331, + "future direction": 21870, + "evaluated new": 18540, + "new examples": 37197, + "complex documents": 9624, + "gap training": 21982, + "optimization algorithm": 38543, + "number pre": 38029, + "adding noise": 1597, + "detailed annotations": 14416, + "limited single": 30615, + "evidence extraction": 18810, + "aid future": 2127, + "corpus construction": 11306, + "experiments corpus": 19392, + "corpus number": 11393, + "defined data": 13784, + "models providing": 35382, + "retrieve information": 47978, + "suffers lack": 53791, + "test instance": 56350, + "hungry models": 24301, + "models heavily": 35080, + "rely labeled": 46290, + "scenarios data": 48693, + "knowledge generating": 27492, + "limited generalization": 30588, + "labeling data": 27782, + "extraction sentiment": 20108, + "study factors": 53378, + "designed assess": 14308, + "fail exploit": 20336, + "works attempt": 62876, + "terms corresponding": 56280, + "texts task": 56932, + "performs tasks": 40721, + "framework employs": 21501, + "representations terms": 46769, + "contextualized language": 10804, + "tuning downstream": 58908, + "power pre": 41429, + "output final": 38973, + "benchmark introduce": 6473, + "require extra": 46855, + "ensembles models": 17985, + "effective knowledge": 16663, + "released source": 46183, + "bias training": 7045, + "language coverage": 28011, + "cover small": 11647, + "corresponding answers": 11546, + "bert downstream": 6646, + "neural question": 37086, + "questions additionally": 44767, + "knowledge including": 27522, + "label aware": 27692, + "construct knowledge": 10389, + "domains overcome": 16280, + "limitation propose": 30538, + "using individual": 60735, + "domains based": 16236, + "achieve effective": 1133, + "knowledge sharing": 27609, + "capture fine": 7671, + "semantic rules": 49341, + "need fine": 36566, + "method chinese": 32413, + "furthermore method": 21827, + "main points": 31451, + "datasets effectiveness": 13241, + "pipeline approaches": 40892, + "specifically devise": 52196, + "imbalance issue": 24562, + "entropy ce": 18159, + "training instance": 58134, + "entropy objective": 18166, + "data imbalanced": 12416, + "examples training": 18939, + "range data": 44910, + "tasks notably": 55769, + "inference instead": 25662, + "optimization approach": 38544, + "additional loss": 1687, + "loss terms": 31105, + "paper contribute": 39307, + "empirically validate": 17370, + "tasks showing": 55885, + "performance prior": 40500, + "serve strong": 50082, + "results seven": 47829, + "multiple times": 36302, + "bert multi": 6690, + "does entail": 15944, + "model contextualized": 33713, + "overcome shortcomings": 39074, + "image pairs": 24541, + "texts images": 56888, + "effective attention": 16632, + "tasks manually": 55740, + "results verify": 47905, + "consumption training": 10457, + "datasets various": 13479, + "model testing": 34455, + "model roberta": 34329, + "questions generated": 44789, + "models masked": 35217, + "enhanced performance": 17935, + "relations addition": 46015, + "mlm based": 33438, + "represent entities": 46472, + "edges represent": 16589, + "recently graph": 45430, + "experiments qa": 19506, + "performance computational": 40263, + "space finally": 51865, + "using annotations": 60560, + "text low": 56651, + "easily combined": 16539, + "learning improve": 29677, + "stopping criterion": 52868, + "models 12": 34649, + "training convergence": 57960, + "answer driven": 3033, + "research evaluation": 47030, + "learns semantic": 29974, + "demonstrate need": 13949, + "lingual tasks": 30731, + "using zero": 61030, + "presents interesting": 42087, + "build better": 7388, + "pairs addition": 39167, + "limitations models": 30552, + "new probing": 37288, + "knowledge stored": 27620, + "models plms": 35325, + "tackling problem": 54721, + "evaluate machine": 18468, + "conversational settings": 11052, + "experiment new": 19244, + "metrics measure": 33179, + "measure different": 32049, + "fail generate": 20339, + "annotating large": 2932, + "available benchmark": 5265, + "making easy": 31653, + "progress pre": 43110, + "works investigated": 62894, + "models unclear": 35643, + "decoding strategies": 13648, + "nucleus sampling": 37978, + "method proved": 32627, + "effectiveness various": 16822, + "virtual adversarial": 61623, + "aims correct": 2183, + "hierarchical graph": 23671, + "despite significant": 14388, + "investigate issue": 26963, + "generating adversarial": 22364, + "examples new": 18919, + "examples present": 18923, + "performance order": 40467, + "evaluate bert": 18444, + "bert baselines": 6627, + "original bert": 38704, + "respectively proposed": 47379, + "sub modules": 53523, + "encoder output": 17531, + "vocabulary space": 61714, + "near sota": 36513, + "sota performance": 51730, + "using seq2seq": 60929, + "sentences inference": 49737, + "step framework": 52809, + "learns map": 29965, + "techniques aim": 56057, + "multimodal features": 36148, + "data original": 12524, + "techniques especially": 56082, + "tend rely": 56207, + "rely spurious": 46301, + "distribution generalization": 15639, + "efficiency compared": 16839, + "methods self": 33029, + "shown tremendous": 50757, + "tremendous progress": 58774, + "number classes": 37988, + "shows better": 50765, + "generalization tasks": 22131, + "examples label": 18914, + "relative gain": 46098, + "knowledge guided": 27511, + "different knowledge": 14960, + "text train": 56816, + "having higher": 23488, + "performance reduces": 40525, + "knowledge helps": 27514, + "problem formulation": 42571, + "allowing models": 2447, + "learn relations": 29412, + "paper contains": 39306, + "example given": 18878, + "inference corpus": 25646, + "80 f1": 522, + "models ubiquitous": 35641, + "ubiquitous natural": 59176, + "english limited": 17836, + "web crawled": 61884, + "results good": 47651, + "using larger": 60761, + "semantic sentence": 49343, + "latent vectors": 29148, + "data generating": 12386, + "sequence task": 50009, + "gain insight": 21909, + "focus attention": 21145, + "performance indicating": 40391, + "fixed sized": 21083, + "generated according": 22266, + "effectiveness training": 16818, + "gaps current": 21987, + "bert sequence": 6716, + "cls token": 8728, + "specific classification": 52055, + "tuning phase": 58941, + "improvements text": 25108, + "variety settings": 61290, + "introduce effective": 26801, + "methods control": 32803, + "method experimental": 32495, + "best prior": 6808, + "identifying correct": 24456, + "particular context": 39837, + "context address": 10581, + "methods suggest": 33060, + "outperformed existing": 38838, + "showed performance": 50668, + "involving multiple": 27027, + "relations challenging": 46019, + "decomposing complex": 13657, + "questions existing": 44787, + "combined existing": 9079, + "majority existing": 31530, + "utterance information": 61136, + "learning capture": 29553, + "design multi": 14290, + "transfer pre": 58414, + "general task": 22093, + "tuning large": 58922, + "approach establishes": 3516, + "establishes state": 18362, + "94 respectively": 565, + "models reducing": 35419, + "noisy datasets": 37616, + "datasets fine": 13276, + "tuning finally": 58914, + "industrial setting": 25618, + "set hand": 50163, + "idea proposed": 24373, + "world information": 62942, + "just small": 27253, + "improving precision": 25192, + "systems machine": 54555, + "review previous": 48035, + "previous literature": 42258, + "bert popular": 6702, + "model generalizability": 33920, + "datasets trained": 13460, + "approaches benefit": 3776, + "lead sub": 29274, + "solutions work": 51673, + "domain twitter": 16219, + "dataset gathered": 12940, + "user question": 60442, + "reduce time": 45681, + "type question": 59066, + "speaker identity": 51998, + "training learn": 58154, + "varying data": 61428, + "style question": 53494, + "model did": 33764, + "relative contributions": 46090, + "bring new": 7332, + "requires commonsense": 46918, + "work pretrained": 62766, + "corpus provide": 11412, + "embeddings set": 17213, + "models handling": 35077, + "data especially": 12327, + "widely utilized": 62024, + "utilized various": 61111, + "showing promising": 50685, + "visualization results": 61679, + "instance based": 26424, + "positive transfer": 41299, + "tasks tested": 55930, + "better integrate": 6903, + "knowledge plms": 27571, + "objectives experimental": 38112, + "benchmark state": 6493, + "research large": 47064, + "com thu": 9025, + "thu keg": 57097, + "capability models": 7611, + "investigate unsupervised": 26992, + "generate reliable": 22236, + "pseudo labeled": 44275, + "domains extensive": 16256, + "multiple large": 36239, + "scale benchmark": 48555, + "generation novel": 22510, + "existing nlp": 19120, + "retrieval module": 47957, + "gaining increasing": 21926, + "effectively exploit": 16734, + "architecture pre": 4077, + "knowledge research": 27595, + "recent embedding": 45307, + "certain scenarios": 7945, + "scenarios finally": 48697, + "relatively smaller": 46133, + "explain model": 19594, + "identify non": 24433, + "prior efforts": 42401, + "quantify importance": 44610, + "human metrics": 24207, + "project page": 43135, + "enhancing performance": 17949, + "bag sentences": 5501, + "ignoring potential": 24501, + "novel dynamic": 37809, + "dynamically generates": 16498, + "shift problem": 50543, + "study demonstrate": 53355, + "generation classification": 22434, + "unable capture": 59199, + "documents document": 15871, + "source documents": 51764, + "cases study": 7814, + "keywords extracted": 27354, + "accuracy comparison": 950, + "use sub": 60033, + "domains addition": 16233, + "ml based": 33429, + "problem given": 42574, + "connected graph": 10174, + "problem addition": 42497, + "according experimental": 859, + "process creating": 42766, + "fundamental aspect": 21775, + "corpus multiple": 11385, + "highly reliable": 23911, + "quantitative analyses": 44615, + "representation transformer": 46597, + "efficient representation": 16894, + "documents important": 15885, + "simple weighted": 51226, + "weighted averaging": 61926, + "shortcomings propose": 50582, + "space complexity": 51852, + "critical component": 11780, + "output generated": 38974, + "models gpt": 35067, + "knowledge embedded": 27455, + "large models": 28911, + "work methods": 62724, + "methods presented": 32989, + "model goal": 33942, + "labeling effort": 27784, + "classification head": 8478, + "class class": 8394, + "expert evaluation": 19580, + "aims produce": 2209, + "better match": 6916, + "dataset semeval": 13078, + "importance using": 24693, + "model embedding": 33806, + "applied real": 3289, + "layout information": 29243, + "based pipeline": 5937, + "documents proposed": 15906, + "methods performed": 32982, + "sample datasets": 48449, + "key findings": 27314, + "slight improvement": 51433, + "systems support": 54645, + "shot adaptation": 50597, + "instead manually": 26456, + "scores computed": 48897, + "improvements cross": 25064, + "understanding intent": 59354, + "compared current": 9399, + "approaches utilize": 3952, + "classification experimental": 8467, + "terms score": 56313, + "tasks obtained": 55774, + "effective achieves": 16626, + "tasks self": 55868, + "single token": 51348, + "use local": 59936, + "propose parallel": 43580, + "terms different": 56283, + "attention learning": 4766, + "research practice": 47094, + "task estimating": 55054, + "annotated large": 2902, + "language findings": 28071, + "consistent different": 10273, + "economic news": 16578, + "short period": 50561, + "large news": 28920, + "learning discrete": 29593, + "choice training": 8338, + "performing submission": 40688, + "finally model": 20869, + "layer word": 29215, + "used proxy": 60279, + "use task": 60040, + "steps using": 52845, + "models established": 34975, + "single input": 51308, + "answering document": 3071, + "query context": 44664, + "efficient data": 16866, + "trained features": 57730, + "visualization attention": 61678, + "significant importance": 50872, + "domain used": 16224, + "perform entity": 40096, + "scale public": 48618, + "systems increasingly": 54532, + "recognition framework": 45507, + "level sequential": 30212, + "output given": 38975, + "network conduct": 36725, + "framework results": 21592, + "studies reveal": 53297, + "helps better": 23604, + "individual neurons": 25575, + "intuition propose": 26907, + "rich feature": 48099, + "communication propose": 9252, + "time evaluate": 57150, + "extremely challenging": 20154, + "real scenario": 45109, + "term dependency": 56234, + "including task": 25307, + "serve effective": 50077, + "recent results": 45345, + "existing publicly": 19132, + "embeddings larger": 17161, + "introduced novel": 26888, + "neuro symbolic": 37116, + "sentences certain": 49687, + "models exploited": 34999, + "context topic": 10734, + "analysis combine": 2629, + "text ranking": 56726, + "ranking approach": 44967, + "selects best": 49168, + "attempt learn": 4689, + "learning improves": 29679, + "efficiency inference": 16844, + "lms text": 30923, + "scale general": 48574, + "tuning strategy": 58963, + "studies different": 53259, + "way classification": 61796, + "task designed": 55009, + "creation datasets": 11748, + "classification corpus": 8446, + "driven analysis": 16418, + "85 f1": 535, + "attention potential": 4810, + "uses sequence": 60534, + "entities experiments": 18050, + "score code": 48840, + "similar examples": 51041, + "examples target": 18934, + "models noise": 35265, + "transformer experiments": 58486, + "model good": 33943, + "encoder transformer": 17546, + "approach leveraging": 3590, + "events related": 18798, + "introduce meta": 26820, + "rich cross": 48096, + "modal representation": 33464, + "incorporates external": 25375, + "internal external": 26686, + "seen rapid": 49061, + "explicitly defined": 19633, + "loss term": 31104, + "chinese datasets": 8305, + "models enhanced": 34969, + "used small": 60305, + "guided learning": 23346, + "vietnamese text": 61593, + "users input": 60467, + "domains text": 16296, + "physical world": 40861, + "accuracy large": 997, + "biomedical publications": 7176, + "having similar": 23491, + "sharing similar": 50519, + "accuracy latency": 998, + "manner proposed": 31725, + "method introduces": 32551, + "special token": 52021, + "output token": 39005, + "improve reasoning": 24914, + "reasoning ability": 45183, + "design evaluation": 14281, + "class label": 8405, + "pretrained transformer": 42186, + "better baselines": 6853, + "bert style": 6723, + "style models": 53491, + "automatically discovering": 5164, + "quality diverse": 44510, + "answers different": 3107, + "com jzbjyb": 9017, + "tree representation": 58755, + "evaluation toolkit": 18742, + "bert achieved": 6603, + "knowledge pre": 27572, + "finally fine": 20860, + "results fully": 47641, + "extra supervision": 19965, + "trained weak": 57911, + "examples improve": 18910, + "approach aims": 3414, + "lexical normalization": 30373, + "text external": 56573, + "accuracy classification": 943, + "positional encodings": 41273, + "german text": 22677, + "corpus time": 11445, + "training requires": 58229, + "truth word": 58841, + "meaning source": 32017, + "multimodal language": 36151, + "framework pre": 21583, + "verify efficacy": 61539, + "brings improvement": 7341, + "target response": 54837, + "low entropy": 31150, + "improve diversity": 24843, + "improves semantic": 25159, + "quite effective": 44830, + "10 datasets": 39, + "data formats": 12372, + "propose explore": 43380, + "tackling challenge": 54720, + "systems address": 54426, + "different subtasks": 15088, + "help build": 23555, + "emotion labels": 17291, + "community study": 9276, + "models self": 35474, + "sentence evaluate": 49553, + "explored topic": 19766, + "focused generating": 21223, + "various granularities": 61346, + "supervision available": 54077, + "different emotions": 14914, + "manually creating": 31772, + "methods systematically": 33064, + "best use": 6835, + "challenge automated": 7968, + "semantics important": 49405, + "using web": 61023, + "kinds features": 27370, + "features applied": 20522, + "traditional way": 57555, + "works relied": 62905, + "models induced": 35129, + "potential large": 41397, + "experiments pre": 19489, + "based non": 5917, + "mentions propose": 32309, + "propose span": 43642, + "span level": 51925, + "programming algorithm": 43083, + "domain medical": 16109, + "annotations code": 2985, + "data downstream": 12296, + "different monolingual": 14999, + "prior language": 42404, + "process proposed": 42820, + "baselines fine": 6262, + "build dataset": 7393, + "work obtained": 62739, + "identifying semantic": 24465, + "sentential contexts": 49812, + "aware approach": 5440, + "approach relation": 3672, + "sentences better": 49686, + "information entire": 25836, + "experiments semeval": 19518, + "generating complex": 22368, + "study leverage": 53406, + "scenarios using": 48711, + "augmentation approaches": 4949, + "baseline terms": 6216, + "automatic creation": 5075, + "sets evaluate": 50291, + "representations high": 46682, + "collect high": 8942, + "embeddings test": 17227, + "analyzing text": 2846, + "fully exploited": 21726, + "model enhance": 33824, + "measure model": 32057, + "model reaches": 34273, + "annotate new": 2874, + "differ terms": 14817, + "annotations provide": 2999, + "rivals state": 48170, + "art recent": 4362, + "furthermore best": 21805, + "paper end": 39347, + "benchmark set": 6492, + "cnn layers": 8769, + "lstm bilstm": 31253, + "feature map": 20496, + "datasets shown": 13425, + "cause models": 7886, + "datasets fail": 13271, + "extra training": 19966, + "perform similarly": 40143, + "analysis comparison": 2633, + "performance provide": 40509, + "people speak": 40038, + "greatly reduces": 23238, + "future nlp": 21879, + "present various": 42052, + "rely text": 46304, + "specifically extract": 52201, + "systems evaluated": 54490, + "ai community": 2116, + "spanish catalan": 51938, + "words cbow": 62375, + "methods compared": 32793, + "multiple steps": 36291, + "executable programs": 18980, + "models non": 35267, + "range reasoning": 44930, + "help extract": 23562, + "supervision provides": 54091, + "challenges make": 8059, + "particular large": 39851, + "translation main": 58627, + "data specifically": 12687, + "tuning multilingual": 58931, + "baselines achieving": 6230, + "68 f1": 480, + "tags dependency": 54757, + "critical sequence": 11792, + "sequence training": 50013, + "general principles": 22082, + "based processing": 5955, + "focus exclusively": 21162, + "fast learning": 20427, + "multiple encoders": 36209, + "training iterations": 58138, + "better encode": 6881, + "usually ignore": 61052, + "attributes paper": 4909, + "examine role": 18869, + "graph context": 23119, + "fed neural": 20706, + "create release": 11715, + "articles neural": 4471, + "past works": 39938, + "approach advantage": 3413, + "growing need": 23298, + "task typically": 55454, + "incorporates context": 25373, + "high inter": 23742, + "lastly present": 29109, + "approaches analysis": 3762, + "mandarin english": 31698, + "advancing state": 1935, + "models focused": 35038, + "understanding capabilities": 59327, + "capabilities models": 7603, + "lower resourced": 31224, + "trained scratch": 57856, + "scratch new": 48945, + "model largely": 34043, + "study open": 53425, + "dependencies sentences": 14111, + "sentences instead": 49739, + "datasets demonstrating": 13221, + "10 minutes": 47, + "supervised state": 54051, + "grained annotations": 23023, + "dataset generate": 12942, + "using popular": 60863, + "representations hand": 46680, + "representation crucial": 46501, + "conversation humans": 11034, + "understand given": 59295, + "novel powerful": 37892, + "process humans": 42789, + "model mitigate": 34101, + "evaluation strategies": 18727, + "written human": 63000, + "coherent fluent": 8915, + "text extracting": 56576, + "model discriminate": 33773, + "aware representation": 5467, + "additional inputs": 1677, + "experiments framework": 19439, + "analysis pre": 2720, + "effectiveness self": 16812, + "incorporate knowledge": 25357, + "knowledge real": 27584, + "applied downstream": 3271, + "f1 improvements": 20184, + "experiments benchmarks": 19365, + "structures learn": 53187, + "especially complex": 18267, + "reveal interesting": 48009, + "information unstructured": 26139, + "source unsupervised": 51818, + "disease detection": 15497, + "detection f1": 14485, + "task studied": 55414, + "case multiple": 7792, + "scarcity low": 48672, + "state ofthe": 52705, + "ofthe art": 38323, + "transfer settings": 58420, + "better knowledge": 6905, + "semantics text": 49417, + "hierarchical semantic": 23691, + "model guide": 33948, + "chain monte": 7959, + "models learns": 35180, + "correct semantically": 11474, + "combination deep": 9039, + "understood paper": 59423, + "understanding underlying": 59411, + "google assistant": 22952, + "understanding user": 59413, + "effectively identify": 16738, + "current techniques": 12018, + "generalization work": 22134, + "framework detect": 21492, + "box models": 7291, + "analysis predictions": 2722, + "importance research": 24687, + "research tasks": 47128, + "computational biology": 9836, + "based discovery": 5682, + "leverage state": 30290, + "thoroughly evaluated": 57069, + "main source": 31459, + "neighbor knn": 36657, + "lack publicly": 27908, + "sentence alignments": 49516, + "use resulting": 59998, + "tuning based": 58901, + "40 000": 395, + "performance demonstrate": 40284, + "paper suggests": 39584, + "word positions": 62266, + "general solution": 22091, + "computer based": 9888, + "medical entity": 32205, + "crucial process": 11907, + "model map": 34091, + "codes models": 8878, + "63 accuracy": 467, + "research approach": 46982, + "create highly": 11700, + "training paradigms": 58204, + "suitable datasets": 53856, + "datasets testing": 13457, + "testing models": 56408, + "multi paragraph": 35995, + "tasks believe": 55518, + "unique structure": 59517, + "annotations task": 3003, + "attention experiments": 4747, + "success pre": 53717, + "shot evaluation": 50612, + "correct predictions": 11473, + "evaluation automatically": 18579, + "encoder self": 17541, + "using pairwise": 60852, + "powerful language": 41435, + "models implemented": 35103, + "bidirectional transformers": 7085, + "embedding representation": 17057, + "better robustness": 6962, + "knowledge addition": 27390, + "contains important": 10495, + "convolutional attention": 11102, + "pairs sentence": 39215, + "features attention": 20526, + "mechanism employed": 32110, + "important roles": 24769, + "lot traction": 31121, + "open new": 38438, + "lack comprehensive": 27878, + "exploration paper": 19679, + "issues challenges": 27086, + "researchers engineers": 47154, + "findings propose": 20911, + "dataset sizes": 13090, + "human capabilities": 24120, + "task domains": 55034, + "steps building": 52841, + "data building": 12195, + "data process": 12564, + "experiments given": 19440, + "method source": 32664, + "resources propose": 47328, + "method reaches": 32632, + "learning various": 29935, + "various model": 61365, + "component systems": 9714, + "systems deep": 54472, + "single dataset": 51293, + "native non": 36404, + "research conducted": 47005, + "correction models": 11485, + "consider real": 10219, + "focuses detecting": 21236, + "tweets dataset": 59013, + "used experiment": 60177, + "data generalize": 12381, + "representation finally": 46519, + "set identify": 50166, + "guidelines human": 23353, + "size makes": 51389, + "small models": 51485, + "bert different": 6642, + "different downstream": 14909, + "tasks motivated": 55751, + "task adaptive": 54882, + "tasks incorporate": 55684, + "distillation loss": 15572, + "demonstrate task": 13990, + "terms parameter": 56305, + "mainstream methods": 31480, + "representations encoding": 46650, + "information method": 25970, + "interactive information": 26628, + "form natural": 21329, + "sub field": 53517, + "progress domain": 43096, + "train work": 57661, + "need train": 36595, + "models statistical": 35535, + "employ novel": 17387, + "classification low": 8488, + "language performance": 28379, + "spread social": 52379, + "bias mitigation": 7033, + "methods leveraging": 32927, + "experiments existing": 19433, + "semantic correspondence": 49264, + "english multilingual": 17847, + "bert used": 6731, + "measured performance": 32069, + "smaller datasets": 51517, + "support downstream": 54118, + "content text": 10564, + "study information": 53392, + "based maximum": 5837, + "user interaction": 60427, + "baseline accuracy": 6150, + "online platforms": 38379, + "new style": 37328, + "approach obtain": 3615, + "rl training": 48177, + "encouraging model": 17605, + "online users": 38393, + "leverage transformer": 30294, + "cross modality": 11866, + "dataset cross": 12874, + "pre computed": 41498, + "location information": 30967, + "task type": 55452, + "numerical vectors": 38062, + "approaches like": 3862, + "produce single": 43010, + "meanings paper": 32034, + "embeddings present": 17192, + "tasks input": 55690, + "especially text": 18305, + "original version": 38738, + "analyzing model": 2844, + "datasets approaches": 13158, + "learning code": 29560, + "mentions paper": 32308, + "detection especially": 14479, + "variety machine": 61279, + "denoising auto": 14063, + "using bart": 60582, + "complete model": 9599, + "enables new": 17446, + "effective pre": 16685, + "tasks low": 55732, + "settings large": 50380, + "leveraging large": 30329, + "corpora respectively": 11238, + "propose simplified": 43638, + "influence model": 25727, + "performance 67": 40171, + "samples train": 48491, + "scale transformer": 48633, + "tuning gpt": 58917, + "gpt using": 22991, + "technique data": 56032, + "sets using": 50311, + "automated human": 5046, + "able reduce": 722, + "semi autoregressive": 49450, + "conditional masked": 9998, + "twitter social": 59041, + "comprehensive evaluations": 9789, + "achieved tremendous": 1279, + "tremendous success": 58775, + "performance bert": 40216, + "layers bert": 29218, + "tuning multi": 58930, + "capture relationships": 7703, + "inspired human": 26406, + "reach new": 45050, + "challenge requires": 8013, + "according given": 862, + "significance tests": 50844, + "analysis conducted": 2636, + "south asian": 51845, + "related target": 45941, + "set user": 50274, + "set related": 50236, + "extraction main": 20079, + "model conversational": 33718, + "post level": 41349, + "variants model": 61236, + "learns sentence": 29975, + "level self": 30203, + "knowledge evaluate": 27469, + "trained parameters": 57838, + "multimodal approach": 36142, + "does account": 15933, + "aware sentence": 5472, + "verifies effectiveness": 61532, + "yields comparable": 63120, + "model averaging": 33595, + "queries contain": 44650, + "suffer issue": 53768, + "answer work": 3059, + "finetuning pretrained": 21051, + "approaches various": 3953, + "bert bilstm": 6632, + "bilstm based": 7127, + "domain existing": 16062, + "models involve": 35148, + "domain improve": 16083, + "improve domain": 24844, + "sentiment target": 49861, + "mutual learning": 36351, + "multiple public": 36267, + "learning recent": 29834, + "fundamental step": 21791, + "step developing": 52804, + "dataset deep": 12882, + "errors work": 18254, + "modeling complex": 34566, + "relationship documents": 46068, + "pretrain large": 42144, + "model serve": 34364, + "documents including": 15887, + "including use": 25316, + "dense representations": 14079, + "extensive automatic": 19857, + "iterative approach": 27123, + "attempt explain": 4685, + "trained lms": 57778, + "approach previous": 3651, + "approaches method": 3872, + "issue present": 27073, + "framework incorporate": 21543, + "investigate types": 26991, + "support task": 54128, + "study best": 53336, + "data distributed": 12289, + "efficiently generate": 16915, + "text perturbations": 56699, + "scoring method": 48936, + "setting results": 50348, + "popular real": 41180, + "used guide": 60202, + "documents utilize": 15928, + "number general": 38006, + "efficiency experiments": 16842, + "artificial agents": 4488, + "methods target": 33067, + "agent human": 2055, + "topics keywords": 57452, + "highlights effectiveness": 23875, + "provided new": 44169, + "highlight challenges": 23860, + "describes task": 14237, + "results submitted": 47862, + "represent complex": 46467, + "neural agent": 36928, + "usually lack": 61055, + "formal analysis": 21344, + "interdisciplinary research": 26645, + "text capture": 56461, + "searching large": 48992, + "time language": 57171, + "computational memory": 9847, + "metrics model": 33181, + "input audio": 26256, + "method creates": 32445, + "processing based": 42856, + "make new": 31584, + "enhance accuracy": 17910, + "knowledge injected": 27525, + "model fixed": 33902, + "backbone model": 5485, + "wikipedia wikidata": 62056, + "classification entity": 8460, + "approach consider": 3463, + "models discriminative": 34926, + "trained joint": 57752, + "outperform unsupervised": 38830, + "just single": 27252, + "based technique": 6085, + "overcome limitations": 39068, + "provide real": 44118, + "transformer framework": 58488, + "model publicly": 34261, + "information generally": 25890, + "embedded representation": 17007, + "led remarkable": 29992, + "indic languages": 25522, + "novel setup": 37921, + "auxiliary data": 5230, + "scores state": 48922, + "particular obtain": 39857, + "data scientists": 12630, + "answer qa": 3046, + "hierarchical bilstm": 23661, + "dataset compare": 12849, + "representation transformers": 46598, + "text popular": 56700, + "matrix based": 31940, + "information utilizing": 26150, + "data differ": 12280, + "data synthesis": 12715, + "tuned model": 58879, + "achieved higher": 1242, + "trained just": 57754, + "synthesize new": 54361, + "combining data": 9109, + "generate long": 22217, + "models class": 34814, + "embedding alignment": 17010, + "transfer using": 58430, + "models solely": 35518, + "reference results": 45744, + "tweets collected": 59010, + "information candidate": 25773, + "approach efficiently": 3504, + "video recordings": 61585, + "video data": 61583, + "nlp based": 37467, + "short form": 50555, + "roc auc": 48298, + "aims use": 2221, + "images using": 24556, + "key factor": 27309, + "demonstrates advantages": 14027, + "problem domains": 42546, + "data embeddings": 12315, + "provide meaningful": 44101, + "important technique": 24781, + "voice assistant": 61722, + "generated language": 22294, + "joint pre": 27183, + "pairs fine": 39192, + "better using": 6990, + "systems current": 54466, + "context result": 10707, + "masked word": 31871, + "benchmarks glue": 6526, + "efficiency different": 16840, + "different pretrained": 15031, + "models differ": 34913, + "better efficiency": 6880, + "lms trained": 30924, + "datasets evaluated": 13253, + "features respectively": 20658, + "number experiments": 38003, + "corpora performance": 11232, + "performance embeddings": 40315, + "substantially different": 53633, + "different results": 15053, + "quantify performance": 44611, + "initialization training": 26224, + "observe fine": 38132, + "training validation": 58312, + "training dynamics": 58076, + "partially observed": 39810, + "achieving 90": 1390, + "text consider": 56506, + "mapping task": 31806, + "extensive ablations": 19854, + "code large": 8821, + "efficient attention": 16863, + "tasks bleu": 55529, + "model scaling": 34337, + "scaling approach": 48648, + "propose bert": 43312, + "pretraining using": 42221, + "text transfer": 56821, + "pretraining transformer": 42220, + "based image": 5778, + "loss prediction": 31102, + "particular design": 39842, + "number available": 37984, + "heavy reliance": 23539, + "effectiveness downstream": 16777, + "transfer nlp": 58411, + "computational budget": 9837, + "shot tasks": 50650, + "process essential": 42775, + "create corpus": 11693, + "attention academia": 4707, + "models recognize": 35414, + "effect model": 16615, + "model predicted": 34221, + "relationships sentence": 46082, + "modeling proposed": 34616, + "future researches": 21895, + "specifically design": 52191, + "able deal": 686, + "including single": 25299, + "outperforms transformer": 38956, + "provide analyses": 44006, + "datasets does": 13237, + "encoder text": 17543, + "matrix representation": 31943, + "words additionally": 62362, + "behavior different": 6391, + "neural modules": 36985, + "model displays": 33777, + "task extract": 55071, + "crucial nlp": 11905, + "latent knowledge": 29129, + "train knowledge": 57596, + "using masked": 60789, + "documents demonstrate": 15869, + "retrieval augmented": 47940, + "augmented language": 4980, + "aim bridge": 2139, + "resource low": 47250, + "allowing better": 2444, + "gained momentum": 21920, + "driven applications": 16419, + "alignment different": 2367, + "challenging paper": 8123, + "methods embedding": 32834, + "text extracted": 56575, + "introduce language": 26816, + "work given": 62675, + "systems real": 54610, + "time work": 57238, + "tasks outperform": 55778, + "instance specific": 26430, + "decisions using": 13576, + "contextual bandit": 10759, + "relations target": 46058, + "benchmarks method": 6530, + "approaches aim": 3759, + "systems capable": 54446, + "algorithm unsupervised": 2308, + "makes prediction": 31631, + "divide conquer": 15744, + "hop questions": 24003, + "like humans": 30476, + "adopt neural": 1865, + "questions corresponding": 44780, + "content quality": 10550, + "systems mainly": 54557, + "simple fine": 51170, + "models little": 35193, + "models brought": 34796, + "propose replace": 43604, + "encoder layer": 17521, + "scientific field": 48761, + "self distillation": 49195, + "distillation experiments": 15569, + "investigate question": 26979, + "sparse matrix": 51969, + "hard negative": 23446, + "examples using": 18941, + "required answer": 46899, + "second existing": 49004, + "required generate": 46901, + "modalities including": 33469, + "text visual": 56841, + "visual audio": 61649, + "text non": 56677, + "non text": 37686, + "generation capability": 22430, + "recently multi": 45440, + "task great": 55110, + "introduce bert": 26787, + "generation able": 22408, + "generation data": 22441, + "obtains substantial": 38260, + "empirically compare": 17357, + "task diverse": 55027, + "covering languages": 11657, + "size large": 51388, + "model required": 34312, + "kgs based": 27363, + "perform joint": 40116, + "model bi": 33627, + "large transformer": 29033, + "models inspired": 35134, + "randomly generated": 44898, + "fully trainable": 21744, + "empirical methods": 17333, + "methods transformer": 33084, + "review current": 48027, + "state knowledge": 52700, + "main advantage": 31422, + "worse performance": 62973, + "tasks studies": 55913, + "data considering": 12240, + "small perturbations": 51493, + "substantially improved": 53638, + "propose pre": 43587, + "train unified": 57655, + "masked tokens": 31870, + "tokens context": 57324, + "provide global": 44079, + "model bidirectional": 33630, + "tasks widely": 55964, + "affect final": 2014, + "arabic natural": 4002, + "transformers based": 58521, + "specific bert": 52049, + "corpus models": 11383, + "research applications": 46981, + "model brings": 33634, + "learning efficiency": 29611, + "models far": 35018, + "adaptive multi": 1577, + "large public": 28946, + "public large": 44322, + "model xlm": 34548, + "al 2020": 2244, + "tasks applications": 55501, + "extraction challenging": 20051, + "task important": 55124, + "model self": 34348, + "direct application": 15252, + "tuning small": 58955, + "manually tagged": 31788, + "various deep": 61322, + "based automated": 5587, + "learning hybrid": 29675, + "presented dataset": 42059, + "corpus conduct": 11301, + "corpus achieve": 11266, + "used chinese": 60113, + "meta embedding": 32331, + "modal pre": 33462, + "method shot": 32649, + "based gpt": 5756, + "dataset robust": 13071, + "investigate applicability": 26940, + "conventional gram": 11003, + "translation bt": 58587, + "study languages": 53403, + "online https": 38369, + "encoder language": 17520, + "data trained": 12740, + "tasks topic": 55936, + "setting training": 50353, + "supervised zero": 54072, + "decrease number": 13668, + "mbert model": 31979, + "results zero": 47914, + "domains tasks": 16295, + "tasks aim": 55495, + "greatly advanced": 23226, + "success methods": 53709, + "available human": 5311, + "effectiveness cross": 16773, + "academic writing": 795, + "achieves score": 1362, + "describes work": 14238, + "work developing": 62635, + "texts domain": 56875, + "social groups": 51563, + "studied large": 53227, + "research create": 47006, + "high context": 23718, + "annotated expert": 2896, + "industrial research": 25617, + "depending type": 14159, + "training documents": 58069, + "years brought": 63052, + "trained millions": 57789, + "leads higher": 29314, + "structured text": 53178, + "challenges automatic": 8034, + "construction methods": 10428, + "translation sentiment": 58675, + "according results": 867, + "topic relevance": 57425, + "large sized": 29012, + "sets available": 50282, + "best choice": 6756, + "new technique": 37338, + "accurate semantic": 1088, + "relations input": 46037, + "graph using": 23178, + "present baseline": 41853, + "systems hope": 54521, + "world environment": 62939, + "explore relative": 19732, + "common languages": 9183, + "incorporate multi": 25359, + "performance simpler": 40564, + "end sentence": 17706, + "search paper": 48978, + "representation spaces": 46583, + "differ substantially": 14816, + "approach combining": 3453, + "datasets extensive": 13267, + "lingual representation": 30720, + "fasttext bert": 20446, + "unsupervised weakly": 59746, + "community effort": 9264, + "data unfortunately": 12752, + "unclear models": 59237, + "words make": 62452, + "curated data": 11948, + "different use": 15116, + "present baselines": 41855, + "novel contribution": 37792, + "conversational ai": 11040, + "share code": 50455, + "models design": 34902, + "regularization loss": 45838, + "labels addition": 27808, + "addition original": 1630, + "information furthermore": 25886, + "feature word": 20512, + "level interpretability": 30137, + "better recall": 6950, + "rules neural": 48394, + "process document": 42772, + "higher human": 23827, + "given contexts": 22730, + "takes raw": 54783, + "issue especially": 27061, + "propose original": 43578, + "addition corpus": 1605, + "fairly compare": 20361, + "methods dataset": 32811, + "help address": 23550, + "tackling task": 54722, + "problem explore": 42560, + "applications traditional": 3254, + "networks improve": 36867, + "strategy used": 52954, + "improve representation": 24917, + "representation texts": 46593, + "significantly advanced": 50933, + "limited low": 30598, + "parsing tree": 39802, + "major types": 31524, + "view features": 61597, + "method enhance": 32484, + "enhance state": 17923, + "networks potential": 36892, + "consists subtasks": 10333, + "data costly": 12255, + "expressed different": 19797, + "development dataset": 14673, + "sheer scale": 50534, + "cohen kappa": 8902, + "existing widely": 19168, + "analysis including": 2680, + "code documentation": 8811, + "global word": 22848, + "ground breaking": 23250, + "breaking performance": 7313, + "survey review": 54219, + "common ground": 9178, + "word count": 62135, + "assessment data": 4591, + "dataset future": 12939, + "despite high": 14366, + "challenging domain": 8091, + "temperature scaling": 56172, + "effective reducing": 16688, + "lingual alignment": 30691, + "alignment human": 2369, + "fluency relevance": 21126, + "semantic changes": 49245, + "techniques finally": 56088, + "general natural": 22072, + "languages indian": 28696, + "utilized improve": 61109, + "sufficient labeled": 53804, + "different platforms": 15026, + "boost training": 7257, + "type annotation": 59048, + "datasets validate": 13476, + "focus arabic": 21144, + "explanation generation": 19604, + "language train": 28533, + "propose generating": 43399, + "performance advantage": 40188, + "events news": 18795, + "work automatically": 62582, + "novel datasets": 37800, + "approach datasets": 3478, + "speech natural": 52273, + "texts web": 56944, + "modern society": 35719, + "nigerian pidgin": 37441, + "datasets open": 13354, + "english despite": 17796, + "evaluation multilingual": 18658, + "multilingual encoders": 36083, + "lingual generalization": 30703, + "capabilities multilingual": 7605, + "release benchmark": 46142, + "performance gaps": 40360, + "leverage domain": 30266, + "general applied": 22044, + "automatically annotate": 5140, + "hybrid attention": 24312, + "sentences mainly": 49751, + "complex domain": 9625, + "core natural": 11151, + "approach tasks": 3718, + "small portion": 51494, + "multiple factors": 36215, + "sufficient diversity": 53802, + "important open": 24751, + "training paradigm": 58203, + "significant margins": 50899, + "platforms provide": 40955, + "model proved": 34254, + "labels existing": 27820, + "tend ignore": 56200, + "assign different": 4598, + "token classification": 57282, + "methods construct": 32801, + "domain finally": 16070, + "finally construct": 20848, + "models modern": 35231, + "studies natural": 53285, + "manual qualitative": 31750, + "research use": 47138, + "process multiple": 42807, + "training nlp": 58193, + "burden manual": 7497, + "tweets used": 59025, + "despite widespread": 14405, + "predict labels": 41644, + "synthetic code": 54368, + "understand sentiment": 59313, + "making important": 31656, + "aggregating multiple": 2077, + "inference information": 25661, + "abundant information": 781, + "information finally": 25875, + "representations substantially": 46763, + "tasks benchmark": 55519, + "bert performed": 6700, + "commonly occurring": 9220, + "significant degradation": 50861, + "degradation performance": 13802, + "bert performance": 6699, + "shortcomings existing": 50580, + "performance findings": 40348, + "presence noise": 41838, + "bert solve": 6720, + "available cc": 5269, + "information flows": 25881, + "single stream": 51340, + "interaction module": 26607, + "single modal": 51315, + "corpus fine": 11343, + "gained traction": 21924, + "accuracy 69": 903, + "alignment results": 2382, + "dialogue based": 14767, + "additional layer": 1683, + "solutions paper": 51668, + "assist researchers": 4611, + "use methods": 59947, + "ai including": 2117, + "attribute aware": 4900, + "need learn": 36578, + "lead improvement": 29262, + "shown performance": 50733, + "pairs low": 39201, + "languages mt": 28733, + "benchmark evaluate": 6464, + "work benchmark": 62587, + "evaluate current": 18448, + "multilingual modeling": 36097, + "demonstrate generalization": 13916, + "resourced settings": 47290, + "languages included": 28693, + "fasttext word": 20448, + "roman script": 48329, + "task standard": 55409, + "processing understanding": 42962, + "online text": 38390, + "based gram": 5758, + "tools natural": 57382, + "result better": 47435, + "proposes deep": 43931, + "translation benchmark": 58584, + "shelf models": 50539, + "data ii": 12414, + "words representing": 62499, + "paper addition": 39249, + "datasets goal": 13286, + "datasets representative": 13401, + "study influence": 53391, + "context results": 10708, + "bidirectional models": 7080, + "plays fundamental": 40997, + "fundamental role": 21790, + "results offer": 47750, + "human automatic": 24110, + "scale cross": 48561, + "task provides": 55309, + "extend recent": 19829, + "tasks evaluated": 55620, + "improved models": 24953, + "information makes": 25963, + "approach non": 3613, + "factors model": 20313, + "shifts word": 50547, + "monotonic alignment": 35821, + "tokens model": 57329, + "model unlabeled": 34501, + "text main": 56653, + "original document": 38710, + "objectives improve": 38113, + "training achieves": 57925, + "new categories": 37145, + "propose introduce": 43423, + "relation network": 45989, + "metrics experiments": 33164, + "synonym replacement": 54285, + "addition generating": 1618, + "shown success": 50755, + "model maintaining": 34082, + "content method": 10537, + "research studying": 47125, + "generation non": 22509, + "typically small": 59157, + "evaluate data": 18449, + "key content": 27303, + "variety baselines": 61263, + "quality synthetic": 44585, + "learning dynamics": 29607, + "entire dataset": 18022, + "brute force": 7378, + "discrete tokens": 15430, + "decoding steps": 13647, + "allows direct": 2458, + "method hierarchical": 32524, + "according type": 870, + "class distributions": 8401, + "broad applications": 7349, + "early prediction": 16513, + "making better": 31646, + "supervised signals": 54049, + "set candidates": 50118, + "benchmarks based": 6512, + "attention past": 4806, + "challenge problem": 8008, + "paper measure": 39424, + "metrics open": 33184, + "extremely imbalanced": 20159, + "score model": 48859, + "improvement 13": 24979, + "systems despite": 54476, + "binary classifiers": 7149, + "proposed self": 43890, + "000 unique": 14, + "results manual": 47713, + "issues associated": 27085, + "generated pseudo": 22309, + "corpus annotation": 11275, + "associated specific": 4623, + "specific entities": 52079, + "additional pre": 1692, + "online services": 38382, + "knowledge enhance": 27462, + "baseline bert": 6158, + "specific question": 52135, + "work describes": 62629, + "structure form": 53106, + "transformer architectures": 58450, + "task network": 55236, + "identify linguistic": 24428, + "space high": 51869, + "recently bert": 45411, + "studies showing": 53300, + "model smaller": 34394, + "distillation framework": 15570, + "tasks consistent": 55556, + "distilled model": 15583, + "benchmark approach": 6425, + "outperforms task": 38953, + "spread multiple": 52378, + "datasets knowledge": 13307, + "introduce textit": 26872, + "words original": 62473, + "multilingual unsupervised": 36133, + "require changes": 46844, + "component human": 9704, + "utilize data": 61089, + "challenges facing": 8048, + "speed compared": 52321, + "efficient terms": 16902, + "data stream": 12695, + "setting evaluate": 50321, + "models ptlms": 35383, + "covid 19": 11668, + "19 pandemic": 187, + "comparing existing": 9480, + "usually requires": 61066, + "models crucial": 34877, + "f1 respectively": 20193, + "train transformers": 57653, + "applied fine": 3274, + "respectively propose": 47378, + "ability use": 650, + "models finetuned": 35033, + "model finetuned": 33896, + "learning rich": 29850, + "success pretrained": 53720, + "segment text": 49076, + "analyze differences": 2810, + "method matches": 32572, + "pretrained lms": 42165, + "role recent": 48320, + "field present": 20766, + "plms achieved": 41015, + "extraction experiments": 20067, + "shown method": 50726, + "approaches explore": 3821, + "improvement proposed": 25020, + "trained nlp": 57831, + "dataset challenge": 12836, + "finetuned bert": 21043, + "approach combine": 3450, + "order dependency": 38607, + "tasks transformer": 55942, + "explicitly consider": 19632, + "performance empirical": 40316, + "input target": 26344, + "dataset substantially": 13105, + "model benefit": 33616, + "reduce gap": 45663, + "unified multilingual": 59475, + "models approaches": 34722, + "world task": 62962, + "millions parameters": 33263, + "performances paper": 40645, + "sub networks": 53525, + "roberta base": 48215, + "recently research": 45463, + "content moderation": 10539, + "limitations present": 30554, + "strategies neural": 52911, + "offers advantages": 38299, + "followed text": 21259, + "propose metrics": 43460, + "finetuned models": 21045, + "dataset reddit": 13057, + "generate short": 22246, + "results 20": 47481, + "goal experiments": 22884, + "limited flexibility": 30587, + "comparable size": 9310, + "math word": 31930, + "data development": 12279, + "performance case": 40228, + "learning achieve": 29501, + "novel pre": 37893, + "yields superior": 63137, + "30 000": 355, + "language construct": 28005, + "set word": 50277, + "evaluate recent": 18498, + "models domain": 34936, + "domain high": 16080, + "understanding different": 59338, + "tasks number": 55771, + "metrics provide": 33192, + "data hindi": 12406, + "crowd source": 11880, + "mbert based": 31978, + "corpus including": 11361, + "current strong": 12013, + "67 f1": 476, + "weights training": 61941, + "end perform": 17694, + "datasets nlp": 13347, + "provide theoretical": 44143, + "empirical observation": 17334, + "dataset curated": 12877, + "global pandemic": 22838, + "xlm roberta": 63030, + "additional unlabeled": 1708, + "trained contextualized": 57694, + "self trained": 49221, + "text requires": 56741, + "trained annotated": 57672, + "corpus social": 11432, + "detailed annotation": 14415, + "annotation using": 2980, + "task active": 54879, + "framework successfully": 21607, + "93 f1": 562, + "000 annotated": 3, + "annotated news": 2909, + "resulting dataset": 47464, + "general topic": 22095, + "errors model": 18245, + "models correlated": 34870, + "perform surprisingly": 40149, + "difficult explain": 15167, + "make correct": 31556, + "generator generates": 22618, + "generates textual": 22360, + "constructed graph": 10411, + "multiple rounds": 36277, + "scores large": 48907, + "scale pre": 48611, + "push state": 44426, + "strategies propose": 52914, + "inference latency": 25665, + "terms parameters": 56306, + "based predefined": 5944, + "make sentence": 31596, + "baseline 17": 6148, + "models direct": 34921, + "context understanding": 10737, + "aims recognize": 2212, + "extracting important": 20031, + "turn conversational": 58988, + "nlu benchmarks": 37563, + "range research": 44932, + "community driven": 9263, + "trained chinese": 57686, + "datasets additional": 13146, + "addition previous": 1635, + "benchmarks experimental": 6523, + "models overfit": 35297, + "generalization models": 22122, + "robust adversarial": 48238, + "training lead": 58152, + "pretrained transformers": 42191, + "distribution ood": 15646, + "distribution shifts": 15651, + "models necessarily": 35247, + "studied task": 53236, + "variant bert": 61233, + "integrated model": 26516, + "previous study": 42293, + "constructing high": 10419, + "annotations experimental": 2990, + "improve target": 24931, + "set latent": 50183, + "tend focus": 56198, + "generate candidate": 22182, + "based external": 5722, + "transfer ability": 58350, + "text baselines": 56457, + "statistical features": 52742, + "later layers": 29150, + "results hold": 47660, + "generalizing new": 22159, + "stage generation": 52432, + "tuned language": 58875, + "results automated": 47512, + "text significantly": 56769, + "outbreak covid": 38762, + "study automatically": 53333, + "models variants": 35667, + "finally consider": 20847, + "useful practice": 60380, + "modeling better": 34562, + "propose contrastive": 43337, + "contrastive objective": 10914, + "recognition dialogue": 45500, + "prediction response": 41736, + "forms including": 21375, + "fast development": 20421, + "systematic studies": 54403, + "analysis investigate": 2685, + "learning combined": 29562, + "technique natural": 56039, + "text extensive": 56571, + "generation benchmarks": 22427, + "benchmarks covering": 6513, + "information based": 25767, + "knowledge distributed": 27445, + "lead severe": 29269, + "information lack": 25938, + "benchmark study": 6495, + "models larger": 35169, + "propose modification": 43469, + "hard instances": 23443, + "calibrated confidence": 7531, + "test proposed": 56362, + "resources time": 47334, + "parameters compared": 39688, + "users control": 60458, + "dialog models": 14759, + "domain requires": 16147, + "human explanations": 24165, + "using 20": 60547, + "data label": 12447, + "based label": 5799, + "tasks easy": 55599, + "unlabeled instances": 59574, + "data popular": 12547, + "models surpass": 35568, + "baseline f1": 6166, + "tasks providing": 55828, + "document different": 15784, + "based bilstm": 5611, + "number vocabulary": 38053, + "input example": 26274, + "methods understand": 33090, + "additionally analyze": 1712, + "benchmark includes": 6471, + "impact word": 24609, + "applying bert": 3358, + "bert tasks": 6725, + "cross platform": 11868, + "complexity measures": 9682, + "state machine": 52702, + "rnn variants": 48205, + "data relying": 12599, + "relying parallel": 46309, + "task wide": 55469, + "motivated propose": 35872, + "process achieve": 42754, + "features latent": 20611, + "heavily relied": 23533, + "human input": 24171, + "propose suite": 43653, + "amortized variational": 2541, + "vae model": 61166, + "sharing decoder": 50514, + "optimization strategy": 38557, + "dataset recently": 13056, + "non contextualized": 37645, + "learning meta": 29725, + "modeling mlm": 34599, + "mlm pre": 33440, + "margin achieves": 31817, + "roberta model": 48226, + "sub graphs": 53520, + "framework inspired": 21546, + "ml model": 33430, + "generates highly": 22344, + "people read": 40035, + "use future": 59896, + "context perform": 10687, + "report significant": 46446, + "understanding benchmarks": 59326, + "outperforms fine": 38902, + "given specific": 22788, + "used incorporate": 60211, + "embedding context": 17021, + "representation graph": 46526, + "generation extensive": 22460, + "understanding research": 59395, + "effectiveness language": 16785, + "regressive language": 45826, + "models display": 34929, + "models reliably": 35428, + "real text": 45113, + "tasks detecting": 55586, + "specific sub": 52149, + "introduced bert": 26880, + "sentences identified": 49734, + "fluency coherence": 21125, + "tasked generating": 55481, + "metrics evaluate": 33161, + "task poses": 55278, + "survey different": 54205, + "trained lm": 57777, + "language fluency": 28074, + "tokens sequence": 57336, + "like knowledge": 30479, + "learning automatically": 29527, + "resolve ambiguity": 47200, + "suggesting new": 53838, + "data support": 12713, + "effort data": 16925, + "focused supervised": 21230, + "transfer experiments": 58363, + "approaches general": 3832, + "recent trends": 45364, + "pretraining fine": 42202, + "information specific": 26100, + "framework dynamically": 21498, + "using massive": 60791, + "evaluation setting": 18714, + "thorough empirical": 57056, + "domain bert": 16026, + "brought significant": 7369, + "legal medical": 30006, + "robustness models": 48287, + "examples evaluate": 18898, + "models increased": 35123, + "learning weak": 29939, + "present user": 42051, + "explore variety": 19752, + "learners english": 29496, + "data avoid": 12182, + "pretraining stage": 42216, + "obtained experiments": 38208, + "tasks typically": 55945, + "performed task": 40666, + "tuning performance": 58940, + "tuned roberta": 58884, + "ranks 1st": 44981, + "semeval 2020": 49435, + "2020 shared": 291, + "leverage pretrained": 30284, + "modeling datasets": 34568, + "incorporating word": 25395, + "models hard": 35078, + "flat structure": 21097, + "position encoding": 41266, + "event centric": 18779, + "major source": 31522, + "dataset 100": 12789, + "models settings": 35489, + "improves zero": 25168, + "performance 10": 40165, + "increasing demand": 25450, + "tune pretrained": 58863, + "improves downstream": 25126, + "qa performance": 44457, + "evaluations english": 18758, + "pretrained neural": 42175, + "conventional models": 11008, + "embedding proposed": 17055, + "art encoder": 4253, + "achieved comparable": 1224, + "model relationships": 34301, + "entity relationship": 18142, + "score 88": 48823, + "longer term": 31054, + "research develop": 47016, + "nlp existing": 37487, + "addition incorporate": 1621, + "information final": 25874, + "turkish language": 58985, + "framework exploits": 21517, + "unlabeled sentences": 59578, + "framework exploit": 21516, + "extensive ablation": 19852, + "bias work": 7049, + "datasets currently": 13205, + "specific biases": 52051, + "emergence large": 17265, + "unexplored work": 59442, + "scenarios results": 48708, + "results gpt": 47652, + "aligned human": 2357, + "policy network": 41100, + "methods main": 32935, + "work required": 62808, + "input passage": 26313, + "graph level": 23147, + "performance questions": 40517, + "performance code": 40239, + "make task": 31603, + "years increasing": 63062, + "potential nlp": 41402, + "gap exists": 21962, + "extreme multi": 20152, + "people rely": 40036, + "long sequence": 31025, + "dataset result": 13067, + "words attention": 62369, + "outperforms leading": 38906, + "absolute gains": 743, + "school students": 48741, + "linguistic expertise": 30767, + "covering wide": 11660, + "benchmark available": 6427, + "paradigm nlp": 39626, + "paper bridge": 39280, + "specifically utilize": 52233, + "baselines extensive": 6260, + "data making": 12480, + "english sub": 17884, + "domains biomedical": 16237, + "conventional sequence": 11012, + "low coverage": 31138, + "detection dataset": 14471, + "alleviates data": 2422, + "empirical analyses": 17318, + "comprehensive knowledge": 9793, + "simulate human": 51256, + "dataset scale": 13073, + "novel tasks": 37934, + "samples experimental": 48472, + "demonstrate challenges": 13880, + "large human": 28886, + "process address": 42756, + "natural utterances": 36469, + "practical framework": 41463, + "diverse applications": 15692, + "information kgs": 25935, + "model respect": 34316, + "processing automatic": 42855, + "texts introduce": 56893, + "web science": 61893, + "word ranking": 62275, + "verify approach": 61534, + "transfer finally": 58364, + "translation effective": 58604, + "english malayalam": 17842, + "languages translation": 28811, + "results survey": 47873, + "difficult define": 15163, + "proposed years": 43928, + "setting specifically": 50350, + "problem particularly": 42623, + "highly ambiguous": 23879, + "related natural": 45919, + "recently focus": 45426, + "medical entities": 32204, + "requiring human": 46962, + "advances artificial": 1906, + "score recent": 48869, + "task creating": 54985, + "creating datasets": 11740, + "datasets study": 13445, + "tasks challenging": 55536, + "imbalance issues": 24563, + "method augment": 32390, + "gpt generate": 22977, + "score points": 48865, + "strong base": 53001, + "expertise required": 19588, + "japanese language": 27147, + "second challenge": 49000, + "processing technique": 42955, + "apply real": 3350, + "aware knowledge": 5453, + "organized hierarchical": 38689, + "novel iterative": 37844, + "class names": 8409, + "candidate entity": 7571, + "entity based": 18097, + "based selected": 6009, + "task span": 55386, + "syntactic morphological": 54308, + "class text": 8412, + "task primarily": 55293, + "changing data": 8185, + "requiring access": 46959, + "original human": 38715, + "shot model": 50633, + "documents average": 15859, + "models choose": 34813, + "utilizing external": 61122, + "enhance representation": 17920, + "encoded pre": 17482, + "achieves great": 1330, + "great improvements": 23208, + "progress area": 43092, + "sota models": 51729, + "measure progress": 32059, + "based findings": 5732, + "create additional": 11691, + "method fine": 32509, + "approaches fine": 3827, + "performances recent": 40646, + "aim investigate": 2153, + "13 different": 123, + "present adversarial": 41841, + "manually built": 31766, + "techniques order": 56116, + "effective performance": 16684, + "performance enhancement": 40320, + "generating additional": 22363, + "aware data": 5447, + "predictions used": 41769, + "performance alleviate": 40191, + "ability pre": 631, + "better preserve": 6944, + "models structured": 35539, + "present self": 42003, + "masking scheme": 31873, + "knowledge learning": 27548, + "individual languages": 25571, + "preserving meaning": 42124, + "source content": 51756, + "surpasses existing": 54172, + "years work": 63083, + "motivated fact": 35865, + "novel idea": 37840, + "goal use": 22905, + "significant step": 50926, + "natural disasters": 36410, + "scale paper": 48609, + "task guided": 55111, + "guided pre": 23348, + "data tested": 12728, + "lack coverage": 27881, + "provide annotations": 44008, + "task adaptation": 54880, + "adapter modules": 1559, + "pretraining model": 42211, + "results representations": 47808, + "learning increasingly": 29680, + "recent explosion": 45310, + "viability approach": 61568, + "diversity language": 15737, + "model reliably": 34304, + "specifically augment": 52182, + "trained distinguish": 57711, + "irrelevant words": 27044, + "fact verification": 20292, + "allows fine": 2464, + "lower levels": 31216, + "accuracy fine": 979, + "instead fine": 26449, + "improvement model": 25008, + "poorly calibrated": 41149, + "training directly": 58067, + "performance 13": 40167, + "experiments strong": 19534, + "pretrained encoders": 42156, + "using integer": 60738, + "mbert xlm": 31980, + "settings pre": 50389, + "generation candidate": 22428, + "published result": 44371, + "multiple reference": 36272, + "distribution possible": 15648, + "core challenges": 11144, + "prohibitively large": 43129, + "space potential": 51882, + "questions use": 44814, + "learning optimize": 29790, + "specific questions": 52136, + "questions demonstrate": 44782, + "generated baseline": 22271, + "metrics humans": 33173, + "components work": 9729, + "work specifically": 62826, + "challenging natural": 8116, + "leverage text": 30292, + "experiments domains": 19423, + "required information": 46902, + "information critical": 25795, + "propose integrate": 43420, + "results adversarial": 47494, + "training effectively": 58078, + "generation automatic": 22423, + "probabilistic approach": 42454, + "candidates using": 7588, + "generated natural": 22301, + "score generated": 48848, + "experiments twitter": 19549, + "samples generated": 48476, + "factual accuracy": 20318, + "retrieval generation": 47945, + "learn spurious": 29428, + "position bias": 41262, + "thoroughly examine": 57070, + "self attentions": 49190, + "generalize distribution": 22139, + "allows explore": 2463, + "seek understand": 49051, + "aspects input": 4542, + "lead model": 29264, + "model choose": 33656, + "essential role": 18333, + "remains key": 46335, + "systems prior": 54599, + "model split": 34404, + "model computes": 33692, + "local models": 30946, + "given texts": 22795, + "nlu nlg": 37567, + "set additional": 50105, + "bert method": 6682, + "framework introduces": 21549, + "alignment input": 2371, + "pair set": 39159, + "knowledge various": 27646, + "structure able": 53088, + "able represent": 723, + "original graph": 38714, + "performance resource": 40536, + "rich domains": 48098, + "employ domain": 17378, + "given pre": 22771, + "domain source": 16164, + "interpretation method": 26734, + "method induces": 32543, + "source natural": 51786, + "way detect": 61799, + "conduct ablation": 10024, + "ablation tests": 662, + "pretrained nlp": 42177, + "converge faster": 11022, + "mt task": 35926, + "ranking candidates": 44969, + "languages expensive": 28662, + "ranking models": 44974, + "signals used": 50837, + "paper offer": 39432, + "large english": 28875, + "bert layers": 6671, + "pretrained parameters": 42179, + "benefits data": 6580, + "highlighting limitations": 23873, + "single data": 51292, + "individual model": 25573, + "control generated": 10963, + "baseline classification": 6160, + "high results": 23796, + "attribution methods": 4916, + "challenges introduce": 8055, + "makes approach": 31614, + "approach efficient": 3503, + "linguistics based": 30819, + "information bert": 25768, + "performance zero": 40634, + "shot accuracy": 50596, + "reproducible results": 46830, + "accuracy zero": 1072, + "different fine": 14935, + "shot results": 50637, + "semantic overlap": 49304, + "earth mover": 16520, + "identification classification": 24384, + "previous tasks": 42296, + "generating training": 22403, + "target models": 54833, + "systems high": 54519, + "modality learning": 33476, + "strong existing": 53029, + "data suggesting": 12708, + "trained corpora": 57698, + "multiple evaluation": 36212, + "language observed": 28361, + "training supervision": 58280, + "end sequence": 17707, + "falls short": 20378, + "significantly boosts": 50945, + "healthcare domain": 23522, + "english models": 17845, + "comparable human": 9297, + "models suggesting": 35560, + "entity alignment": 18094, + "improve entity": 24850, + "learning processes": 29822, + "metrics fail": 33165, + "metrics code": 33148, + "roberta large": 48225, + "match score": 31900, + "approaches ignore": 3841, + "datasets analyze": 13151, + "labels different": 27814, + "combine deep": 9064, + "document levels": 15810, + "scenarios including": 48698, + "silver labels": 51024, + "task predictions": 55285, + "input prompts": 26320, + "relative effectiveness": 46093, + "proves effective": 43999, + "cases language": 7807, + "pertinent information": 40787, + "italian spanish": 27112, + "direction building": 15270, + "multilingual pretrained": 36112, + "able transfer": 729, + "transfer unseen": 58429, + "commercial applications": 9154, + "effective zero": 16717, + "properties languages": 43264, + "text correct": 56517, + "demonstrate pre": 13960, + "performance make": 40431, + "datasets introduced": 13304, + "box model": 7290, + "quantitative experiments": 44619, + "model following": 33906, + "evaluations models": 18762, + "used investigate": 60219, + "investigate multi": 26969, + "half number": 23368, + "current end": 11973, + "models inherently": 35132, + "framework decomposes": 21487, + "bias text": 7044, + "collect novel": 8950, + "language terms": 28525, + "models attracted": 34738, + "context plays": 10688, + "shown large": 50724, + "improved training": 24968, + "intermediate task": 26681, + "intermediate tasks": 26682, + "inference reasoning": 25688, + "performance strongly": 40580, + "strongly correlated": 53069, + "methods benchmarks": 32769, + "benchmarks available": 6511, + "recognition question": 45528, + "additionally release": 1733, + "trained specifically": 57880, + "standard baselines": 52469, + "multilingual transformer": 36129, + "datasets existing": 13260, + "fail cover": 20332, + "simulate real": 51257, + "generalize test": 22148, + "multiple random": 36268, + "source entity": 51770, + "typically consider": 59137, + "prediction unseen": 41749, + "datasets requiring": 13403, + "corpus demonstrates": 11321, + "data linguistic": 12467, + "linguistically diverse": 30814, + "prior datasets": 42396, + "wise contrastive": 62080, + "performance plms": 40485, + "works pre": 62901, + "predicting masked": 41677, + "roberta perform": 48228, + "confident predictions": 10121, + "english knowledge": 17828, + "memory intensive": 32256, + "similar original": 51057, + "distillation based": 15568, + "evidence use": 18824, + "latest advances": 29154, + "datasets spanning": 13439, + "spanning diverse": 51953, + "simply fine": 51251, + "style paper": 53492, + "methods adversarial": 32743, + "existing tasks": 19155, + "interpretable structured": 26731, + "mechanisms use": 32154, + "embeddings play": 17189, + "datasets require": 13402, + "composing multiple": 9737, + "provide faithful": 44072, + "model behaviour": 33613, + "auxiliary supervision": 5239, + "propose recursive": 43598, + "use nearest": 59959, + "improve recall": 24915, + "contributions follows": 10953, + "bert training": 6728, + "analysis received": 2739, + "domain gap": 16076, + "text automatic": 56449, + "results generalize": 47645, + "demonstrate neural": 13950, + "step nlp": 52818, + "method demonstrate": 32453, + "traditional model": 57532, + "improve zero": 24940, + "consists stages": 10329, + "understand improve": 59298, + "smoothing method": 51539, + "analysis recent": 2740, + "1st place": 217, + "nature task": 36488, + "text makes": 56655, + "sourced data": 51822, + "data capture": 12197, + "given speech": 22789, + "english annotated": 17774, + "addressing task": 1826, + "performance suggesting": 40586, + "models come": 34829, + "information deep": 25802, + "models facilitate": 35013, + "thorough experiments": 57062, + "approach reaches": 3665, + "shows strong": 50807, + "methods evaluation": 32847, + "evaluation cross": 18601, + "usually performed": 61060, + "reference free": 45740, + "directly compare": 15310, + "free evaluation": 21639, + "art cross": 4242, + "embedding distance": 17026, + "useful downstream": 60361, + "improve situation": 24925, + "aspects including": 4541, + "consistent predictions": 10284, + "absolute performance": 748, + "performance perform": 40483, + "better non": 6922, + "large task": 29022, + "novel design": 37805, + "better make": 6915, + "test instances": 56351, + "datasets empirically": 13244, + "performance established": 40324, + "increase robustness": 25423, + "leverage deep": 30264, + "information types": 26134, + "encoded contextual": 17476, + "information tokens": 26127, + "specifically task": 52229, + "performance modern": 40442, + "require hand": 46858, + "features low": 20620, + "method soft": 32661, + "improvement points": 25015, + "problem extractive": 42564, + "documents answer": 15855, + "multi objective": 35993, + "best individual": 6767, + "bert baseline": 6626, + "baseline 10": 6146, + "scarcity available": 48662, + "special characteristics": 52016, + "scarcity data": 48663, + "data includes": 12424, + "approach dynamically": 3498, + "study online": 53424, + "crucial importance": 11901, + "target groups": 54818, + "platforms like": 40952, + "difficult researchers": 15185, + "dataset codes": 12844, + "majority vote": 31534, + "experiments study": 19535, + "problem shot": 42654, + "accuracy 81": 913, + "https aka": 24052, + "aka ms": 2225, + "understanding various": 59416, + "years previous": 63071, + "information fail": 25872, + "classified different": 8587, + "people public": 40034, + "work making": 62721, + "reported state": 46455, + "social political": 51600, + "described single": 14215, + "automated systems": 5060, + "memory module": 32271, + "relative previous": 46109, + "expensive training": 19223, + "model classifier": 33660, + "analyzed results": 2835, + "works study": 62909, + "bias results": 7042, + "representations contain": 46630, + "informative diverse": 26171, + "understanding based": 59323, + "comprehensive understanding": 9803, + "local coherence": 30930, + "video clips": 61582, + "improvements come": 25057, + "especially training": 18306, + "computed based": 9883, + "metrics reflect": 33196, + "provide diverse": 44055, + "corpus covers": 11312, + "predominant approach": 41783, + "learn effective": 29364, + "modality specific": 33478, + "predictions experiments": 41760, + "ranking algorithms": 44966, + "achieves relative": 1357, + "model gains": 33918, + "proposed mitigate": 43841, + "focus unsupervised": 21211, + "knowledge furthermore": 27487, + "achieves 70": 1290, + "accuracy predicting": 1028, + "create human": 11701, + "qa multi": 44453, + "medical experts": 32206, + "question conduct": 44723, + "sourced code": 51821, + "provide stronger": 44136, + "stronger baseline": 53061, + "code new": 8838, + "addressing specific": 1825, + "inputs training": 26368, + "diversity paper": 15738, + "mechanism capture": 32104, + "present structured": 42027, + "prediction algorithm": 41693, + "approaches successfully": 3929, + "purely unsupervised": 44398, + "models way": 35677, + "different segment": 15060, + "time generating": 57160, + "agent reinforcement": 2058, + "learning difficult": 29590, + "improvement models": 25009, + "domains evaluate": 16250, + "proposed systems": 43906, + "performance outperforming": 40469, + "handcrafted feature": 23399, + "engineering based": 17767, + "domain analysis": 16017, + "successfully capture": 53742, + "evaluation introduce": 18630, + "results larger": 47696, + "module generates": 35760, + "existing multilingual": 19112, + "negative effects": 36618, + "help pre": 23584, + "process pre": 42816, + "enhanced pre": 17936, + "representation multiple": 46560, + "strong pre": 53043, + "better adapt": 6845, + "effectiveness component": 16772, + "long answer": 31003, + "methods benchmark": 32767, + "ignoring dependencies": 24499, + "utilize graph": 61094, + "provided additional": 44158, + "extract integrate": 19979, + "features common": 20540, + "document datasets": 15782, + "non informative": 37657, + "document previous": 15820, + "context generating": 10646, + "memory paper": 32278, + "extremely noisy": 20165, + "systems sensitive": 54629, + "pairs web": 39231, + "corpus make": 11376, + "design stage": 14302, + "leveraging recent": 30338, + "perform new": 40125, + "dataset compared": 12850, + "develop automated": 14574, + "input sources": 26339, + "model challenging": 33651, + "parameters experiments": 39697, + "used update": 60345, + "differences observed": 14825, + "2017 dataset": 265, + "using intrinsic": 60741, + "build effective": 7395, + "work report": 62805, + "divided categories": 15746, + "based explicitly": 5720, + "corpora shows": 11242, + "transformer proposed": 58508, + "sentences combined": 49690, + "led state": 29994, + "framework addresses": 21452, + "method real": 32633, + "challenging given": 8096, + "network gat": 36746, + "2020 task": 293, + "approach bert": 3431, + "overcome issues": 39065, + "elements including": 16978, + "uses graph": 60512, + "architecture provides": 4081, + "domain focused": 16073, + "contextual vector": 10786, + "insufficient data": 26492, + "robust domain": 48245, + "contains wealth": 10508, + "connections words": 10186, + "analyze types": 2830, + "public figures": 44320, + "task extracts": 55073, + "crucial downstream": 11899, + "time analysis": 57115, + "yielded state": 63106, + "formulating task": 21391, + "bilstm networks": 7137, + "propose transformer": 43681, + "superiority model": 53952, + "encoder layers": 17522, + "training encoder": 58084, + "propose layer": 43436, + "systematic experiments": 54397, + "including low": 25270, + "constituent labels": 10355, + "used effectively": 60158, + "conditioned previously": 10016, + "variable number": 61225, + "monolingual bert": 35790, + "results challenging": 47531, + "challenging implement": 8102, + "quality prediction": 44563, + "classification limited": 8486, + "seen ones": 49060, + "models architectures": 34725, + "models performing": 35321, + "benchmark work": 6505, + "outperforms similar": 38942, + "task research": 55342, + "paper adopts": 39259, + "dataset demonstrating": 12889, + "classification multi": 8500, + "classification use": 8577, + "performance 95": 40174, + "classical chinese": 8422, + "addressing limitations": 1821, + "labels natural": 27840, + "enhance existing": 17912, + "automatic labeling": 5099, + "learn represent": 29413, + "minority class": 33332, + "generalization recent": 22129, + "news corpora": 37394, + "domain recent": 16143, + "potential make": 41399, + "results baseline": 47519, + "models demonstrating": 34897, + "languages shown": 28784, + "transfer high": 58366, + "languages covered": 28625, + "random word": 44894, + "accuracy absolute": 932, + "complex structure": 9664, + "approaches generating": 3835, + "approach experimental": 3525, + "provide human": 44087, + "data subset": 12705, + "corpus does": 11327, + "research low": 47068, + "common topic": 9206, + "topic discussion": 57401, + "embeddings related": 17202, + "effectively combine": 16728, + "multitask model": 36325, + "compare transformer": 9373, + "task generally": 55102, + "lack appropriate": 27875, + "effective combination": 16635, + "evaluation present": 18679, + "dataset larger": 12979, + "previous datasets": 42254, + "architecture bert": 4029, + "using roberta": 60912, + "level nlp": 30168, + "performance transformer": 40609, + "address key": 1774, + "role context": 48302, + "used current": 60134, + "current dialogue": 11971, + "tasks explicitly": 55632, + "76 f1": 507, + "study pre": 53434, + "strategy data": 52930, + "end transformer": 17722, + "augmentation generate": 4955, + "nlp including": 37490, + "corpora extracted": 11201, + "experiments applying": 19354, + "analysis better": 2623, + "follow standard": 21255, + "information key": 25934, + "extraction single": 20112, + "work general": 62674, + "contain various": 10476, + "designed natural": 14326, + "texts human": 56887, + "textual explanations": 56964, + "understudied problem": 59426, + "exhibits better": 19009, + "modeling specifically": 34624, + "comparison previous": 9503, + "16 teams": 165, + "directly indirectly": 15320, + "presents baseline": 42074, + "2020 challenge": 290, + "proposed transformer": 43918, + "textual form": 56966, + "distribution information": 15641, + "lms bert": 30917, + "resources models": 47318, + "objectives based": 38111, + "adapter based": 1557, + "outperform bert": 38784, + "sourced https": 51824, + "text allowing": 56429, + "analysis research": 2742, + "survey methods": 54209, + "analysis potential": 2719, + "solving nlp": 51703, + "time money": 57181, + "time using": 57236, + "experiments demonstrates": 19410, + "source library": 51782, + "new era": 37190, + "massive dataset": 31884, + "obtains higher": 38250, + "scores datasets": 48898, + "outperforming multilingual": 38854, + "publicly https": 44357, + "58 accuracy": 449, + "evaluation novel": 18664, + "transformer neural": 58505, + "rule induction": 48388, + "induction method": 25606, + "fact triples": 20291, + "form structured": 21337, + "continues grow": 10835, + "amounts labeled": 2551, + "problem multiple": 42612, + "reaching state": 45060, + "growing complexity": 23292, + "safety critical": 48429, + "code implementation": 8820, + "human social": 24240, + "substantial effort": 53618, + "gpt shown": 22988, + "contrastive self": 10922, + "important issues": 24738, + "intermediate training": 26683, + "data consistently": 12242, + "end pipeline": 17696, + "models starting": 35532, + "work reveals": 62811, + "include explicit": 25223, + "propose transition": 43683, + "approach works": 3742, + "video audio": 61578, + "increased performance": 25431, + "goal produce": 22896, + "ill suited": 24511, + "models reasoning": 35404, + "generation challenging": 22432, + "design simple": 14299, + "greatly facilitate": 23228, + "use https": 59909, + "various purposes": 61381, + "shown models": 50728, + "models possess": 35330, + "force model": 21286, + "little studied": 30885, + "dataset improvements": 12961, + "efficient models": 16887, + "cost performance": 11591, + "input generate": 26281, + "components jointly": 9718, + "context compare": 10598, + "dataset problem": 13036, + "tested different": 56395, + "words depending": 62395, + "context context": 10599, + "real conversations": 45100, + "detecting various": 14451, + "score average": 48834, + "results indicated": 47680, + "english addition": 17773, + "generated given": 22290, + "information significant": 26085, + "language ability": 27949, + "language presented": 28386, + "setting explore": 50323, + "directly optimizing": 15328, + "web sites": 61898, + "increasing data": 25449, + "data internet": 12439, + "representation capture": 46497, + "consists sub": 10331, + "sub network": 53524, + "bbc news": 6363, + "questions question": 44801, + "question corresponding": 44725, + "domains data": 16243, + "consuming work": 10455, + "models allows": 34709, + "created data": 11724, + "various transformer": 61411, + "learning unlabeled": 29924, + "neural unsupervised": 37110, + "embeddings provides": 17200, + "conversations social": 11063, + "mining research": 33321, + "largest human": 29096, + "context predict": 10689, + "words making": 62454, + "variety word": 61296, + "method focuses": 32512, + "highly depends": 23892, + "access source": 828, + "studies report": 53296, + "work utilize": 62857, + "model families": 33880, + "roberta gpt": 48222, + "number speakers": 38037, + "based solution": 6046, + "distinct datasets": 15589, + "roberta albert": 48214, + "deeper insights": 13758, + "dataset providing": 13046, + "average error": 5405, + "words specific": 62519, + "tuning improves": 58918, + "representation pre": 46567, + "bert proposed": 6708, + "alleviates problem": 2425, + "tasks specific": 55902, + "better pre": 6940, + "used submission": 60315, + "task graph": 55109, + "resources needed": 47321, + "models beneficial": 34768, + "propose modular": 43472, + "modular architecture": 35743, + "problem mainly": 42603, + "classification uses": 8579, + "documents experimental": 15877, + "levels results": 30246, + "approaches help": 3839, + "rapidly growing": 44995, + "advances pre": 1920, + "large external": 28878, + "recent unsupervised": 45365, + "cross model": 11867, + "typically focus": 59143, + "pseudo labels": 44278, + "labels generated": 27829, + "quality pseudo": 44568, + "corpus 10": 11263, + "events including": 18793, + "multimodal pre": 36154, + "training goal": 58116, + "english new": 17850, + "trained lexical": 57774, + "information corresponding": 25794, + "latent code": 29118, + "develop annotation": 14571, + "difficult cases": 15158, + "multiple strategies": 36292, + "tool understanding": 57367, + "geometric structure": 22656, + "computational benefits": 9835, + "improves bert": 25117, + "roberta models": 48227, + "efficiency model": 16846, + "half training": 23369, + "achieving improvements": 1414, + "surpass human": 54164, + "carefully design": 7761, + "design self": 14298, + "architectures large": 4113, + "transformers achieve": 58519, + "extensive research": 19910, + "comprehensive empirical": 9786, + "different facets": 14929, + "pretrained encoder": 42155, + "encoder training": 17545, + "paper argues": 39272, + "data generative": 12388, + "model sample": 34333, + "closed book": 8695, + "outperform methods": 38802, + "token sequences": 57308, + "labeling text": 27797, + "bert architecture": 6610, + "larger model": 29081, + "text address": 56425, + "universally applicable": 59551, + "connections different": 10185, + "implementation publicly": 24642, + "propose consider": 43330, + "generation question": 22535, + "generation abstractive": 22409, + "dataset verify": 13133, + "introduce transformer": 26873, + "encoder experimental": 17513, + "improving overall": 25188, + "performance lack": 40404, + "content propose": 10549, + "identify potential": 24436, + "sentences dataset": 49702, + "work measure": 62722, + "based virtual": 6130, + "alexa google": 2254, + "text downstream": 56545, + "data subsequently": 12704, + "resource situations": 47277, + "unseen domain": 59646, + "label semantics": 27724, + "labels experimental": 27821, + "setting pre": 50341, + "replaced token": 46405, + "token detection": 57285, + "task masked": 55207, + "understanding benchmark": 59325, + "bias present": 7038, + "useful future": 60365, + "framework utilises": 21623, + "aims increase": 2200, + "impact factors": 24595, + "bert evaluate": 6654, + "research interests": 47056, + "main research": 31457, + "diverse corpus": 15696, + "augmentation framework": 4954, + "generate multi": 22219, + "model align": 33558, + "work method": 62723, + "improved performances": 24961, + "reasoning graph": 45196, + "graph contains": 23118, + "contains new": 10502, + "question information": 44733, + "approaches especially": 3811, + "teach model": 55988, + "effectively perform": 16753, + "systems challenging": 54449, + "generated utterances": 22333, + "potential improvements": 41395, + "embedding clustering": 17020, + "core components": 11146, + "github https": 22715, + "metrics computed": 33153, + "unlabelled text": 59588, + "tuned specific": 58886, + "xlnet model": 63033, + "bert like": 6675, + "like models": 30486, + "english evaluate": 17801, + "ignore context": 24489, + "automatically selecting": 5201, + "relevant evidence": 46214, + "generate different": 22193, + "relevant question": 46231, + "superior accuracy": 53930, + "domains small": 16292, + "annotations obtained": 2995, + "certain extent": 7941, + "guarantee quality": 23320, + "current text": 12019, + "boundary information": 7284, + "information introduced": 25932, + "recognition datasets": 45499, + "aforementioned challenges": 2036, + "specifically make": 52215, + "task shot": 55371, + "categories propose": 7848, + "model shot": 34371, + "similarity reference": 51115, + "sentences real": 49776, + "classifier used": 8608, + "process introduce": 42796, + "extraction essential": 20061, + "domain address": 16013, + "address limitation": 1776, + "domains performance": 16283, + "average compared": 5404, + "kappa score": 27268, + "issue study": 27080, + "prediction extensive": 41708, + "11 accuracy": 84, + "key ingredients": 27319, + "processing especially": 42869, + "translation code": 58588, + "improvements code": 25056, + "structure new": 53123, + "supervised end": 53982, + "employ graph": 17382, + "different online": 15013, + "key factors": 27310, + "corpus comprises": 11300, + "answers based": 3106, + "types machine": 59100, + "performance corpus": 40268, + "score 84": 48819, + "set corpus": 50131, + "positive training": 41298, + "model memory": 34098, + "requires minimal": 46942, + "spanish data": 51940, + "probabilistic inference": 42462, + "model leveraging": 34059, + "state model": 52704, + "language considered": 28002, + "method termed": 32681, + "outperforms recently": 38939, + "sets recent": 50304, + "experiments work": 19565, + "new adversarial": 37124, + "corpora small": 11243, + "imbalanced datasets": 24568, + "challenge training": 8021, + "datasets application": 13153, + "content non": 10542, + "graph used": 23177, + "granularity levels": 23092, + "documents address": 15853, + "approach considers": 3464, + "maximum accuracy": 31967, + "propose clustering": 43319, + "shot settings": 50643, + "neural modeling": 36972, + "scarcity issue": 48667, + "information respectively": 26057, + "limited target": 30622, + "fewer training": 20741, + "complexity using": 9692, + "achieves 95": 1298, + "step various": 52836, + "human centric": 24121, + "words significantly": 62513, + "relationships paper": 46081, + "representations transformer": 46775, + "training steps": 58273, + "mixing phenomenon": 33416, + "utterance text": 61142, + "text sentiment": 56763, + "transport ot": 58723, + "representing entities": 46811, + "incorporated existing": 25368, + "baselines wide": 6320, + "propose pipeline": 43582, + "pipeline approach": 40891, + "unsupervised multi": 59714, + "texts languages": 56898, + "summarization information": 53886, + "strong zero": 53058, + "additional task": 1702, + "discriminative generative": 15444, + "classification generation": 8476, + "bert albert": 6608, + "com salesforce": 9023, + "expensive acquire": 19202, + "approaches reduce": 3910, + "pseudo label": 44274, + "augment labeled": 4942, + "underlying neural": 59273, + "network leveraging": 36760, + "20 30": 220, + "samples class": 48466, + "accuracy 91": 923, + "applied learn": 3279, + "use bi": 59835, + "method study": 32670, + "person pronouns": 40752, + "recently advanced": 45405, + "advanced nlp": 1892, + "seen surge": 49063, + "propose textit": 43669, + "minimal changes": 33285, + "parameters using": 39728, + "far superior": 20406, + "corresponding question": 11556, + "learning stages": 29893, + "stage fine": 52429, + "effectiveness superiority": 16813, + "input transformer": 26353, + "factual correctness": 20320, + "improving downstream": 25177, + "related training": 45949, + "use automated": 59828, + "models core": 34867, + "performance knowledge": 40402, + "document better": 15769, + "table text": 54689, + "employ encoder": 17379, + "approach terms": 3719, + "transfer domains": 58360, + "standard domain": 52486, + "language setting": 28482, + "domain recently": 16144, + "recently led": 45437, + "bert demonstrate": 6639, + "focuses building": 21235, + "model seen": 34342, + "approach flexible": 3540, + "19 test": 191, + "tools resources": 57384, + "parameter reduction": 39675, + "analysis recently": 2741, + "embeddings combining": 17095, + "retrieval accuracy": 47938, + "performing competitively": 40674, + "en zh": 17418, + "medical diagnosis": 32201, + "gap introduce": 21965, + "different sampling": 15056, + "tasks verify": 55961, + "learning translate": 29922, + "task uses": 55462, + "closing gap": 8717, + "accuracy end": 967, + "related covid": 45893, + "literature work": 30865, + "modeling generate": 34578, + "train generative": 57593, + "transformer gpt": 58489, + "text generator": 56608, + "news wikipedia": 37427, + "included training": 25228, + "dataset pre": 13031, + "language experimental": 28059, + "push limits": 44425, + "effective deep": 16643, + "final set": 20831, + "functions including": 21772, + "information decoding": 25801, + "motivate research": 35862, + "approaches furthermore": 3831, + "content creators": 10516, + "malayalam english": 31678, + "neighbors knn": 36666, + "similar level": 51051, + "bert distilbert": 6643, + "performing bert": 40671, + "traditional classification": 57513, + "accuracy 63": 900, + "oversampling technique": 39104, + "proposed pipeline": 43878, + "defined based": 13782, + "shapley values": 50453, + "networks experimental": 36851, + "art t5": 4421, + "given product": 22773, + "impact user": 24606, + "methods methods": 32945, + "application text": 3182, + "presents study": 42107, + "corpus spanish": 11433, + "purpose model": 44405, + "work possible": 62750, + "process dataset": 42768, + "terms input": 56296, + "leverage multiple": 30280, + "tasks boost": 55530, + "model main": 34081, + "bi level": 7010, + "problem semantic": 42646, + "lstm bert": 31249, + "study systematically": 53465, + "parsers used": 39766, + "make recommendations": 31592, + "despite huge": 14367, + "propose textsc": 43670, + "order construct": 38603, + "t5 model": 54682, + "model demonstrating": 33747, + "benchmarks using": 6549, + "method helps": 32523, + "alleviate drawbacks": 2406, + "approach bring": 3434, + "model linear": 34066, + "using metadata": 60800, + "end consider": 17622, + "text compare": 56499, + "retrieval results": 47968, + "text able": 56420, + "shown pre": 50737, + "results key": 47686, + "tasks significantly": 55889, + "performance challenging": 40231, + "challenging examples": 8094, + "distribution performance": 15647, + "importance data": 24679, + "strategies proposed": 52915, + "view paper": 61600, + "paper analyses": 39264, + "words semantics": 62506, + "speech named": 52271, + "attack method": 4659, + "generation requires": 22539, + "model global": 33941, + "using qualitative": 60887, + "qualitative study": 44482, + "study multiple": 53415, + "content shared": 10556, + "unified view": 59482, + "2010 2020": 249, + "facebook pages": 20245, + "estimation task": 18387, + "evidence retrieval": 18818, + "teams registered": 56011, + "registered participate": 45809, + "evaluation scripts": 18711, + "enable research": 17428, + "furthermore design": 21814, + "models public": 35385, + "embeddings contrast": 17104, + "capacity models": 7638, + "especially shot": 18299, + "datasets applying": 13154, + "standard multilingual": 52510, + "assess effectiveness": 4577, + "representations essential": 46655, + "representation final": 46518, + "approach increases": 3571, + "15 different": 147, + "available date": 5280, + "describes proposed": 14231, + "results transformer": 47891, + "effective task": 16701, + "vision cv": 61635, + "input performs": 26314, + "sentences common": 49691, + "subtasks paper": 53670, + "proposes model": 43934, + "various fine": 61344, + "tuned transformer": 58891, + "tree information": 58745, + "utilizing local": 61125, + "specifically present": 52221, + "models improving": 35116, + "information allows": 25759, + "terms words": 56324, + "leverages pretrained": 30311, + "leverage different": 30265, + "leverage context": 30259, + "dependent context": 14146, + "prediction finally": 41709, + "outperforms neural": 38914, + "study highlights": 53384, + "classification setting": 8548, + "smart devices": 51529, + "voice assistants": 61723, + "order accelerate": 38586, + "information self": 26075, + "entities identified": 18055, + "architectures trained": 4126, + "automatic transcripts": 5132, + "cascaded approach": 7783, + "models poorly": 35328, + "user text": 60452, + "answer natural": 3039, + "relation information": 45985, + "contains main": 10498, + "emphasize importance": 17311, + "utilizing pre": 61127, + "models arabic": 34723, + "platform people": 40950, + "varies significantly": 61258, + "categories linguistic": 7846, + "analyze sentiment": 2828, + "dataset according": 12795, + "challenge focused": 7982, + "match f1": 31896, + "preserving input": 42123, + "realistic settings": 45153, + "development systems": 14704, + "19 related": 190, + "successfully employed": 53744, + "time labor": 57170, + "metrics best": 33142, + "current metrics": 11986, + "corpora make": 11219, + "network performance": 36782, + "better low": 6912, + "personal preferences": 40758, + "limited scale": 30611, + "propose sentiment": 43623, + "end multi": 17687, + "powerful text": 41447, + "benefits large": 6583, + "quantities unlabeled": 44637, + "models extended": 35003, + "improves average": 25114, + "dataset grounded": 12945, + "evaluation period": 18671, + "baseline code": 6162, + "leaderboard https": 29284, + "processing module": 42893, + "structure proposed": 53131, + "better performing": 6939, + "generation scheme": 22542, + "languages recently": 28766, + "given current": 22732, + "given particular": 22768, + "analysis specific": 2763, + "10 20": 31, + "20 training": 232, + "significant time": 50928, + "presents unique": 42109, + "evidence existing": 18809, + "advancements deep": 1897, + "generation research": 22540, + "language improve": 28102, + "furthermore provide": 21837, + "models comparing": 34837, + "systems discuss": 54482, + "discuss promising": 15480, + "available researchers": 5362, + "subject areas": 53551, + "development domain": 14677, + "learning environment": 29625, + "task computational": 54964, + "domain benchmark": 16024, + "shown deep": 50701, + "achieving goal": 1405, + "release test": 46169, + "tasks critical": 55564, + "models unseen": 35648, + "generated candidates": 22274, + "poor quality": 41142, + "information fully": 25885, + "rely handcrafted": 46285, + "candidate ranking": 7576, + "proposed effectively": 43760, + "approach presents": 3650, + "better ranking": 6949, + "compared neural": 9425, + "patterns observed": 39972, + "recent study": 45354, + "models critical": 34873, + "focus short": 21199, + "design considerations": 14268, + "task attracted": 54920, + "31 teams": 365, + "task indicates": 55134, + "choice pre": 8333, + "useful feature": 60363, + "classification existing": 8466, + "grammatical semantic": 23077, + "20 times": 231, + "constituency parser": 10348, + "improve parsing": 24884, + "performance settings": 40556, + "sentences second": 49780, + "samples large": 48480, + "selection mechanism": 49143, + "par performance": 39616, + "underlying task": 59279, + "performed comparably": 40661, + "extremely costly": 20155, + "focus enhancing": 21159, + "context provide": 10698, + "mechanism named": 32130, + "named multi": 36376, + "randomly mask": 44900, + "learning makes": 29718, + "text semantics": 56759, + "decades research": 13542, + "method suitable": 32675, + "knowledge previous": 27577, + "models accurate": 34664, + "query text": 44678, + "online test": 38389, + "nature social": 36486, + "data led": 12461, + "proposed applied": 43719, + "shows higher": 50782, + "labeling based": 27779, + "learning widely": 29941, + "available line": 5322, + "detect errors": 14439, + "extraction texts": 20123, + "semantics syntactic": 49415, + "parsing important": 39782, + "step automated": 52799, + "average gain": 5409, + "user specified": 60449, + "fills gap": 20805, + "coherence generated": 8908, + "particular given": 39847, + "incorporating new": 25390, + "used everyday": 60174, + "aim explore": 2146, + "different events": 14923, + "learning additionally": 29504, + "framework new": 21572, + "seamlessly integrate": 48960, + "gpt based": 22972, + "strategy introduce": 52938, + "robustness evaluation": 48278, + "representation bert": 46495, + "largely attributed": 29051, + "attributed ability": 4904, + "contained sentence": 10479, + "does significantly": 15979, + "encoder generate": 17517, + "models t5": 35576, + "text abstract": 56421, + "typically evaluated": 59141, + "metrics compare": 33150, + "compare generated": 9342, + "aim alleviate": 2135, + "issues proposing": 27102, + "sota language": 51726, + "involving large": 27026, + "despite extensive": 14360, + "models adapted": 34690, + "space experiments": 51862, + "lack fine": 27889, + "retrieved training": 47987, + "data hand": 12398, + "copy words": 11136, + "t5 models": 54683, + "quality knowledge": 44538, + "reasoning existing": 45193, + "features extract": 20581, + "information construct": 25789, + "domains labeled": 16265, + "test efficacy": 56346, + "alignment techniques": 2384, + "domain adaptive": 16010, + "adaptive fine": 1574, + "scores 70": 48887, + "data tackle": 12718, + "problem building": 42514, + "news sentences": 37413, + "graph networks": 23151, + "evidence lower": 18812, + "method sentiment": 32648, + "domains method": 16273, + "obtains comparable": 38243, + "stateof art": 52718, + "approaches report": 3913, + "exploit potential": 19660, + "datasets addition": 13145, + "posts news": 41371, + "underlying structure": 59278, + "paper analyzed": 39266, + "different deep": 14893, + "mainly lack": 31475, + "propose chinese": 43317, + "lack suitable": 27916, + "knowledge instead": 27528, + "need understand": 36598, + "specific meanings": 52109, + "capability proposed": 7613, + "sa license": 48418, + "understanding fundamental": 59346, + "problem information": 42583, + "understanding users": 59414, + "novel query": 37903, + "tasks leverage": 55720, + "novel contrastive": 37790, + "study sentiment": 53457, + "combination approaches": 9032, + "adaptation specific": 1538, + "results cases": 47529, + "encoder use": 17547, + "finetuning data": 21049, + "improve factual": 24853, + "assessed using": 4584, + "word set": 62308, + "model interpretation": 34016, + "remarkable performances": 46359, + "conducted benchmark": 10074, + "better bert": 6854, + "great demand": 23203, + "tasks unified": 55949, + "overall framework": 39043, + "bert variants": 6733, + "outperforms multilingual": 38912, + "available code": 5270, + "tuned downstream": 58873, + "pro pose": 42450, + "coronavirus disease": 11168, + "labeling costs": 27781, + "instances specifically": 26436, + "superiority effectiveness": 53950, + "adverse effects": 1999, + "common errors": 9173, + "hierarchical transformer": 23696, + "different transformer": 15105, + "masked entity": 31861, + "datasets additionally": 13147, + "datasets deep": 13209, + "expensive produce": 19216, + "settings various": 50403, + "classical approaches": 8421, + "efficient multi": 16888, + "efficient propose": 16893, + "improve pre": 24905, + "describes developed": 14222, + "obtained f1": 38209, + "samples data": 48468, + "similar given": 51044, + "addresses gap": 1810, + "unsupervised algorithms": 59680, + "potential reasons": 41404, + "tuning work": 58974, + "demonstrate human": 13919, + "art computational": 4240, + "recognition research": 45532, + "span detection": 51922, + "spans training": 51959, + "demonstrate evaluation": 13910, + "extraction propose": 20098, + "problem speech": 42666, + "account semantic": 880, + "accuracy metric": 1006, + "models ensure": 34971, + "language existing": 28058, + "stage pipeline": 52437, + "stage paper": 52436, + "gains previous": 21941, + "text author": 56447, + "10 years": 55, + "english native": 17848, + "approaches given": 3836, + "gpt gpt": 22980, + "scores significantly": 48921, + "submission ranked": 53573, + "bi grams": 7008, + "text various": 56838, + "employ label": 17384, + "layers different": 29222, + "people understand": 40040, + "span identification": 51924, + "ways expressing": 61841, + "team semeval": 56004, + "analysis specifically": 2764, + "performance subtasks": 40583, + "introduce adaptive": 26776, + "model adaptively": 33535, + "tasks review": 55866, + "shared publicly": 50484, + "tuning techniques": 58969, + "teams total": 56013, + "benchmark including": 6472, + "including translation": 25314, + "use benchmark": 59833, + "benchmark compare": 6434, + "driven systems": 16433, + "team achieved": 56002, + "particular type": 39868, + "causal inference": 7873, + "challenges particular": 8067, + "work try": 62849, + "main topics": 31465, + "similarity embeddings": 51093, + "group similar": 23274, + "context covid": 10601, + "results superiority": 47869, + "approach baselines": 3428, + "transformers pre": 58528, + "strategy allows": 52927, + "trained seq2seq": 57865, + "influence different": 25724, + "score accuracy": 48832, + "framework extracts": 21522, + "information produced": 26026, + "using examples": 60683, + "stepping stone": 52839, + "response given": 47395, + "showed significant": 50673, + "tasks solved": 55898, + "additional annotation": 1653, + "setting performing": 50340, + "demonstrate low": 13932, + "low computational": 31133, + "end document": 17631, + "using classification": 60605, + "problem improve": 42580, + "based notion": 5918, + "datasets investigate": 13305, + "classification labels": 8483, + "tasks framework": 55649, + "benchmark performances": 6487, + "including limited": 25268, + "limited text": 30625, + "focus real": 21192, + "model dependent": 33750, + "making model": 31660, + "noise model": 37601, + "model variant": 34522, + "tuning explore": 58913, + "jointly predict": 27217, + "dataset obtains": 13017, + "english labeled": 17830, + "speech using": 52316, + "higher baseline": 23814, + "standard natural": 52511, + "remains poorly": 46345, + "result suggests": 47453, + "improve coherence": 24832, + "coherence consistency": 8906, + "train gpt": 57594, + "conditioned given": 10015, + "layer pre": 29202, + "realistic text": 45155, + "large lms": 28900, + "class conditional": 8399, + "particular multi": 39855, + "utterance speaker": 61141, + "models prlms": 35358, + "information utterance": 26151, + "doi org": 15988, + "org 10": 38677, + "published articles": 44367, + "contains large": 10497, + "online offline": 38377, + "easier use": 16530, + "identify limitations": 24427, + "ended text": 17739, + "scaling model": 48650, + "training furthermore": 58111, + "training introduce": 58137, + "non pretrained": 37677, + "different entities": 14917, + "non entity": 37651, + "attribute value": 4901, + "modeling relationship": 34619, + "challenges nlp": 8064, + "field training": 20772, + "performance explore": 40338, + "experiments compare": 19378, + "ended language": 17737, + "par existing": 39614, + "2019 evaluation": 283, + "leads improvement": 29317, + "data increasing": 12427, + "perform supervised": 40148, + "rich knowledge": 48105, + "principled method": 42388, + "execution accuracy": 18986, + "augmentation training": 4971, + "aware transformer": 5476, + "end tasks": 17714, + "exploit syntactic": 19666, + "architecture fine": 4049, + "used adapt": 60080, + "unsupervised graph": 59700, + "space order": 51878, + "multiple benchmark": 36172, + "respective tasks": 47356, + "problems caused": 42697, + "primary challenges": 42369, + "ambiguity natural": 2526, + "language lack": 28128, + "data overcome": 12527, + "leverages semantic": 30313, + "paper derive": 39318, + "meaning context": 32000, + "contextual semantic": 10780, + "using strategy": 60965, + "training schemes": 58239, + "supervised contrastive": 53971, + "dataset annotation": 12810, + "original input": 38716, + "positive sample": 41295, + "learns attend": 29953, + "standard text": 52534, + "new manually": 37245, + "argument extraction": 4171, + "used bidirectional": 60108, + "bert experimental": 6655, + "multilingual systems": 36124, + "bert text": 6726, + "sentence contains": 49533, + "labeling approaches": 27778, + "various embedding": 61335, + "present bert": 41857, + "robustness generalization": 48281, + "ability bert": 596, + "segmentation cws": 49081, + "bert provide": 6709, + "convenient use": 10996, + "terms multiple": 56301, + "effort devoted": 16926, + "traditional state": 57545, + "including different": 25250, + "improving current": 25175, + "useful real": 60382, + "attention new": 4800, + "new long": 37240, + "model validate": 34521, + "far satisfactory": 20405, + "way large": 61815, + "network gnn": 36750, + "relations important": 46035, + "develop strong": 14614, + "graph learning": 23146, + "task stage": 55408, + "joint framework": 27171, + "based manually": 5832, + "joint approach": 27165, + "works treat": 62912, + "classification neural": 8509, + "models ignoring": 35100, + "common features": 9175, + "features tasks": 20681, + "form used": 21340, + "contextualized models": 10807, + "experiments classification": 19373, + "resources publicly": 47329, + "score previous": 48867, + "pairs extensive": 39190, + "way future": 61804, + "com csebuetnlp": 9007, + "gold label": 22913, + "experiments 11": 19343, + "language treebank": 28541, + "size parameter": 51393, + "module employed": 35756, + "tagging results": 54750, + "methods explain": 32853, + "mtl framework": 35931, + "input single": 26336, + "used make": 60232, + "make final": 31571, + "learning difficulty": 29591, + "additional large": 1682, + "based t5": 6079, + "transformation method": 58444, + "research needed": 47079, + "data potential": 12549, + "provide deeper": 44046, + "process identify": 42790, + "develop end": 14585, + "end knowledge": 17679, + "sharing mechanism": 50517, + "bert extract": 6658, + "examples data": 18894, + "common method": 9185, + "ood data": 38402, + "softmax loss": 51633, + "media comments": 32161, + "encoders bert": 17553, + "roberta language": 48223, + "level encoding": 30109, + "document information": 15800, + "context relation": 10704, + "document entity": 15791, + "performance public": 40512, + "datasets document": 13236, + "having multiple": 23490, + "compact representation": 9280, + "sequence fine": 49922, + "model supervised": 34429, + "datasets bert": 13167, + "types work": 59129, + "experiments seven": 19521, + "seven benchmark": 50415, + "model ablation": 33486, + "search method": 48975, + "understanding state": 59404, + "certain cases": 7936, + "analyze factors": 2816, + "classification finally": 8471, + "largely based": 29052, + "propose token": 43674, + "increasing focus": 25451, + "recent bert": 45297, + "metrics models": 33182, + "assign high": 4599, + "context data": 10605, + "metrics introduce": 33175, + "metrics perform": 33187, + "accuracy performance": 1022, + "slu tasks": 51459, + "contextualised embeddings": 10792, + "models emerged": 34952, + "knowledge generative": 27493, + "propose generation": 43400, + "tune bert": 58854, + "task select": 55350, + "algorithms provide": 2337, + "generating realistic": 22390, + "contain semantic": 10472, + "use learn": 59930, + "inputs paper": 26366, + "learn structural": 29429, + "techniques improved": 56098, + "specifically firstly": 52203, + "vae based": 61165, + "representations downstream": 46644, + "generated contents": 22278, + "10 training": 53, + "building evaluating": 7444, + "issue proposing": 27079, + "entities second": 18080, + "transfer information": 58368, + "directly map": 15323, + "efficient end": 16870, + "methods higher": 32888, + "fail distinguish": 20334, + "layers models": 29228, + "role various": 48325, + "novel adversarial": 37750, + "processing currently": 42864, + "studies field": 53265, + "aims classify": 2181, + "bert applied": 6609, + "propose ensemble": 43373, + "datasets transformer": 13462, + "capacity large": 7636, + "comprehensively evaluate": 9805, + "settings neural": 50385, + "study models": 53413, + "years task": 63079, + "attention previous": 4811, + "task instead": 55142, + "samples drawn": 48471, + "generative framework": 22590, + "knowledge tasks": 27627, + "framework general": 21526, + "resource high": 47228, + "generates large": 22346, + "generation important": 22474, + "human writing": 24258, + "semantic transfer": 49367, + "baselines pre": 6286, + "abstractive summaries": 771, + "content generated": 10525, + "humans able": 24271, + "transformers achieved": 58520, + "addition evaluating": 1613, + "attempt capture": 4683, + "motivated findings": 35867, + "module developed": 35755, + "generated pre": 22308, + "stronger generalization": 53063, + "principled manner": 42387, + "proposed strategies": 43903, + "generation problems": 22527, + "par better": 39612, + "scores based": 48891, + "tightly coupled": 57110, + "popular entities": 41164, + "dataset previous": 13035, + "quadratic complexity": 44464, + "efforts devoted": 16937, + "heterogeneous knowledge": 23623, + "indicating effectiveness": 25542, + "performance providing": 40511, + "output embeddings": 38970, + "using structured": 60967, + "ability answer": 593, + "better deep": 6876, + "provide guidance": 44083, + "uzbek language": 61156, + "cost work": 11597, + "lack adequate": 27871, + "strong domain": 53026, + "cc nc": 7897, + "based distant": 5684, + "end collect": 17619, + "paper tried": 39598, + "focused classifying": 21217, + "behavior propose": 6396, + "embeddings apply": 17082, + "sets words": 50313, + "enable users": 17431, + "addresses problems": 1815, + "achieved macro": 1248, + "works explore": 62887, + "mlm objective": 33439, + "preliminary evidence": 41802, + "holds potential": 23986, + "evaluation methodologies": 18640, + "classification aim": 8428, + "metric evaluate": 33114, + "art metrics": 4291, + "collected chinese": 8955, + "require understanding": 46895, + "accuracy 53": 898, + "attribution method": 4915, + "shows existing": 50777, + "demonstrated great": 14008, + "datasets conducted": 13188, + "discuss performance": 15477, + "texts traditional": 56936, + "method empirically": 32478, + "parsing dp": 39778, + "detailed evaluation": 14424, + "tasks provides": 55827, + "knowledge critical": 27429, + "research address": 46978, + "search approach": 48964, + "direct access": 15251, + "examples test": 18937, + "highly expressive": 23899, + "training domains": 58074, + "survey presents": 54212, + "explainable ai": 19598, + "nlp model": 37500, + "development maintenance": 14685, + "bart model": 5534, + "time location": 57174, + "temporal spatial": 56193, + "capturing meaning": 7741, + "generic data": 22627, + "domain order": 16126, + "domain tasks": 16202, + "known methods": 27661, + "pretrained contextualized": 42149, + "contextualized text": 10810, + "randomly masked": 44901, + "large entity": 28876, + "wikipedia propose": 62053, + "propose entity": 43374, + "empirical performance": 17336, + "resource efficient": 47225, + "learning analyze": 29516, + "text method": 56658, + "outperforms roberta": 38940, + "approaches evaluating": 3814, + "languages zero": 28827, + "modeling natural": 34603, + "levels propose": 30245, + "order effectively": 38611, + "field recent": 20767, + "paired image": 39163, + "graph encoding": 23133, + "sentences target": 49791, + "high success": 23804, + "graph structural": 23168, + "aims predict": 2206, + "use given": 59900, + "related sentiment": 45935, + "construct heterogeneous": 10386, + "kg information": 27360, + "able enhance": 692, + "datasets demonstrated": 13219, + "recent model": 45320, + "short phrases": 50562, + "sentence multi": 49603, + "types fine": 59089, + "learning effectiveness": 29610, + "similar sentence": 51065, + "relevance generated": 46191, + "training making": 58168, + "robust accurate": 48237, + "modern language": 35707, + "trained mixed": 57790, + "mixed dataset": 33402, + "repository https": 46464, + "creating multiple": 11743, + "comprehension given": 9765, + "given article": 22723, + "sentence generate": 49564, + "questions evaluate": 44786, + "suffer long": 53773, + "input source": 26338, + "outperforms sota": 38944, + "english benchmark": 17777, + "reasoning network": 45211, + "understanding document": 59339, + "final decision": 20819, + "generation code": 22435, + "process knowledge": 42798, + "extracted different": 20008, + "label assigned": 27689, + "sentence introduce": 49573, + "introduce study": 26865, + "corpus input": 11362, + "reducing need": 45711, + "tuned different": 58871, + "varying number": 61434, + "finetuning bert": 21048, + "algorithm provides": 2296, + "aims bring": 2177, + "motivation propose": 35884, + "based transformers": 6111, + "dravidian languages": 16398, + "leader board": 29282, + "trained augmented": 57676, + "feature attribution": 20474, + "approach employed": 3505, + "phases phase": 40808, + "second phase": 49015, + "final submission": 20832, + "achieved micro": 1250, + "structures different": 53183, + "embedding dimensions": 17025, + "requires human": 46933, + "showing high": 50679, + "words small": 62516, + "learning materials": 29722, + "sentences help": 49730, + "students learning": 53218, + "parametric non": 39732, + "array tasks": 4202, + "limits application": 30640, + "training transfer": 58304, + "tasks generate": 55654, + "examples achieve": 18886, + "faces problem": 20254, + "problem sparse": 42663, + "slow convergence": 51449, + "sparse reward": 51971, + "leveraging human": 30326, + "annotations propose": 2997, + "scores code": 48895, + "training make": 58166, + "challenge end": 7979, + "represent entire": 46471, + "entities related": 18077, + "previous text": 42297, + "approaches achieving": 3754, + "input examples": 26275, + "training evaluate": 58089, + "models resulting": 35455, + "corpora makes": 11220, + "constraint based": 10370, + "various combinations": 61315, + "models contain": 34858, + "field work": 20774, + "work look": 62713, + "models remains": 35434, + "remains unexplored": 46354, + "use metric": 59948, + "broad study": 7356, + "hampered lack": 23379, + "standard classification": 52475, + "ability reason": 639, + "improvements average": 25049, + "rarely studied": 45007, + "tokens different": 57325, + "results https": 47661, + "ability identify": 612, + "net model": 36689, + "estimation model": 18385, + "performance gpt": 40369, + "creating dataset": 11739, + "gpt generated": 22978, + "generated gpt": 22291, + "processing works": 42969, + "explicit alignment": 19611, + "alignment objective": 2378, + "dataset single": 13088, + "mean standard": 31997, + "contribute model": 10931, + "examples used": 18940, + "consistently significantly": 10310, + "analyses reveal": 2604, + "documents long": 15894, + "need manually": 36582, + "supervision target": 54096, + "using minimal": 60804, + "given limited": 22759, + "variety training": 61295, + "datasets pre": 13372, + "building model": 7454, + "predict performance": 41651, + "insights model": 26391, + "related sentences": 45934, + "end setting": 17708, + "weighted graph": 61930, + "score 63": 48799, + "decoder predict": 13612, + "predict output": 41650, + "based decoders": 5669, + "methods ignore": 32893, + "ignore fact": 24490, + "suboptimal performance": 53595, + "semantic signals": 49344, + "embeddings previous": 17194, + "models empirically": 34954, + "integral natural": 26502, + "recently different": 45420, + "improvement zero": 25040, + "work code": 62597, + "com swarnahub": 9024, + "current sota": 12009, + "techniques widely": 56151, + "biases model": 7056, + "clustering experiments": 8740, + "sentences perform": 49767, + "supervised losses": 54012, + "changes training": 8182, + "learning combine": 29561, + "extraction use": 20128, + "chinese arabic": 8297, + "robust representations": 48264, + "lexical cues": 30360, + "bert shows": 6718, + "context includes": 10656, + "provided context": 44159, + "shows bert": 50763, + "challenges data": 8037, + "sets evaluation": 50292, + "set topics": 50267, + "cost efficient": 11581, + "task introducing": 55145, + "challenging benchmarks": 8084, + "dialogues existing": 14798, + "approaches current": 3791, + "limited model": 30599, + "sota approaches": 51724, + "benchmarks furthermore": 6525, + "multilingual scenarios": 36116, + "like mbert": 30484, + "languages easily": 28649, + "scenarios work": 48712, + "baselines supervised": 6306, + "despite rapid": 14380, + "multiple candidate": 36176, + "14 absolute": 136, + "strong retrieval": 53047, + "challenge future": 7983, + "maintain performance": 31483, + "based zero": 6143, + "critical importance": 11782, + "complex nature": 9640, + "language growing": 28095, + "challenging evaluation": 8093, + "knowledge task": 27626, + "settings compared": 50361, + "learning despite": 29588, + "approach including": 3568, + "including multilingual": 25276, + "employ pre": 17388, + "using majority": 60785, + "high agreement": 23708, + "labels models": 27839, + "random guess": 44880, + "low levels": 31158, + "future data": 21865, + "features play": 20642, + "play essential": 40969, + "case based": 7788, + "method code": 32415, + "method motivated": 32580, + "build robust": 7424, + "evaluation protocols": 18687, + "multi reference": 36000, + "study multi": 53414, + "pretrained using": 42192, + "tasks zero": 55974, + "limitations data": 30545, + "shallow heuristics": 50439, + "challenge data": 7973, + "corpora analysis": 11174, + "scale work": 48640, + "work improves": 62684, + "generalization training": 22132, + "generated examples": 22286, + "improves generalization": 25131, + "pair method": 39154, + "make data": 31558, + "code freely": 8814, + "performance comparing": 40259, + "explore training": 19743, + "tuning generative": 58916, + "loss fine": 31091, + "loss experiments": 31088, + "queries keys": 44653, + "potential enhance": 41388, + "output learning": 38983, + "share knowledge": 50459, + "propose relation": 43603, + "improves prior": 25153, + "superior capability": 53932, + "experts moe": 19590, + "word substitution": 62317, + "model ensembling": 33828, + "annotate dataset": 2872, + "effective solutions": 16697, + "information chinese": 25779, + "arguments propose": 4181, + "adversarial test": 1987, + "data textual": 12733, + "mechanism make": 32128, + "longer sentences": 31052, + "different source": 15073, + "translation module": 58636, + "number context": 37991, + "train modules": 57611, + "academic community": 791, + "especially natural": 18289, + "interaction human": 26600, + "make models": 31583, + "models yielded": 35692, + "framework comprising": 21475, + "parameters task": 39723, + "recover original": 45593, + "uncertainty aware": 59227, + "distribution model": 15645, + "specific evaluation": 52081, + "datasets cases": 13171, + "texts according": 56856, + "meaning input": 32003, + "outputs propose": 39019, + "graph transformer": 23176, + "art bert": 4228, + "challenges design": 8038, + "consistently achieves": 10289, + "training conduct": 57955, + "children learning": 8294, + "combination data": 9037, + "low correlation": 31136, + "model apply": 33570, + "shows models": 50790, + "create evaluation": 11697, + "seed examples": 49043, + "build work": 7434, + "generalization robustness": 22130, + "fluent language": 21130, + "encourage future": 17593, + "attention especially": 4744, + "wise attention": 62079, + "tasks words": 55967, + "frequency tokens": 21679, + "analysis long": 2691, + "generation incorporating": 22477, + "consistency model": 10269, + "unidirectional language": 59463, + "task solved": 55384, + "proposed outperforms": 43873, + "specific content": 52059, + "used bert": 60107, + "time evaluation": 57151, + "reduced model": 45685, + "explore influence": 19711, + "decoding results": 13643, + "considerably large": 10241, + "unseen cases": 59643, + "bi modal": 7017, + "perform downstream": 40093, + "commonsense question": 9237, + "studies knowledge": 53276, + "leveraging machine": 30332, + "24 hours": 328, + "set obtain": 50205, + "evaluated dataset": 18527, + "ability different": 602, + "models boost": 34792, + "language culture": 28014, + "model allow": 33560, + "outputs different": 39013, + "documents new": 15898, + "shot approaches": 50599, + "approach extensive": 3532, + "classifiers identify": 8616, + "development pre": 14698, + "verification model": 61527, + "detection demonstrate": 14473, + "uses transformer": 60541, + "perform various": 40159, + "winning solution": 62071, + "results identifying": 47666, + "using transformers": 61001, + "interactive systems": 26633, + "problem fine": 42568, + "particular achieve": 39831, + "media use": 32186, + "prevents model": 42235, + "instead predicting": 26459, + "factors affecting": 20306, + "results classifying": 47533, + "datasets involving": 13306, + "goal mind": 22891, + "models scratch": 35471, + "quantities data": 44635, + "including twitter": 25315, + "based multimodal": 5891, + "propose conditional": 43328, + "level extensive": 30116, + "task focusing": 55094, + "code replicate": 8854, + "despite tremendous": 14399, + "progress limited": 43102, + "translation non": 58649, + "performance 12": 40166, + "corpus non": 11392, + "scarcity parallel": 48674, + "furthermore different": 21816, + "model sub": 34420, + "distillation propose": 15576, + "constituent parts": 10356, + "generalization accuracy": 22117, + "evaluation conduct": 18594, + "social commonsense": 51556, + "strong state": 53051, + "models roberta": 35464, + "task validate": 55466, + "recent focus": 45312, + "downstream datasets": 16337, + "models distribution": 34931, + "ability discriminate": 603, + "framework cross": 21483, + "original datasets": 38709, + "identify critical": 24419, + "work follow": 62672, + "effectively work": 16764, + "shot methods": 50632, + "serves strong": 50091, + "split training": 52347, + "roberta xlnet": 48230, + "interpret human": 26709, + "framework applied": 21458, + "datasets widely": 13484, + "used public": 60280, + "experiments ablation": 19344, + "model mlm": 34103, + "general pre": 22080, + "studies validate": 53310, + "discuss major": 15474, + "improves sota": 25161, + "leverage contextual": 30260, + "enhance neural": 17917, + "leveraging contextual": 30320, + "build graph": 7402, + "analyses illustrate": 2597, + "illustrate effectiveness": 24515, + "review process": 48036, + "multiple categories": 36178, + "previous papers": 42269, + "time budget": 57123, + "results alternative": 47497, + "works surprisingly": 62911, + "approach augment": 3423, + "success various": 53730, + "tasks efficiently": 55604, + "simple lightweight": 51184, + "flexible efficient": 21109, + "able jointly": 703, + "forgetting problem": 21309, + "agnostic framework": 2090, + "algorithm named": 2286, + "knowledge retrieval": 27601, + "multilingual benchmark": 36063, + "methods single": 33044, + "token predictions": 57302, + "improve ability": 24821, + "approach ranked": 3664, + "systems predict": 54594, + "evaluation settings": 18715, + "set perform": 50216, + "practical setting": 41471, + "better aligned": 6848, + "corpus benchmark": 11287, + "contrastive pre": 10916, + "instance wise": 26431, + "effective sentence": 16694, + "understanding cross": 59335, + "documents typically": 15922, + "consider aspects": 10207, + "experiments newly": 19482, + "results findings": 47634, + "findings motivate": 20909, + "motivate future": 35859, + "achieved superior": 1278, + "hold true": 23981, + "suggest different": 53815, + "learn different": 29358, + "drawn attention": 16408, + "users examine": 60461, + "performance including": 40388, + "classification respectively": 8536, + "articles contain": 4465, + "detection fine": 14487, + "models consistent": 34850, + "paper question": 39562, + "evaluate robustness": 18502, + "robustness different": 48276, + "issues need": 27095, + "time significantly": 57213, + "influence functions": 25725, + "time efficient": 57147, + "language develop": 28026, + "techniques build": 56066, + "algorithm trained": 2306, + "generate corresponding": 22190, + "provides high": 44202, + "integrate pre": 26508, + "experiments comparing": 19380, + "work analyzing": 62566, + "models observe": 35270, + "led development": 29988, + "challenging datasets": 8089, + "set generated": 50160, + "narrative generation": 36382, + "growing field": 23297, + "generation open": 22511, + "dependencies natural": 14109, + "primarily focus": 42362, + "construct novel": 10398, + "datasets popular": 13371, + "consider various": 10226, + "gated graph": 21995, + "domain testing": 16208, + "provide basis": 44015, + "weight distribution": 61917, + "matching loss": 31913, + "methods baselines": 32766, + "task observe": 55245, + "scores previous": 48913, + "ordering information": 38666, + "provide benchmarks": 44017, + "best existing": 6762, + "nlp propose": 37516, + "need task": 36592, + "similar meanings": 51052, + "retrieval sentence": 47969, + "ai models": 2119, + "good starting": 22945, + "results hybrid": 47665, + "focus building": 21147, + "supervision strategy": 54095, + "approach simultaneously": 3696, + "general corpora": 22048, + "box nature": 7292, + "measure effectiveness": 32051, + "generated explanations": 22288, + "explanations results": 19607, + "building reliable": 7467, + "systems improving": 54528, + "sequence transformer": 50015, + "evidence suggesting": 18820, + "specific supervision": 52151, + "requires supervision": 46954, + "step use": 52833, + "sentence paraphrasing": 49616, + "documents news": 15899, + "salient words": 48443, + "help establish": 23559, + "performance distribution": 40297, + "distribution test": 15652, + "setting analysis": 50316, + "contrastive regularization": 10918, + "introduced capture": 26882, + "better estimate": 6884, + "augmentation adversarial": 4947, + "settings extensive": 50372, + "wide applicability": 61957, + "research years": 47145, + "availability parallel": 5255, + "scores baseline": 48892, + "data despite": 12275, + "larger previous": 29085, + "including dialogue": 25249, + "resolution tasks": 47196, + "description data": 14242, + "annotations experiments": 2992, + "benchmark proposed": 6488, + "approach yield": 3743, + "spoken text": 52365, + "highly noisy": 23906, + "addition evaluate": 1612, + "mixed sentences": 33409, + "corresponding human": 11552, + "metric evaluation": 33116, + "correctly identifying": 11493, + "domains address": 16234, + "framework open": 21575, + "answer related": 3052, + "unique advantages": 59508, + "decoding processes": 13641, + "information attention": 25763, + "framework consistently": 21477, + "code github": 8817, + "synthetic question": 54379, + "action sequence": 1456, + "weights encoder": 61939, + "forcing model": 21291, + "learning capacity": 29552, + "various architectures": 61301, + "dramatically improved": 16388, + "provide realistic": 44119, + "improvement 25": 24981, + "learning generated": 29661, + "hot vectors": 24032, + "errors existing": 18238, + "generation metrics": 22493, + "ways combine": 61840, + "novel use": 37949, + "tail entities": 54764, + "dealing long": 13521, + "combining neural": 9118, + "easy integration": 16563, + "inference datasets": 25652, + "features bi": 20533, + "architectures recent": 4122, + "parameters results": 39721, + "long studied": 31034, + "style sentiment": 53498, + "rely solely": 46300, + "models quality": 35390, + "algorithms text": 2342, + "algorithms different": 2325, + "addition paper": 1631, + "result best": 47434, + "achieved 96": 1216, + "humans communicate": 24274, + "specific neural": 52118, + "includes multiple": 25231, + "supervised objectives": 54029, + "generating target": 22399, + "consistent patterns": 10280, + "noisy signals": 37624, + "result performance": 47446, + "model supervision": 34430, + "labeling process": 27791, + "process sentence": 42829, + "dataset existing": 12914, + "english centric": 17781, + "analysis sentence": 2750, + "relationships documents": 46078, + "samples classes": 48467, + "mapping function": 31801, + "showing improved": 50680, + "require lot": 46876, + "sentence bert": 49520, + "suffer severe": 53779, + "regularization improve": 45837, + "calibration methods": 7536, + "tasks commonly": 55544, + "tasks area": 55507, + "development research": 14700, + "unified format": 59471, + "model chosen": 33657, + "accuracy increase": 992, + "producing natural": 43040, + "perform analyses": 40067, + "multi role": 36002, + "training synthetic": 58281, + "improvement 10": 24976, + "knowledge multi": 27554, + "effective robust": 16691, + "new decoding": 37169, + "trained positive": 57839, + "assess extent": 4578, + "pretrained sequence": 42183, + "recent text": 45359, + "transfer transformer": 58427, + "transformer t5": 58511, + "unified text": 59480, + "led increased": 29991, + "data tokens": 12736, + "humans create": 24275, + "new existing": 37199, + "output examples": 38971, + "range simple": 44934, + "training long": 58158, + "shot domain": 50611, + "performance improve": 40381, + "unseen target": 59654, + "model synthetic": 34435, + "synthetic samples": 54381, + "requires labeled": 46935, + "evaluations indicate": 18760, + "reducing gap": 45708, + "shot supervised": 50648, + "supervised performance": 54031, + "performance smaller": 40567, + "generating concise": 22369, + "directly using": 15342, + "experiments training": 19547, + "potentially used": 41419, + "tasks context": 55558, + "adaptive pre": 1578, + "propose feature": 43387, + "bert bart": 6611, + "models research": 35447, + "available cross": 5274, + "new shared": 37315, + "events cause": 18791, + "language built": 27982, + "models mainstream": 35210, + "align human": 2352, + "method align": 32376, + "level contrastive": 30086, + "generation current": 22440, + "model ranking": 34270, + "use variational": 60069, + "space generating": 51868, + "pre learned": 41504, + "datasets prior": 13377, + "corpus retrieval": 11423, + "intensive tasks": 26561, + "minimal pairs": 33290, + "pairs building": 39172, + "features evaluation": 20573, + "indian english": 25517, + "examples demonstrate": 18895, + "requires domain": 46925, + "thousands examples": 57079, + "supervision method": 54085, + "data active": 12116, + "alleviate challenges": 2401, + "method finally": 32507, + "time standard": 57223, + "vocabulary generation": 61703, + "representations unsupervised": 46780, + "knowledge english": 27461, + "information information": 25923, + "understand information": 59299, + "bert best": 6629, + "insights future": 26388, + "composed modules": 9733, + "tree graph": 58744, + "dataset build": 12832, + "dependent model": 14148, + "proposed incorporate": 43795, + "model decoder": 33740, + "work novel": 62737, + "representation contextual": 46500, + "costly collect": 11600, + "interface web": 26662, + "bias reduction": 7041, + "using downstream": 60669, + "challenges remain": 8076, + "lm fine": 30906, + "demonstrated significant": 14018, + "significant inference": 50893, + "structure given": 53108, + "input context": 26260, + "context extracted": 10636, + "reasoning benchmarks": 45186, + "outperform supervised": 38827, + "properties example": 43261, + "text proposed": 56717, + "datasets despite": 13224, + "additional lexical": 1684, + "resources code": 47296, + "approaches detect": 3796, + "bert embedding": 6650, + "using advanced": 60553, + "2020 using": 294, + "scores word": 48930, + "questions contain": 44779, + "task description": 55006, + "scale domain": 48566, + "dialogue tasks": 14790, + "model annotated": 33565, + "generating textual": 22402, + "score 27": 48787, + "token input": 57293, + "scoring module": 48938, + "scratch fine": 48944, + "coronavirus pandemic": 11169, + "learning hierarchical": 29670, + "quality instances": 44536, + "method significant": 32652, + "method classify": 32414, + "explain predictions": 19595, + "bidirectional decoder": 7066, + "sets human": 50295, + "need robust": 36590, + "techniques mitigate": 56111, + "datasets expensive": 13261, + "highly domain": 23895, + "better machine": 6914, + "thoroughly investigated": 57072, + "corpus sentence": 11426, + "volumes text": 61732, + "ml algorithms": 33428, + "labels present": 27845, + "augmenting data": 4987, + "data discuss": 12288, + "bert electra": 6648, + "word masked": 62243, + "models transformers": 35631, + "evaluation open": 18665, + "demonstrate automatically": 13873, + "result proposed": 47448, + "tasks outperforms": 55780, + "consuming error": 10443, + "recognize unseen": 45551, + "classification previous": 8519, + "ignored paper": 24495, + "recent pre": 45333, + "model objectives": 34135, + "experiments downstream": 19424, + "feature model": 20498, + "fair evaluation": 20357, + "seeks address": 49056, + "topic language": 57412, + "additional tasks": 1703, + "relevant literature": 46223, + "document analysis": 15767, + "continued pre": 10831, + "baselines generating": 6267, + "mitigation methods": 33396, + "analysis article": 2617, + "main methods": 31446, + "method pre": 32617, + "novel large": 37851, + "work datasets": 62618, + "innovative approach": 26250, + "correlations different": 11534, + "interactions social": 26622, + "hierarchical levels": 23675, + "tuning strategies": 58962, + "encoding used": 17580, + "achieving similar": 1423, + "conversational setting": 11051, + "retain information": 47921, + "absolute error": 739, + "systems vulnerable": 54668, + "indonesian language": 25600, + "resources experiments": 47302, + "suffer catastrophic": 53759, + "domain methods": 16111, + "proposed solve": 43899, + "occur training": 38270, + "documents documents": 15872, + "sentence graph": 49567, + "graph semantic": 23165, + "different frameworks": 14939, + "mask based": 31856, + "based augmentation": 5585, + "studies revealed": 53298, + "reasoning path": 45213, + "providing comprehensive": 44239, + "logical rules": 30989, + "reasoning experiments": 45194, + "speech inputs": 52267, + "change paper": 8172, + "human use": 24254, + "evidence annotations": 18807, + "literature task": 30864, + "extraction traditional": 20125, + "learning solve": 29886, + "consistency regularization": 10270, + "regularization based": 45836, + "construct word": 10404, + "representations novel": 46726, + "sets code": 50284, + "trained datasets": 57705, + "emotion information": 17290, + "derived text": 14203, + "controlled experiment": 10983, + "analysis compare": 2632, + "editing approach": 16596, + "opens possibility": 38484, + "dataset sentence": 13079, + "evaluating proposed": 18567, + "weighting method": 61934, + "errors based": 18235, + "models findings": 35026, + "resource availability": 47210, + "detection english": 14476, + "bart based": 5533, + "years pre": 63069, + "data uncertainty": 12751, + "loss finally": 31090, + "uncertainty estimation": 59230, + "hindi bengali": 23935, + "effective strategies": 16698, + "community https": 9265, + "https huggingface": 24060, + "based dual": 5692, + "models targeted": 35584, + "bert makes": 6678, + "dataset generation": 12943, + "new setting": 37314, + "negative mining": 36626, + "auxiliary entity": 5231, + "methods achieves": 32734, + "describes contribution": 14219, + "models potential": 35333, + "performance equivalent": 40322, + "source embeddings": 51769, + "embeddings according": 17076, + "accompanying dataset": 844, + "capturing complex": 7730, + "documents previous": 15903, + "using discourse": 60657, + "document length": 15802, + "propose emph": 43365, + "utterance model": 61138, + "input use": 26354, + "models case": 34807, + "single character": 51288, + "single head": 51306, + "using handcrafted": 60722, + "results allow": 47496, + "segmenting long": 49094, + "neural inference": 36960, + "cues present": 11934, + "provide generic": 44078, + "resources based": 47294, + "end study": 17711, + "sequential transfer": 50052, + "explore number": 19722, + "number research": 38033, + "algorithms recent": 2338, + "work fine": 62667, + "characteristics chinese": 8234, + "offer insights": 38292, + "auxiliary objectives": 5237, + "semantically unrelated": 49396, + "based filtering": 5729, + "style tasks": 53500, + "corpora state": 11245, + "pretraining process": 42215, + "classification evaluation": 8465, + "proposed bert": 43745, + "entire sequence": 18028, + "purpose pretrained": 44409, + "tasks pretraining": 55809, + "lms learn": 30920, + "data adopt": 12125, + "encode syntactic": 17471, + "interpret results": 26712, + "task problem": 55295, + "problem order": 42618, + "set final": 50157, + "major focus": 31509, + "data prior": 12561, + "create benchmark": 11692, + "robust data": 48242, + "identify correct": 24418, + "increase computational": 25408, + "generate single": 22248, + "representations latent": 46702, + "space generate": 51867, + "questions study": 44811, + "attention input": 4760, + "previously introduced": 42335, + "predicted answers": 41665, + "achieves sota": 1376, + "lingual ner": 30716, + "data newly": 12512, + "seven teams": 50421, + "results highest": 47656, + "effect data": 16611, + "attention global": 4754, + "deletion insertion": 13823, + "better label": 6906, + "investigate existing": 26957, + "simple alternative": 51134, + "based decomposition": 5671, + "allows flexible": 2465, + "level relations": 30190, + "domain furthermore": 16075, + "lexical choice": 30356, + "enabling efficient": 17454, + "single instance": 51309, + "makes learning": 31627, + "single text": 51347, + "data labels": 12450, + "despite long": 14373, + "supervised fashion": 53984, + "adopt pre": 1866, + "maximum improvement": 31970, + "coherent meaningful": 8917, + "issues arise": 27084, + "discuss shortcomings": 15482, + "shortcomings previous": 50581, + "explored using": 19768, + "data increase": 12426, + "self learning": 49200, + "sentences various": 49805, + "summaries based": 53869, + "specific paper": 52121, + "spur research": 52383, + "negative pairs": 36628, + "failing capture": 20348, + "text combine": 56497, + "utilize textual": 61104, + "popular pre": 41178, + "resources train": 47336, + "performance cost": 40269, + "paper ask": 39273, + "evaluation strategy": 18728, + "suggest improvements": 53821, + "contribute research": 10933, + "nlp solutions": 37525, + "including global": 25258, + "financial reports": 20893, + "present tool": 42042, + "density estimation": 14093, + "task formulation": 55096, + "leaving room": 29984, + "task descriptions": 55007, + "score 12": 48784, + "thanks availability": 56999, + "rl algorithms": 48173, + "sentences fed": 49723, + "learning scl": 29861, + "texts long": 56901, + "close supervised": 8693, + "texts despite": 56871, + "experiments compared": 19379, + "difficulty capturing": 15198, + "aspects natural": 4547, + "new synthetic": 37330, + "improvement different": 25002, + "approaching human": 3959, + "common benchmarks": 9167, + "benchmarks multiple": 6533, + "clear evidence": 8653, + "gain deeper": 21906, + "problem pre": 42625, + "increasingly studied": 25477, + "perform different": 40089, + "tasks performed": 55797, + "provided test": 44173, + "set shows": 50245, + "auto generated": 5019, + "experiments fine": 19436, + "suggest bert": 53814, + "content form": 10524, + "automatically identifies": 5180, + "increasing amounts": 25443, + "method methods": 32576, + "proposed achieve": 43710, + "multiple popular": 36263, + "detection best": 14464, + "bilstm bert": 7128, + "dataset indicate": 12967, + "bert learn": 6672, + "technique improve": 56033, + "model concepts": 33693, + "aims develop": 2187, + "compare using": 9375, + "language typically": 28542, + "studies investigate": 53273, + "000 articles": 4, + "datasets manually": 13326, + "information constituency": 25788, + "use interface": 59915, + "search strategies": 48985, + "extraction low": 20078, + "limitation paper": 30537, + "superiority approach": 53949, + "systems key": 54537, + "representative text": 46802, + "large complex": 28860, + "retrain model": 47933, + "provide various": 44154, + "human life": 24198, + "performance greatly": 40370, + "current model": 11987, + "accuracy 13": 895, + "evaluations conducted": 18753, + "important high": 24730, + "selection lead": 49142, + "number authors": 37983, + "tasks improving": 55675, + "task leads": 55166, + "knowledge injection": 27526, + "facilitate downstream": 20266, + "potential improvement": 41394, + "serve good": 50078, + "challenge aims": 7967, + "developing novel": 14662, + "performance information": 40394, + "tasks meta": 55744, + "trained input": 57751, + "individual representations": 25577, + "novel dependency": 37804, + "linguistic ambiguity": 30746, + "matching query": 31920, + "knowledge domains": 27449, + "learn single": 29424, + "method facilitates": 32506, + "second contribution": 49001, + "ir systems": 27035, + "identify types": 24449, + "dataset arabic": 12814, + "models classification": 34815, + "techniques rely": 56131, + "estimated using": 18377, + "frequently observed": 21685, + "space latent": 51872, + "use alternative": 59818, + "exploiting large": 19674, + "modelling framework": 34642, + "language bert": 27976, + "research explored": 47033, + "making suitable": 31668, + "applying state": 3377, + "shows best": 50764, + "best submission": 6826, + "pair level": 39153, + "quality issues": 44537, + "make work": 31611, + "evaluating text": 18570, + "texts use": 56939, + "carried using": 7771, + "distribution shift": 15650, + "media important": 32167, + "using crowd": 60638, + "augmentation data": 4952, + "methods neglect": 32955, + "tokens propose": 57333, + "significantly superior": 51014, + "training achieved": 57924, + "far away": 20397, + "concepts present": 9940, + "model potential": 34214, + "computational studies": 9864, + "different mechanisms": 14986, + "conventional transformer": 11017, + "generated datasets": 22284, + "framework robust": 21593, + "high f1": 23736, + "level specifically": 30214, + "study illustrate": 53386, + "demonstrated ability": 14002, + "models select": 35473, + "existing researches": 19137, + "knowledge enhancement": 27464, + "candidates proposed": 7587, + "task providing": 55310, + "providing additional": 44236, + "applied data": 3266, + "framework natural": 21570, + "based inter": 5788, + "task performed": 55274, + "inherent ambiguity": 26200, + "annotation strategy": 2971, + "unseen labels": 59650, + "method investigate": 32552, + "propose curriculum": 43344, + "aspects generated": 4539, + "correct target": 11476, + "sequence architecture": 49909, + "score used": 48880, + "role label": 48310, + "witnessed great": 62092, + "news generation": 37404, + "progress challenges": 43094, + "potentially harmful": 41413, + "modal language": 33460, + "focus english": 21158, + "building cross": 7440, + "model trains": 34484, + "normalization method": 37705, + "available model": 5327, + "sampling large": 48501, + "corpora furthermore": 11205, + "release datasets": 46153, + "automatically discovers": 5165, + "quality samples": 44577, + "manner extensive": 31718, + "limitations propose": 30555, + "tuning transformer": 58971, + "result obtain": 47443, + "shows high": 50781, + "model research": 34314, + "work performance": 62749, + "tuning limited": 58925, + "score 46": 48793, + "second issue": 49008, + "used facilitate": 60184, + "advantage explicit": 1939, + "graph generate": 23139, + "surpass strong": 54168, + "model attempt": 33582, + "reduce impact": 45665, + "information longer": 25959, + "bilstm encoder": 7133, + "combines pre": 9100, + "mtl approach": 35930, + "tasks dataset": 55569, + "based mathematical": 5836, + "little progress": 30883, + "perform topic": 40156, + "saliency based": 48437, + "limitation using": 30541, + "latent spaces": 29137, + "world language": 62944, + "better precision": 6941, + "classifier fine": 8597, + "manifold mixup": 31706, + "methods easy": 32831, + "speech form": 52264, + "using autoregressive": 60577, + "private test": 42443, + "relevant research": 46232, + "documents methods": 15895, + "methods manual": 32941, + "sample size": 48455, + "focus capturing": 21148, + "view propose": 61602, + "bert family": 6659, + "roberta distilbert": 48220, + "graph automatically": 23099, + "complexity text": 9691, + "existing computational": 19047, + "analysis traditional": 2784, + "poor accuracy": 41132, + "tasks prior": 55814, + "nature word": 36492, + "corpora approach": 11177, + "specific patterns": 52125, + "short document": 50553, + "makes comparison": 31617, + "problem design": 42533, + "problem high": 42577, + "directly extracted": 15315, + "works demonstrated": 62883, + "rely language": 46291, + "pairs joint": 39198, + "space source": 51898, + "information efficiently": 25826, + "performances downstream": 40642, + "aims map": 2205, + "scenarios propose": 48706, + "competitive previous": 9558, + "global perspective": 22839, + "labels better": 27812, + "challenge knowledge": 7987, + "achieve 93": 1108, + "benchmark various": 6504, + "dataset benchmarks": 12827, + "domain explore": 16069, + "directly generates": 15319, + "reproducibility code": 46827, + "recognition benchmarks": 45495, + "shown potential": 50735, + "focus incorporating": 21172, + "artificial languages": 4496, + "depending language": 14156, + "rules natural": 48392, + "domains investigate": 16263, + "relations introducing": 46038, + "features instance": 20605, + "conducted widely": 10098, + "scientific document": 48758, + "lastly demonstrate": 29107, + "empirical experiment": 17327, + "labels high": 27832, + "methods greatly": 32882, + "tuned dataset": 58870, + "decoding propose": 13642, + "generation sentences": 22544, + "performance input": 40395, + "sub domains": 53516, + "cnn bilstm": 8763, + "training pairs": 58200, + "embeddings usually": 17241, + "provide code": 44027, + "information interaction": 25930, + "evaluation work": 18749, + "automatic assessment": 5072, + "dataset validate": 13131, + "models ablation": 34652, + "advancements neural": 1903, + "aims build": 2178, + "improve accessibility": 24822, + "models assist": 34730, + "applications provide": 3238, + "language format": 28078, + "outperforms multi": 38911, + "driven end": 16422, + "essential components": 18324, + "domain labeled": 16095, + "model generated": 33932, + "data allowing": 12130, + "automated generation": 5045, + "dataset support": 13109, + "evaluation automated": 18577, + "automatically detect": 5157, + "using images": 60734, + "develop multimodal": 14601, + "implications future": 24656, + "challenge organized": 8003, + "model separate": 34357, + "proposed achieves": 43711, + "reduce influence": 45666, + "predictions models": 41764, + "dataset general": 12941, + "traditional knowledge": 57522, + "knowledge single": 27611, + "current practices": 11998, + "additional benefit": 1655, + "transformer encoders": 58485, + "problem leveraging": 42595, + "tuning stages": 58960, + "tree text": 58761, + "gains compared": 21935, + "work offers": 62740, + "universal representation": 59543, + "incorporate different": 25350, + "surpasses baselines": 54171, + "approach constructing": 3470, + "dataset open": 13018, + "distilled data": 15582, + "quality measures": 44550, + "structure texts": 53142, + "multiple decoders": 36195, + "finding models": 20899, + "studies benchmark": 53250, + "dialogue graph": 14775, + "global graph": 22829, + "role labelling": 48313, + "layers transformer": 29236, + "layers finally": 29224, + "domain long": 16105, + "benchmark training": 6502, + "specifically define": 52189, + "tasks better": 55526, + "plms bert": 41016, + "wall clock": 61761, + "effectiveness generalization": 16782, + "language improves": 28103, + "important characteristics": 24710, + "superficial cues": 53925, + "samples work": 48495, + "relevant dimensions": 46209, + "tasks lexical": 55722, + "represented different": 46806, + "layers pre": 29232, + "trained roberta": 57855, + "parsing benchmark": 39774, + "methods capable": 32776, + "advances large": 1914, + "lm pretraining": 30912, + "multilingual transformers": 36132, + "constraints paper": 10376, + "word span": 62313, + "observe different": 38131, + "outperform multiple": 38805, + "multiple existing": 36213, + "explanations model": 19606, + "incorrect predictions": 25400, + "difficult adapt": 15156, + "long input": 31015, + "models introducing": 35145, + "supervised shot": 54047, + "based strategy": 6061, + "inferior performance": 25708, + "design task": 14303, + "learning encoder": 29620, + "representation approaches": 46493, + "data smaller": 12670, + "primarily lack": 42365, + "largest model": 29098, + "articles study": 4480, + "conducted human": 10086, + "accuracy detecting": 958, + "length propose": 30033, + "structure large": 53114, + "attempt evaluate": 4684, + "develop effective": 14583, + "com ukplab": 9028, + "study set": 53458, + "models set": 35487, + "performance decreases": 40279, + "report introduces": 46438, + "token vocabulary": 57313, + "understanding role": 59396, + "tasks improvement": 55674, + "method encourages": 32482, + "align representation": 2353, + "process generate": 42785, + "time consumption": 57139, + "models greatly": 35071, + "loss landscape": 31098, + "relatively understudied": 46134, + "augmented fine": 4978, + "trained annotators": 57673, + "detection provide": 14514, + "supervision approach": 54076, + "t5 transformer": 54684, + "retrieved evidence": 47984, + "copy network": 11135, + "conducted dataset": 10078, + "available project": 5348, + "reduces inference": 45691, + "compared transformer": 9468, + "models adopt": 34696, + "assign labels": 4600, + "lead models": 29265, + "able assign": 676, + "knowledge downstream": 27450, + "existing newly": 19119, + "aspects data": 4537, + "reveal challenges": 48008, + "personally identifiable": 40767, + "identifiable information": 24382, + "training named": 58185, + "models base": 34752, + "recall scores": 45247, + "score 98": 48831, + "f_1 points": 20236, + "context analysis": 10583, + "work retrieval": 62810, + "datasets sentiment": 13416, + "robustness method": 48284, + "prediction process": 41730, + "multilingual monolingual": 36099, + "allows control": 2455, + "access model": 827, + "data highlight": 12404, + "thorough review": 57064, + "used introduce": 60218, + "input predict": 26316, + "based constituency": 5638, + "especially helpful": 18278, + "model seq2seq": 34359, + "novel pretraining": 37896, + "based commonsense": 5626, + "input approach": 26255, + "limitations model": 30551, + "query representation": 44676, + "setting investigate": 50328, + "learning lack": 29693, + "learning era": 29627, + "generation ability": 22407, + "gap work": 21985, + "feature fusion": 20491, + "tunes pre": 58895, + "english foreign": 17805, + "focus research": 21196, + "dynamic graph": 16485, + "relation representation": 45994, + "collection diverse": 8982, + "way understanding": 61834, + "understanding world": 59419, + "providing human": 44247, + "directions task": 15301, + "crowdsourcing workers": 11891, + "2021 shared": 298, + "proposed ensemble": 43768, + "correctly classified": 11490, + "present architecture": 41849, + "task formulations": 55097, + "applications current": 3193, + "constrained devices": 10365, + "attention specific": 4830, + "lower model": 31218, + "size existing": 51382, + "lightweight model": 30458, + "originally designed": 38743, + "designed provide": 14329, + "ii incorporating": 24504, + "approach creating": 3472, + "token word": 57315, + "model affect": 33548, + "provide case": 44023, + "graph words": 23179, + "standard procedure": 52519, + "paper fine": 39375, + "proposed fine": 43777, + "evaluate range": 18497, + "nlp used": 37559, + "phenomena paper": 40812, + "design based": 14265, + "despite use": 14401, + "efficient memory": 16883, + "toolkit available": 57371, + "topic field": 57407, + "confuse model": 10157, + "fail recognize": 20345, + "trained recognize": 57849, + "heavily relying": 23536, + "research tree": 47134, + "type single": 59069, + "embedding position": 17053, + "input recurrent": 26325, + "achieved compared": 1226, + "text generate": 56590, + "bias improve": 7028, + "use prediction": 59977, + "encoders perform": 17558, + "extraction requires": 20104, + "highly context": 23889, + "responses work": 47406, + "aim address": 2134, + "proposed annotation": 43718, + "dataset ii": 12957, + "promising source": 43186, + "required fine": 46900, + "models zero": 35693, + "improving zero": 25201, + "code fine": 8813, + "present comparative": 41867, + "datasets hope": 13290, + "specific parts": 52124, + "classifier proposed": 8604, + "precision f1": 41613, + "features dataset": 20552, + "training generating": 58113, + "augmentation using": 4972, + "approach compares": 3457, + "baseline evaluation": 6164, + "main finding": 31439, + "models don": 34939, + "art contextual": 4241, + "text prompts": 56714, + "extending previous": 19841, + "decompose complex": 13653, + "strategies combined": 52895, + "com gt": 9014, + "gt salt": 23317, + "problems work": 42738, + "applies machine": 3318, + "sentiment data": 49838, + "context speaker": 10723, + "yield effective": 63095, + "texts address": 56857, + "form texts": 21339, + "capture key": 7687, + "model match": 34093, + "word overlapping": 62259, + "built based": 7482, + "words selected": 62504, + "images video": 24557, + "content generation": 10526, + "set models": 50195, + "context shot": 10718, + "context examples": 10631, + "based prompt": 5956, + "datasets yield": 13489, + "methods time": 33074, + "need distinguish": 36556, + "context form": 10640, + "model contextual": 33712, + "contextual string": 10784, + "pairs text": 39220, + "dataset natural": 13006, + "tools models": 57381, + "timely manner": 57245, + "result experiments": 47438, + "studies existing": 53262, + "pre process": 41507, + "critical tasks": 11796, + "research far": 47035, + "issues data": 27088, + "methods normally": 32962, + "absence explicit": 734, + "lms fine": 30918, + "designed improve": 14321, + "corpus automatically": 11283, + "detailed experimental": 14425, + "step ahead": 52797, + "bias lead": 7031, + "similarity original": 51111, + "little additional": 30869, + "evaluation compared": 18592, + "detect classify": 14436, + "course conversation": 11636, + "typical approach": 59131, + "proposed recent": 43884, + "features learn": 20614, + "understand semantics": 59312, + "baselines publicly": 6292, + "components pre": 9723, + "based roberta": 6002, + "architecture large": 4057, + "adaptive approach": 1573, + "number candidate": 37986, + "large fraction": 28880, + "aims promote": 2210, + "high levels": 23749, + "based constraints": 5639, + "focus models": 21183, + "classification perform": 8513, + "tool designed": 57361, + "attention computational": 4728, + "linguistics community": 30822, + "community previous": 9271, + "goal directed": 22882, + "humans models": 24282, + "model overcomes": 34171, + "lack generalization": 27890, + "data shortage": 12657, + "framework highly": 21534, + "decoding experiments": 13630, + "explicit linguistic": 19619, + "parameters frozen": 39700, + "linguistics literature": 30823, + "novel adaptation": 37748, + "generated human": 22292, + "solve sub": 51690, + "utilizing multi": 61126, + "perform differently": 40090, + "utilize different": 61090, + "encoded graph": 17478, + "considering multiple": 10260, + "analyses conducted": 2590, + "conducted analyze": 10073, + "used entity": 60167, + "evaluation text": 18739, + "focused text": 21231, + "document image": 15799, + "focuses developing": 21237, + "contains 30": 10491, + "000 pairs": 10, + "attack models": 4660, + "showing training": 50691, + "processing particularly": 42921, + "data reduce": 12591, + "stages development": 52449, + "information considering": 25787, + "diversity complexity": 15732, + "pass model": 39917, + "generate texts": 22255, + "models reveals": 35461, + "tasks instance": 55692, + "learning ensemble": 29623, + "bias make": 7032, + "use conditional": 59850, + "learning respectively": 29847, + "focus previous": 21190, + "performance image": 40378, + "political discourse": 41108, + "tasks increasingly": 55685, + "set multi": 50196, + "achieve low": 1167, + "models score": 35470, + "individually ignoring": 25590, + "efficiently incorporate": 16917, + "efficient transformer": 16906, + "achieve use": 1212, + "models determine": 34907, + "trained explicit": 57728, + "enables evaluation": 17440, + "articles news": 4472, + "heterogeneous document": 23618, + "challenge text": 8020, + "work addressed": 62557, + "appropriate training": 3967, + "finally summarize": 20883, + "large spectrum": 29016, + "language consisting": 28003, + "using widely": 61025, + "factors impact": 20308, + "contrastive losses": 10913, + "yield similar": 63100, + "lack language": 27900, + "used multilingual": 60244, + "base language": 5544, + "domain framework": 16074, + "transfer source": 58422, + "systems handle": 54517, + "contains total": 10506, + "availability domain": 5247, + "robust dataset": 48243, + "adapts model": 1584, + "performance boosts": 40225, + "pairs utterances": 39229, + "ability memorize": 624, + "methods real": 33005, + "framework encodes": 21503, + "multiple bert": 36175, + "aim understand": 2161, + "better best": 6855, + "predicting future": 41675, + "larger language": 29080, + "datasets combined": 13178, + "models likely": 35188, + "paper explains": 39359, + "usually hard": 61051, + "feature distribution": 20480, + "roberta xlm": 48229, + "science linguistics": 48747, + "capabilities limitations": 7600, + "single architecture": 51283, + "labels text": 27852, + "single set": 51336, + "procedure based": 42742, + "benchmark accuracy": 6423, + "underrepresented groups": 59285, + "attention use": 4843, + "examples question": 18927, + "applied directly": 3268, + "model retraining": 34324, + "strong models": 53037, + "competitive recent": 9559, + "constructing large": 10422, + "label quality": 27722, + "100 training": 64, + "models acquire": 34686, + "datasets mainly": 13323, + "words definitions": 62394, + "future paper": 21881, + "easily available": 16538, + "specifically conduct": 52186, + "using template": 60983, + "reasoning datasets": 45192, + "propose light": 43441, + "bayesian learning": 6359, + "self explanatory": 49196, + "method contributes": 32443, + "confirm method": 10132, + "unified multimodal": 59476, + "annotation existing": 2948, + "information additional": 25755, + "label generation": 27709, + "method surpasses": 32677, + "improvement score": 25022, + "improvement significant": 25023, + "representative methods": 46798, + "life settings": 30440, + "focus context": 21149, + "allows reduce": 2476, + "methods future": 32874, + "model scale": 34335, + "languages respectively": 28774, + "generation focused": 22463, + "extraction documents": 20058, + "remains relatively": 46347, + "present hybrid": 41926, + "hybrid framework": 24316, + "datasets able": 13140, + "translation bilingual": 58586, + "aim achieve": 2133, + "study zero": 53477, + "parallel datasets": 39649, + "based contextualized": 5645, + "effective bert": 16633, + "medium sized": 32220, + "verify model": 61541, + "generated comments": 22275, + "introduce small": 26862, + "annotation budget": 2938, + "curated datasets": 11950, + "problems addressed": 42693, + "speed memory": 52323, + "retaining high": 47925, + "model leading": 34048, + "types code": 59079, + "work leverages": 62710, + "style text": 53501, + "using pointer": 60860, + "methods assume": 32757, + "input given": 26283, + "following previous": 21266, + "fusion approach": 21851, + "researchers focus": 47158, + "increased attention": 25429, + "adaptation propose": 1536, + "auxiliary model": 5235, + "base large": 5545, + "average points": 5413, + "datasets studies": 13444, + "cases domain": 7806, + "real scenarios": 45110, + "existing single": 19143, + "analyses provide": 2602, + "researchers understand": 47167, + "information vital": 26156, + "data cross": 12262, + "relevant external": 46215, + "time detection": 57144, + "extensive data": 19860, + "learning specific": 29888, + "datasets automatic": 13161, + "terms fluency": 56291, + "simplify process": 51243, + "step improve": 52812, + "attention transformers": 4841, + "learning works": 29947, + "scores model": 48908, + "tree main": 58750, + "models codes": 34823, + "rich history": 48101, + "instead focusing": 26452, + "set different": 50135, + "typically need": 59149, + "domain typically": 16220, + "selecting data": 49125, + "comparisons state": 9513, + "relatively limited": 46120, + "efficient text": 16903, + "accuracy 89": 921, + "resulted best": 47459, + "baseline given": 6174, + "augmentation models": 4964, + "task pretrained": 55289, + "provides dataset": 44191, + "000 examples": 6, + "global optimum": 22837, + "strategies perform": 52912, + "problem incorporating": 42582, + "bilingual model": 7111, + "set social": 50250, + "works field": 62889, + "sequential decoding": 50039, + "outputs experiments": 39014, + "extraction work": 20131, + "current trend": 12023, + "models requiring": 35446, + "models construction": 34857, + "implemented different": 24647, + "fields work": 20785, + "work defines": 62624, + "online used": 38391, + "propose user": 43695, + "informal nature": 25741, + "standard sentence": 52525, + "solutions proposed": 51672, + "additional pretraining": 1695, + "directly input": 15321, + "depend context": 14099, + "lead robust": 29268, + "tasks field": 55639, + "based empirical": 5697, + "significantly enhance": 50955, + "experimentally evaluated": 19333, + "semeval 2021": 49438, + "2021 task": 300, + "integrated gradients": 26515, + "best submissions": 6827, + "research english": 47029, + "compare pre": 9358, + "attention promising": 4813, + "studies explored": 53264, + "mel spectrograms": 32227, + "study indicates": 53390, + "research past": 47092, + "research typically": 47136, + "role pre": 48319, + "tuned multilingual": 58881, + "consistently best": 10290, + "supplementary material": 54106, + "input label": 26287, + "relations finally": 46033, + "model fails": 33879, + "easily learn": 16548, + "current transformer": 12021, + "settings limited": 50381, + "average 18": 5399, + "text dynamic": 56546, + "strategies compare": 52896, + "age groups": 2047, + "mitigate negative": 33388, + "regression tasks": 45822, + "knowledge scientific": 27603, + "systems detecting": 54477, + "strategy yields": 52956, + "shot training": 50653, + "lead poor": 29267, + "context domain": 10617, + "words relatively": 62495, + "jointly leverages": 27204, + "results biomedical": 47527, + "knowledge represented": 27593, + "hope survey": 24015, + "directly extract": 15314, + "performance good": 40368, + "multi modality": 35992, + "high resolution": 23790, + "relatively explored": 46115, + "use datasets": 59862, + "learning classify": 29558, + "higher levels": 23833, + "suggest human": 53820, + "t5 large": 54681, + "information achieved": 25752, + "intuitive humans": 26910, + "reasoning method": 45202, + "overall findings": 39042, + "donald trump": 16315, + "domains healthcare": 16259, + "dataset valuable": 13132, + "utility dataset": 61081, + "https doi": 24056, + "10 5281": 33, + "5281 zenodo": 437, + "knowledge linguistic": 27549, + "online platform": 38378, + "cc sa": 7899, + "attention uses": 4845, + "softmax attention": 51628, + "recency bias": 45275, + "combining representations": 9121, + "known text": 27668, + "need expensive": 36560, + "expensive annotations": 19204, + "task metrics": 55213, + "systems required": 54621, + "dynamic vocabulary": 16493, + "building accurate": 7436, + "driven learning": 16426, + "algorithm effectively": 2271, + "outperforms supervised": 38952, + "gap using": 21984, + "code accessed": 8791, + "human decision": 24132, + "answering existing": 3072, + "mutual interaction": 36350, + "way experiments": 61802, + "corpora corpus": 11188, + "candidate entities": 7570, + "contextual meaning": 10775, + "role nlp": 48318, + "human provided": 24218, + "knowledge parameters": 27566, + "complex pipelines": 9646, + "data studied": 12701, + "harder tasks": 23455, + "available easy": 5285, + "systems general": 54510, + "addressing question": 1824, + "encoded language": 17480, + "procedure using": 42746, + "models bias": 34784, + "data showing": 12658, + "unsatisfactory performance": 59641, + "data abstract": 12105, + "examples hard": 18908, + "develop evaluate": 14587, + "fleiss kappa": 21105, + "leverage recent": 30286, + "achieve goals": 1144, + "context instead": 10661, + "known benchmark": 27654, + "tuning nlp": 58935, + "19 patients": 189, + "human resources": 24234, + "information generating": 25893, + "design framework": 14284, + "social issues": 51566, + "domains low": 16272, + "resource multilingual": 47254, + "techniques fine": 56089, + "human preferences": 24217, + "focus types": 21209, + "introduce idea": 26813, + "powerful new": 41439, + "analysis network": 2705, + "classification fundamental": 8474, + "particularly suitable": 39891, + "training limited": 58156, + "simple training": 51221, + "standard single": 52530, + "text does": 56542, + "provides reasonable": 44221, + "assist humans": 4610, + "understanding people": 59380, + "unique properties": 59516, + "handling problem": 23427, + "improvements f1": 25075, + "improvement 11": 24977, + "absolute points": 749, + "dataset multiple": 13004, + "dataset baseline": 12823, + "studies introduced": 53272, + "rnn lm": 48198, + "robustness existing": 48279, + "model conducted": 33697, + "challenging cases": 8085, + "factually consistent": 20326, + "impressive capabilities": 24809, + "including zero": 25322, + "slow inference": 51450, + "novel machine": 37859, + "attention problem": 4812, + "given dialogue": 22735, + "vital information": 61691, + "datasets user": 13473, + "great performance": 23209, + "model perplexity": 34207, + "baseline absolute": 6149, + "absolute 10": 737, + "models construct": 34855, + "code mix": 8822, + "handle code": 23406, + "using code": 60609, + "resource researchers": 47267, + "current sequence": 12007, + "best tasks": 6831, + "t5 gpt": 54680, + "traditional fine": 57520, + "benchmark best": 6429, + "knowledge additional": 27391, + "prompt engineering": 43203, + "using type": 61005, + "efforts make": 16941, + "web app": 61876, + "task form": 55095, + "designed enable": 14312, + "confidence based": 10111, + "early training": 16517, + "limited address": 30563, + "effect translation": 16621, + "languages combining": 28616, + "reflect human": 45773, + "missing important": 33362, + "generally rely": 22170, + "phase pre": 40805, + "effectiveness pre": 16801, + "continuing pre": 10837, + "grained annotation": 23022, + "significant work": 50930, + "tweets extracted": 59015, + "evaluation platform": 18674, + "robustness analysis": 48274, + "models aspects": 34728, + "input time": 26348, + "possible model": 41333, + "datasets difficult": 13232, + "entities sentences": 18083, + "size complexity": 51377, + "samples given": 48477, + "metric language": 33118, + "sample level": 48452, + "evolving nature": 18842, + "provide researchers": 44122, + "posts using": 41376, + "particular important": 39848, + "level ones": 30170, + "dense retrieval": 14080, + "hard negatives": 23448, + "recall 10": 45239, + "languages manually": 28727, + "evaluate improve": 18464, + "rely lexical": 46294, + "critical understanding": 11797, + "dataset supports": 13110, + "annotation guideline": 2952, + "tasks average": 55515, + "category text": 7863, + "diversity fluency": 15734, + "11 datasets": 85, + "linear complexity": 30654, + "aims convert": 2182, + "maintaining accuracy": 31486, + "transformers work": 58533, + "improving inference": 25182, + "handle multi": 23412, + "necessary condition": 36530, + "improve supervised": 24930, + "embeddings tokens": 17230, + "follow recent": 21254, + "languages performing": 28751, + "outperform results": 38818, + "propose plug": 43583, + "development training": 14709, + "method especially": 32487, + "84 f1": 532, + "quality unsupervised": 44594, + "task matching": 55209, + "leverage self": 30288, + "test scores": 56367, + "scores paper": 48912, + "hinders model": 23933, + "improvement paper": 25012, + "accelerate model": 801, + "trainable neural": 57663, + "set small": 50249, + "example paper": 18880, + "known knowledge": 27659, + "model extra": 33870, + "achieves 86": 1294, + "99 accuracy": 575, + "translation dialogue": 58599, + "information neighboring": 25989, + "finally analyse": 20835, + "time particular": 57189, + "extract various": 20002, + "constantly changing": 10344, + "news agencies": 37383, + "novel vocabulary": 37951, + "examples similar": 18932, + "extraction document": 20057, + "valuable knowledge": 61204, + "provide ablation": 44002, + "wider context": 62027, + "major issue": 31512, + "news portals": 37411, + "trained attention": 57675, + "results combined": 47537, + "embeddings approaches": 17084, + "transfer low": 58401, + "dependency analysis": 14117, + "ethical considerations": 18417, + "extremely limited": 20162, + "english high": 17819, + "multilingual low": 36092, + "instead used": 26466, + "recently models": 45439, + "model capabilities": 33642, + "parsers present": 39764, + "examples generate": 18905, + "stage pre": 52438, + "training intermediate": 58136, + "guides model": 23355, + "understand linguistic": 59303, + "representation multi": 46559, + "measures different": 32077, + "expand corpus": 19184, + "capable identifying": 7623, + "construction grammar": 10426, + "novel span": 37925, + "task focused": 55092, + "advances text": 1929, + "enables models": 17445, + "english utterances": 17898, + "existing methodologies": 19091, + "using ensembles": 60678, + "research article": 46985, + "information units": 26137, + "subject domain": 53552, + "techniques utilized": 56150, + "hard follow": 23442, + "specific metrics": 52111, + "problems provide": 42726, + "encourage consistency": 17590, + "external supervision": 19955, + "strong text": 53055, + "trained achieve": 57669, + "foster future": 21410, + "advancements field": 1899, + "research gaps": 47045, + "capitalization punctuation": 7641, + "000 hours": 7, + "manually labeling": 31785, + "detecting domain": 14447, + "gan based": 21953, + "works directly": 62885, + "growing popularity": 23300, + "help models": 23580, + "performance boosting": 40224, + "relation classifiers": 45968, + "label words": 27734, + "tuning better": 58903, + "setting fine": 50324, + "using strong": 60966, + "dataset https": 12952, + "rnn baselines": 48189, + "multihead attention": 36053, + "reading natural": 45089, + "controllable language": 10978, + "control codes": 10961, + "automated evaluations": 5043, + "tasks benchmarks": 55521, + "studies deep": 53256, + "annotations work": 3008, + "low confidence": 31135, + "calibration method": 7535, + "conduct preliminary": 10057, + "present insights": 41931, + "limits applicability": 30639, + "based tagging": 6081, + "dependencies improve": 14107, + "art accuracies": 4208, + "individual feature": 25567, + "faithful explanations": 20365, + "meaning original": 32007, + "processing resources": 42935, + "lms gpt": 30919, + "models collection": 34824, + "collection datasets": 8981, + "meta dataset": 32330, + "quickly new": 44824, + "baselines variety": 6318, + "likely contain": 30525, + "widespread attention": 62032, + "performing fine": 40678, + "models resulted": 35454, + "generate rich": 22238, + "tagging based": 54737, + "model motivated": 34107, + "careful design": 7756, + "integrate different": 26505, + "recently bidirectional": 45412, + "tuning paradigm": 58937, + "modeling ability": 34558, + "global topic": 22846, + "scores best": 48893, + "use public": 59985, + "world facts": 62941, + "style prompts": 53493, + "raise important": 44855, + "conduct set": 10062, + "tasks measure": 55743, + "method evaluation": 32492, + "art hybrid": 4267, + "data analyze": 12134, + "draw insights": 16401, + "leverage task": 30291, + "data self": 12635, + "data extensive": 12350, + "approach significant": 3691, + "data currently": 12266, + "evaluation research": 18697, + "models sufficiently": 35558, + "methods 10": 32723, + "task predicts": 55286, + "100 200": 58, + "scientific abstracts": 48754, + "performance trade": 40602, + "multiple model": 36250, + "voting approach": 61740, + "texts english": 56876, + "topics propose": 57459, + "evaluate baseline": 18442, + "used interpret": 60217, + "parser achieve": 39758, + "challenging state": 8142, + "better solve": 6967, + "external datasets": 19933, + "challenge previous": 8007, + "response present": 47398, + "layer layers": 29186, + "stack transformer": 52419, + "model add": 33537, + "non target": 37684, + "output representations": 38997, + "work current": 62616, + "evaluations text": 18770, + "methods exploiting": 32855, + "literature dataset": 30857, + "studies large": 53277, + "contextualised language": 10793, + "highly useful": 23922, + "models plm": 35324, + "abundant labeled": 782, + "perform reasoning": 40133, + "challenge design": 7976, + "framework extracting": 21521, + "interpretable reasoning": 26728, + "exist text": 19017, + "bart t5": 5535, + "perform decently": 40085, + "outputs generated": 39015, + "generate effective": 22196, + "reduce performance": 45679, + "significantly enhances": 50956, + "modern transformer": 35723, + "knowledge essential": 27468, + "essential downstream": 18326, + "summarization multi": 53894, + "evaluations confirm": 18754, + "scale user": 48638, + "ai assisted": 2114, + "requires capturing": 46916, + "representations experiment": 46659, + "directions recent": 15299, + "points better": 41069, + "training important": 58124, + "lack transparency": 27924, + "causal explanations": 7872, + "common text": 9205, + "model transferred": 34488, + "inference test": 25698, + "address gaps": 1760, + "work highlight": 62678, + "highlight limitations": 23865, + "code provide": 8847, + "length prediction": 30032, + "oriented semantic": 38699, + "datasets designed": 13223, + "extract spans": 19995, + "texts recent": 56916, + "article focus": 4449, + "embeddings studied": 17222, + "components using": 9728, + "tasks findings": 55642, + "changing model": 8186, + "tend low": 56202, + "questions regarding": 44802, + "approach integrates": 3575, + "smaller ones": 51525, + "summarization document": 53882, + "assume access": 4633, + "access high": 822, + "metric used": 33128, + "directly source": 15336, + "perturbation based": 40790, + "generate entity": 22198, + "content planning": 10546, + "demonstrate empirically": 13909, + "experimental conditions": 19259, + "event information": 18785, + "crosslingual transfer": 11877, + "encoders task": 17559, + "recently prompt": 45453, + "prompt tuning": 43209, + "prompt template": 43207, + "representation structured": 46587, + "weight word": 61921, + "domain evaluations": 16060, + "seek improve": 49050, + "using cosine": 60633, + "dataset news": 13011, + "different news": 15008, + "51 accuracy": 431, + "paper looks": 39419, + "classes text": 8418, + "previous non": 42267, + "contrastive samples": 10921, + "studies real": 53292, + "targeting specific": 54863, + "presents research": 42101, + "research paradigm": 47091, + "application needs": 3172, + "efficient approaches": 16862, + "setting models": 50331, + "related downstream": 45902, + "appropriate pre": 3965, + "multiple methods": 36247, + "models highest": 35088, + "introduce stage": 26864, + "learn small": 29425, + "models finetuning": 35034, + "architecture leverages": 4061, + "transfer natural": 58408, + "gpt t5": 22989, + "suffer performance": 53775, + "improvement code": 24996, + "lm pre": 30911, + "embeddings sensitive": 17210, + "factual consistency": 20319, + "dataset dialogue": 12896, + "meta evaluation": 32334, + "makes models": 31629, + "models visual": 35675, + "significant robustness": 50921, + "data instance": 12433, + "classification multiple": 8503, + "approaches best": 3777, + "representation propose": 46571, + "apply methodology": 3334, + "problem standard": 42667, + "context specifically": 10725, + "machines ability": 31397, + "introduce publicly": 26855, + "better supervised": 6973, + "trade performance": 57503, + "knowledge produce": 27578, + "problem systems": 42671, + "mechanisms improve": 32151, + "tasks conversational": 55560, + "retrieval method": 47953, + "question understanding": 44755, + "semantic connections": 49256, + "datasets detailed": 13225, + "knowledge shared": 27608, + "extract domain": 19972, + "domain invariance": 16089, + "sentiment features": 49845, + "reasoning question": 45219, + "complexity real": 9688, + "train bilingual": 57569, + "combining semantic": 9122, + "similarities sentences": 51080, + "corpus derived": 11322, + "generates accurate": 22337, + "contributions present": 10956, + "linguistic capabilities": 30751, + "remain unsolved": 46318, + "systems potential": 54591, + "experiments wide": 19560, + "setups demonstrate": 50413, + "labels extensive": 27824, + "domain textual": 16211, + "irrelevant sentences": 27043, + "model partially": 34182, + "biases learned": 7055, + "input structure": 26343, + "knowledge fully": 27486, + "adversarial data": 1966, + "use synthetic": 60038, + "generation make": 22488, + "robust human": 48250, + "develop data": 14579, + "generation pipeline": 22518, + "time average": 57120, + "soft prompts": 51625, + "frozen language": 21696, + "learned approach": 29451, + "using t5": 60976, + "billions parameters": 7124, + "prefix tuning": 41796, + "summarization experimental": 53883, + "work usually": 62856, + "based corresponding": 5653, + "fail provide": 20344, + "addressing issues": 1820, + "label imbalance": 27711, + "create multiple": 11710, + "retriever generator": 47989, + "generator framework": 22617, + "datasets construct": 13195, + "pairs high": 39194, + "progress nlp": 43108, + "tasks researchers": 55859, + "propose neuro": 43492, + "memory stores": 32284, + "contain complex": 10460, + "settings study": 50398, + "evidence provided": 18817, + "output pairs": 38989, + "seen tasks": 49065, + "terms generalization": 56293, + "significant room": 50923, + "quantitatively measure": 44632, + "based criteria": 5659, + "created task": 11732, + "adaptation uda": 1546, + "training results": 58232, + "reduces model": 45693, + "model overfitting": 34172, + "vastly outperforms": 61443, + "dataset textit": 13118, + "settings achieve": 50358, + "improvement release": 25021, + "achieves 85": 1293, + "prior posterior": 42410, + "decoder networks": 13604, + "networks considering": 36841, + "original documents": 38711, + "various modalities": 61364, + "challenges like": 8058, + "speech english": 52262, + "malayalam tamil": 31679, + "models commonsense": 34833, + "masked token": 31869, + "inference latent": 25666, + "swedish danish": 54249, + "leverage neural": 30281, + "models news": 35260, + "sources model": 51835, + "designed facilitate": 14315, + "shallow models": 50441, + "scenarios particularly": 48704, + "puts forward": 44432, + "models common": 34831, + "research efficient": 47025, + "tuning experiments": 58912, + "solution proposed": 51660, + "enhanced transformer": 17940, + "hierarchical relations": 23688, + "model augment": 33587, + "instead modeling": 26458, + "questions compared": 44778, + "dataset novel": 13013, + "key design": 27306, + "especially unseen": 18309, + "concepts training": 9945, + "carlo sampling": 7767, + "document finally": 15794, + "particular best": 39834, + "achieves micro": 1346, + "gpt transformer": 22990, + "performance 59": 40170, + "suggesting models": 53837, + "questions address": 44768, + "possible reduce": 41335, + "dependence labeled": 14102, + "manual methods": 31748, + "languages research": 28772, + "poor robustness": 41144, + "measuring model": 32086, + "detection evaluate": 14481, + "reveal significant": 48013, + "topic recent": 57422, + "features syntactic": 20679, + "models increases": 35124, + "shot context": 50606, + "model scales": 34336, + "tasks shot": 55884, + "aware dialogue": 5448, + "inference costs": 25648, + "comprehensive analyses": 9781, + "approach mitigate": 3601, + "issue learning": 27066, + "75 f1": 503, + "model filter": 33886, + "majority studies": 31533, + "propose semantics": 43618, + "scale multiple": 48602, + "extraction specifically": 20113, + "adversarial example": 1969, + "matches exceeds": 31904, + "use real": 59986, + "benchmark multiple": 6480, + "approaches multilingual": 3878, + "effective ways": 16715, + "focus cross": 21150, + "specific representation": 52137, + "approaches experimental": 3816, + "threshold based": 57090, + "data requiring": 12608, + "scaling number": 48651, + "given fixed": 22744, + "called graph": 7546, + "demonstrate improvement": 13922, + "features developed": 20559, + "corpus allows": 11272, + "words convey": 62389, + "learning enables": 29618, + "accuracy use": 1067, + "popular knowledge": 41165, + "benchmark existing": 6467, + "layers training": 29235, + "like self": 30499, + "simple naive": 51198, + "extraction relation": 20103, + "detect semantic": 14441, + "usually apply": 61035, + "sparse datasets": 51968, + "efficient existing": 16872, + "propose tasks": 43660, + "datasets baselines": 13166, + "relevant natural": 46224, + "models driven": 34942, + "analysis benchmark": 2622, + "set recommendations": 50234, + "significant human": 50869, + "processing aims": 42848, + "consists 10": 10320, + "strategy enhance": 52932, + "enhance information": 17913, + "huge text": 24078, + "real nlp": 45108, + "lack labelled": 27899, + "reasoning step": 45224, + "understand reason": 59309, + "knowledge build": 27419, + "encoding syntactic": 17578, + "dependency edges": 14119, + "explainable predictions": 19599, + "comparison paper": 9500, + "evaluated experiments": 18532, + "probe models": 42487, + "inter dependence": 26579, + "augmentation da": 4951, + "lead wrong": 29280, + "outperforms model": 38909, + "explicit content": 19612, + "content language": 10535, + "retrieval multilingual": 47958, + "advances transformer": 1930, + "datasets usually": 13475, + "research document": 47022, + "additional work": 1710, + "continuously updated": 10860, + "network demonstrate": 36729, + "aims select": 2214, + "containing information": 10483, + "texts tweets": 56938, + "paper process": 39488, + "tuning neural": 58933, + "settings task": 50399, + "distinct language": 15592, + "feasibility using": 20470, + "increase usage": 25426, + "language despite": 28024, + "study discuss": 53363, + "expressions text": 19811, + "limited research": 30608, + "critical challenge": 11778, + "contribute new": 10932, + "nlp ml": 37499, + "limited capacity": 30573, + "continue pre": 10829, + "design training": 14304, + "biases language": 7054, + "experiments commonly": 19376, + "language automatically": 27970, + "learning existing": 29632, + "explored recent": 19764, + "established datasets": 18354, + "datasets arabic": 13159, + "provides potential": 44219, + "text small": 56774, + "need access": 36543, + "synthetic labels": 54377, + "sensitive semantic": 49504, + "testing sets": 56412, + "intermediate results": 26678, + "building multi": 7456, + "systems conversational": 54460, + "detailed descriptions": 14419, + "play central": 40961, + "google bert": 22953, + "outperform human": 38800, + "generates meaningful": 22347, + "help human": 23567, + "properties different": 43260, + "leverage bert": 30257, + "based labels": 5801, + "major topics": 31523, + "practitioners researchers": 41492, + "tuned target": 58888, + "layer using": 29212, + "glove embedding": 22857, + "built transformer": 7492, + "motivate new": 35861, + "strong challenge": 53020, + "unique opportunity": 59515, + "tasks result": 55861, + "nlp challenges": 37471, + "identify challenges": 24415, + "task currently": 54990, + "applying model": 3368, + "graph theory": 23175, + "general approaches": 22046, + "efficient paper": 16891, + "introduce contrastive": 26790, + "maximize mutual": 31958, + "interaction learning": 26604, + "articles multiple": 4470, + "overcome propose": 39073, + "evaluation various": 18747, + "shot manner": 50631, + "methods rarely": 33004, + "testing scenarios": 56410, + "aware machine": 5457, + "guided attention": 23344, + "layer outputs": 29198, + "significantly degrades": 50951, + "specific ones": 52120, + "input set": 26334, + "lexically diverse": 30401, + "lexicon information": 30412, + "bert combined": 6637, + "bert paper": 6697, + "external lexicon": 19947, + "trained manually": 57781, + "documents extract": 15880, + "combining models": 9116, + "years online": 63067, + "increasing volume": 25467, + "reasoning types": 45230, + "tasks baselines": 55517, + "tunes model": 58894, + "generation addition": 22411, + "datasets include": 13298, + "offer limited": 38293, + "datasets long": 13319, + "realistic scenario": 45150, + "finding work": 20903, + "data gold": 12393, + "mt5 model": 35928, + "model convergence": 33717, + "state encoder": 52697, + "prediction approaches": 41695, + "mixture expert": 33418, + "success modeling": 53710, + "sequence generated": 49924, + "context token": 10733, + "topic knowledge": 57411, + "relation using": 46000, + "capture interaction": 7684, + "years datasets": 63053, + "involves use": 27022, + "aims detecting": 2185, + "issue low": 27067, + "types introduce": 59095, + "source corpora": 51758, + "release pretrained": 46165, + "pretraining corpus": 42197, + "issues current": 27087, + "importance nlp": 24686, + "labels method": 27837, + "existing high": 19075, + "provides automatic": 44182, + "token label": 57294, + "tagging scheme": 54751, + "incompleteness knowledge": 25333, + "ones previous": 38342, + "aims investigate": 2201, + "dialogue understanding": 14794, + "plms paper": 41019, + "core content": 11148, + "existing plms": 19126, + "witnessed increasing": 62093, + "experiments challenging": 19370, + "data produce": 12566, + "produce synthetic": 43014, + "hallucination problem": 23374, + "generating fluent": 22375, + "models weak": 35678, + "experiments self": 19516, + "albert roberta": 2251, + "using retrieval": 60910, + "paid little": 39143, + "convolution networks": 11095, + "module designed": 35754, + "extraction pre": 20094, + "different relations": 15049, + "tools like": 57380, + "tune plms": 58860, + "manually designing": 31777, + "language prompts": 28446, + "propose prompt": 43594, + "construct prompts": 10399, + "labels fine": 27827, + "language news": 28358, + "shot models": 50634, + "based support": 6072, + "art evaluated": 4259, + "proper knowledge": 43252, + "problems task": 42733, + "given situation": 22785, + "initial training": 26220, + "pretrained domain": 42153, + "2020 work": 295, + "experiments evaluations": 19432, + "analysis new": 2707, + "benchmarks evaluation": 6522, + "adapt various": 1515, + "results shot": 47835, + "information current": 25798, + "gap current": 21959, + "annotation platform": 2959, + "current dataset": 11967, + "respect number": 47349, + "paradigm called": 39622, + "multi armed": 35938, + "armed bandit": 4191, + "relevant texts": 46240, + "classification research": 8535, + "performance article": 40200, + "enable efficient": 17424, + "based annotations": 5567, + "method increase": 32541, + "particularly strong": 39890, + "analysis studies": 2769, + "propose masked": 43448, + "information layers": 25947, + "universal feature": 59540, + "standard pre": 52517, + "collecting high": 8974, + "architectures end": 4108, + "recent paper": 45330, + "level global": 30126, + "respectively work": 47386, + "accuracy traditional": 1063, + "extend prior": 19828, + "data accessible": 12108, + "recent advancement": 45278, + "learning learns": 29705, + "finding answer": 20897, + "modeling pairwise": 34608, + "capture higher": 7678, + "model explainability": 33855, + "samples method": 48481, + "selects subset": 49171, + "token free": 57288, + "free models": 21645, + "framework obtains": 21574, + "adaptive pretraining": 1580, + "bridge semantic": 7322, + "span boundaries": 51921, + "english widely": 17903, + "entities finally": 18053, + "finally benchmark": 20840, + "existing shot": 19142, + "predictions methods": 41762, + "based prototypical": 5961, + "growing attention": 23289, + "efforts focus": 16938, + "settings proposed": 50392, + "2021 challenge": 297, + "setting outperforms": 50337, + "texts structured": 56930, + "task challenge": 54947, + "network bert": 36710, + "performance address": 40186, + "precision points": 41614, + "handling multiple": 23426, + "unified way": 59483, + "remains limited": 46338, + "methods existing": 32849, + "design auxiliary": 14264, + "widely reported": 62001, + "content lack": 10534, + "specifically fine": 52202, + "high lexical": 23750, + "model adapting": 33534, + "sentences study": 49790, + "increased complexity": 25430, + "source attention": 51741, + "generates set": 22357, + "resource downstream": 47224, + "annotation approach": 2934, + "propose annotation": 43293, + "based types": 6116, + "models annotation": 34714, + "support systems": 54127, + "answering sentiment": 3095, + "make strong": 31601, + "dataset confirm": 12858, + "output probability": 38994, + "history model": 23968, + "study confirms": 53346, + "computation resources": 9829, + "specifically train": 52230, + "model encourage": 33820, + "hidden space": 23646, + "exhibits superior": 19013, + "superior generalization": 53934, + "architectures techniques": 4125, + "knowledge interaction": 27532, + "need efficient": 36558, + "informative summaries": 26177, + "method 10": 32353, + "score 57": 48795, + "resources pre": 47326, + "opportunities challenges": 38511, + "production settings": 43051, + "model multimodal": 34112, + "explore robustness": 19733, + "plms typically": 41022, + "information exists": 25846, + "human rating": 24223, + "scoring systems": 48939, + "way evaluate": 61800, + "task long": 55192, + "diverse types": 15725, + "subjective objective": 53566, + "generation diverse": 22449, + "articles using": 4482, + "different formats": 14937, + "evaluations recent": 18768, + "datasets effective": 13240, + "qa paper": 44456, + "suggest large": 53822, + "fashion model": 20415, + "sets finally": 50294, + "review state": 48040, + "approaches new": 3884, + "respectively code": 47363, + "representation different": 46503, + "task labels": 55157, + "improves domain": 25125, + "based fact": 5726, + "benefit joint": 6563, + "tasks previously": 55813, + "used shot": 60302, + "complexity input": 9678, + "effective long": 16666, + "hierarchical way": 23698, + "sentence transformer": 49661, + "context finally": 10639, + "strategy learn": 52939, + "uniform prior": 59486, + "driven knowledge": 16424, + "different weight": 15129, + "enhanced text": 17938, + "directed edges": 15266, + "studies usually": 53308, + "degeneration problem": 13799, + "propose local": 43443, + "similarity source": 51121, + "explore semantic": 19735, + "topical coherence": 57439, + "constraints language": 10374, + "available unlabeled": 5384, + "domain speech": 16196, + "available target": 5374, + "resources particular": 47325, + "attention unit": 4842, + "studies provide": 53290, + "output attention": 38965, + "analysis empirical": 2656, + "com declare": 9008, + "declare lab": 13580, + "transformers language": 58525, + "information domains": 25822, + "method reduces": 32636, + "processing bert": 42857, + "features complex": 20543, + "directions including": 15297, + "knowledge question": 27583, + "control level": 10967, + "spurious correlation": 52387, + "train classification": 57572, + "methods fully": 32871, + "settings code": 50359, + "com rucaibox": 9022, + "systems generalize": 54511, + "strategies affect": 52892, + "tweets different": 59014, + "health support": 23520, + "main obstacle": 31449, + "challenge study": 8017, + "sentences according": 49676, + "largely limited": 29058, + "general discourse": 22051, + "respectively demonstrate": 47365, + "methods struggle": 33055, + "models retrieval": 35459, + "methods exist": 32848, + "perform case": 40073, + "etal 2019": 18414, + "data essential": 12328, + "detection question": 14516, + "auxiliary self": 5238, + "original pre": 38723, + "tasks introduced": 55695, + "advance current": 1880, + "models biased": 34785, + "compared language": 9416, + "adopted neural": 1870, + "history current": 23967, + "employ end": 17380, + "sparsity paper": 51981, + "brings challenges": 7339, + "corpus words": 11460, + "different attributes": 14847, + "semantically correlated": 49384, + "methods major": 32938, + "adapting pre": 1568, + "asked identify": 4521, + "preserving high": 42122, + "modules trained": 35775, + "reasoning processes": 45218, + "huge search": 24076, + "generates better": 22338, + "sota baselines": 51725, + "explore trade": 19742, + "semantic cues": 49266, + "scarcity datasets": 48664, + "settings experimental": 50369, + "language contexts": 28007, + "modeling question": 34618, + "memory language": 32257, + "hierarchical variational": 23697, + "samples extensive": 48474, + "evaluations datasets": 18755, + "sentences extracted": 49722, + "models utilized": 35664, + "utilized generate": 61108, + "high costs": 23722, + "document processing": 15822, + "length limit": 30029, + "limited settings": 30614, + "relative distance": 46091, + "model public": 34260, + "set evaluations": 50149, + "predominantly focused": 41785, + "performance report": 40533, + "coherence diversity": 8907, + "high probabilities": 23765, + "based modules": 5886, + "speech domain": 52261, + "accuracy domain": 961, + "popular dataset": 41161, + "score macro": 48857, + "stage train": 52444, + "model including": 33989, + "aims finding": 2194, + "humans make": 24281, + "copy mechanisms": 11134, + "mask mechanism": 31859, + "mechanism pre": 32136, + "limited annotated": 30566, + "efficient outperforms": 16890, + "codes datasets": 8877, + "based unified": 6118, + "exploit pre": 19661, + "model bart": 33597, + "tremendous improvements": 58773, + "consider time": 10224, + "model defines": 33743, + "representations identify": 46683, + "compared humans": 9415, + "context helps": 10652, + "field artificial": 20750, + "plays essential": 40995, + "dynamic attention": 16482, + "used transformer": 60341, + "understand relative": 59310, + "noise injection": 37598, + "art character": 4231, + "methods commonly": 32790, + "corpus showed": 11428, + "miss important": 33357, + "application scenario": 3178, + "directly compared": 15311, + "information ignored": 25907, + "dataset benchmark": 12825, + "tables wikipedia": 54692, + "finally develop": 20851, + "develop baseline": 14576, + "text tables": 56802, + "text references": 56729, + "proposed overcome": 43874, + "bias variance": 7047, + "survey present": 54211, + "new taxonomy": 37337, + "challenge open": 8002, + "simply changing": 51248, + "metrics assessing": 33138, + "layer stacked": 29208, + "reflect real": 45774, + "different vocabulary": 15126, + "labels word": 27860, + "program synthesis": 43077, + "challenging set": 8139, + "similar semantics": 51064, + "models xlnet": 35690, + "context low": 10670, + "weighting mechanism": 61933, + "prompt based": 43198, + "underlying reasoning": 59274, + "task facilitate": 55077, + "explored problem": 19763, + "intellectual property": 26533, + "used analyzing": 60087, + "probe language": 42486, + "build baseline": 7387, + "model 12": 33482, + "including support": 25305, + "paper fills": 39373, + "generation challenge": 22431, + "uses generative": 60511, + "generation unlike": 22573, + "quality predictions": 44564, + "sets documents": 50289, + "success language": 53703, + "gradient information": 23008, + "response prediction": 47397, + "cascaded model": 7784, + "attribution scores": 4917, + "makes inference": 31626, + "processes dpps": 42845, + "generation strategies": 22553, + "compare algorithms": 9327, + "articles specifically": 4479, + "modeling code": 34565, + "positive pairs": 41292, + "single utterance": 51355, + "level scores": 30199, + "level local": 30152, + "embeddings evaluate": 17128, + "representation form": 46520, + "limited performance": 30604, + "leads high": 29313, + "strategies present": 52913, + "dataset f1": 12924, + "creating training": 11746, + "bi encoder": 7005, + "cross encoder": 11820, + "datasets shot": 13421, + "data augmentations": 12168, + "augmented versions": 4985, + "set experimental": 50152, + "highly rely": 23912, + "rely explicit": 46279, + "explicit text": 19628, + "models modest": 35232, + "sentence discourse": 49542, + "wide adoption": 61956, + "training jointly": 58139, + "researchers proposed": 47165, + "capabilities model": 7602, + "restricted limited": 47423, + "knowledge selection": 27604, + "strategies experimental": 52901, + "describes development": 14223, + "generated social": 22318, + "annotated sentiment": 2916, + "experiments establish": 19429, + "propose embedding": 43364, + "performance entity": 40321, + "type propose": 59065, + "using interpretable": 60740, + "using rules": 60915, + "transfer datasets": 58357, + "language benchmark": 27975, + "achieve considerable": 1129, + "relation specific": 45997, + "overlapping problem": 39092, + "conduct exhaustive": 10040, + "exhaustive experiments": 18998, + "layers order": 29231, + "set improve": 50167, + "examples based": 18890, + "better metrics": 6918, + "metrics compared": 33151, + "provide automatic": 44011, + "representations graph": 46679, + "representation fed": 46517, + "analysis demonstrating": 2648, + "optimization objectives": 38552, + "robustness methods": 48285, + "encourage models": 17597, + "good generalization": 22933, + "accuracy explainability": 974, + "contrastive training": 10923, + "existing labeled": 19080, + "design contrastive": 14270, + "iterative manner": 27124, + "parameters large": 39705, + "data centric": 12201, + "analysis discourse": 2652, + "mainly relies": 31476, + "output graph": 38976, + "sentence types": 49664, + "method develop": 32461, + "punctuation restoration": 44389, + "experiments joint": 19449, + "text transcripts": 56820, + "heavily used": 23537, + "performance analyze": 40193, + "increasing scale": 25463, + "critical challenges": 11779, + "knowledge mining": 27551, + "model selected": 34344, + "robust baseline": 48241, + "labeled domain": 27757, + "models active": 34687, + "techniques automated": 56062, + "analyze ability": 2805, + "systems social": 54635, + "generation empirical": 22451, + "high fidelity": 23737, + "answer using": 3058, + "framework existing": 21511, + "terms task": 56316, + "work bridge": 62590, + "terms experiments": 56288, + "studied work": 53240, + "performance increasing": 40390, + "confirm hypothesis": 10131, + "satisfying performance": 48530, + "study nlp": 53420, + "general specific": 22092, + "challenges encountered": 8042, + "construct robust": 10401, + "documents present": 15902, + "scores various": 48929, + "method performance": 32611, + "contains text": 10505, + "large standard": 29018, + "temporal context": 56183, + "set guidelines": 50162, + "data setting": 12653, + "research landscape": 47062, + "counterfactual data": 11617, + "rely supervised": 46302, + "training supervised": 58279, + "studies explore": 53263, + "need novel": 36586, + "models properly": 35368, + "models fewer": 35023, + "performances wide": 40653, + "task combining": 54956, + "enable researchers": 17429, + "define multi": 13775, + "compare english": 9340, + "multiple classification": 36184, + "languages diverse": 28644, + "inform future": 25738, + "using higher": 60729, + "propose bi": 43314, + "diversity experiments": 15733, + "ood performance": 38403, + "according context": 855, + "making work": 31675, + "invariant information": 26919, + "aimed improve": 2165, + "text discrete": 56539, + "processing sentence": 42937, + "performance transformers": 40611, + "attention score": 4825, + "decoding method": 13633, + "task unlike": 55456, + "employed various": 17397, + "cases text": 7815, + "addition analysis": 1599, + "annotation toolkit": 2977, + "information solve": 26093, + "approach handle": 3551, + "including audio": 25240, + "information modalities": 25974, + "asian translation": 4515, + "previous solutions": 42278, + "loss experimental": 31086, + "participants required": 39814, + "contextualized token": 10811, + "graph finally": 23138, + "text topic": 56815, + "models llms": 35195, + "recognize words": 45552, + "joint information": 27173, + "corresponding entity": 11551, + "probabilistic distribution": 42457, + "10 30": 32, + "studied research": 53235, + "decision based": 13559, + "robust multi": 48257, + "years end": 63057, + "improved ability": 24943, + "making decisions": 31650, + "problem need": 42615, + "set obtained": 50206, + "resources tools": 47335, + "varying sizes": 61435, + "data splits": 12690, + "data hope": 12407, + "survey help": 54208, + "language application": 27963, + "different ones": 15012, + "annotations using": 3006, + "task considered": 54970, + "samples available": 48464, + "propose tackle": 43657, + "necessary training": 36535, + "achieved performance": 1257, + "np hard": 37966, + "evolutionary algorithm": 18835, + "identification language": 24389, + "tokens order": 57330, + "systems small": 54634, + "various speech": 61394, + "based logistic": 5819, + "network transformer": 36818, + "data examine": 12333, + "shown experiments": 50708, + "strongly influenced": 53071, + "used propose": 60277, + "balanced training": 5516, + "proved challenging": 43985, + "generate consistent": 22187, + "content introduce": 10533, + "tools allow": 57375, + "manner previous": 31723, + "specifically learn": 52212, + "require specialized": 46888, + "set parallel": 50213, + "order explore": 38617, + "high computation": 23712, + "provides highly": 44203, + "shot prompting": 50636, + "research real": 47109, + "models noisy": 35266, + "footnote https": 21280, + "results make": 47712, + "questions present": 44799, + "struggle tasks": 53202, + "reasoning work": 45233, + "examples require": 18928, + "model t5": 34436, + "shown produce": 50739, + "task leaderboard": 55165, + "domain benchmarks": 16025, + "robustness neural": 48288, + "utilizes pre": 61116, + "improvement test": 25033, + "systems spoken": 54637, + "documents improve": 15886, + "benchmark chinese": 6431, + "typing model": 59162, + "strategies based": 52893, + "existing graph": 19074, + "text paired": 56685, + "datasets typically": 13465, + "contain small": 10474, + "dataset tasks": 13114, + "observations motivate": 38127, + "problem efficiently": 42548, + "preceding context": 41607, + "entire process": 18026, + "solutions problems": 51670, + "new work": 37363, + "tasks far": 55638, + "gains different": 21936, + "switched text": 54258, + "different code": 14866, + "practitioners use": 41493, + "method adversarial": 32374, + "propose confidence": 43329, + "manual process": 31749, + "conducted comprehensive": 10077, + "texts make": 56902, + "metrics commonly": 33149, + "aims translate": 2220, + "significantly surpasses": 51015, + "shown efficient": 50704, + "considered difficult": 10246, + "strategies data": 52897, + "leads gains": 29312, + "cross encoders": 11821, + "explanatory power": 19609, + "framework fine": 21523, + "new classes": 37151, + "20 newsgroups": 229, + "health crisis": 23514, + "topics based": 57444, + "significantly increase": 50982, + "loss compared": 31084, + "compared pre": 9431, + "good predictive": 22938, + "process various": 42839, + "multilingual xlm": 36135, + "baselines consider": 6245, + "2019 data": 281, + "beneficial task": 6556, + "local structure": 30950, + "text build": 56460, + "makes impossible": 31625, + "generative method": 22595, + "approach exhibits": 3523, + "settings experiments": 50371, + "information granularity": 25897, + "performance quality": 40515, + "depending sentence": 14157, + "lm training": 30914, + "error analyses": 18212, + "architecture incorporates": 4052, + "proprietary datasets": 43952, + "denoising objective": 14067, + "tuning propose": 58951, + "especially small": 18300, + "use twitter": 60063, + "model 30": 33484, + "datasets help": 13288, + "help nlp": 23581, + "community develop": 9262, + "effectiveness combining": 16770, + "comments written": 9149, + "performance fact": 40343, + "traditional ml": 57531, + "time task": 57229, + "hard learn": 23445, + "inference cost": 25647, + "best ensemble": 6761, + "study reports": 53453, + "efforts improve": 16940, + "common human": 9179, + "societal impact": 51608, + "different random": 15042, + "approaches improved": 3844, + "modules semantic": 35774, + "attributes text": 4911, + "level attributes": 30068, + "quality code": 44498, + "objective used": 38107, + "data pretraining": 12559, + "require fine": 46856, + "trending topic": 58779, + "reverse order": 48023, + "model vulnerable": 34531, + "bottleneck layers": 7277, + "adapter layers": 1558, + "network combines": 36723, + "nlp resources": 37524, + "models published": 35388, + "lingual datasets": 30697, + "task tweet": 55451, + "sentiment detection": 49841, + "restricted english": 47422, + "cost inference": 11586, + "paper applies": 39268, + "t5 base": 54678, + "selecting small": 49130, + "employing pre": 17400, + "increasing concern": 25448, + "settings shows": 50397, + "languages hope": 28688, + "hope findings": 24008, + "efficient sequence": 16899, + "dataset average": 12821, + "control important": 10965, + "framework helps": 21533, + "information variety": 26153, + "combined text": 9086, + "financial documents": 20890, + "number ways": 38054, + "embeddings output": 17185, + "goal provide": 22899, + "evaluation techniques": 18737, + "fundamental important": 21780, + "ranked systems": 44959, + "loop approach": 31074, + "huggingface datasets": 24085, + "models focusing": 35039, + "focusing specific": 21247, + "time dimension": 57146, + "released url": 46184, + "reducing model": 45710, + "information massive": 25967, + "massive labeled": 31886, + "work time": 62843, + "results pre": 47770, + "multiple research": 36276, + "task conducted": 54968, + "use annotation": 59822, + "depth qualitative": 14188, + "model generations": 33936, + "tasks exhibit": 55624, + "provided models": 44166, + "model lower": 34078, + "inconsistent predictions": 25340, + "tasks pertaining": 55798, + "classified according": 8586, + "bayes support": 6354, + "limited attention": 30570, + "demonstrate generated": 13917, + "task 2021": 54870, + "information helpful": 25901, + "design encoder": 14278, + "models increasing": 35125, + "achieve optimal": 1176, + "target knowledge": 54820, + "tool available": 57358, + "available literature": 5323, + "uniformly distributed": 59489, + "opinions various": 38508, + "12 relative": 110, + "bert obtain": 6695, + "text time": 56813, + "idf word2vec": 24479, + "ensure model": 17989, + "leads comparable": 29308, + "previous supervised": 42294, + "data generates": 12385, + "used pretrained": 60269, + "grained manner": 23041, + "functions propose": 21773, + "annotating data": 2931, + "measuring inter": 32085, + "detailed statistics": 14429, + "analyses results": 2603, + "produce sentence": 43007, + "attention proposed": 4815, + "integration external": 26528, + "multiple online": 36258, + "media challenging": 32160, + "baselines detecting": 6252, + "attention years": 4855, + "information similar": 26088, + "model boost": 33633, + "boost classification": 7252, + "ranks 2nd": 44982, + "fined tuned": 21035, + "metrics outperform": 33185, + "knowledge researchers": 27596, + "studied machine": 53229, + "task suffers": 55423, + "bert framework": 6663, + "source platform": 51789, + "networks furthermore": 36856, + "used systems": 60321, + "different quality": 15040, + "furthermore models": 21830, + "datasets provides": 13387, + "research investigate": 47058, + "tuning existing": 58911, + "datasets twitter": 13464, + "learning social": 29883, + "work collect": 62598, + "attention entity": 4743, + "obtain higher": 38177, + "sequence pre": 49966, + "models transferred": 35628, + "information accurately": 25750, + "bert novel": 6694, + "kg based": 27358, + "problem determining": 42535, + "baselines benchmark": 6239, + "lead suboptimal": 29276, + "purpose introduce": 44402, + "labels finally": 27826, + "corpus relevant": 11419, + "metrics able": 33133, + "able match": 706, + "embedding using": 17069, + "language survey": 28514, + "dataset extract": 12922, + "results code": 47535, + "methods retrieve": 33024, + "classification setup": 8549, + "methods assign": 32756, + "way interaction": 61813, + "tasks properly": 55817, + "approaches addition": 3756, + "results common": 47539, + "perturbation methods": 40791, + "small changes": 51465, + "task inference": 55136, + "effort invested": 16927, + "simple structure": 51215, + "data unavailable": 12750, + "benchmark code": 6432, + "framework achieve": 21447, + "proposed span": 43900, + "new emerging": 37182, + "alleviate discrepancy": 2405, + "task popular": 55277, + "matching method": 31914, + "intelligence tasks": 26540, + "small language": 51479, + "better shot": 6965, + "solely trained": 51646, + "produce pseudo": 43001, + "pattern exploiting": 39960, + "exploiting training": 19676, + "training pet": 58207, + "task enable": 55043, + "process providing": 42822, + "providing valuable": 44255, + "model intrinsic": 34017, + "time approaches": 57118, + "produce promising": 42999, + "construct release": 10400, + "currently popular": 12037, + "systems prone": 54603, + "data construction": 12247, + "dataset widely": 13136, + "competitive non": 9552, + "way make": 61818, + "dense passage": 14077, + "corpus standard": 11437, + "learning encode": 29619, + "denoising training": 14068, + "benchmark using": 6503, + "experiments performance": 19486, + "languages total": 28806, + "batch negatives": 6340, + "text relation": 56732, + "performed benchmark": 40657, + "verbal communication": 61513, + "modelling approach": 34641, + "critical review": 11790, + "exploring different": 19777, + "present summary": 42030, + "uses machine": 60520, + "guide selection": 23341, + "leverages power": 30308, + "robust nlp": 48258, + "tasks employ": 55609, + "new fine": 37206, + "different label": 14961, + "transfer domain": 58359, + "industry recent": 25621, + "users existing": 60462, + "use contrastive": 59854, + "knn classifier": 27383, + "propose heterogeneous": 43408, + "model plm": 34210, + "text including": 56626, + "knowledge topic": 27631, + "outperforms pre": 38918, + "benchmarks designed": 6517, + "training develop": 58062, + "decisions paper": 13575, + "space code": 51851, + "adaptation text": 1544, + "metrics furthermore": 33168, + "human quality": 24219, + "construction framework": 10425, + "based labeled": 5800, + "data flow": 12367, + "approach publicly": 3660, + "processing lack": 42880, + "related corpora": 45891, + "cases data": 7805, + "measure effect": 32050, + "mitigate effect": 33382, + "alleviate catastrophic": 2399, + "resources work": 47341, + "classification errors": 8462, + "score 30": 48788, + "shows data": 50772, + "domains specifically": 16293, + "problem class": 42519, + "tuning framework": 58915, + "objective pre": 38099, + "examples conduct": 18893, + "studies analyze": 53245, + "modeling results": 34620, + "gpt neo": 22985, + "examples specifically": 18933, + "achieves 80": 1292, + "processing typically": 42961, + "largely reduce": 29062, + "empirically proposed": 17366, + "method naturally": 32587, + "implementation work": 24644, + "usually depend": 61045, + "issues exist": 27089, + "based generated": 5747, + "data modeling": 12492, + "introduce adversarial": 26778, + "based sampling": 6005, + "robustness study": 48295, + "text table": 56801, + "additional techniques": 1704, + "consider text": 10222, + "curate new": 11945, + "significant overlap": 50901, + "assume gold": 4634, + "extraction component": 20054, + "researchers propose": 47164, + "11 indic": 87, + "answering framework": 3074, + "corpus specific": 11434, + "focuses extracting": 21238, + "sources target": 51840, + "improvements automatic": 25047, + "explicitly handle": 19635, + "node embeddings": 37585, + "embeddings key": 17155, + "texts specifically": 56929, + "achieve outstanding": 1177, + "attention training": 4839, + "improvement single": 25024, + "unlikelihood training": 59613, + "contrastive representation": 10919, + "stage propose": 52443, + "data annotations": 12138, + "linearly interpolating": 30684, + "improve faithfulness": 24854, + "pipeline composed": 40896, + "components demonstrate": 9717, + "training downstream": 58075, + "token dependencies": 57284, + "models spanish": 35523, + "domain pretraining": 16137, + "transfer approaches": 58352, + "higher performances": 23836, + "mechanism enhance": 32113, + "involves multiple": 27020, + "issues model": 27094, + "strategies utilize": 52921, + "theoretic perspective": 57016, + "framework measure": 21561, + "steer generation": 52789, + "memory efficiency": 32253, + "adequately evaluate": 1835, + "receiving attention": 45272, + "models seq2seq": 35483, + "dialogue work": 14796, + "substantially faster": 53634, + "underlying assumption": 59262, + "robust features": 48248, + "problem error": 42551, + "unified knowledge": 59473, + "overall better": 39036, + "codes released": 8881, + "concerns paper": 9962, + "quality source": 44581, + "cases propose": 7812, + "missing tokens": 33364, + "performance research": 40535, + "simply using": 51254, + "model ensure": 33829, + "14 dataset": 137, + "study key": 53401, + "benchmark different": 6463, + "better competing": 6867, + "generative qa": 22607, + "existing zero": 19178, + "unseen domains": 59647, + "detection accuracies": 14454, + "critical effective": 11781, + "knowledge relevant": 27590, + "training expensive": 58097, + "data ignoring": 12413, + "results leveraging": 47699, + "able create": 685, + "limited studies": 30619, + "reasons effectiveness": 45235, + "evaluated datasets": 18528, + "models example": 34982, + "systems central": 54448, + "special emphasis": 52018, + "efficient fine": 16873, + "task adapters": 54881, + "extraction ee": 20059, + "dependencies entities": 14106, + "exploit label": 19658, + "incorporated pre": 25370, + "document experiments": 15793, + "datasets ranging": 13391, + "baselines shot": 6298, + "lay foundation": 29176, + "propose evaluating": 43376, + "annotations enable": 2988, + "bias different": 7024, + "select data": 49103, + "label embedding": 27707, + "train multiple": 57615, + "capturing information": 7735, + "coherence aware": 8904, + "challenging long": 8110, + "lack effective": 27887, + "learning design": 29587, + "settings respectively": 50395, + "efficiency quality": 16854, + "guiding model": 23357, + "aware contrastive": 5445, + "level use": 30229, + "models memorize": 35221, + "contextual sentences": 10782, + "models ptms": 35384, + "specific decoders": 52068, + "provide critical": 44044, + "deductive reasoning": 13678, + "level english": 30110, + "paper point": 39441, + "tuning plms": 58942, + "strategy proposed": 52948, + "languages improved": 28692, + "readable format": 45072, + "span pair": 51928, + "results enhanced": 47612, + "lack multilingual": 27903, + "remains low": 46339, + "increasing need": 25456, + "phrases context": 40849, + "retaining performance": 47927, + "baselines cross": 6246, + "based measure": 5839, + "results analyses": 47498, + "available transformer": 5382, + "trained noisy": 57833, + "documents complex": 15864, + "generation study": 22555, + "implications findings": 24655, + "fusion based": 21853, + "additionally build": 1714, + "space address": 51848, + "propose potential": 43585, + "semantic connection": 49255, + "art comparable": 4237, + "deployment production": 14177, + "language multi": 28351, + "validate findings": 61178, + "propose controllable": 43339, + "generation significantly": 22548, + "content diversity": 10521, + "novel adaptive": 37749, + "directly output": 15329, + "increasingly focused": 25472, + "tuning tasks": 58967, + "interpretable method": 26724, + "corpus investigate": 11364, + "context sensitivity": 10712, + "characteristics make": 8239, + "t5 bart": 54677, + "dramatically reduce": 16389, + "semantic errors": 49276, + "largely focus": 29054, + "focus monolingual": 21184, + "lms used": 30925, + "prompt templates": 43208, + "ability learning": 620, + "text argue": 56440, + "regression random": 45817, + "findings reveal": 20915, + "task effectively": 55037, + "main factors": 31436, + "knowledge apply": 27396, + "task transformer": 55448, + "nn based": 37579, + "longer input": 31051, + "outputs approach": 39012, + "approach computationally": 3462, + "based meaning": 5838, + "benchmark compared": 6435, + "non semantic": 37681, + "encode sentences": 17470, + "extremely small": 20167, + "lm generate": 30908, + "effectively optimize": 16752, + "specific tokens": 52160, + "study ways": 53475, + "smaller faster": 51518, + "high stakes": 23803, + "based gaussian": 5745, + "effectively alleviates": 16723, + "practical recommendations": 41468, + "based svm": 6075, + "annotated evaluation": 2894, + "generate multiple": 22220, + "models avoid": 34750, + "t5 based": 54679, + "understanding behavior": 59324, + "research benchmark": 46993, + "performance verify": 40626, + "hurting performance": 24306, + "benchmark sentiment": 6491, + "key parts": 27324, + "models surpassed": 35569, + "work shed": 62818, + "dense retriever": 14081, + "labeling problems": 27790, + "explore domain": 19702, + "importance domain": 24682, + "uses contrastive": 60500, + "particular method": 39853, + "trip translation": 58801, + "results complex": 47555, + "shown able": 50693, + "serve valuable": 50085, + "types sentences": 59117, + "inference present": 25682, + "way obtaining": 61824, + "similar data": 51035, + "experiments applied": 19352, + "relevant language": 46222, + "information called": 25772, + "based multitask": 5894, + "introduces simple": 26896, + "applied widely": 3313, + "available textual": 5378, + "datasets finding": 13274, + "context tackle": 10728, + "using shot": 60938, + "method datasets": 32451, + "time intervals": 57167, + "domain social": 16162, + "compared commonly": 9393, + "char level": 8193, + "level bert": 30070, + "neighbor retrieval": 36658, + "based heuristic": 5767, + "arabic chinese": 3994, + "augmentation improves": 4957, + "problem real": 42641, + "lightweight effective": 30457, + "key finding": 27313, + "exploit structural": 19664, + "includes subtasks": 25234, + "tuning achieves": 58898, + "train accurate": 57563, + "processing complex": 42859, + "research carried": 46997, + "great improvement": 23207, + "result showed": 47450, + "multilingual fine": 36085, + "performance additional": 40184, + "potential benefit": 41383, + "improving interpretability": 25183, + "gap compared": 21958, + "attack text": 4661, + "improvements prior": 25095, + "models sample": 35468, + "performance sota": 40568, + "examine large": 18865, + "causal analysis": 7868, + "bias existing": 7027, + "contributions different": 10952, + "method augmenting": 32391, + "contain lot": 10465, + "current results": 12005, + "results long": 47703, + "individual instances": 25569, + "set carefully": 50119, + "conversational semantic": 11050, + "input different": 26265, + "light recent": 30453, + "strategies increase": 52908, + "data design": 12274, + "triple level": 58803, + "gnn based": 22873, + "languages developed": 28639, + "lingual experiments": 30702, + "problems text": 42734, + "proposed simple": 43896, + "method entity": 32486, + "problem build": 42513, + "data significant": 12661, + "published work": 44375, + "effective handling": 16657, + "conduct automatic": 10028, + "level results": 30195, + "benchmark recently": 6489, + "text answers": 56435, + "information prediction": 26017, + "features leads": 20613, + "model resolve": 34315, + "input specific": 26341, + "tokens source": 57337, + "handle diverse": 23408, + "work assess": 62575, + "drawn increasing": 16410, + "text alignments": 56428, + "parameter model": 39673, + "researchers explored": 47156, + "bilstm cnn": 7129, + "relationships multiple": 46080, + "paper follow": 39382, + "means communication": 32040, + "task teams": 55431, + "baselines best": 6242, + "multiple candidates": 36177, + "context guided": 10650, + "pairwise comparison": 39237, + "bi encoders": 7007, + "model termed": 34450, + "encoder cross": 17494, + "model optimized": 34149, + "focused single": 21229, + "metrics better": 33143, + "impact classification": 24590, + "questions annotated": 44769, + "new protocol": 37290, + "accuracy current": 953, + "domain open": 16125, + "models enabled": 34959, + "tasks systematically": 55923, + "provide consistent": 44042, + "models prompt": 35366, + "predict class": 41636, + "transferred languages": 58434, + "million english": 33252, + "accuracy simple": 1048, + "text perturbation": 56698, + "varying degree": 61429, + "words standard": 62521, + "models 10": 34648, + "score 52": 48794, + "train release": 57626, + "input user": 26355, + "uses existing": 60509, + "tweets social": 59022, + "set terms": 50260, + "vocabulary terms": 61715, + "potential models": 41401, + "pretraining downstream": 42201, + "latest state": 29158, + "establish benchmark": 18343, + "data inference": 12429, + "achieves 97": 1299, + "application models": 3169, + "optimization finally": 38547, + "survey provide": 54213, + "systematically categorize": 54409, + "scale using": 48639, + "text key": 56635, + "analyses based": 2589, + "inference module": 25672, + "reference text": 45748, + "setting open": 50336, + "poses challenging": 41246, + "evaluate baselines": 18443, + "reader models": 45074, + "linearly combines": 30683, + "based frequency": 5741, + "distribution divergence": 15637, + "mask language": 31857, + "training free": 58110, + "create synthetic": 11717, + "modular design": 35744, + "task resulting": 55345, + "label associated": 27691, + "open book": 38411, + "order guarantee": 38623, + "parameters maintaining": 39707, + "original performance": 38722, + "comprehensive literature": 9794, + "networks designed": 36843, + "learning module": 29766, + "propose asr": 43300, + "fully understanding": 21747, + "solves problem": 51699, + "domain train": 16213, + "manual data": 31736, + "mentions given": 32306, + "retrieval reading": 47966, + "demonstrate surprising": 13989, + "aims enhance": 2190, + "explores impact": 19772, + "unlabeled texts": 59584, + "methods treat": 33085, + "following training": 21272, + "control number": 10969, + "severe performance": 50425, + "semantic associations": 49238, + "learning pipelines": 29806, + "parameters deep": 39689, + "increasingly challenging": 25469, + "resource regimes": 47265, + "datasets distinct": 13233, + "indicate training": 25536, + "complementary tasks": 9592, + "adaptation da": 1521, + "report aim": 46425, + "sentence spans": 49650, + "account contextual": 878, + "way approach": 61793, + "capture task": 7716, + "important end": 24721, + "multiple input": 36228, + "analyses proposed": 2601, + "simultaneously paper": 51274, + "paper forward": 39385, + "strategy significantly": 52951, + "parameters extensive": 39698, + "performing task": 40690, + "reasoning long": 45200, + "yields performance": 63127, + "set does": 50139, + "learned small": 29481, + "explore useful": 19749, + "iteratively perform": 27132, + "labels self": 27848, + "human rationales": 24225, + "relatedness datasets": 45958, + "grained scores": 23043, + "sentences semantically": 49783, + "representation various": 46602, + "based paper": 5932, + "especially recent": 18296, + "limitations future": 30549, + "earlier approaches": 16506, + "ability large": 617, + "work aimed": 62563, + "measure impact": 32054, + "introduce approaches": 26780, + "model alignment": 33559, + "socio cultural": 51612, + "common approaches": 9165, + "estimate uncertainty": 18375, + "used ai": 60084, + "kind text": 27368, + "present transformer": 42045, + "single transformer": 51351, + "dataset construct": 12862, + "datasets capture": 13170, + "evaluation challenging": 18588, + "classifiers make": 8618, + "previously predicted": 42339, + "domains domain": 16248, + "using prompt": 60875, + "number domain": 37997, + "fully fine": 21732, + "decoding stage": 13645, + "continuous prompts": 10849, + "performance dramatically": 40308, + "different varieties": 15122, + "relied heavily": 46261, + "multimodal fusion": 36149, + "techniques achieving": 56055, + "reasoning methods": 45203, + "entities query": 18076, + "lexical matching": 30371, + "trained imitate": 57750, + "despite small": 14392, + "practical value": 41479, + "increase available": 25405, + "style sentence": 53497, + "sentence preserving": 49623, + "design adversarial": 14261, + "generate complete": 22184, + "external corpora": 19930, + "embeddings documents": 17115, + "causal commonsense": 7869, + "lightweight models": 30459, + "dialogue summarization": 14787, + "performance important": 40380, + "hybrid data": 24313, + "task structured": 55413, + "performance inference": 40393, + "technique training": 56048, + "development cycle": 14671, + "compare recent": 9362, + "dataset 10k": 12790, + "contain important": 10462, + "order use": 38659, + "specific contextual": 52062, + "work pre": 62751, + "dataset features": 12928, + "latent document": 29123, + "potentially improve": 41414, + "techniques generating": 56092, + "semantic diversity": 49271, + "retrieve generate": 47977, + "technique create": 56030, + "right reasons": 48141, + "improvement present": 25016, + "hierarchical knowledge": 23673, + "researchers focused": 47159, + "annotated spans": 2917, + "embedding contextual": 17022, + "training train": 58302, + "mention context": 32296, + "languages unseen": 28813, + "methods induce": 32903, + "topics user": 57463, + "tasks train": 55937, + "training mitigate": 58176, + "present literature": 41938, + "empirical survey": 17353, + "leverages recent": 30312, + "direct approach": 15253, + "generation understanding": 22572, + "covers different": 11664, + "focusing different": 21246, + "keywords used": 27356, + "strategies make": 52909, + "framework human": 21535, + "impact noise": 24602, + "outperforms matches": 38907, + "consists training": 10335, + "suffer spurious": 53783, + "poor domain": 41133, + "paradigm propose": 39628, + "specific loss": 52108, + "classification structured": 8557, + "propose textbf": 43668, + "lm architecture": 30904, + "architecture introduce": 4055, + "study unsupervised": 53469, + "learning solution": 29884, + "bypassing need": 7509, + "data comprehensive": 12228, + "information facilitate": 25870, + "generate additional": 22175, + "demonstrated superior": 14023, + "problems remain": 42728, + "performance multimodal": 40449, + "engineering efforts": 17768, + "supervision data": 54080, + "applied zero": 3314, + "wrong predictions": 63016, + "novel entities": 37817, + "documents existing": 15876, + "provide weak": 44155, + "given models": 22761, + "template filling": 56176, + "predict model": 41646, + "nature models": 36483, + "variants bert": 61235, + "interpretable machine": 26721, + "models outputs": 35294, + "bias english": 7025, + "available building": 5268, + "trained universal": 57905, + "present important": 41927, + "problem simple": 42656, + "boost overall": 7255, + "voting ensemble": 61742, + "visually rich": 61688, + "human ai": 24093, + "power deep": 41424, + "specific scenarios": 52142, + "adopt self": 1867, + "effective especially": 16649, + "input second": 26328, + "autoregressive non": 5223, + "favorable performance": 20454, + "span sentence": 51932, + "various strong": 61399, + "faster lighter": 20440, + "existing sota": 19145, + "methods select": 33028, + "lot manual": 31115, + "topic article": 57390, + "articles approach": 4463, + "introduce high": 26812, + "analysis shared": 2753, + "legal nlp": 30007, + "contains examples": 10494, + "tune transformer": 58865, + "obtaining f1": 38233, + "tasks naturally": 55761, + "different experiments": 14927, + "modeling widely": 34636, + "furthermore methods": 21828, + "domain scientific": 16151, + "recognition synthesis": 45540, + "experiments varying": 19558, + "baselines quality": 6293, + "extraction sequence": 20110, + "learning rules": 29855, + "impressive improvements": 24810, + "interaction text": 26613, + "remained unexplored": 46321, + "explicitly incorporating": 19638, + "especially challenging": 18265, + "classify texts": 8633, + "complementary techniques": 9593, + "improvement 12": 24978, + "framework including": 21542, + "terms classification": 56275, + "learning analysis": 29515, + "metropolis hastings": 33211, + "generating multiple": 22382, + "bert natural": 6692, + "experience end": 19226, + "needed paper": 36603, + "tool help": 57363, + "prediction benchmark": 41697, + "detect lexical": 14440, + "extracted texts": 20023, + "prediction aims": 41692, + "study investigated": 53398, + "annotated human": 2900, + "conclusions study": 9979, + "area work": 4150, + "languages observed": 28741, + "pooling layers": 41126, + "base classifiers": 5540, + "demonstrated human": 14009, + "evaluation case": 18587, + "proposed enable": 43761, + "achieved 1st": 1214, + "content trained": 10565, + "independent datasets": 25496, + "deployment real": 14178, + "frameworks proposed": 21632, + "comprehensively study": 9806, + "extremely high": 20158, + "techniques fail": 56087, + "structured sparsity": 53176, + "policy gradients": 41097, + "domain despite": 16046, + "embeddings close": 17094, + "challenge researchers": 8014, + "inherent challenges": 26201, + "limited dataset": 30579, + "performance applying": 40195, + "practical implications": 41464, + "additional research": 1696, + "aiming extract": 2168, + "extract text": 19998, + "usually small": 61068, + "training english": 58087, + "develop natural": 14602, + "sentences identify": 49735, + "propose combination": 43322, + "better address": 6846, + "positive rate": 41293, + "preprocessing methods": 41827, + "accuracy computational": 951, + "collected real": 8966, + "corpus persian": 11403, + "evaluations present": 18765, + "improvements data": 25067, + "requires lot": 46940, + "carefully crafted": 7760, + "parsing data": 39775, + "score 33": 48789, + "humans task": 24288, + "corpus code": 11293, + "models expected": 34990, + "learning enabled": 29617, + "performance ability": 40176, + "knowledge response": 27599, + "context produce": 10694, + "models conversational": 34864, + "lower human": 31211, + "transformers models": 58527, + "conduct zero": 10070, + "specific settings": 52147, + "massive datasets": 31885, + "domains furthermore": 16257, + "equivalent performance": 18202, + "representative data": 46796, + "vector use": 61471, + "work researchers": 62809, + "intrinsic quality": 26773, + "pretrain models": 42145, + "models subtasks": 35550, + "provide platform": 44112, + "release novel": 46161, + "hope release": 24012, + "groups using": 23283, + "models try": 35635, + "modal contrastive": 33454, + "visual knowledge": 61658, + "following success": 21270, + "strategies evaluated": 52900, + "works evaluate": 62886, + "thoroughly investigate": 57071, + "existing task": 19154, + "transformer structure": 58510, + "achieves 58": 1289, + "effectively leverages": 16748, + "improved data": 24947, + "bert classifiers": 6636, + "truth datasets": 58837, + "emphasize need": 17312, + "aware learning": 5456, + "problems current": 42698, + "documents user": 15925, + "train paper": 57622, + "field lacks": 20757, + "lacks systematic": 27931, + "techniques effective": 56080, + "having high": 23487, + "sentence different": 49541, + "similarity comparison": 51089, + "sentences finally": 49724, + "semantic correlations": 49263, + "retrieval aims": 47939, + "global interactions": 22832, + "new graph": 37215, + "reward based": 48066, + "use applications": 59824, + "studied effect": 53223, + "newly designed": 37375, + "results lack": 47689, + "available experimental": 5290, + "encoder input": 17518, + "positive pair": 41291, + "process method": 42804, + "runtime performance": 48410, + "language l1": 28127, + "factors related": 20314, + "suitable data": 53855, + "resource task": 47281, + "investigate strategies": 26988, + "knowledge gap": 27488, + "great practical": 23211, + "specific focus": 52087, + "noisy web": 37630, + "finally best": 20841, + "results exhibit": 47622, + "pairs contrastive": 39175, + "documents knowledge": 15890, + "diverse expressions": 15703, + "evaluation pipeline": 18673, + "accuracy 30": 897, + "providing accurate": 44235, + "autoregressive fashion": 5214, + "answer propose": 3045, + "implementation details": 24640, + "research identifying": 47051, + "primarily focuses": 42364, + "separate training": 49879, + "unified encoder": 59468, + "models success": 35551, + "dataset published": 13050, + "dataset accuracy": 12796, + "learns transform": 29977, + "25 000": 333, + "description corpus": 14241, + "aryan language": 4508, + "develop annotated": 14570, + "task generation": 55106, + "purpose models": 44406, + "designed novel": 14327, + "multi dialect": 35950, + "score best": 48837, + "complex semantics": 9659, + "capabilities large": 7599, + "human values": 24256, + "shot knowledge": 50621, + "demonstrate fine": 13912, + "challenging limited": 8108, + "validate models": 61182, + "public available": 44305, + "strategy select": 52950, + "strategy achieving": 52926, + "languages conduct": 28622, + "translated parallel": 58556, + "text translated": 56824, + "speech task": 52306, + "task widely": 55470, + "semeval 2022": 49440, + "2022 task": 305, + "binary class": 7143, + "develop benchmark": 14577, + "achieved 86": 1215, + "despite promising": 14378, + "benchmark natural": 6481, + "statistical analyses": 52734, + "including multiple": 25277, + "highly consistent": 23888, + "cl methods": 8379, + "improve end": 24849, + "make best": 31542, + "analyze robustness": 2827, + "available publicly": 5353, + "progress various": 43119, + "ability understanding": 649, + "domains knowledge": 16264, + "studies multi": 53284, + "datasets comparison": 13185, + "specific tools": 52161, + "robust perturbations": 48263, + "psycholinguistic features": 44288, + "pairs unstructured": 39225, + "approaches suggest": 3931, + "accuracy multilingual": 1010, + "useful detecting": 60360, + "systems making": 54560, + "features datasets": 20553, + "improvements 15": 25044, + "interaction mechanism": 26605, + "maintain high": 31482, + "knowledge design": 27436, + "arabic bert": 3993, + "depth understanding": 14190, + "sensitive domains": 49498, + "try use": 58846, + "knowledge augmentation": 27400, + "based relational": 5990, + "code work": 8868, + "multiple genres": 36219, + "generated corpora": 22280, + "semantic components": 49250, + "inference phase": 25681, + "datasets non": 13348, + "primarily studied": 42366, + "tasks problem": 55815, + "evaluate generalization": 18460, + "shot baseline": 50600, + "generates question": 22353, + "new machine": 37244, + "additional neural": 1690, + "token generated": 57291, + "scenarios models": 48701, + "set knowledge": 50174, + "effectiveness improving": 16783, + "style experimental": 53484, + "advances pretrained": 1922, + "abstractive approach": 769, + "training inspired": 58133, + "effectively transfer": 16759, + "makes training": 31640, + "classifiers evaluate": 8615, + "suggests neural": 53849, + "model driven": 33792, + "statistical distribution": 52741, + "google speech": 22958, + "dataset social": 13092, + "presents depth": 42079, + "approaches lack": 3852, + "results require": 47810, + "limits usage": 30644, + "prediction question": 41734, + "systems providing": 54608, + "conditional neural": 10000, + "massive scale": 31887, + "knowledge facilitate": 27481, + "models produces": 35363, + "provide user": 44150, + "rarely available": 45002, + "20 absolute": 221, + "representation entire": 46509, + "scenario given": 48687, + "different hyper": 14951, + "parameters experimental": 39695, + "domains second": 16291, + "existing meta": 19089, + "integrate word": 26511, + "phrases text": 40855, + "competitive neural": 9551, + "second demonstrate": 49003, + "related domains": 45901, + "contrast model": 10879, + "languages vary": 28819, + "motivates need": 35877, + "robustness compared": 48275, + "proposed taxonomy": 43910, + "training suggesting": 58278, + "models importantly": 35108, + "causal knowledge": 7874, + "significantly underperform": 51016, + "proposed pretraining": 43881, + "extraction refers": 20102, + "computation overhead": 9828, + "despite showing": 14387, + "gap pre": 21972, + "languages domain": 28646, + "high uncertainty": 23807, + "model pseudo": 34259, + "training baselines": 57943, + "dataset indian": 12966, + "conversations work": 11066, + "important limitations": 24741, + "traditional data": 57514, + "model reinforcement": 34295, + "outperforms heuristic": 38904, + "logical relation": 30987, + "despite efforts": 14359, + "paper recent": 39563, + "research knowledge": 47060, + "known unknown": 27669, + "jointly represent": 27219, + "languages achieved": 28591, + "detection languages": 14496, + "adopt supervised": 1868, + "effectiveness supervised": 16815, + "tasks automated": 55513, + "current task": 12017, + "low probability": 31166, + "13 datasets": 122, + "efficiently using": 16921, + "techniques achieved": 56054, + "relevant papers": 46227, + "billion parameter": 7118, + "overall word": 39054, + "integration multiple": 26530, + "directional transformer": 15286, + "iterative refinement": 27126, + "method information": 32544, + "model retain": 34322, + "range existing": 44919, + "explicit interaction": 19616, + "road map": 48211, + "ability transformer": 646, + "experiments shot": 19522, + "dataset public": 13047, + "shot finetuning": 50618, + "lastly explore": 29108, + "examples approach": 18888, + "scores input": 48904, + "original meaning": 38719, + "latest deep": 29155, + "influence training": 25729, + "model aggregation": 33550, + "structure context": 53094, + "results representative": 47809, + "driven neural": 16432, + "studies task": 53303, + "key advantages": 27294, + "terms used": 56320, + "related news": 45920, + "allows seamlessly": 2478, + "classification domain": 8458, + "data input": 12431, + "text having": 56613, + "obtained dataset": 38206, + "addition study": 1644, + "work carry": 62595, + "dataset potential": 13030, + "news videos": 37425, + "components text": 9727, + "utility incorporating": 61082, + "networks combined": 36839, + "models revolutionized": 35462, + "using predictive": 60870, + "effective human": 16658, + "datasets developed": 13227, + "domains covering": 16241, + "samples high": 48478, + "large class": 28854, + "using conventional": 60626, + "light new": 30452, + "results establish": 47616, + "errors commonly": 18237, + "work textual": 62842, + "limited end": 30583, + "understand effect": 59292, + "work motivated": 62727, + "important impact": 24731, + "extraction unified": 20127, + "practical utility": 41478, + "specific prompts": 52132, + "tasks prediction": 55806, + "english release": 17863, + "behavior paper": 6395, + "task monolingual": 55222, + "test state": 56382, + "semantic complexity": 49249, + "series controlled": 50060, + "task determine": 55014, + "detection feature": 14486, + "critical real": 11788, + "using class": 60604, + "code training": 8864, + "methods conduct": 32798, + "lead state": 29272, + "open text": 38465, + "challenging human": 8099, + "extraction achieves": 20044, + "unsupervised contrastive": 59687, + "task average": 54927, + "tasks systems": 55924, + "little performance": 30882, + "present recent": 41998, + "studied recent": 53233, + "framework brings": 21463, + "introduces multi": 26892, + "information iii": 25910, + "works demonstrate": 62882, + "multiple feature": 36217, + "detection experimental": 14482, + "mixing languages": 33415, + "extracted single": 20020, + "performance argue": 40199, + "wider variety": 62029, + "issue design": 27060, + "soft prompt": 51624, + "pretraining strategy": 42217, + "does increase": 15954, + "built dataset": 7483, + "token spans": 57309, + "document contexts": 15779, + "heuristics used": 23634, + "plms fine": 41017, + "evidence models": 18816, + "chinese news": 8314, + "environments work": 18177, + "results particular": 47762, + "analysis highlights": 2672, + "attribute words": 4902, + "benchmark text": 6500, + "discriminative representations": 15448, + "desired attributes": 14347, + "relevant background": 46200, + "using labelled": 60751, + "ai technologies": 2124, + "focus given": 21168, + "news news": 37408, + "model gain": 33917, + "modeling capabilities": 34563, + "information following": 25883, + "text suitable": 56795, + "dataset high": 12948, + "handcrafted rules": 23401, + "exploit semantic": 19663, + "special treatment": 52023, + "objective designed": 38085, + "benefits training": 6589, + "posts comments": 41370, + "order develop": 38610, + "shown existing": 50707, + "methods gradient": 32881, + "representations dynamically": 46645, + "accuracy target": 1057, + "context short": 10717, + "chain thought": 7961, + "model just": 34027, + "present details": 41892, + "superior zero": 53946, + "required resources": 46904, + "using gpt": 60710, + "tasks label": 55706, + "high flexibility": 23738, + "indicate specific": 25535, + "external datastore": 19934, + "major bottleneck": 31502, + "models practice": 35337, + "scarcity large": 48671, + "propose progressive": 43593, + "essential developing": 18325, + "methods increase": 32902, + "optimized end": 38566, + "linguistic computational": 30756, + "10 15": 30, + "model hypothesize": 33968, + "needed train": 36604, + "normalizing flow": 37710, + "proposes multi": 43935, + "layers propose": 29234, + "making unsuitable": 31672, + "understanding impact": 59351, + "improved paper": 24955, + "uses auxiliary": 60494, + "sentence sentiment": 49644, + "advent deep": 1957, + "issue investigate": 27065, + "data reducing": 12592, + "training targets": 58284, + "models receive": 35405, + "make generated": 31573, + "paper datasets": 39312, + "comparing generated": 9481, + "natural artificial": 36409, + "tasks set": 55881, + "build comprehensive": 7391, + "algorithm reduces": 2297, + "sentence features": 49560, + "based genetic": 5753, + "expressed multiple": 19798, + "content use": 10567, + "advanced language": 1888, + "major impact": 31510, + "trained google": 57744, + "acceptable performance": 813, + "models overall": 35295, + "generation limited": 22485, + "datasets associated": 13160, + "datasets metrics": 13333, + "different questions": 15041, + "logical relations": 30988, + "times parameters": 57254, + "incorporating different": 25383, + "sentence scoring": 49636, + "combined machine": 9080, + "scale diversity": 48565, + "train lstm": 57602, + "bengali dataset": 6592, + "cost terms": 11594, + "methods adopted": 32741, + "specific general": 52088, + "usually represented": 61063, + "furthermore conduct": 21810, + "introduce lightweight": 26819, + "data characteristics": 12207, + "problem traditional": 42676, + "adequately capture": 1834, + "contextual feature": 10768, + "variations training": 61252, + "demonstrates strong": 14044, + "question ask": 44719, + "survey focus": 54207, + "helps alleviate": 23603, + "examples class": 18891, + "coherent diverse": 8914, + "strategy results": 52949, + "introduce various": 26877, + "level proposed": 30184, + "prediction error": 41701, + "techniques capture": 56067, + "relevant baselines": 46201, + "challenge large": 7990, + "decoder transformers": 13619, + "work chinese": 62596, + "states language": 52720, + "types propose": 59112, + "original words": 38740, + "classification pipeline": 8517, + "corpora shown": 11241, + "translation supervised": 58684, + "advancements natural": 1901, + "learn generic": 29375, + "model reducing": 34292, + "sequential manner": 50045, + "data computationally": 12231, + "approaches supervised": 3932, + "models tuned": 35636, + "argue models": 4164, + "especially zero": 18312, + "discrete prompts": 15426, + "applying nlp": 3373, + "analyze recent": 2825, + "naive approaches": 36362, + "particular observe": 39856, + "cognitive biases": 8890, + "input prompt": 26319, + "lexical overlaps": 30375, + "effectively adapted": 16721, + "real synthetic": 45111, + "errors impact": 18241, + "extend current": 19820, + "work multimodal": 62730, + "task contrast": 54978, + "articles given": 4468, + "word makes": 62242, + "consisting multiple": 10317, + "better context": 6868, + "perform self": 40135, + "abundant unlabeled": 783, + "variety fields": 61273, + "capture correlations": 7658, + "prompting methods": 43220, + "based span": 6049, + "equal number": 18189, + "statistical tests": 52766, + "prompt generation": 43204, + "based early": 5694, + "performance fewer": 40345, + "texts lack": 56896, + "addition content": 1604, + "modules used": 35776, + "set including": 50169, + "cost code": 11577, + "forgetting previous": 21307, + "behavior use": 6397, + "minimal performance": 33291, + "outperformed conventional": 38837, + "structured meaning": 53164, + "tuning shot": 58953, + "theorem proving": 57009, + "heavily large": 23532, + "new mechanism": 37249, + "low variance": 31204, + "results literature": 47702, + "documents text": 15918, + "combined pre": 9082, + "models examples": 34983, + "_1 score": 580, + "approaches alleviate": 3760, + "propose target": 43658, + "metric named": 33122, + "baseline proposed": 6203, + "tuning significantly": 58954, + "inference extensive": 25657, + "achieving superior": 1427, + "advancements machine": 1900, + "nlp pre": 37512, + "enhanced training": 17939, + "using active": 60549, + "existing architectures": 19034, + "results substantially": 47864, + "level edits": 30105, + "framework works": 21628, + "tuning different": 58906, + "adapter tuning": 1560, + "parameters multi": 39711, + "instruction based": 26479, + "questions set": 44808, + "datasets news": 13346, + "systems semeval": 54628, + "need design": 36553, + "attention various": 4849, + "techniques performance": 56120, + "work main": 62717, + "carefully annotated": 7758, + "learning emerging": 29616, + "novel prompt": 37900, + "performance performance": 40484, + "previous joint": 42256, + "trained linguistic": 57776, + "tree encoder": 58742, + "creating need": 11744, + "model textual": 34462, + "data computation": 12229, + "results resulting": 47813, + "data texts": 12732, + "better improve": 6902, + "improve bert": 24827, + "obtain complete": 38169, + "continually learn": 10825, + "tuning parameter": 58938, + "propose techniques": 43663, + "score significantly": 48873, + "texts extracted": 56880, + "performance precision": 40489, + "manually assigned": 31764, + "consistent evaluation": 10274, + "tasks aims": 55496, + "multiple outputs": 36259, + "diverse reasoning": 15715, + "seq models": 49889, + "modifying training": 35740, + "tune task": 58864, + "training reward": 58233, + "proved successful": 43989, + "learning does": 29603, + "new interactive": 37227, + "accuracy prior": 1031, + "strategy model": 52942, + "techniques label": 56103, + "interactions entities": 26617, + "augmentation work": 4973, + "understanding relationship": 59392, + "work direction": 62638, + "agnostic representation": 2096, + "use prompt": 59983, + "based plms": 5938, + "signals including": 50834, + "queries different": 44651, + "spanning languages": 51954, + "hope study": 24014, + "tokens word": 57344, + "size pre": 51394, + "pairs approach": 39169, + "10 dataset": 38, + "datasets general": 13282, + "better convergence": 6870, + "recently various": 45474, + "typically fail": 59142, + "pretrained lm": 42164, + "prompting approach": 43214, + "substantially worse": 53650, + "common strategy": 9202, + "knowledge extensive": 27476, + "way incorporating": 61811, + "models vision": 35674, + "decoder paper": 13608, + "samples different": 48470, + "metric work": 33131, + "text respectively": 56745, + "existing transformer": 19165, + "development open": 14696, + "higher correlations": 23819, + "example generation": 18877, + "including gpt": 25259, + "datasets findings": 13275, + "online posts": 38380, + "gender agreement": 22034, + "corpus essential": 11330, + "language end": 28049, + "method variety": 32702, + "individual methods": 25572, + "evaluated languages": 18534, + "generation remains": 22538, + "model likely": 34064, + "evaluation reveals": 18706, + "transformers shown": 58531, + "model logical": 34071, + "select candidate": 49101, + "modular approach": 35742, + "recent sota": 45346, + "problem distinguishing": 42542, + "article proposed": 4457, + "rich relations": 48114, + "reasoning needed": 45210, + "networks good": 36864, + "produce poor": 42998, + "performance suggest": 40585, + "instances available": 26433, + "parameter efficiency": 39667, + "costly human": 11601, + "words improve": 62433, + "various large": 61353, + "design prompt": 14297, + "instead conventional": 26447, + "texts word": 56946, + "combines best": 9093, + "token representation": 57303, + "automatically translating": 5204, + "specific emotion": 52077, + "goal understand": 22904, + "work inspire": 62688, + "models hallucinate": 35073, + "conducting human": 10104, + "model selecting": 34345, + "challenging low": 8111, + "generalized model": 22152, + "overfitting issue": 39082, + "sets task": 50307, + "task aim": 54891, + "text set": 56767, + "interpretability analysis": 26714, + "results underline": 47893, + "performance small": 40566, + "little understanding": 30888, + "language templates": 28524, + "temporal knowledge": 56189, + "library provides": 30425, + "training conducted": 57956, + "diverse downstream": 15701, + "existing theories": 19161, + "express thoughts": 19795, + "aims present": 2208, + "data selected": 12632, + "able reconstruct": 720, + "rarely considered": 45004, + "requires precise": 46947, + "precise understanding": 41610, + "dl model": 15753, + "aware mechanism": 5460, + "combines existing": 9095, + "domain ner": 16119, + "building text": 7475, + "competitive alternative": 9538, + "provide support": 44139, + "generation generation": 22470, + "assessment models": 4592, + "2021 workshop": 301, + "f1 test": 20231, + "systems given": 54514, + "model hard": 33953, + "objective optimization": 38097, + "architecture important": 4051, + "systems production": 54602, + "form context": 21317, + "context account": 10579, + "inconsistency problem": 25338, + "focus solving": 21203, + "text ignoring": 56620, + "expansion based": 19189, + "novel general": 37829, + "2022 shared": 303, + "parameters existing": 39694, + "github paper": 22718, + "rich annotated": 48092, + "approaches self": 3917, + "multi pass": 35998, + "dataset relative": 13059, + "alleviate limitations": 2413, + "divergence based": 15686, + "conducted publicly": 10089, + "extracted automatically": 20006, + "effectiveness bert": 16769, + "contribute meaning": 10930, + "processing linguistic": 42883, + "guided multi": 23347, + "automated speech": 5058, + "texts high": 56886, + "approximate posterior": 3979, + "learn alignment": 29345, + "language corresponding": 28010, + "decoding mechanism": 13632, + "train datasets": 57576, + "information underlying": 26135, + "study challenges": 53338, + "summarization research": 53898, + "tasks adapting": 55491, + "shot examples": 50614, + "substantially larger": 53641, + "domain shifts": 16160, + "model infers": 34002, + "module introduced": 35762, + "levels present": 30244, + "used automated": 60096, + "general english": 22059, + "evaluation studies": 18729, + "news using": 37424, + "expected provide": 19198, + "augmented input": 4979, + "accurate model": 1080, + "transformation rules": 58445, + "results surpass": 47872, + "dependent target": 14151, + "guide decoding": 23329, + "learning diverse": 29598, + "tokens learn": 57328, + "majority research": 31532, + "gpt experiments": 22976, + "learning leverages": 29707, + "data widely": 12774, + "synthesis method": 54358, + "create noisy": 11713, + "corpus create": 11313, + "translation test": 58689, + "hope facilitate": 24007, + "generation different": 22447, + "studies work": 53313, + "results able": 47482, + "based users": 6123, + "plm based": 41013, + "samples using": 48494, + "construct datasets": 10385, + "time leads": 57173, + "method new": 32589, + "results tested": 47882, + "identify gaps": 24423, + "prediction demonstrate": 41699, + "introduce methods": 26823, + "word removal": 62281, + "enables easy": 17438, + "end compare": 17620, + "supervisory signals": 54103, + "alternative fine": 2501, + "detecting semantic": 14450, + "model follows": 33907, + "results perform": 47763, + "detection compare": 14468, + "using prompts": 60877, + "understand differences": 59290, + "high latency": 23744, + "propose cluster": 43318, + "information fact": 25871, + "linguistic skills": 30795, + "distinct types": 15596, + "addition experiments": 1615, + "used target": 60322, + "english proposed": 17862, + "evaluation low": 18633, + "accuracy introduce": 995, + "evaluating multi": 18564, + "produce low": 42990, + "memory cost": 32251, + "models control": 34862, + "hierarchical encoding": 23669, + "hierarchical document": 23666, + "based auto": 5586, + "inference times": 25702, + "used enrich": 60166, + "enrich training": 17961, + "samples paper": 48485, + "models internal": 35140, + "dataset lastly": 12981, + "corpus performance": 11402, + "label semantic": 27723, + "baselines release": 6296, + "learned previous": 29477, + "methods formulate": 32870, + "evaluate wide": 18517, + "generative sequence": 22610, + "training accuracy": 57922, + "methods specific": 33048, + "amr semantic": 2576, + "multiple embedding": 36207, + "task assessing": 54917, + "personal experience": 40755, + "evaluation 12": 18572, + "finetuning strategy": 21052, + "unique dataset": 59512, + "plms like": 41018, + "knowledge unstructured": 27640, + "attention tokens": 4836, + "questions important": 44792, + "experiments including": 19445, + "underlying information": 59266, + "accurate natural": 1082, + "setting provide": 50345, + "interpretable manner": 26723, + "common english": 9172, + "relations previous": 46052, + "textual signals": 56980, + "lm performance": 30910, + "types syntactic": 59119, + "play critical": 40963, + "automatically capture": 5146, + "examples adversarial": 18887, + "task case": 54946, + "similar features": 51042, + "set addition": 50104, + "independent tasks": 25505, + "task shows": 55374, + "emotional context": 17297, + "train various": 57658, + "tuned t5": 58887, + "accuracy 11": 894, + "setting recent": 50346, + "performs inference": 40708, + "important real": 24758, + "square error": 52399, + "noise types": 37606, + "performance base": 40206, + "work explicitly": 62655, + "limited pre": 30605, + "easy data": 16559, + "augmentation eda": 4953, + "dataset making": 12990, + "dataset demonstrates": 12888, + "studies cross": 53254, + "content various": 10569, + "datasets suitable": 13447, + "task presents": 55288, + "annotator agreements": 3012, + "kg paper": 27361, + "ignore semantic": 24493, + "novel structure": 37931, + "reference sentences": 45745, + "test task": 56386, + "14 f1": 139, + "potential methods": 41400, + "separately different": 49882, + "passage level": 39920, + "competing systems": 9532, + "generation generating": 22469, + "strategy make": 52941, + "motivating development": 35881, + "modeling interactions": 34586, + "texts general": 56882, + "annotated multi": 2905, + "readable form": 45071, + "work typically": 62850, + "indicate effectiveness": 25525, + "supervision pre": 54088, + "new rules": 37307, + "demonstrated different": 14003, + "neural rankers": 37087, + "present textbf": 42040, + "helps overcome": 23612, + "resources machine": 47315, + "smaller language": 51519, + "different prompt": 15038, + "dataset focusing": 12934, + "maintaining original": 31495, + "examples compared": 18892, + "domains computer": 16240, + "process release": 42825, + "joint reasoning": 27186, + "level correlations": 30093, + "opinion paper": 38502, + "semantic attributes": 49239, + "tasks future": 55651, + "link https": 30827, + "correlations paper": 11538, + "mixed languages": 33407, + "shot retrieval": 50638, + "tasks assess": 55511, + "nature dataset": 36478, + "systems primarily": 54598, + "input format": 26280, + "model constraint": 33706, + "annotation large": 2954, + "documents sentence": 15911, + "useful human": 60366, + "research code": 47000, + "google scholar": 22957, + "compared domain": 9402, + "method development": 32462, + "training stability": 58267, + "using 100": 60546, + "100 samples": 63, + "dataset assessing": 12815, + "respectively benchmark": 47361, + "updated new": 59767, + "training lm": 58157, + "demonstrated efficacy": 14007, + "knowledge known": 27538, + "designed make": 14323, + "com alibaba": 9004, + "annotations training": 3004, + "concrete recommendations": 9982, + "leading sub": 29301, + "usage propose": 59805, + "set framework": 50159, + "rich morphological": 48112, + "created annotated": 11722, + "analysis nlp": 2709, + "sentences jointly": 49742, + "com amazon": 9006, + "inference recent": 25689, + "generated adversarial": 22267, + "close original": 8689, + "original inputs": 38717, + "adapt knowledge": 1503, + "layer multi": 29191, + "10 12": 29, + "proposed alleviate": 43717, + "work leveraged": 62709, + "training mixed": 58177, + "model fitting": 33901, + "present promising": 41989, + "provides reliable": 44222, + "unified task": 59479, + "encoded input": 17479, + "models restricted": 35452, + "develop multi": 14600, + "created corpus": 11723, + "addition test": 1646, + "transformer multi": 58502, + "level fusion": 30123, + "humans acquire": 24272, + "users feedback": 60465, + "architecture combined": 4034, + "position dependent": 41263, + "tree generation": 58743, + "tight coupling": 57107, + "tasks reduce": 55842, + "lms pretrained": 30922, + "easily used": 16553, + "severely limits": 50428, + "aims assign": 2174, + "structures human": 53185, + "potentially noisy": 41416, + "probability label": 42477, + "model remaining": 34306, + "labels experiments": 27823, + "features context": 20546, + "prompting large": 43216, + "model llm": 34068, + "text davinci": 56526, + "strategy achieve": 52924, + "classification goal": 8477, + "considerable progress": 10235, + "datasets prove": 13385, + "dataset level": 12983, + "features sentences": 20663, + "studies examined": 53261, + "finally observe": 20870, + "observe proposed": 38140, + "models attempt": 34733, + "input automatically": 26257, + "approaches approach": 3766, + "metric training": 33127, + "set multiple": 50197, + "experiment bert": 19233, + "representative datasets": 46797, + "models ignore": 35099, + "augmentation improve": 4956, + "words provide": 62488, + "documents generated": 15882, + "surpass current": 54162, + "art existing": 4260, + "parameters achieve": 39687, + "supervised finetuning": 53987, + "small medium": 51482, + "instruction tuning": 26481, + "look problem": 31066, + "problem practical": 42624, + "different numbers": 15011, + "prompt learning": 43205, + "learning outperform": 29792, + "annotation domain": 2944, + "absolute score": 750, + "level selection": 30202, + "advances self": 1925, + "question state": 44751, + "need extensive": 36563, + "gender occupation": 22038, + "phenomenon known": 40815, + "knowledge speaker": 27615, + "reasoning including": 45197, + "dataset table": 13111, + "entities like": 18063, + "challenges document": 8041, + "models associated": 34731, + "intent classifier": 26566, + "shown fine": 50709, + "labeled utterances": 27772, + "analysis deep": 2644, + "achieves 10": 1284, + "generation primarily": 22524, + "words time": 62532, + "processing procedure": 42928, + "approach reducing": 3671, + "improves best": 25118, + "intelligence applications": 26537, + "ranking accuracy": 44964, + "produced bert": 43017, + "demonstrate accuracy": 13860, + "obtains performance": 38254, + "evaluation remains": 18696, + "benchmark called": 6430, + "using languages": 60755, + "level generative": 30124, + "capture user": 7720, + "deployed online": 14171, + "sentences article": 49682, + "largest existing": 29095, + "gains standard": 21942, + "dual encoders": 16460, + "trees propose": 58770, + "lot training": 31122, + "method combination": 32418, + "underexplored paper": 59252, + "belief propagation": 6407, + "11b parameters": 100, + "testing dataset": 56403, + "short description": 50552, + "models tailored": 35580, + "easily understandable": 16551, + "using finite": 60698, + "given labeled": 22753, + "contrastive objectives": 10915, + "shot data": 50610, + "risk assessment": 48161, + "autoregressive manner": 5220, + "data growing": 12396, + "applications prior": 3236, + "words corpora": 62390, + "vast knowledge": 61439, + "data construct": 12245, + "informative knowledge": 26173, + "languages remain": 28769, + "performance set": 40555, + "retaining original": 47926, + "level perturbations": 30175, + "syntactic parser": 54310, + "drastically reduce": 16393, + "networks generate": 36861, + "15 higher": 148, + "higher diversity": 23822, + "produced different": 43019, + "target oriented": 54834, + "study compares": 53341, + "algorithms analyze": 2319, + "certain attributes": 7935, + "samples multiple": 48483, + "time high": 57161, + "propose template": 43664, + "adapting large": 1565, + "adapters small": 1562, + "outperform fine": 38798, + "finetuning models": 21050, + "models seven": 35490, + "difference performance": 14819, + "rapid adaptation": 44986, + "higher frequency": 23826, + "methods remove": 33014, + "adaptive learning": 1576, + "level despite": 30099, + "theoretically analyze": 57028, + "corpus adapt": 11269, + "significantly longer": 50986, + "metrics automatic": 33139, + "baseline 12": 6147, + "model gram": 33945, + "transfer cross": 58355, + "chinese dialogue": 8306, + "research build": 46995, + "simple easy": 51148, + "character features": 8202, + "ce loss": 7904, + "subset input": 53608, + "systematic investigation": 54400, + "models augment": 34740, + "model retrieves": 34326, + "explore challenging": 19691, + "western countries": 61948, + "require access": 46840, + "additional fine": 1670, + "interesting aspects": 26648, + "considering entire": 10259, + "learns mapping": 29966, + "improvement 20": 24980, + "viable option": 61571, + "approaches tend": 3937, + "general effective": 22058, + "knowledge especially": 27467, + "data evidence": 12332, + "data parameters": 12537, + "model maintains": 34083, + "generate non": 22224, + "data explicitly": 12347, + "designed training": 14335, + "language effect": 28040, + "leverage cross": 30261, + "achieves consistently": 1322, + "detection module": 14503, + "proposed previous": 43882, + "crowdsourced datasets": 11887, + "datasets outperforming": 13358, + "tokens time": 57339, + "requires multiple": 46946, + "hierarchical contrastive": 23664, + "deal long": 13518, + "modeling loss": 34594, + "generation time": 22568, + "adaptation training": 1545, + "important improving": 24733, + "demonstrates great": 14034, + "language testing": 28526, + "dataset represents": 13063, + "effect text": 16619, + "language lacks": 28129, + "useful resources": 60386, + "previous zero": 42325, + "set achieves": 50103, + "information promote": 26027, + "especially new": 18291, + "algorithm evaluate": 2273, + "financial text": 20894, + "extract large": 19983, + "task self": 55351, + "leverages sentence": 30314, + "identify biases": 24414, + "linguistic concepts": 30757, + "learn long": 29392, + "fed model": 20705, + "framework unifies": 21617, + "scientific terms": 48770, + "accuracy interpretability": 994, + "study carried": 53337, + "generating summaries": 22397, + "perform post": 40129, + "identifying semantically": 24466, + "language architecture": 27967, + "sentences novel": 49759, + "aspects discussed": 4538, + "train encoder": 57583, + "includes pre": 25233, + "memory costs": 32252, + "method given": 32518, + "sentence sentences": 49643, + "sentences higher": 49732, + "alternative evaluation": 2500, + "information despite": 25808, + "lacking explicit": 27929, + "tasks generation": 55656, + "complex high": 9627, + "detection different": 14474, + "dynamically adapt": 16496, + "19 paper": 188, + "components propose": 9725, + "intensity prediction": 26557, + "resource future": 47227, + "optimize performance": 38564, + "leads precise": 29324, + "training main": 58165, + "significant implications": 50871, + "languages able": 28589, + "need make": 36580, + "involve multiple": 27014, + "problem models": 42610, + "learned pretrained": 29476, + "texts target": 56931, + "manually create": 31770, + "facilitate process": 20272, + "descriptions natural": 14253, + "various degrees": 61324, + "support tasks": 54129, + "tasks leading": 55716, + "models relevant": 35427, + "enables build": 17437, + "able advantage": 673, + "real use": 45116, + "aims achieve": 2171, + "reducing annotation": 45703, + "studies human": 53269, + "phrases extracted": 40851, + "features performance": 20641, + "studies approach": 53247, + "outputs word": 39021, + "provide clues": 44026, + "consider local": 10213, + "provide brief": 44022, + "text refers": 56730, + "defined terms": 13787, + "akin human": 2227, + "prompting method": 43219, + "benchmarks provide": 6540, + "need comprehensive": 36550, + "abilities large": 590, + "low perplexity": 31164, + "stage pretraining": 52440, + "emphasizing importance": 17316, + "practical importance": 41465, + "novel shot": 37922, + "sentences paired": 49761, + "compression ratio": 9813, + "paper submitted": 39582, + "benchmarks use": 6547, + "task counterparts": 54982, + "submitted models": 53583, + "classification challenging": 8441, + "graph capture": 23113, + "capture relationship": 7702, + "capable processing": 7627, + "reward shaping": 48070, + "making robust": 31667, + "need look": 36579, + "capabilities llms": 7601, + "study attempts": 53330, + "suggest use": 53831, + "human studies": 24243, + "points language": 41077, + "datasets obtain": 13351, + "conducted test": 10096, + "noisy samples": 37623, + "semantics data": 49401, + "categories like": 7845, + "furthermore build": 21806, + "embedding attention": 17013, + "addition fine": 1617, + "scores provide": 48917, + "avoid problems": 5434, + "architecture designs": 4042, + "difficult nlp": 15177, + "case paper": 7793, + "79 respectively": 515, + "function language": 21755, + "algorithms detect": 2323, + "respect linguistic": 47347, + "scores training": 48926, + "method collect": 32417, + "level score": 30198, + "methods leveraged": 32926, + "contain richer": 10471, + "investigate benefits": 26945, + "science psychology": 48749, + "official submission": 38311, + "models impressive": 35109, + "strong robustness": 53048, + "corpus development": 11324, + "state research": 52709, + "progress tasks": 43117, + "negligible cost": 36653, + "keeping competitive": 27278, + "lexical processing": 30377, + "volume variety": 61730, + "vary depending": 61420, + "generated best": 22272, + "introduce text": 26871, + "apply graph": 3330, + "information modeling": 25976, + "language produced": 28443, + "learning integrated": 29685, + "experiments result": 19513, + "resulting sub": 47478, + "corresponding actions": 11545, + "extract local": 19985, + "strategies reduce": 52916, + "functions different": 21771, + "makes research": 31634, + "discuss existing": 15465, + "finally open": 20872, + "important form": 24728, + "tasks crucial": 55566, + "specific adapters": 52042, + "evaluate validity": 18515, + "allow models": 2438, + "consistent word": 10287, + "ignore information": 24492, + "allows combine": 2453, + "tokens paper": 57331, + "models defined": 34891, + "specific parameters": 52123, + "domain zero": 16229, + "curate release": 11946, + "benchmark open": 6484, + "domain end": 16056, + "documents generate": 15881, + "systems aims": 54429, + "knowledge address": 27392, + "enhanced generative": 17931, + "methods overcome": 32971, + "modal transformer": 33466, + "methods popular": 32983, + "popular widely": 41198, + "neglected paper": 36649, + "electra model": 16964, + "13 improvement": 125, + "utilizing deep": 61121, + "performance architecture": 40198, + "performing method": 40680, + "articles results": 4478, + "provides accurate": 44179, + "problems especially": 42699, + "data verify": 12771, + "learning demonstrated": 29586, + "context article": 10585, + "analysis establish": 2658, + "models random": 35394, + "development artificial": 14668, + "combine context": 9063, + "resources open": 47322, + "help enhance": 23558, + "challenges problem": 8070, + "proposed deal": 43754, + "task combination": 54954, + "extracted high": 20011, + "learn fine": 29371, + "provide domain": 44056, + "knowledge grounding": 27509, + "models supports": 35567, + "modality model": 33477, + "accurate estimation": 1078, + "leverages context": 30301, + "finally examine": 20855, + "cloud platform": 8721, + "addition pre": 1633, + "attention variants": 4848, + "consists tasks": 10334, + "learning technology": 29909, + "span masking": 51926, + "book question": 7246, + "words unknown": 62538, + "feelings opinions": 20728, + "hold promise": 23980, + "level inter": 30136, + "phrase phrase": 40842, + "matching dataset": 31911, + "availability gold": 5248, + "entities complex": 18040, + "model arabic": 33574, + "time end": 57149, + "available zero": 5393, + "time prior": 57199, + "improvement perplexity": 25014, + "explore transfer": 19744, + "data objective": 12517, + "data coverage": 12257, + "public corpus": 44310, + "corpus unlabeled": 11452, + "selecting representative": 49128, + "models showcase": 35494, + "evaluation pre": 18678, + "techniques solve": 56138, + "systems multiple": 54566, + "representations content": 46631, + "changes neural": 8180, + "tasks terms": 55928, + "research release": 47115, + "challenges include": 8053, + "text preserves": 56705, + "rapidly developing": 44993, + "type paper": 59063, + "subjective evaluations": 53563, + "traditional transformer": 57554, + "languages supported": 28798, + "classification large": 8484, + "maintaining grammatical": 31491, + "use proposed": 59984, + "traditional multi": 57534, + "pretrained deep": 42152, + "task suffer": 55422, + "dataset suitable": 13107, + "propose shot": 43630, + "mechanisms results": 32153, + "explore properties": 19729, + "embedding state": 17064, + "experiment data": 19235, + "adaptation pre": 1534, + "offers comprehensive": 38301, + "mitigate impact": 33384, + "work shot": 62820, + "optimal model": 38528, + "specific way": 52173, + "novel research": 37909, + "design pre": 14295, + "computes attention": 9902, + "using similar": 60941, + "text complexity": 56502, + "wikipedia corpora": 62045, + "text remains": 56735, + "provides data": 44190, + "insights human": 26390, + "language annotation": 27961, + "robustness achieved": 48271, + "sample selection": 48454, + "type user": 59074, + "detection purpose": 14515, + "techniques multi": 56113, + "languages scarce": 28777, + "related non": 45921, + "results generating": 47648, + "associated increased": 4622, + "different tokens": 15101, + "context set": 10716, + "datasets resource": 13406, + "reasonable time": 45175, + "negative sample": 36632, + "detecting entities": 14448, + "supervised multi": 54023, + "generated candidate": 22273, + "problems lack": 42705, + "surprising findings": 54182, + "metrics designed": 33159, + "model samples": 34334, + "handling unseen": 23429, + "context inspired": 10660, + "finally leverage": 20866, + "determine relevance": 14558, + "domains trained": 16297, + "paths entities": 39952, + "entities far": 18052, + "trained static": 57884, + "text expressing": 56570, + "develop theory": 14619, + "models remarkable": 35435, + "end requires": 17705, + "correlations data": 11533, + "topics like": 57453, + "lingual setup": 30729, + "data emerged": 12316, + "significant variation": 50929, + "work achieve": 62552, + "evaluation purposes": 18690, + "models manually": 35216, + "gap supervised": 21981, + "code based": 8797, + "retrieval enhanced": 47943, + "structure sequence": 53137, + "models attractive": 34739, + "slows inference": 51456, + "achieve great": 1149, + "approaches pre": 3896, + "flat sequence": 21096, + "finally design": 20850, + "chinese lexical": 8311, + "information evaluate": 25841, + "dependent nature": 14149, + "make text": 31604, + "proposed various": 43924, + "adapt state": 1510, + "large noisy": 28921, + "achieve 10": 1106, + "method user": 32696, + "dataset empirical": 12905, + "crucial problem": 11906, + "data extracting": 12355, + "empirically analyze": 17356, + "leveraging different": 30321, + "inter related": 26585, + "dataset set": 13081, + "build release": 7422, + "access ground": 821, + "techniques method": 56110, + "strategy fine": 52933, + "tasks solve": 55897, + "based theoretical": 6095, + "plain language": 40937, + "sampling model": 48504, + "input level": 26294, + "use end": 59876, + "domains important": 16260, + "roberta deberta": 48219, + "negative impacts": 36621, + "capturing local": 7739, + "parameters fully": 39701, + "performance source": 40569, + "importance carefully": 24677, + "source multi": 51785, + "use complex": 59847, + "finally generate": 20861, + "arduous task": 4135, + "analysis current": 2640, + "control outputs": 10971, + "node graph": 37586, + "solution improving": 51655, + "designed text": 14334, + "identify optimal": 24434, + "units different": 59530, + "properly reflect": 43256, + "problems study": 42732, + "capabilities multi": 7604, + "additionally results": 1734, + "benchmark collection": 6433, + "enhance diversity": 17911, + "novel group": 37836, + "allows develop": 2457, + "problem extend": 42561, + "insights dataset": 26387, + "topic time": 57434, + "supervision limited": 54084, + "models llm": 35194, + "work document": 62640, + "supervised labels": 53991, + "summarization generation": 53885, + "struggle generate": 53201, + "generation enhance": 22453, + "task solving": 55385, + "ability context": 600, + "corpus 10k": 11264, + "languages requires": 28771, + "effort involved": 16928, + "class imbalanced": 8404, + "datasets unified": 13466, + "performance generally": 40363, + "kb text": 27272, + "transformers specifically": 58532, + "minimal data": 33286, + "use powerful": 59974, + "capture correlation": 7657, + "demonstrates feasibility": 14033, + "feasibility automatic": 20468, + "video available": 61579, + "analysis collected": 2628, + "series methods": 50065, + "using historical": 60730, + "terms complexity": 56277, + "performance sequential": 40554, + "settings existing": 50368, + "prediction used": 41750, + "enable large": 17425, + "based prompting": 5957, + "translation pipeline": 58657, + "task instructions": 55143, + "generation word": 22580, + "trained classify": 57689, + "presents alternative": 42070, + "information multimodal": 25979, + "evaluation experimental": 18618, + "dense vectors": 14086, + "approach particular": 3636, + "texts detecting": 56872, + "semeval 2023": 49442, + "2023 task": 308, + "dataset particularly": 13024, + "raises important": 44859, + "focus new": 21187, + "contrast performance": 10880, + "outperforms commonly": 38883, + "token text": 57310, + "testing human": 56406, + "technique achieve": 56024, + "intensive paper": 26560, + "models developing": 34910, + "developing neural": 14660, + "size quality": 51395, + "world social": 62961, + "setup models": 50410, + "annotations specifically": 3001, + "gpt codex": 22975, + "relevance text": 46196, + "producing coherent": 43038, + "levels using": 30250, + "training hypothesize": 58123, + "generation algorithms": 22418, + "style specific": 53499, + "module capture": 35753, + "attention enhanced": 4742, + "predicted token": 41671, + "effective shot": 16695, + "evaluation public": 18689, + "temporal aspects": 56179, + "strategies fine": 52903, + "language pattern": 28376, + "sentences pre": 49768, + "sentences conduct": 49694, + "promising area": 43162, + "discourse unit": 15402, + "training manner": 58169, + "task crucial": 54988, + "different common": 14868, + "crucial challenging": 11896, + "settings generating": 50376, + "model prompt": 34242, + "framework case": 21469, + "techniques identifying": 56095, + "majority work": 31536, + "stage employ": 52428, + "usually suffers": 61070, + "models constructed": 34856, + "text performance": 56697, + "errors human": 18240, + "english systems": 17885, + "hope new": 24010, + "existing lm": 19086, + "continued training": 10833, + "corpora additionally": 11172, + "future opportunities": 21880, + "aware manner": 5459, + "variants transformer": 61238, + "single candidate": 51287, + "relevant news": 46226, + "methods ablation": 32724, + "learning structured": 29898, + "framework end": 21504, + "set online": 50207, + "impact important": 24597, + "equally training": 18192, + "data utilization": 12767, + "mining framework": 33313, + "demonstrated superiority": 14024, + "languages reduce": 28767, + "languages affect": 28596, + "training information": 58131, + "transformers pretrained": 58529, + "translation modern": 58635, + "careful consideration": 7755, + "space furthermore": 51866, + "essential model": 18328, + "knowledge experimental": 27471, + "data likely": 12464, + "rouge meteor": 48351, + "addition lexical": 1624, + "uses domain": 60507, + "forgetting previously": 21308, + "agreement iaa": 2107, + "human analysis": 24094, + "effective alternative": 16628, + "scenarios end": 48695, + "examples investigate": 18913, + "utterances propose": 61151, + "future design": 21867, + "provide higher": 44086, + "task include": 55129, + "transformer effectively": 58481, + "growing size": 23303, + "sample multiple": 48453, + "current popular": 11996, + "works generally": 62893, + "propose guidelines": 43407, + "focus solely": 21202, + "form short": 21335, + "automatic diagnosis": 5079, + "models constantly": 34853, + "word unit": 62327, + "unsolved problem": 59662, + "generation use": 22574, + "works based": 62877, + "methods scale": 33026, + "method trains": 32688, + "spurious biases": 52386, + "remains underexplored": 46353, + "generalize diverse": 22140, + "specific subsets": 52150, + "fairer models": 20359, + "structured overview": 53169, + "hierarchical manner": 23677, + "insufficient information": 26493, + "usually consist": 61041, + "spoken dialogues": 52356, + "time maintaining": 57175, + "sentence pieces": 49619, + "widely acknowledged": 61990, + "analyse impact": 2586, + "model prompting": 34243, + "unlabeled pu": 59575, + "com deeplearnxmu": 9010, + "combine predictions": 9071, + "methods codes": 32785, + "span span": 51933, + "detection key": 14495, + "systems achieving": 54423, + "applications crucial": 3192, + "recognized important": 45554, + "prompts improve": 43224, + "prompting llms": 43218, + "prompting strategies": 43221, + "heavily affected": 23528, + "foundation model": 21417, + "challenging number": 8122, + "textual dataset": 56957, + "000 news": 9, + "extensive dataset": 19861, + "training generate": 58112, + "established method": 18355, + "minimally supervised": 33295, + "research google": 47047, + "set prediction": 50224, + "generating final": 22374, + "performances variety": 40651, + "models hybrid": 35096, + "generate labeled": 22214, + "experiments illustrate": 19444, + "bring performance": 7333, + "settings datasets": 50365, + "cases zero": 7818, + "competitive strong": 9568, + "distribution differences": 15635, + "weights used": 61942, + "suffers severe": 53794, + "randomly masking": 44902, + "translated target": 58559, + "number tokens": 38046, + "information level": 25953, + "tuning entire": 58910, + "demonstrates substantial": 14046, + "datasets far": 13272, + "palm 540b": 39243, + "generation design": 22445, + "using news": 60840, + "pairs resulting": 39213, + "improves effectiveness": 25127, + "data condition": 12234, + "domain experiments": 16065, + "shared network": 50482, + "quantities text": 44636, + "pre order": 41505, + "furthermore using": 21842, + "annotated medical": 2904, + "results lead": 47697, + "solution space": 51661, + "uses automatic": 60493, + "longformer bigbird": 31059, + "models sufficient": 35557, + "method performing": 32612, + "question recent": 44747, + "problem description": 42532, + "retrieves relevant": 47992, + "realistic challenging": 45148, + "scores task": 48924, + "languages findings": 28674, + "outperform vanilla": 38831, + "examples introduce": 18912, + "ability llms": 622, + "availability annotated": 5245, + "create largest": 11707, + "points given": 41074, + "texts tend": 56933, + "observe models": 38138, + "efficient search": 16897, + "domain difficult": 16049, + "require annotated": 46842, + "process task": 42832, + "relevant works": 46247, + "analyze use": 2832, + "dataset effective": 12903, + "process furthermore": 42784, + "models textit": 35598, + "input modality": 26299, + "growing area": 23288, + "research robust": 47118, + "leverage commonsense": 30258, + "proposed unified": 43919, + "syntactic correctness": 54297, + "emnlp 2022": 17281, + "datasets includes": 13299, + "challenge finding": 7981, + "brown et": 7372, + "development multi": 14688, + "quality dimensions": 44509, + "cited papers": 8369, + "way pre": 61826, + "lm parameters": 30909, + "approaches tested": 3939, + "algorithmic approaches": 2314, + "extract sentences": 19993, + "produce large": 42989, + "similar methods": 51053, + "following challenges": 21263, + "training entire": 58088, + "utterances experiments": 61147, + "challenging develop": 8090, + "compare effectiveness": 9339, + "predictions training": 41768, + "debiasing method": 13534, + "study help": 53383, + "efforts paper": 16942, + "document corpora": 15780, + "domain key": 16092, + "using strategies": 60964, + "performance scenario": 40545, + "compare previous": 9359, + "high task": 23805, + "compared naive": 9424, + "quality crucial": 44504, + "words punctuation": 62489, + "important tokens": 24784, + "reduces need": 45694, + "metrics achieve": 33135, + "performance effectiveness": 40313, + "important difference": 24717, + "tasks challenge": 55535, + "embeddings domain": 17116, + "gap languages": 21967, + "resources scarce": 47333, + "llms shown": 30902, + "improved understanding": 24970, + "context similar": 10719, + "agreement based": 2104, + "representation furthermore": 46523, + "easily adaptable": 16534, + "tasks evaluating": 55621, + "work zero": 62861, + "complexity makes": 9681, + "attention current": 4732, + "datasets consistent": 13193, + "domain small": 16161, + "score original": 48862, + "annotation standards": 2970, + "sentiment social": 49859, + "furthermore leverage": 21826, + "settings release": 50394, + "relevance prediction": 46192, + "degradation compared": 13801, + "shot evaluations": 50613, + "offer insight": 38291, + "events like": 18794, + "benchmark knowledge": 6474, + "bridge language": 7321, + "alleviate limitation": 2412, + "essential modern": 18329, + "works achieved": 62873, + "size increases": 51386, + "models rapidly": 35397, + "significant benefits": 50852, + "generation content": 22438, + "information output": 25998, + "reddit twitter": 45645, + "framework self": 21594, + "framework evaluated": 21508, + "model continuous": 33714, + "paper challenge": 39287, + "shows dataset": 50773, + "models broadly": 34795, + "practical way": 41480, + "train standard": 57637, + "loss used": 31107, + "source datasets": 51762, + "highly likely": 23904, + "emerged effective": 17259, + "consist multiple": 10263, + "decoding paper": 13636, + "requires advanced": 46914, + "biased word": 7052, + "concepts work": 9947, + "end evaluation": 17672, + "scenarios automatic": 48691, + "investigate reasons": 26981, + "graph encoder": 23132, + "studies tried": 53304, + "generate variety": 22262, + "self correction": 49193, + "domains previous": 16285, + "fusion approaches": 21852, + "capturing inter": 7736, + "challenging propose": 8131, + "variety benchmarks": 61264, + "step wise": 52837, + "major barrier": 31501, + "alternative traditional": 2509, + "researchers develop": 47150, + "communication based": 9246, + "work employ": 62644, + "performing end": 40676, + "cross utterance": 11871, + "achieves 13": 1285, + "issues remain": 27104, + "work looked": 62714, + "answers address": 3105, + "text outputs": 56683, + "does utilize": 15982, + "sentences large": 49747, + "simultaneously considers": 51270, + "sensitive nature": 49502, + "dataset helps": 12947, + "limited resource": 30609, + "using latest": 60763, + "construct multilingual": 10393, + "distillation scheme": 15577, + "proposes effective": 43932, + "attention regularization": 4819, + "including end": 25253, + "significant data": 50860, + "propose natural": 43486, + "including recurrent": 25293, + "multi component": 35949, + "conventional pipeline": 11010, + "data semi": 12638, + "using adapters": 60551, + "test english": 56347, + "phrases source": 40854, + "trains models": 58325, + "propose representation": 43606, + "predicting topic": 41684, + "accuracy nlp": 1016, + "generative ones": 22601, + "text sample": 56751, + "11 respectively": 91, + "train target": 57643, + "use encoder": 59874, + "recall using": 45248, + "scale annotation": 48553, + "documents prior": 15904, + "summary input": 53915, + "findings conclude": 20905, + "analysis points": 2717, + "demonstrates promising": 14038, + "scientific progress": 48767, + "available furthermore": 5295, + "step used": 52834, + "foundation natural": 21418, + "prominent role": 43152, + "help solve": 23589, + "poor model": 41139, + "data balancing": 12183, + "result able": 47432, + "reduce negative": 45674, + "regressive decoder": 45825, + "recently demonstrated": 45417, + "triplet network": 58809, + "analysis sentences": 2751, + "relatedness scores": 45959, + "translation second": 58672, + "second uses": 49029, + "binary text": 7155, + "modern approaches": 35701, + "2022 workshop": 306, + "ensembling models": 17987, + "transfer shot": 58421, + "candidate outputs": 7573, + "performance sub": 40582, + "par strong": 39619, + "attention specifically": 4831, + "leveraging powerful": 30335, + "performs considerably": 40705, + "negative sentiments": 36637, + "score bert": 48836, + "strategy outperforms": 52945, + "successfully improves": 53747, + "token classifier": 57283, + "sentence transformers": 49662, + "texts data": 56869, + "future word": 21898, + "respectively extensive": 47369, + "distillation technique": 15579, + "paper comprehensively": 39293, + "analyze generalization": 2817, + "schemes proposed": 48734, + "based stage": 6053, + "german based": 22662, + "handle tasks": 23416, + "bipartite matching": 7182, + "competitive scores": 9565, + "low model": 31162, + "maintaining model": 31494, + "architectures explored": 4110, + "attempt answer": 4679, + "corpora important": 11207, + "set automatic": 50109, + "surface text": 54156, + "use resource": 59997, + "jointly encoding": 27196, + "negative consequences": 36616, + "labels target": 27849, + "provide fair": 44071, + "difficult construct": 15161, + "methods contrastive": 32802, + "important work": 24794, + "design constrained": 14269, + "training extensive": 58102, + "performance suggests": 40587, + "f1 accuracy": 20180, + "tasks combined": 55542, + "need effective": 36557, + "emerged important": 17260, + "ability support": 643, + "increasingly necessary": 25475, + "access text": 829, + "prompt models": 43206, + "classification multilingual": 8502, + "expansion task": 19190, + "optimized maximize": 38568, + "just fine": 27249, + "common scenario": 9195, + "crafted templates": 11683, + "labels labels": 27836, + "pair dataset": 39148, + "size work": 51404, + "description task": 14248, + "based hand": 5764, + "train generate": 57592, + "questions best": 44776, + "benchmark analysis": 6424, + "language significant": 28485, + "models accomplish": 34660, + "leverage available": 30256, + "level relevance": 30191, + "document sequence": 15830, + "approach collect": 3448, + "systems capture": 54447, + "studied topic": 53238, + "drastically reducing": 16396, + "models speed": 35529, + "16 datasets": 163, + "study attention": 53331, + "reliable methods": 46252, + "unsupervised automatic": 59683, + "baseline metrics": 6184, + "11 improvement": 86, + "process evaluation": 42777, + "generally requires": 22171, + "exploit recent": 19662, + "results previously": 47777, + "proposed nlp": 43870, + "low source": 31203, + "low semantic": 31202, + "modal multi": 33461, + "self generated": 49197, + "multi topic": 36034, + "knowledge identify": 27519, + "works address": 62874, + "domain does": 16050, + "construct high": 10387, + "quality multi": 44553, + "dataset leveraging": 12984, + "observed different": 38144, + "paradigm paper": 39627, + "need evaluate": 36559, + "multiple stages": 36288, + "datasets generate": 13283, + "study dataset": 53353, + "improvement training": 25036, + "seq2seq paradigm": 49904, + "supervised loss": 54011, + "generation improve": 22475, + "techniques terms": 56142, + "area machine": 4141, + "ubiquitous human": 59175, + "different length": 14975, + "points bleu": 41070, + "problem converting": 42525, + "produces output": 43033, + "annotation based": 2937, + "context simple": 10720, + "form based": 21313, + "data pipeline": 12544, + "training construct": 57958, + "latent state": 29138, + "given access": 22722, + "consisting 100": 10315, + "fail produce": 20343, + "upstream tasks": 59779, + "demonstrated remarkable": 14017, + "gpt 175b": 22971, + "translation generate": 58615, + "compare supervised": 9370, + "testing state": 56413, + "consequently models": 10204, + "significant loss": 50896, + "introducing auxiliary": 26899, + "understanding capability": 59328, + "decoding efficiency": 13629, + "data bilingual": 12191, + "resource problem": 47264, + "llms perform": 30901, + "training subsets": 58277, + "framework zero": 21629, + "flan t5": 21094, + "llms gpt": 30900, + "leading robust": 29297, + "generalizable models": 22111, + "uses single": 60537, + "davinci 003": 13499, + "set expert": 50155, + "like biases": 30464, + "guide development": 23330, + "understand effectiveness": 59293, + "drawbacks existing": 16404, + "generative pretraining": 22605, + "ranging size": 44946, + "new public": 37291, + "efforts address": 16934, + "restrict attention": 47420, + "attention multiple": 4792, + "existing supervised": 19152, + "set demonstrating": 50134, + "models corpora": 34868, + "allows creation": 2456, + "single target": 51341, + "task match": 55208, + "high prevalence": 23764, + "determine appropriate": 14554, + "problem binary": 42512, + "important downstream": 24720, + "using layers": 60765, + "setting text": 50352, + "textual genres": 56967, + "based kg": 5795, + "systems frequently": 54508, + "propose decoding": 43350, + "additional model": 1688, + "dataset comparison": 12851, + "poor interpretability": 41136, + "complex state": 9662, + "research aim": 46979, + "interpret paper": 26711, + "improve conventional": 24835, + "paper cast": 39286, + "gpt generates": 22979, + "search dataset": 48967, + "representation predicting": 46570, + "english literature": 17837, + "classifiers outperform": 8620, + "size experiments": 51383, + "information relationship": 26050, + "datasets setting": 13419, + "translation arabic": 58580, + "building real": 7464, + "style training": 53503, + "efficiently utilize": 16922, + "better original": 6923, + "graph captures": 23114, + "manual intervention": 31744, + "express information": 19793, + "integrating state": 26524, + "effective detection": 16645, + "strong non": 53039, + "output used": 39007, + "like gender": 30470, + "methods support": 33062, + "similar ones": 51056, + "classification considered": 8445, + "method extensive": 32502, + "attempt use": 4693, + "benefit applications": 6558, + "like chatgpt": 30466, + "work collected": 62599, + "dataset structured": 13103, + "gain knowledge": 21911, + "technique learn": 56038, + "multiple teachers": 36300, + "evaluation chatgpt": 18589, + "expert crafted": 19575, + "distance information": 15545, + "verification methods": 61526, + "performance 93": 40173, + "performance observe": 40463, + "linear non": 30662, + "contents paper": 10576, + "depth width": 14191, + "series forecasting": 50064, + "transformer fine": 58487, + "self assessment": 49174, + "extraction training": 20126, + "topic coverage": 57399, + "setup based": 50409, + "progress automatic": 43093, + "strategy generate": 52934, + "adapt target": 1512, + "models taken": 35581, + "available methods": 5326, + "model vietnamese": 34529, + "scaling large": 48649, + "paper learn": 39416, + "new inputs": 37223, + "micro macro": 33227, + "independently using": 25510, + "paper https": 39389, + "https bit": 24054, + "bert lstm": 6677, + "theoretic measures": 57015, + "applications high": 3208, + "models decisions": 34887, + "grand challenge": 23086, + "class balanced": 8391, + "research better": 46994, + "pretrained text": 42185, + "generation shown": 22547, + "providing natural": 44250, + "architecture order": 4072, + "score new": 48861, + "learning good": 29665, + "transfer process": 58415, + "glue datasets": 22865, + "experiments code": 19374, + "systems example": 54492, + "steer model": 52790, + "provide timely": 44146, + "strategies deal": 52898, + "gains terms": 21946, + "datasets needed": 13343, + "combination retrieval": 9049, + "design benchmark": 14266, + "cost manual": 11588, + "selects informative": 49169, + "evaluate algorithm": 18436, + "contextual dependency": 10762, + "unseen topics": 59657, + "process apply": 42758, + "significant limitations": 50895, + "advances area": 1905, + "models comes": 34830, + "learn tasks": 29436, + "distance aware": 15541, + "like multi": 30487, + "results field": 47632, + "models decision": 34886, + "pipeline including": 40901, + "methods findings": 32866, + "metrics finally": 33166, + "thousands tokens": 57080, + "history based": 23966, + "groups based": 23279, + "retrieved information": 47985, + "shot cases": 50603, + "based table": 6080, + "noticeable performance": 37729, + "conversational contexts": 11042, + "bert gpt2": 6666, + "prompting gpt": 43215, + "performance producing": 40501, + "propose ontology": 43577, + "leverage dataset": 30263, + "word provided": 62273, + "predict non": 41649, + "survey reviews": 54220, + "knowledge datasets": 27434, + "available based": 5264, + "overview state": 39118, + "model embed": 33805, + "empirical exploration": 17329, + "half century": 23366, + "feelings emotions": 20727, + "corpus approximately": 11279, + "benchmarks propose": 6537, + "embeddings far": 17135, + "content prior": 10548, + "model difficult": 33768, + "language automatic": 27969, + "gpt chatgpt": 22974, + "measure importance": 32055, + "investigate differences": 26950, + "evaluation showed": 18718, + "interaction systems": 26612, + "exploit unlabeled": 19667, + "performed significantly": 40665, + "text lack": 56641, + "knowledge alleviate": 27393, + "represents important": 46818, + "systematic exploration": 54398, + "training difficult": 58066, + "random selection": 44889, + "generation paradigm": 22515, + "addition highlight": 1619, + "partly lack": 39900, + "drive future": 16416, + "model evaluations": 33845, + "tool open": 57364, + "driven way": 16434, + "way task": 61831, + "reduce domain": 45659, + "suggests natural": 53848, + "provide little": 44100, + "effectiveness incorporating": 16784, + "masked sequence": 31867, + "identification datasets": 24386, + "texts retrieved": 56920, + "quality synthesized": 44584, + "strategy overcome": 52946, + "labeling strategy": 27794, + "adapts pre": 1585, + "particular social": 39861, + "gap existing": 21961, + "score 23": 48785, + "leading development": 29290, + "achieve faster": 1139, + "strict evaluation": 52984, + "especially pre": 18293, + "approaches analyze": 3763, + "distilled version": 15584, + "algorithms performance": 2334, + "make encoder": 31569, + "article generation": 4450, + "summarize main": 53907, + "limitations discuss": 30546, + "tasks understood": 55947, + "process does": 42773, + "algorithm natural": 2287, + "problem generate": 42572, + "form basis": 21314, + "information leakage": 25949, + "suggesting dataset": 53836, + "datasets visual": 13483, + "novel taxonomy": 37935, + "time knowledge": 57169, + "projection layers": 43141, + "60 accuracy": 458, + "provide flexible": 44076, + "baseline novel": 6197, + "features current": 20550, + "ner named": 36680, + "recently end": 45423, + "nlp mainly": 37497, + "scenarios tasks": 48710, + "costly annotate": 11599, + "provide task": 44142, + "english vice": 17899, + "shot classifiers": 50605, + "datasets yields": 13490, + "understanding modeling": 59365, + "accuracy data": 955, + "augmentation proposed": 4965, + "issue data": 27059, + "solving text": 51708, + "improve generalizability": 24858, + "scenarios particular": 48703, + "training design": 58061, + "surrounding text": 54198, + "model corresponding": 33726, + "gan model": 21955, + "relations obtained": 46047, + "aspect language": 4530, + "affective information": 2023, + "work date": 62619, + "chatgpt recently": 8265, + "based variation": 6125, + "complexity tasks": 9690, + "contains modules": 10500, + "module language": 35764, + "500 000": 424, + "set 20": 50100, + "challenges exist": 8045, + "research providing": 47104, + "utilizing unlabeled": 61129, + "limitations study": 30557, + "requires specialized": 46952, + "including user": 25317, + "models heavy": 35082, + "models questions": 35392, + "different decoding": 14892, + "simple interpretable": 51181, + "improve output": 24881, + "version used": 61558, + "tasks considering": 55555, + "ai model": 2118, + "relevant phrases": 46229, + "information incorporate": 25919, + "combining output": 9119, + "transform input": 58440, + "select similar": 49112, + "corpus apply": 11277, + "problem aim": 42500, + "approach especially": 3515, + "apply rule": 3351, + "sound complete": 51736, + "context dependencies": 10608, + "entailment relation": 18005, + "generate expressive": 22199, + "mechanism performance": 32134, + "used performance": 60261, + "demonstrating feasibility": 14053, + "build hierarchical": 7403, + "cost prohibitive": 11592, + "techniques successfully": 56139, + "obtained similar": 38222, + "clue words": 8730, + "ability task": 644, + "compares different": 9475, + "significantly data": 50950, + "tools analyzing": 57376, + "segment boundaries": 49072, + "domain annotated": 16018, + "compared evaluated": 9404, + "integrated framework": 26514, + "proposed stage": 43901, + "disambiguation systems": 15360, + "methods serve": 33034, + "word dictionary": 62137, + "robustness performance": 48292, + "based robust": 6003, + "information certain": 25778, + "framework adopt": 21453, + "language standard": 28502, + "describes data": 14221, + "makes text": 31639, + "task created": 54984, + "combine output": 9070, + "markup language": 31853, + "demonstrate necessity": 13948, + "known method": 27660, + "processing tool": 42959, + "deep linguistic": 13725, + "corpora specific": 11244, + "analysis automatically": 2620, + "based tf": 6093, + "need text": 36594, + "work various": 62858, + "structure modeling": 53119, + "representation structure": 46586, + "evaluation context": 18597, + "presents model": 42091, + "models original": 35282, + "order compare": 38602, + "based component": 5629, + "theoretical models": 57024, + "fully explored paper": 21730, + "human computer interaction": 24125, + "natural language processing": 36440, + "language processing nlp": 28418, + "possible future research": 41326, + "future research directions": 21888, + "natural language understanding": 36457, + "language understanding reasoning": 28558, + "natural language text": 36455, + "paper describes experiments": 39323, + "methods make use": 32940, + "strengths weaknesses different": 52978, + "et al 2007": 18396, + "problem paper present": 42620, + "non native speakers": 37666, + "new method based": 37252, + "non native english": 37665, + "systems make use": 54559, + "gram language models": 23055, + "language models lm": 28279, + "syntactic semantic information": 54325, + "latent semantic analysis": 29134, + "semantic analysis lsa": 49234, + "significant improvements compared": 50885, + "short term memory": 50570, + "languages like english": 28713, + "english french german": 17809, + "using domain specific": 60668, + "language processing domain": 28405, + "paper present model": 39456, + "fundamental problem natural": 21787, + "problem natural language": 42614, + "syntactic semantic properties": 54326, + "propose theoretical framework": 43672, + "corpus extensive experiments": 11341, + "automatic text summarization": 5130, + "subject verb object": 53559, + "tasks natural language": 55760, + "language processing information": 28411, + "processing information retrieval": 42877, + "information retrieval machine": 26064, + "retrieval machine translation": 47951, + "achieves human level": 1339, + "human level performance": 24197, + "corpora different languages": 11194, + "world wide web": 62968, + "machine translation based": 31350, + "gram language model": 23054, + "supervised machine learning": 54014, + "machine learning algorithm": 31307, + "predicate argument structures": 41630, + "multi document summarization": 35953, + "multiword expressions mwes": 36334, + "using large scale": 60759, + "large scale collection": 28965, + "syntactic structure sentence": 54329, + "knowledge plays important": 27570, + "plays important role": 41000, + "non local features": 37662, + "task paper propose": 55267, + "using rule based": 60914, + "paper provide overview": 39557, + "specific language model": 52100, + "text audio video": 56445, + "artificial neural network": 4498, + "real world text": 45142, + "large number languages": 28924, + "paper tackle problem": 39591, + "using natural language": 60829, + "natural language expressions": 36424, + "wide range natural": 61972, + "range natural language": 44925, + "language processing applications": 28397, + "including question answering": 25291, + "question answering summarization": 44710, + "advance state art": 1884, + "current natural language": 11990, + "language processing systems": 28431, + "performance state art": 40576, + "challenge natural language": 7999, + "trained large scale": 57770, + "large scale corpora": 28966, + "order improve performance": 38627, + "domains natural language": 16278, + "mean squared error": 31996, + "hidden markov models": 23643, + "automatic speech recognition": 5125, + "systems paper present": 54581, + "vector space models": 61468, + "open source project": 38455, + "important component natural": 24712, + "component natural language": 9710, + "processing nlp applications": 42899, + "elementary discourse units": 16976, + "paper present simple": 39461, + "present simple approach": 42015, + "multi class classification": 35946, + "used natural language": 60248, + "perform extensive experiments": 40107, + "extensive experiments benchmark": 19881, + "benchmark data sets": 6442, + "various natural language": 61369, + "sentences experimental results": 49716, + "experimental results obtained": 19299, + "results obtained using": 47749, + "natural language based": 36414, + "large scale real": 28998, + "scale real world": 48621, + "real world setting": 45137, + "english machine translation": 17841, + "word sense disambiguation": 62298, + "language generation process": 28087, + "machine translation mt": 31369, + "systems paper presents": 54582, + "closely related languages": 8706, + "recent work focused": 45369, + "open source tool": 38459, + "main contributions work": 31433, + "machine learning systems": 31332, + "proposed semi supervised": 43893, + "semi supervised approaches": 49457, + "paper consider problem": 39303, + "context free grammar": 10642, + "paper presents new": 39478, + "natural language sentences": 36449, + "words phrases sentences": 62480, + "natural language interfaces": 36432, + "understand natural language": 59307, + "given natural language": 22763, + "process natural language": 42809, + "demonstrated promising results": 14015, + "play different roles": 40968, + "paper presents method": 39476, + "sense disambiguation wsd": 49484, + "achieve good results": 1148, + "paper presents comprehensive": 39471, + "presents comprehensive study": 42078, + "previous work using": 42316, + "role natural language": 48317, + "natural language applications": 36413, + "applications information retrieval": 3212, + "english german spanish": 17816, + "paper presents novel": 39479, + "spoken dialogue systems": 52355, + "models trained tested": 35622, + "applications natural language": 3223, + "like machine translation": 30483, + "machine translation speech": 31383, + "information retrieval question": 26066, + "retrieval question answering": 47965, + "using conditional random": 60619, + "conditional random field": 10004, + "random field crf": 44871, + "fold cross validation": 21251, + "paper presents preliminary": 39481, + "paper presents work": 39486, + "representation natural language": 46562, + "using machine learning": 60783, + "machine learning approach": 31310, + "processing natural language": 42895, + "previous work model": 42306, + "significant improvements baseline": 50883, + "speech recognition asr": 52283, + "like natural language": 30491, + "task oriented dialogue": 55255, + "pre processing step": 41510, + "received little attention": 45261, + "resource poor languages": 47262, + "paper gives overview": 39388, + "answering natural language": 3086, + "natural language questions": 36445, + "question answering qa": 44707, + "answering qa systems": 3090, + "information retrieval ir": 26063, + "used evaluate performance": 60170, + "real world applications": 45123, + "low quality paper": 31169, + "real world data": 45124, + "provide new insights": 44105, + "machine learning framework": 31320, + "machine translation systems": 31385, + "construct large scale": 10391, + "semantic similarity words": 49350, + "optical character recognition": 38523, + "written different languages": 62997, + "lexical knowledge base": 30369, + "evaluation metric called": 18645, + "inter annotator agreement": 26576, + "improvement state art": 25027, + "state art propose": 52651, + "world natural language": 62950, + "data paper introduce": 12531, + "paper introduce new": 39402, + "recent years growing": 45386, + "paper present new": 39458, + "data sparseness problem": 12682, + "domain specific terms": 16188, + "paper proposes new": 39550, + "context free grammars": 10643, + "existing natural language": 19115, + "language processing methods": 28415, + "languages english french": 28654, + "latent dirichlet allocation": 29121, + "natural language texts": 36456, + "texts paper presents": 56909, + "presents novel approach": 42097, + "freely available https": 21653, + "available https github": 5309, + "https github com": 24059, + "high dimensional space": 23729, + "left right contexts": 30000, + "noise contrastive estimation": 37596, + "neural language models": 36964, + "obtaining state art": 38239, + "state art results": 52657, + "multi view learning": 36040, + "text classification tasks": 56488, + "shows proposed method": 50799, + "proposed method consistently": 43814, + "method consistently outperforms": 32438, + "methods domain adaptation": 32830, + "domain adaptation methods": 16000, + "processing nlp models": 42907, + "intrinsic extrinsic evaluations": 26771, + "probabilistic generative model": 42460, + "fine tuning step": 21022, + "various state art": 61397, + "state art supervised": 52674, + "supervised learning algorithms": 53995, + "support vector machine": 54132, + "vector machine svm": 61454, + "naive bayes classifier": 36364, + "principal component analysis": 42381, + "present new approach": 41959, + "available natural language": 5331, + "paper focus problem": 39379, + "english french translation": 17811, + "conditional random fields": 10005, + "random fields crf": 44873, + "help better understand": 23554, + "state art approaches": 52581, + "small scale datasets": 51497, + "real world datasets": 45126, + "plays central role": 40990, + "analysis named entity": 2702, + "named entity recognition": 36373, + "paper proposes method": 39546, + "language pairs english": 28368, + "english french english": 17808, + "lexical syntactic semantic": 30392, + "support vector machines": 54133, + "statistical machine translation": 52750, + "machine translation smt": 31382, + "long term goal": 31041, + "rule based approach": 48379, + "shows promising results": 50797, + "language processing tasks": 28433, + "language processing techniques": 28434, + "chinese social media": 8320, + "deep learning models": 13714, + "success natural language": 53713, + "language processing deep": 28404, + "massive text corpora": 31889, + "close state art": 8692, + "state art speech": 52669, + "speech pos tagger": 52278, + "based named entity": 5896, + "named entity recognizer": 36374, + "problems natural language": 42714, + "training data used": 58047, + "natural language generation": 36426, + "time natural language": 57183, + "outperforms existing models": 38900, + "work deep learning": 62622, + "learning neural networks": 29780, + "representations paper propose": 46733, + "paper propose alternative": 39494, + "propose novel model": 43549, + "vector representations words": 61463, + "types neural networks": 59106, + "learn high quality": 29380, + "provide state art": 44134, + "state art performance": 52646, + "human language acquisition": 24192, + "et al 2010": 18397, + "open source tools": 38461, + "development natural language": 14690, + "understanding natural language": 59371, + "natural language interface": 36431, + "paper describes submission": 39330, + "using publicly available": 60885, + "translation model using": 58632, + "english french spanish": 17810, + "machine translation information": 31362, + "translation information retrieval": 58621, + "information retrieval information": 26062, + "retrieval information extraction": 47947, + "information extraction text": 25869, + "machine translation cross": 31352, + "better performance existing": 6931, + "different types language": 15112, + "field natural language": 20763, + "work natural language": 62732, + "resources natural language": 47320, + "processing tasks machine": 42951, + "tasks machine translation": 55735, + "using proposed method": 60881, + "lexical semantic information": 30384, + "work present results": 62758, + "human effort required": 24137, + "multilingual natural language": 36103, + "play crucial role": 40966, + "language model adaptation": 28152, + "training language model": 58145, + "work propose model": 62781, + "play important role": 40972, + "words experimental results": 62414, + "performs better state": 40700, + "better state art": 6970, + "state art task": 52678, + "extract useful information": 20001, + "large collections documents": 28858, + "supervised unsupervised methods": 54068, + "translation natural language": 58643, + "modern standard arabic": 35721, + "standard arabic msa": 52465, + "syntactic semantic features": 54324, + "question answering systems": 44711, + "continuous vector space": 10855, + "machine translation techniques": 31388, + "pre processing steps": 41511, + "lead significant improvements": 29271, + "significant improvements accuracy": 50882, + "amazon mechanical turk": 2522, + "lexical syntactic features": 30390, + "close human performance": 8688, + "pointwise mutual information": 41084, + "mutual information pmi": 36349, + "extensive experiments large": 19891, + "publicly available datasets": 44341, + "level document level": 30104, + "distributed word representations": 15630, + "representations word embeddings": 46789, + "nlp tasks work": 37552, + "train word embeddings": 57660, + "word embeddings using": 62195, + "near state art": 36515, + "state art methods": 52630, + "applications machine translation": 3220, + "text target language": 56804, + "morphologically rich language": 35848, + "important research area": 24761, + "field machine translation": 20760, + "plays significant role": 41006, + "quality machine translation": 44548, + "machine translation paper": 31376, + "evaluation machine translation": 18636, + "automatic evaluation metrics": 5087, + "sentence document level": 49544, + "latent variable model": 29144, + "experimental results demonstrate": 19281, + "results demonstrate effectiveness": 47575, + "demonstrate effectiveness method": 13899, + "gained increasing attention": 21917, + "current state art": 12012, + "state art sequence": 52663, + "sentences natural language": 49757, + "based machine learning": 5827, + "paper describes approach": 39320, + "machine translation tasks": 31387, + "application natural language": 3171, + "large high quality": 28885, + "level word level": 30232, + "language processing including": 28410, + "including machine translation": 25273, + "outperforming existing methods": 38852, + "machine learning techniques": 31335, + "used shared task": 60300, + "natural language tasks": 36454, + "web based application": 61880, + "generates natural language": 22350, + "task machine translation": 55197, + "natural language input": 36429, + "sentiment emotion analysis": 49843, + "vector space model": 61467, + "distributed representation words": 15624, + "non trivial task": 37689, + "learn text representations": 29438, + "multi label classification": 35976, + "parallel training data": 39657, + "training data languages": 58006, + "entity recognition ner": 18129, + "able achieve high": 670, + "information real world": 26041, + "based character based": 5615, + "dataset experimental results": 12917, + "experimental results proposed": 19305, + "paper describes new": 39325, + "new freely available": 37213, + "large scale multilingual": 28989, + "deep neural network": 13739, + "neural network dnn": 37004, + "different natural languages": 15003, + "addition propose new": 1637, + "propose new architecture": 43497, + "state art entity": 52612, + "speech pos tagging": 52279, + "text speech synthesis": 56785, + "machine translation research": 31381, + "using different machine": 60655, + "human evaluation automatic": 24143, + "word level features": 62230, + "best knowledge work": 6775, + "weakly supervised learning": 61864, + "resource rich language": 47269, + "propose new method": 43504, + "english german english": 17813, + "labeled data used": 27752, + "yields significant improvements": 63129, + "significant improvements state": 50889, + "improvements state art": 25101, + "art supervised methods": 4418, + "branch natural language": 7301, + "integer linear programming": 26500, + "information theoretic framework": 26121, + "mutual information maximization": 36347, + "available real world": 5356, + "semantic similarity measures": 49347, + "downstream natural language": 16344, + "applications question answering": 3240, + "paper propose new": 39526, + "propose new task": 43513, + "proposed method effective": 43817, + "competitive state art": 9567, + "mapping natural language": 31805, + "natural language formal": 36425, + "machine translation evaluation": 31357, + "evaluation metrics proposed": 18653, + "human evaluation metrics": 24150, + "machine translation text": 31390, + "layer neural network": 29195, + "training data learning": 58010, + "end end machine": 17649, + "model significantly improves": 34381, + "significantly improves performance": 50976, + "improves performance state": 25143, + "phrase based statistical": 40837, + "based statistical machine": 6059, + "state art technologies": 52682, + "language models shown": 28319, + "variety nlp tasks": 61286, + "similar better performance": 51031, + "performance deep learning": 40281, + "word embeddings provide": 62185, + "problem machine learning": 42601, + "machine learning community": 31316, + "useful nlp tasks": 60379, + "nlp tasks recent": 37546, + "tasks recent work": 55838, + "recent work shown": 45373, + "applications sentiment analysis": 3249, + "word level representations": 62236, + "learning distributed representations": 29597, + "cross lingual document": 11832, + "document classification task": 15771, + "outperform previous state": 38812, + "previous state art": 42283, + "multiple language pairs": 36235, + "recursive neural network": 45637, + "neural network models": 37018, + "natural language nl": 36436, + "natural language query": 36443, + "approach outperforms state": 3630, + "outperforms state art": 38947, + "machine translation task": 31386, + "task recent work": 55322, + "word embeddings different": 62164, + "labeled data available": 27740, + "learn word representations": 29446, + "developing natural language": 14659, + "online news articles": 38376, + "text mining applications": 56661, + "large text corpora": 29025, + "corpus paper presents": 11399, + "model substantially outperforms": 34423, + "languages paper present": 28744, + "text classification problem": 56481, + "text classification algorithms": 56466, + "learning based approach": 29530, + "sufficient training data": 53807, + "previous work shown": 42312, + "multi word expressions": 36043, + "compare performance different": 9354, + "framework experimental results": 21513, + "bring significant improvements": 7336, + "state art model": 52632, + "propose novel method": 43545, + "novel method called": 37865, + "fine grained semantic": 20942, + "results significant improvements": 47845, + "significant improvements previous": 50888, + "improvements previous state": 25094, + "state art tasks": 52679, + "experimental evaluation shows": 19263, + "evaluation shows proposed": 18723, + "proposed method outperforms": 43824, + "paper describes method": 39324, + "experiments conducted using": 19387, + "different language pairs": 14966, + "predicate argument structure": 41629, + "plays critical role": 40992, + "maintaining competitive performance": 31490, + "maximum entropy classifier": 31969, + "achieves state art": 1379, + "language understanding generation": 28550, + "point wise mutual": 41053, + "wise mutual information": 62083, + "standard maximum likelihood": 52503, + "hidden markov model": 23642, + "provide open source": 44109, + "open source implementation": 38451, + "achieve significant improvements": 1193, + "improvements compared previous": 25059, + "compared previous methods": 9435, + "task paper presents": 55266, + "training machine learning": 58163, + "important problem natural": 24755, + "graph based approaches": 23103, + "performance benchmark datasets": 40212, + "data different languages": 12283, + "experimental results shown": 19313, + "information significantly improves": 26087, + "performance propose novel": 40503, + "language independent approach": 28106, + "resource poor language": 47261, + "recent state art": 45348, + "perform extensive evaluation": 40106, + "using different datasets": 60654, + "performs significantly better": 40714, + "machine translation language": 31363, + "state art chinese": 52593, + "approaches machine translation": 3868, + "phrase based smt": 40836, + "preliminary results using": 41806, + "useful natural language": 60377, + "graph based methods": 23106, + "propose novel unsupervised": 43572, + "empirical evaluation shows": 17324, + "use training data": 60056, + "training data language": 58005, + "work explore use": 62659, + "leads significant improvement": 29326, + "significant improvement performance": 50878, + "best reported results": 6813, + "different real world": 15045, + "text speech tts": 56786, + "machine learning classification": 31314, + "achieve comparable performance": 1123, + "meaning natural language": 32006, + "natural language utterances": 36458, + "best performing systems": 6800, + "automatically generate large": 5175, + "et al 2012": 18399, + "term frequency inverse": 56237, + "frequency inverse document": 21675, + "inverse document frequency": 26928, + "document frequency tf": 15796, + "frequency tf idf": 21678, + "processing tasks paper": 42952, + "information retrieval natural": 26065, + "retrieval natural language": 47960, + "natural language modeling": 36434, + "information extraction task": 25867, + "label classification problem": 27696, + "single label multi": 51311, + "label multi label": 27717, + "area natural language": 4143, + "play key role": 40975, + "machine learning approaches": 31311, + "plays crucial role": 40994, + "languages like hindi": 28714, + "perform comparative analysis": 40078, + "convolutional neural network": 11113, + "long range relations": 31022, + "binary multi class": 7152, + "achieves excellent performance": 1324, + "different data sets": 14886, + "plays vital role": 41008, + "metrics like bleu": 33178, + "automatic evaluation results": 5088, + "able answer questions": 675, + "long standing goal": 31032, + "human labeled data": 24187, + "stochastic gradient descent": 52856, + "followed fine tuning": 21258, + "using weak supervision": 61020, + "weakly labeled data": 61858, + "learn semantic representations": 29419, + "models cross lingual": 34876, + "document classification tasks": 15772, + "prior state art": 42414, + "chinese word segmentation": 8327, + "method outperforms existing": 32601, + "recently neural network": 45443, + "neural network based": 36999, + "based language models": 5804, + "learning word embeddings": 29944, + "neural word embeddings": 37113, + "achieve state art": 1204, + "achieves f1 score": 1326, + "f1 score 90": 20213, + "significantly better previous": 50942, + "provide empirical evidence": 44061, + "english second language": 17871, + "published state art": 44374, + "large scale text": 29003, + "paper present novel": 39459, + "present novel approach": 41968, + "new state art": 37324, + "state art performances": 52647, + "social media data": 51573, + "present case study": 41863, + "sentence level text": 49594, + "machine learning algorithms": 31308, + "bag words models": 5507, + "achieve new state": 1175, + "art results text": 4389, + "results text classification": 47884, + "text classification sentiment": 56484, + "classification sentiment analysis": 8544, + "sentiment analysis tasks": 49830, + "tasks paper presents": 55787, + "probabilistic language model": 42464, + "language model approach": 28155, + "word similarity tasks": 62311, + "morphologically rich languages": 35849, + "paper propose unsupervised": 39539, + "propose unsupervised method": 43693, + "text data available": 56523, + "obtain better results": 38164, + "text classification task": 56487, + "large unlabeled corpus": 29039, + "corpus experimental results": 11338, + "performance machine translation": 40428, + "language processing tools": 28436, + "texts multiple languages": 56905, + "yield better results": 63092, + "present novel framework": 41972, + "present novel algorithm": 41967, + "baselines large margin": 6276, + "comparable current state": 9295, + "supervised learning algorithm": 53994, + "time paper propose": 57188, + "paper propose novel": 39528, + "propose novel neural": 43552, + "novel neural network": 37885, + "neural network model": 37017, + "rnn encoder decoder": 48192, + "recurrent neural networks": 45626, + "neural networks rnn": 37070, + "model jointly trained": 34026, + "log linear model": 30974, + "proposed model learns": 43853, + "based machine translation": 5828, + "focus natural language": 21186, + "based information extraction": 5784, + "specific use cases": 52169, + "state art technique": 52680, + "automatically extract information": 5169, + "information natural language": 25984, + "information retrieval text": 26070, + "text classification important": 56474, + "performance text classification": 40600, + "question answering information": 44699, + "text classification paper": 56479, + "study paper presents": 53427, + "paper presents overview": 39480, + "syntactic semantic structures": 54327, + "markov model hmm": 31848, + "language natural language": 28354, + "language processing task": 28432, + "various real world": 61383, + "state art systems": 52676, + "knowledge base population": 27408, + "word embedding method": 62148, + "results proposed method": 47784, + "proposed method achieves": 43807, + "better results compared": 6957, + "word embeddings languages": 62175, + "available public use": 5352, + "approach does require": 3495, + "natural language process": 36439, + "large annotated corpora": 28844, + "information extraction systems": 25866, + "machine learning models": 31326, + "recent studies shown": 45353, + "mechanical turk amt": 32093, + "generate high quality": 22207, + "high quality annotations": 23770, + "develop novel model": 14609, + "using monte carlo": 60814, + "results shed light": 47833, + "state art models": 52633, + "using real world": 60896, + "social media corpus": 51572, + "different semantic spaces": 15062, + "results achieved using": 47487, + "experiments neural machine": 19480, + "neural machine translation": 36968, + "recently proposed approach": 45455, + "machine translation neural": 31372, + "single neural network": 51326, + "neural network jointly": 37010, + "encoder decoder architecture": 17496, + "parts source sentence": 39909, + "existing state art": 19147, + "qualitative analysis reveals": 44473, + "systems paper propose": 54583, + "machine translation model": 31367, + "results significant improvement": 47844, + "machine translation models": 31368, + "machine translation using": 31392, + "translating natural language": 58567, + "models significantly outperform": 35510, + "complex natural language": 9639, + "make use unlabeled": 31610, + "use unlabeled data": 60065, + "extensive empirical analysis": 19863, + "approaches mainly focus": 3870, + "supervised learning based": 53998, + "learning based methods": 29536, + "improve classification performance": 24831, + "processing nlp tools": 42916, + "data used training": 12762, + "terms precision recall": 56310, + "play vital role": 40981, + "labeled unlabeled data": 27771, + "classification natural language": 8507, + "labeled data expensive": 27741, + "processing tasks like": 42950, + "paper explores use": 39370, + "use machine learning": 59942, + "supervised learning methods": 54001, + "remains open question": 46344, + "using neural network": 60834, + "network based models": 36709, + "models ability learn": 34651, + "leads better performance": 29307, + "work paves way": 62746, + "experiment results demonstrate": 19249, + "applying natural language": 3371, + "speech tagging named": 52302, + "tagging named entity": 54745, + "entity recognition entity": 18127, + "number state art": 38039, + "improve state art": 24928, + "machine translation nmt": 31373, + "shown promising results": 50743, + "post processing step": 41352, + "improvement bleu points": 24992, + "based language model": 5803, + "language model based": 28156, + "model based methods": 33605, + "large amounts training": 28841, + "amounts training data": 2561, + "training data experiments": 57994, + "social media texts": 51587, + "tasks named entity": 55758, + "based supervised machine": 6070, + "mikolov et al": 33241, + "attention recent years": 4817, + "various nlp tasks": 61375, + "word embedding models": 62151, + "multiple state art": 36290, + "logistic regression classifier": 30995, + "question answering using": 44715, + "neural network cnn": 37003, + "experiments demonstrate effectiveness": 19403, + "demonstrate effectiveness approach": 13894, + "comparison state art": 9507, + "labeled training data": 27767, + "training data existing": 57991, + "important task natural": 24778, + "task natural language": 55234, + "language processing used": 28439, + "large scale applications": 28962, + "applications previous studies": 3235, + "real world scenarios": 45136, + "based deep neural": 5674, + "compare different approaches": 9336, + "based neural network": 5907, + "neural network outperforms": 37020, + "results recent years": 47801, + "recent years witnessed": 45399, + "based question answering": 5971, + "question answering named": 44704, + "experimental results performance": 19301, + "knowledge base completion": 27405, + "word embeddings trained": 62191, + "present systematic study": 42034, + "dirichlet allocation lda": 15345, + "presents new approach": 42094, + "distributed representations words": 15626, + "performance natural language": 40452, + "model paper present": 34174, + "bag words bow": 5504, + "skip gram model": 51420, + "word vector representations": 62336, + "sentiment classification task": 49834, + "features work propose": 20700, + "work propose new": 62783, + "semantic role labeling": 49338, + "unlike previous approaches": 59599, + "prior linguistic knowledge": 42406, + "language models learn": 28271, + "neural language model": 36962, + "language model embeddings": 28162, + "source target languages": 51803, + "neural translation models": 37109, + "based word representations": 6140, + "propose simple method": 43635, + "zero shot experiments": 63162, + "experiments cross lingual": 19394, + "method does require": 32467, + "training neural networks": 58191, + "gives state art": 22811, + "low dimensional space": 31146, + "time consuming human": 57131, + "task specific embeddings": 55393, + "applications paper propose": 3230, + "propose novel approach": 43521, + "applying machine learning": 3365, + "machine learning method": 31322, + "paper propose method": 39521, + "effectiveness proposed method": 16806, + "computer vision natural": 9896, + "vision natural language": 61642, + "language processing paper": 28419, + "processing paper present": 42919, + "model able generate": 33491, + "trained convolutional neural": 57697, + "achieves comparable results": 1315, + "topic natural language": 57420, + "processing nlp task": 42912, + "achieve good performance": 1147, + "zero shot setup": 63179, + "nearest neighbor search": 36521, + "word sense induction": 62299, + "zero shot learning": 63166, + "shot learning approach": 50625, + "significantly improve performance": 50966, + "information paper present": 26001, + "commonly used word": 9229, + "word level models": 62234, + "experimental results approach": 19270, + "translation mt systems": 58638, + "widely used approach": 62008, + "language model lm": 28174, + "error rate reduction": 18227, + "structure natural language": 53121, + "natural language sentence": 36448, + "recurrent neural network": 45625, + "neural network language": 37011, + "network language models": 36755, + "training set size": 58251, + "word error rates": 62202, + "large scale monolingual": 28987, + "alleviate data sparsity": 2404, + "data sparsity problem": 12685, + "experiments chinese english": 19372, + "language model significantly": 28194, + "model significantly improve": 34380, + "improve translation quality": 24936, + "target language training": 54828, + "high level semantic": 23747, + "uses natural language": 60524, + "supervised learning approaches": 53997, + "language processing research": 28429, + "using recurrent neural": 60901, + "neural networks long": 37055, + "networks long short": 36871, + "long short term": 31028, + "term memory lstm": 56249, + "capture long term": 7696, + "long term memory": 31042, + "weakly supervised manner": 61865, + "significantly outperform existing": 50991, + "outperform existing state": 38797, + "proposed model generates": 43851, + "proposed method paper": 43825, + "extend previous work": 19827, + "finite state transducers": 21060, + "sentence level information": 49588, + "sentence level features": 49587, + "based convolutional neural": 5651, + "neural network learn": 37012, + "feed forward neural": 20714, + "forward neural network": 21406, + "local global information": 30940, + "information large scale": 25944, + "large scale experiments": 28976, + "memory lstm networks": 32266, + "sequence modeling tasks": 49956, + "data paper propose": 12534, + "paper propose non": 39527, + "based neural networks": 5908, + "performance experimental results": 40336, + "machine translation method": 31365, + "paper make attempt": 39422, + "latent variable models": 29145, + "models neural networks": 35255, + "conduct extensive experiments": 10052, + "methods experimental results": 32851, + "results demonstrate proposed": 47582, + "demonstrate proposed methods": 13968, + "present novel method": 41974, + "using state art": 60961, + "state art visual": 52690, + "deep convolutional neural": 13689, + "recently proposed neural": 45458, + "devlin et al": 14731, + "et al 2014": 18401, + "target language model": 54825, + "achieving state art": 1425, + "proposed model achieve": 43844, + "model achieve significant": 33505, + "curriculum learning strategy": 12046, + "sentence level context": 49582, + "results approach significantly": 47507, + "approach significantly outperforms": 3694, + "significantly outperforms baseline": 50997, + "encoder decoder framework": 17500, + "empirical study shows": 17351, + "machine translation question": 31380, + "translation question answering": 58667, + "propose new approach": 43496, + "wang et al": 61766, + "et al 2013": 18400, + "based word embedding": 6136, + "capture long range": 7695, + "long range dependencies": 31020, + "propose convolutional neural": 43341, + "jensen shannon divergence": 27154, + "end end neural": 17656, + "end neural network": 17691, + "availability high quality": 5250, + "low resource language": 31181, + "resource language pair": 47236, + "high resource languages": 23793, + "based finite state": 5738, + "previous work neural": 42307, + "neural network predict": 37021, + "specifically designed task": 52194, + "extensive experiments text": 19902, + "unstructured textual data": 59675, + "word embeddings shown": 62189, + "embedding models trained": 17047, + "deep neural networks": 13740, + "neural networks dnns": 37043, + "significant performance gains": 50908, + "substantial performance improvements": 53628, + "tasks sentiment analysis": 55874, + "sentiment analysis question": 49826, + "outperforms previous state": 38925, + "propose new metric": 43506, + "process experimental results": 42779, + "low dimensional vector": 31147, + "extensive experiments model": 19893, + "experiments model achieves": 19468, + "model achieves significant": 33524, + "achieves significant improvement": 1367, + "power law distribution": 41427, + "language model propose": 28189, + "model propose simple": 34248, + "large amounts data": 28834, + "low resource languages": 31182, + "high quality datasets": 23775, + "state art natural": 52638, + "art natural language": 4307, + "processing nlp systems": 42911, + "unsupervised machine learning": 59707, + "machine learning methods": 31323, + "used large scale": 60225, + "qualitative quantitative analysis": 44480, + "word representations learned": 62289, + "word embedding methods": 62149, + "using word embeddings": 61028, + "achieve competitive results": 1128, + "different word embedding": 15132, + "space word embeddings": 51905, + "word embeddings use": 62193, + "downstream tasks present": 16362, + "present new state": 41965, + "knowledge base kb": 27407, + "paper presents approach": 39469, + "neural network rnn": 37024, + "leveraging pre trained": 30337, + "pre trained embeddings": 41529, + "canonical correlation analysis": 7591, + "task transfer learning": 55446, + "experiments demonstrate proposed": 19406, + "experimental results dataset": 19279, + "methods paper presents": 32974, + "proposed method uses": 43832, + "distributed vector representations": 15628, + "representations natural language": 46723, + "language models task": 28329, + "continuous bag words": 10842, + "mildly context sensitive": 33246, + "f1 score 86": 20210, + "wide range domains": 61966, + "rise social media": 48156, + "sentiment analysis paper": 49825, + "based sentence level": 6021, + "experimental results using": 19319, + "popular social media": 41188, + "systems widely used": 54671, + "paper presents results": 39482, + "correlate human judgments": 11505, + "based dependency parsing": 5677, + "annotated natural language": 2908, + "natural language descriptions": 36419, + "generation previous work": 22523, + "rule based methods": 48383, + "correlates human judgments": 11515, + "overview shared task": 39117, + "shared task consists": 50495, + "deep learning technologies": 13723, + "tackle challenges propose": 54701, + "challenges propose novel": 8072, + "propose novel framework": 43536, + "representations experimental results": 46661, + "results shown proposed": 47841, + "outperform existing methods": 38795, + "uses deep learning": 60504, + "conducted series experiments": 10094, + "bi directional long": 7001, + "directional long short": 15282, + "memory lstm neural": 32267, + "neural networks use": 37077, + "approaches paper propose": 3892, + "end end framework": 17647, + "demonstrate efficacy proposed": 13907, + "outperforming state art": 38861, + "used end end": 60163, + "neural networks model": 37057, + "english chinese english": 17785, + "learning approach based": 29521, + "neural networks dnn": 37042, + "gains natural language": 21939, + "lstm long short": 31272, + "neural models nlp": 36979, + "language understanding tasks": 28563, + "tasks paper introduce": 55784, + "entity recognition sentiment": 18132, + "recognition sentiment analysis": 45536, + "embeddings improve performance": 17148, + "improve performance tasks": 24902, + "tasks speech tagging": 55906, + "results highlight importance": 47658, + "using amazon mechanical": 60557, + "relations empirical results": 46025, + "gained lot attention": 21919, + "evaluation metrics based": 18647, + "results compared state": 47550, + "compared state art": 9459, + "state art word": 52692, + "word embedding techniques": 62154, + "existing methods using": 19104, + "abstract meaning representation": 761, + "meaning representation amr": 32013, + "improve previous state": 24910, + "state art result": 52656, + "end end performance": 17657, + "based large scale": 5808, + "gated recurrent unit": 21997, + "multi task objective": 36025, + "online social media": 38385, + "cover wide range": 11649, + "experimental results models": 19295, + "models outperform state": 35287, + "outperform state art": 38823, + "state art baselines": 52587, + "neural sequence sequence": 37099, + "sequence sequence model": 49990, + "based encoder decoder": 5699, + "encoder decoder model": 17501, + "term memory recurrent": 56252, + "memory recurrent neural": 32280, + "neural networks lstm": 37056, + "natural language instructions": 36430, + "achieves best results": 1306, + "best results reported": 6818, + "semi supervised learning": 49460, + "paper addresses problem": 39257, + "task specific knowledge": 55397, + "neural networks encode": 37045, + "sequence based models": 49913, + "neural sequence models": 37098, + "large training sets": 29032, + "training test sets": 58293, + "methods improve performance": 32895, + "artificial intelligence ai": 4491, + "research paper propose": 47089, + "shown good performance": 50712, + "used machine translation": 60231, + "deep learning approach": 13702, + "information extraction tasks": 25868, + "traditional rule based": 57542, + "higher level abstraction": 23831, + "dataset containing million": 12866, + "multi turn dialogues": 36037, + "based neural language": 5903, + "large amounts unlabeled": 28842, + "amounts unlabeled data": 2563, + "deep reinforcement learning": 13747, + "reinforcement learning framework": 45871, + "learning framework jointly": 29655, + "using bag words": 60581, + "average f1 score": 5407, + "based models neural": 5875, + "models neural models": 35253, + "rnn language model": 48195, + "achieve results comparable": 1187, + "results comparable state": 47543, + "comparable state art": 9312, + "wide variety languages": 61983, + "comprehensive survey existing": 9802, + "neural network approaches": 36996, + "recently achieved state": 45404, + "achieved state art": 1274, + "long distance dependencies": 31008, + "used generate text": 60198, + "source language target": 51779, + "language target language": 28519, + "new natural language": 37267, + "extracting relevant information": 20036, + "tasks demonstrate effectiveness": 55577, + "demonstrate effectiveness proposed": 13902, + "paper focus task": 39380, + "propose semi supervised": 43620, + "semi supervised method": 49462, + "demonstrate proposed method": 13967, + "online discussion forums": 38363, + "community paper propose": 9269, + "propose novel word": 43574, + "question answering task": 44712, + "tasks experimental results": 55630, + "languages english spanish": 28657, + "method outperforms previous": 32602, + "set paper describes": 50212, + "performance model trained": 40439, + "proposed unsupervised method": 43921, + "like word embeddings": 30513, + "text mining tasks": 56664, + "experimental results effectiveness": 19285, + "model state art": 34409, + "text classification performance": 56480, + "high dimensional data": 23728, + "attracting increasing attention": 4894, + "deep learning architectures": 13705, + "convolutional neural networks": 11114, + "machine learning tasks": 31333, + "low dimensional representations": 31145, + "supervised representation learning": 54036, + "representation learning method": 46541, + "low dimensional embedding": 31143, + "question answer pairs": 44687, + "obtains significant improvements": 38257, + "transition based parser": 58539, + "lstm recurrent neural": 31280, + "neural networks learn": 37054, + "neural networks cnn": 37039, + "long distance dependency": 31009, + "pairs paper propose": 39206, + "paper propose simple": 39534, + "based recurrent neural": 5983, + "experiments different datasets": 19415, + "rnn based model": 48187, + "neural network learns": 37013, + "word error rate": 62201, + "error rate wer": 18228, + "paper presents findings": 39474, + "memory lstm based": 32260, + "lstm based models": 31247, + "sequence tagging models": 50004, + "lstm bi lstm": 31251, + "bi lstm crf": 7013, + "lstm crf model": 31258, + "produce state art": 43012, + "online social network": 38386, + "paper investigate effect": 39410, + "sentiment analysis sa": 49828, + "text classification using": 56490, + "na ive bayes": 36359, + "better performance using": 6937, + "word representation models": 62285, + "yield state art": 63102, + "widely used natural": 62019, + "language processing problem": 28423, + "gradient based methods": 23004, + "based methods paper": 5850, + "closely related language": 8705, + "maximum likelihood training": 31973, + "widely used automatic": 62010, + "gram based metrics": 23051, + "previous studies shown": 42291, + "evaluate proposed approach": 18490, + "proposed approach using": 43735, + "domain specific corpus": 16169, + "results sentence level": 47824, + "results state art": 47856, + "neural networks nlp": 37061, + "word embedding model": 62150, + "models language models": 35162, + "improve model performance": 24873, + "tasks including machine": 55679, + "neural networks shown": 37072, + "shown improve performance": 50721, + "paper address issue": 39252, + "models machine translation": 35207, + "models natural language": 35245, + "produce high quality": 42986, + "release source code": 46168, + "capturing semantic information": 7743, + "word embeddings models": 62180, + "provide comprehensive overview": 44039, + "large scale data": 28969, + "data driven methods": 12304, + "methods widely used": 33101, + "discrete latent variables": 15424, + "representation learning methods": 46542, + "translation nmt models": 58647, + "problem previous work": 42629, + "byte pair encoding": 7515, + "propose end end": 43370, + "model achieves best": 33512, + "incorporate prior knowledge": 25362, + "rhetorical structure theory": 48089, + "based sentiment analysis": 6023, + "document level information": 15807, + "respectively experimental results": 47367, + "achieves consistent improvements": 1321, + "state art method": 52628, + "data selection strategies": 12634, + "data improve performance": 12420, + "additional training data": 1707, + "perform detailed analysis": 40088, + "semantic vector space": 49374, + "introduce new evaluation": 26835, + "contrast prior work": 10886, + "extensive experimental evaluation": 19873, + "state art methodologies": 52629, + "quality training data": 44591, + "training data limited": 58011, + "evaluation metrics used": 18655, + "performs slightly better": 40717, + "perform significantly better": 40141, + "bilingual evaluation understudy": 7109, + "evaluation understudy bleu": 18744, + "text processing tasks": 56713, + "training language models": 58146, + "language models powerful": 28299, + "entities knowledge base": 18061, + "propose generative model": 43402, + "different data sources": 14887, + "previous research shown": 42272, + "yielding state art": 63112, + "feed forward networks": 20713, + "best previously published": 6807, + "previously published results": 42344, + "paper introduces novel": 39408, + "sentence experimental results": 49556, + "experimental results large": 19291, + "using pre trained": 60866, + "neural network extract": 37006, + "model achieves performance": 33521, + "learn task specific": 29435, + "specific word embeddings": 52175, + "supervised training data": 54062, + "training data new": 58022, + "learning neural network": 29779, + "word embeddings proposed": 62184, + "word frequency word": 62210, + "using parallel corpus": 60855, + "real world scenario": 45135, + "parallel data available": 39646, + "using parallel data": 60856, + "model achieves state": 33525, + "largest publicly available": 29101, + "different language models": 14965, + "language models including": 28263, + "paper present approach": 39444, + "sequence generation models": 49927, + "trained multiple languages": 57821, + "perform error analysis": 40099, + "held test set": 23546, + "error analysis shows": 18216, + "statistically significant improvement": 52772, + "bidirectional long short": 7076, + "properties natural language": 43269, + "pos tagging task": 41235, + "set state art": 50253, + "rational speech acts": 45027, + "use natural language": 59958, + "best performance achieved": 6790, + "f1 score compared": 20221, + "model f1 score": 33875, + "linear programming ilp": 30665, + "orders magnitude faster": 38671, + "sequence sequence learning": 49989, + "memory lstm network": 32265, + "terms bleu score": 56273, + "achieving competitive performance": 1401, + "tasks paper explore": 55783, + "paper explore different": 39362, + "different neural network": 15007, + "neural network architectures": 36998, + "train deep neural": 57578, + "non negative matrix": 37668, + "training data usually": 58049, + "new approach called": 37133, + "approaches typically use": 3946, + "language models capture": 28238, + "range state art": 44936, + "state art neural": 52639, + "art neural language": 4311, + "art performance paper": 4341, + "performance paper propose": 40478, + "deep learning framework": 13709, + "social media text": 51586, + "character level recurrent": 8216, + "level recurrent neural": 30188, + "available open source": 5338, + "world applications including": 62928, + "word level information": 62231, + "model target language": 34444, + "word vectors word": 62341, + "character level model": 8213, + "word based models": 62119, + "visual question answering": 61664, + "question answering vqa": 44716, + "high level semantics": 23748, + "dataset proposed approach": 13040, + "recent advances neural": 45286, + "deep latent variable": 13699, + "models paper introduce": 35300, + "question answer pair": 44686, + "question answering benchmarks": 44690, + "novel attention based": 37771, + "attention based neural": 4723, + "neural network architecture": 36997, + "little training data": 30887, + "multi task learning": 36021, + "report state art": 46448, + "question answering tasks": 44713, + "et al 2015": 18402, + "capturing fine grained": 7734, + "reduce memory footprint": 45670, + "state art embedding": 52608, + "using large text": 60760, + "received attention lately": 45255, + "numerous natural language": 38068, + "processing nlp tasks": 42913, + "word representation learning": 62284, + "outperforms previously proposed": 38929, + "previously proposed methods": 42342, + "obtained state art": 38225, + "parallel data training": 39647, + "contrast previous work": 10884, + "trained language models": 57761, + "information language model": 25940, + "results fine tuning": 47638, + "fine tuning domain": 20985, + "propose general framework": 43397, + "set training data": 50270, + "training data using": 58048, + "word embeddings used": 62194, + "network based approach": 36704, + "approach natural language": 3610, + "neural networks used": 37078, + "fundamental natural language": 21782, + "consider problem learning": 10218, + "distribution training data": 15655, + "order better understand": 38599, + "natural language data": 36416, + "unsupervised representation learning": 59725, + "sentiment analysis text": 49831, + "neural network lstm": 37015, + "evaluate proposed architecture": 18491, + "achieve excellent performance": 1135, + "unlike previous work": 59603, + "previous work focused": 42303, + "semi supervised framework": 49459, + "demonstrate framework outperforms": 13915, + "outperforms competitive baselines": 38888, + "spoken language translation": 52360, + "language translation slt": 28539, + "speech recognition systems": 52288, + "lstm based model": 31246, + "work address problem": 62556, + "problem machine translation": 42602, + "method significantly outperforms": 32657, + "artificial neural networks": 4499, + "translation language modeling": 58625, + "present novel neural": 41977, + "prediction experimental results": 41706, + "achieves significant improvements": 1368, + "positive negative neutral": 41286, + "paper presents end": 39473, + "presents end end": 42083, + "generative question answering": 22609, + "shows proposed model": 50800, + "demonstrates proposed model": 14040, + "model trained data": 34468, + "context propose novel": 10696, + "semantically related words": 49389, + "human natural language": 24210, + "train language models": 57599, + "domain adaptation techniques": 16005, + "results indicate approach": 47676, + "minimum risk training": 33308, + "training end end": 58086, + "maximum likelihood estimation": 31972, + "experiments approach achieves": 19356, + "approach achieves significant": 3402, + "art neural machine": 4312, + "training neural network": 58190, + "language models large": 28270, + "based semi supervised": 6019, + "supervised learning method": 54000, + "improve performance downstream": 24889, + "performance downstream tasks": 40307, + "tagging dependency parsing": 54739, + "task fine tuning": 55090, + "fine tuning specific": 21019, + "specific linguistic features": 52106, + "wide variety tasks": 61986, + "directed acyclic graph": 15265, + "task artificial intelligence": 54915, + "avenues future research": 5397, + "generation task generating": 22560, + "sequence learning problem": 49946, + "neural encoder decoder": 36951, + "supervised semi supervised": 54040, + "semi supervised settings": 49466, + "neural networks rnns": 37071, + "generate natural language": 22222, + "machine translation summarization": 31384, + "summarization question answering": 53897, + "systems existing methods": 54496, + "existing methods typically": 19102, + "problem paper propose": 42621, + "methods achieve state": 32731, + "art results tasks": 4387, + "tasks sentiment classification": 55875, + "model able capture": 33489, + "model outperforms previous": 34163, + "models source code": 35522, + "sentence level semantics": 49592, + "experimental results model": 19294, + "results model outperforms": 47724, + "model outperforms existing": 34162, + "approaches large margin": 3855, + "sequence sequence neural": 49992, + "information source target": 26096, + "source target sentences": 51805, + "german english french": 22667, + "noisy training data": 37627, + "using different types": 60656, + "performance wide range": 40628, + "word embeddings learned": 62177, + "bengali english hindi": 6594, + "english hindi english": 17822, + "english language pairs": 17834, + "challenge paper propose": 8005, + "tasks language modeling": 55709, + "state art large": 52624, + "art large margin": 4274, + "slot filling task": 51442, + "state art f1": 52615, + "f1 score 95": 20217, + "language training data": 28536, + "significantly better performance": 50941, + "multi source transfer": 36007, + "large number people": 28925, + "propose novel deep": 43531, + "neural network used": 37030, + "proposed approach significantly": 43733, + "approach significantly improves": 3693, + "training semi supervised": 58245, + "performance machine learning": 40427, + "state art nlp": 52641, + "evaluate quality generated": 18496, + "machine learning tools": 31337, + "training data data": 57984, + "paper propose framework": 39511, + "arora et al": 4194, + "et al 2016": 18403, + "gold standard reference": 22920, + "improves translation quality": 25167, + "effectiveness proposed methods": 16807, + "achieved high accuracy": 1240, + "open domain question": 38425, + "domain question answering": 16142, + "probabilistic context free": 42456, + "present new method": 41964, + "using synthetic data": 60975, + "analysis natural language": 2704, + "natural language inference": 36428, + "social networking sites": 51596, + "various machine learning": 61359, + "vocabulary oov words": 61709, + "propose neural network": 43491, + "model large scale": 34042, + "shared embedding space": 50467, + "new evaluation method": 37193, + "downstream tasks text": 16368, + "neural networks natural": 37060, + "networks natural language": 36880, + "achieve similar performance": 1199, + "electronic health records": 16968, + "knowledge graph completion": 27498, + "multiple choice questions": 36182, + "information extraction question": 25865, + "answer given question": 3037, + "outperforms strong baselines": 38950, + "fine grained entity": 20934, + "grained entity typing": 23033, + "existing knowledge bases": 19079, + "define new task": 13777, + "entity mentions text": 18121, + "datasets demonstrate effectiveness": 13212, + "demonstrate effectiveness robustness": 13903, + "network language model": 36754, + "apply natural language": 3341, + "text paper present": 56690, + "nlp tasks word": 37551, + "like question answering": 30495, + "results demonstrate superiority": 47585, + "small labeled data": 51478, + "labeled data unlabeled": 27751, + "data unlabeled data": 12754, + "experimental results datasets": 19280, + "linear discriminant analysis": 30656, + "nlp tasks paper": 37543, + "reach state art": 45054, + "sentiment analysis task": 49829, + "challenges natural language": 8063, + "languages work investigate": 28823, + "tasks information extraction": 55687, + "embeddings trained large": 17234, + "trained large corpus": 57767, + "relative error reduction": 46095, + "performance cross domain": 40271, + "task named entity": 55232, + "languages french german": 28678, + "outperforms previously published": 38930, + "present novel methods": 41975, + "case study use": 7801, + "use multi task": 59954, + "based proposed method": 5960, + "long term dependencies": 31039, + "neural models learn": 36977, + "models learn representations": 35177, + "yield significant improvements": 63099, + "languages paper propose": 28746, + "paper propose neural": 39525, + "using character based": 60602, + "based attention based": 5580, + "attention based bidirectional": 4717, + "bidirectional recurrent neural": 7082, + "neural network proposed": 37023, + "task work present": 55473, + "work present novel": 62757, + "publicly available pre": 44350, + "available pre trained": 5345, + "pre trained word": 41565, + "trained word vectors": 57918, + "word vector space": 62337, + "dialogue state tracking": 14785, + "convolution neural network": 11097, + "word embeddings sentence": 62188, + "consistently outperforms baseline": 10302, + "outperforms baseline models": 38871, + "sequence tagging tasks": 50007, + "classification task using": 8568, + "achieves competitive performance": 1318, + "methods state art": 33052, + "social media posts": 51584, + "present novel unsupervised": 41980, + "model does require": 33786, + "recent research shown": 45342, + "existing methods require": 19100, + "large labeled data": 28894, + "hand crafted features": 23387, + "comparable performance state": 9304, + "languages english chinese": 28653, + "unsupervised domain adaptation": 59695, + "use social media": 60021, + "social media twitter": 51588, + "random forest classifier": 44876, + "state art accuracy": 52576, + "short text classification": 50572, + "art results different": 4376, + "results different datasets": 47593, + "pretrained word embeddings": 42195, + "art performance tasks": 4347, + "models present novel": 35347, + "correlation human ratings": 11525, + "outperforming previous state": 38857, + "previous best model": 42249, + "information propose novel": 26031, + "propose novel end": 43534, + "novel end end": 37815, + "model attention mechanism": 33586, + "words source sentence": 62518, + "dataset demonstrate proposed": 12886, + "demonstrate proposed model": 13969, + "compares favorably state": 9477, + "favorably state art": 20457, + "achieves results comparable": 1360, + "improving state art": 25198, + "recognizing textual entailment": 45560, + "recurrent attention based": 45610, + "based neural models": 5906, + "open source software": 38458, + "set new state": 50200, + "gated recurrent units": 21998, + "model multi task": 34110, + "task cross lingual": 54987, + "art results multiple": 4379, + "training improve performance": 58126, + "problem sequence sequence": 42649, + "sequence sequence seq2seq": 49996, + "propose new model": 43508, + "new model called": 37260, + "encoder decoder structure": 17507, + "world data sets": 62933, + "feature selection methods": 20503, + "state art languages": 52623, + "domain natural language": 16118, + "advanced state art": 1894, + "state art various": 52689, + "tasks paper propose": 55788, + "neural word embedding": 37112, + "present experimental results": 41910, + "state art graph": 52620, + "graph based approach": 23102, + "language generation tasks": 28090, + "natural language structured": 36451, + "wide range applications": 61965, + "machine learning researchers": 31331, + "question generation model": 44730, + "long standing problem": 31033, + "language processing recently": 28427, + "promising results using": 43183, + "sequence learning neural": 49945, + "lack training data": 27923, + "demonstrate state art": 13978, + "semantic textual similarity": 49366, + "textual similarity sts": 56982, + "semantic information encoded": 49285, + "data freely available": 12374, + "different methods proposed": 14990, + "opinions social media": 38507, + "large collection documents": 28856, + "training data scarce": 58033, + "using reinforcement learning": 60904, + "significantly outperforms traditional": 51009, + "logistic regression model": 30996, + "experimental results suggest": 19317, + "deep neural model": 13737, + "model trained using": 34478, + "multilayer perceptron mlp": 36060, + "features fine tuning": 20588, + "fine tuning method": 20998, + "benchmark data set": 6441, + "domain specific language": 16179, + "goal natural language": 22893, + "word level sentence": 62237, + "word embedding representations": 62152, + "limited training data": 30628, + "model sets new": 34367, + "sets new state": 50299, + "trained end end": 57721, + "outperforms prior work": 38934, + "language models lms": 28280, + "neural network nn": 37019, + "introduce new task": 26839, + "wide range nlp": 61973, + "range nlp tasks": 44927, + "network based approaches": 36705, + "address challenge propose": 1745, + "propose new deep": 43501, + "leverage pre trained": 30283, + "trained word embedding": 57916, + "learn high level": 29379, + "results demonstrate approach": 47574, + "demonstrate approach consistently": 13868, + "approach consistently outperforms": 3467, + "consistently outperforms existing": 10304, + "outperforms existing methods": 38899, + "datasets natural language": 13342, + "machine translation methods": 31366, + "present neural network": 41957, + "network attention mechanism": 36702, + "noisy user generated": 37629, + "user generated text": 60419, + "language model method": 28176, + "method achieves state": 32367, + "unsupervised semi supervised": 59729, + "semi supervised methods": 49463, + "supervised methods require": 54019, + "training data achieve": 57969, + "fine grained analysis": 20926, + "modern nlp models": 35717, + "models rely heavily": 35431, + "tasks relation extraction": 55846, + "language generation model": 28084, + "achieves new state": 1350, + "bag words model": 5506, + "real world knowledge": 45131, + "word embedding vectors": 62155, + "data set contains": 12643, + "empirical results approach": 17339, + "results approach outperforms": 47506, + "nlp tasks like": 37538, + "character level word": 8218, + "low dimensional vectors": 31148, + "novel approach automatic": 37758, + "language paper present": 28370, + "character level neural": 8215, + "deep language understanding": 13697, + "baselines state art": 6303, + "language understanding paper": 28556, + "generation natural language": 22504, + "trained large text": 57771, + "effective low resource": 16668, + "resource languages present": 47244, + "transfer learning method": 58384, + "method significantly improves": 32656, + "high resource language": 23792, + "using transfer learning": 60998, + "resource language pairs": 47237, + "performance low resource": 40424, + "low resource machine": 31183, + "resource machine translation": 47253, + "transfer learning model": 58386, + "state art low": 52626, + "word embeddings improve": 62171, + "neural network classifiers": 37002, + "based long short": 5822, + "knowledge graph construction": 27499, + "answering information retrieval": 3076, + "experimental results prove": 19306, + "analysis topic modeling": 2783, + "hindi english code": 23940, + "english code mixed": 17787, + "best knowledge attempt": 6769, + "available research community": 5360, + "novel multi task": 37879, + "model outperforms state": 34166, + "recently deep learning": 45415, + "attention based encoder": 4719, + "encoder decoder models": 17502, + "model sentence level": 34354, + "knowledge graph embedding": 27500, + "semeval 2016 task": 49429, + "neural networks based": 37037, + "performance various natural": 40623, + "processing tasks especially": 42947, + "used deep learning": 60142, + "deep learning methods": 13712, + "end end learning": 17648, + "term memory bi": 56245, + "memory bi lstm": 32244, + "bi lstm model": 7015, + "model obtains state": 34141, + "obtains state art": 38259, + "training data size": 58038, + "performance proposed method": 40507, + "semi supervised approach": 49456, + "deep learning based": 13706, + "results approach achieves": 47503, + "knowledge base construction": 27406, + "learning vector representations": 29937, + "inter rater agreement": 26584, + "memory neural network": 32277, + "sentence pair classification": 49610, + "tasks textual entailment": 55934, + "allocation lda topic": 2432, + "dataset proposed method": 13041, + "method outperforms state": 32605, + "art methods terms": 4289, + "averaged f1 score": 5422, + "memory lstm language": 32262, + "lstm language model": 31266, + "network end end": 36739, + "end end fashion": 17646, + "significantly improving performance": 50981, + "paper propose algorithm": 39493, + "analysis social media": 2762, + "important challenging task": 24708, + "long range context": 31019, + "context experimental results": 10634, + "previous work mainly": 42305, + "work mainly focused": 62719, + "significantly outperforms state": 51007, + "based neural machine": 5904, + "experiments large scale": 19455, + "large scale chinese": 28964, + "model experimental results": 33851, + "results large scale": 47695, + "large scale english": 28974, + "method achieves better": 32362, + "models recurrent neural": 35417, + "models trained predict": 35616, + "extrinsic evaluation downstream": 20171, + "variety downstream tasks": 61270, + "compared previous state": 9437, + "feedforward neural network": 20720, + "semantics natural language": 49409, + "lack large scale": 27902, + "paper present large": 39454, + "present large scale": 41937, + "language model score": 28193, + "beam search decoder": 6367, + "natural language question": 36444, + "strong baselines demonstrate": 53011, + "automatic post editing": 5115, + "machine translation output": 31375, + "submitted shared task": 53588, + "data paper present": 12532, + "based recent advances": 5980, + "recent advances deep": 45282, + "including social media": 25301, + "social media platforms": 51582, + "media platforms twitter": 32178, + "class classification problem": 8396, + "semantic syntactic features": 49359, + "method achieved state": 32359, + "f1 score 70": 20202, + "present novel model": 41976, + "multiple data sets": 36191, + "sentence paper propose": 49614, + "paper propose deep": 39503, + "datasets demonstrate efficacy": 13213, + "processing social media": 42939, + "human annotated data": 24097, + "train machine learning": 57604, + "machine learning classifiers": 31315, + "word2vec word embeddings": 62353, + "experiments proposed method": 19496, + "proposed method achieve": 43805, + "bleu points improvement": 7210, + "forward neural networks": 21407, + "develop novel approach": 14608, + "extraction experimental results": 20066, + "experimental results semeval": 19310, + "yields better performance": 63118, + "model trained end": 34470, + "large scale training": 29004, + "english german translation": 17817, + "different levels granularity": 14978, + "obtain new state": 38182, + "standard test set": 52533, + "accuracy precision recall": 1027, + "precision recall f1": 41617, + "standard evaluation metrics": 52491, + "neural networks models": 37058, + "models trained using": 35624, + "machine translation image": 31360, + "evaluation metrics bleu": 18648, + "metrics bleu meteor": 33145, + "experimental results indicate": 19289, + "results indicate model": 47677, + "paper study problem": 39580, + "novel attention mechanism": 37772, + "attention mechanism based": 4774, + "art results benchmarks": 4372, + "cnn daily mail": 8766, + "code available https": 8794, + "tasks best knowledge": 55525, + "best knowledge time": 6774, + "stanford natural language": 52557, + "language inference snli": 28112, + "obtain state art": 38194, + "order magnitude fewer": 38634, + "knowledge base propose": 27409, + "deep recurrent neural": 13745, + "outperforms current state": 38892, + "low frequency words": 31152, + "size training data": 51400, + "training data english": 57989, + "verify effectiveness proposed": 61538, + "play significant role": 40979, + "state art wide": 52691, + "report new state": 46440, + "automatic manual evaluation": 5104, + "training test data": 58291, + "study different types": 53362, + "sequence labeling tasks": 49940, + "tasks propose new": 55819, + "tasks sequence labeling": 55878, + "spoken language understanding": 52361, + "penn treebank ptb": 40025, + "word embeddings model": 62179, + "character level models": 8214, + "correlate better human": 11502, + "better human judgments": 6900, + "domain test set": 16206, + "real world deployment": 45127, + "recently achieved impressive": 45403, + "achieved impressive results": 1246, + "attentional encoder decoder": 4858, + "deep neural language": 13736, + "neural language modeling": 36963, + "attention based seq2seq": 4724, + "sequence sequence tasks": 49998, + "paper propose use": 39540, + "reinforcement learning rl": 45877, + "context aware word": 10592, + "remains challenging task": 46329, + "pre trained models": 41545, + "f1 score 97": 20218, + "term memory network": 56250, + "language specific features": 28496, + "hand crafted rules": 23389, + "neural models task": 36982, + "model training data": 34480, + "train sequence sequence": 57631, + "semantic parsing datasets": 49311, + "art performance standard": 4345, + "task reinforcement learning": 55327, + "bi directional lstm": 7002, + "outperforms previous work": 38927, + "language modeling task": 28219, + "aspect sentiment analysis": 4534, + "manually annotated test": 31761, + "translation model trained": 58631, + "active learning al": 1474, + "neural networks cnns": 37040, + "propose simple approach": 43632, + "empirical results method": 17343, + "results method outperforms": 47717, + "method outperforms baseline": 32598, + "models text classification": 35596, + "produces state art": 43036, + "especially low resource": 18286, + "source target target": 51806, + "target target source": 54846, + "target language source": 54827, + "language source language": 28491, + "dataset approach achieves": 12813, + "model natural language": 34118, + "language generation nlg": 28086, + "encoder decoder architectures": 17497, + "training data available": 57978, + "data available training": 12180, + "available training data": 5380, + "end end method": 17651, + "art large scale": 4275, + "deep learning architecture": 13704, + "introduce new method": 26837, + "user generated content": 60416, + "labeled data paper": 27743, + "data paper presents": 12533, + "trained neural language": 57826, + "language models generate": 28257, + "model achieves f1": 33516, + "features neural network": 20629, + "wide range linguistic": 61971, + "state art language": 52622, + "art language models": 4270, + "context natural language": 10679, + "present generative model": 41921, + "semantic parsing approach": 49309, + "generalize new domains": 22145, + "word level embeddings": 62229, + "multi class multi": 35947, + "class multi label": 8408, + "work propose use": 62790, + "end end speech": 17662, + "wall street journal": 61763, + "street journal corpus": 52969, + "previous work relied": 42311, + "recurrent convolutional neural": 45612, + "bi directional recurrent": 7004, + "question answering model": 44702, + "propose novel semi": 43560, + "novel semi supervised": 37916, + "model trained large": 34473, + "learning based models": 29538, + "based models proposed": 5879, + "despite recent progress": 14384, + "large data set": 28866, + "model takes input": 34441, + "model generalization ability": 33922, + "achieving competitive results": 1402, + "structures natural language": 53190, + "supervised learning unsupervised": 54010, + "learning unsupervised learning": 29926, + "art results various": 4391, + "novel unsupervised framework": 37947, + "chen et al": 8285, + "et al 2011": 18398, + "baseline model trained": 6186, + "times fewer parameters": 57251, + "small training dataset": 51508, + "feedforward neural networks": 20721, + "dense vector representations": 14085, + "use language models": 59924, + "language models model": 28284, + "results indicate proposed": 47679, + "indicate proposed model": 25534, + "outperforming current state": 38850, + "large margin paper": 28906, + "models state art": 35534, + "introduce neural network": 26830, + "best results obtained": 6817, + "hand crafted feature": 23386, + "design end end": 14280, + "outperforms previous approaches": 38920, + "manual feature engineering": 31742, + "evaluate performance proposed": 18484, + "performance proposed model": 40508, + "network rnn models": 36799, + "pre defined categories": 41500, + "simple neural network": 51200, + "rule based approaches": 48380, + "large body work": 28853, + "providing fine grained": 44245, + "task propose novel": 55302, + "novel deep learning": 37803, + "deep learning model": 13713, + "domain adaptation method": 15999, + "supervised domain adaptation": 53981, + "performance target domain": 40592, + "source domain target": 51767, + "domain target domain": 16200, + "neural networks trained": 37075, + "trained cross entropy": 57701, + "cross entropy loss": 11824, + "methods paper describes": 32973, + "semantically similar words": 49394, + "bag words representation": 5508, + "paper introduce novel": 39403, + "introduce novel approach": 26843, + "exceeds state art": 18951, + "low resource settings": 31191, + "large training data": 29030, + "large scale knowledge": 28981, + "scale knowledge bases": 48582, + "knowledge bases kbs": 27415, + "state art code": 52597, + "code data available": 8799, + "data available https": 12177, + "paper propose effective": 39506, + "translation nmt model": 58646, + "translation nmt systems": 58648, + "developed machine learning": 14632, + "reading comprehension datasets": 45082, + "reading comprehension task": 45086, + "training data required": 58031, + "enable zero shot": 17433, + "words training data": 62536, + "social media messages": 51579, + "achieving new state": 1416, + "tasks word embeddings": 55966, + "systems natural language": 54568, + "active learning framework": 1475, + "results demonstrate significant": 47583, + "demonstrate significant improvements": 13974, + "recently proposed methods": 45457, + "significantly improved performance": 50971, + "recently neural networks": 45444, + "neural networks proven": 37066, + "effective natural language": 16679, + "analysis question answering": 2737, + "question answering machine": 44701, + "answering machine translation": 3080, + "attention mechanism applied": 4772, + "model achieved state": 33509, + "art performance different": 4333, + "different nlp tasks": 15010, + "state art recurrent": 52653, + "recursive neural networks": 45638, + "words paper propose": 62476, + "new approach based": 37132, + "appear training data": 3140, + "training data evaluate": 57990, + "promising research directions": 43177, + "bilingual word embeddings": 7116, + "imitation learning framework": 24578, + "small number parameters": 51490, + "model jointly learns": 34025, + "task multi label": 55226, + "results natural language": 47737, + "results demonstrate efficacy": 47576, + "free form text": 21641, + "data driven approach": 12300, + "publicly available present": 44351, + "novel method generating": 37866, + "using character level": 60603, + "lstm encoder decoder": 31261, + "languages paper presents": 28745, + "proposed method evaluated": 43819, + "data sets results": 12652, + "development machine learning": 14684, + "achieve human like": 1160, + "present preliminary results": 41988, + "state art traditional": 52685, + "current language models": 11982, + "knowledge language model": 27541, + "state art cnn": 52596, + "sentence classification tasks": 49527, + "benchmark datasets experimental": 6452, + "datasets experimental results": 13264, + "nlp natural language": 37505, + "corpus available research": 11285, + "recently gained popularity": 45428, + "word embeddings directly": 62165, + "dependency parse tree": 14126, + "approach compared state": 3456, + "state art statistical": 52671, + "used nlp tasks": 60253, + "high computational cost": 23715, + "high quality human": 23779, + "nlp research community": 37522, + "language models paper": 28292, + "models paper presents": 35303, + "high dimensional spaces": 23730, + "source target language": 51802, + "target language paper": 54826, + "language paper propose": 28371, + "paper propose approach": 39495, + "based word embeddings": 6137, + "word embedding based": 62144, + "embedding based approach": 17015, + "word mover distance": 62250, + "mean average precision": 31990, + "real world dataset": 45125, + "conduct experiments english": 10046, + "experiments english chinese": 19428, + "proposed model achieves": 43846, + "existing large scale": 19084, + "native english speakers": 36401, + "gap human performance": 21964, + "representation learning models": 46544, + "character recognition ocr": 8222, + "domain adaptation approaches": 15996, + "transfer knowledge learned": 58370, + "task paper explore": 55263, + "explore multi task": 19718, + "learning domain adaptation": 29605, + "neural network framework": 37007, + "language understanding work": 28564, + "word vectors trained": 62340, + "trained source language": 57879, + "trained target language": 57890, + "target language using": 54829, + "large scale natural": 28991, + "scale natural language": 48604, + "language understanding task": 28562, + "task publicly available": 55312, + "publicly available dataset": 44340, + "structured knowledge base": 53160, + "end end models": 17654, + "models deep neural": 34890, + "extraction question answering": 20101, + "question answering models": 44703, + "best performing model": 6797, + "model word level": 34541, + "word level sequence": 62238, + "state art automatic": 52584, + "propose novel strategy": 43564, + "language processing analyze": 28396, + "fixed length vectors": 21077, + "averaging word vectors": 5429, + "tasks pre training": 55804, + "analysis sheds light": 2756, + "human language processing": 24193, + "neural network trained": 37029, + "paper propose efficient": 39507, + "character based models": 8196, + "based semantic similarity": 6016, + "semantic similarity metrics": 49348, + "open information extraction": 38434, + "train test models": 57647, + "introduce simple effective": 26861, + "simple effective framework": 51153, + "beam search algorithm": 6366, + "reinforcement learning model": 45874, + "language processing existing": 28407, + "experiment results model": 19251, + "difficult time consuming": 15190, + "propose attention based": 43302, + "attention based multi": 4722, + "word level attention": 62226, + "sentence level attention": 49580, + "language modeling approach": 28206, + "nlp models trained": 37502, + "svm logistic regression": 54236, + "conduct case study": 10030, + "capture semantic meaning": 7708, + "task automatically generating": 54924, + "using sequence sequence": 60932, + "correlation human judgments": 11524, + "models pre trained": 35339, + "competitive performance compared": 9554, + "performance compared state": 40255, + "significantly outperforms baselines": 50998, + "level convolutional neural": 30090, + "conneau et al": 10171, + "language models based": 28234, + "processing tasks word": 42954, + "experimental results popular": 19302, + "language processing work": 28441, + "aspect based sentiment": 4528, + "results competitive state": 47554, + "state art multilingual": 52636, + "multi domain datasets": 35955, + "hand engineered features": 23393, + "error analysis reveals": 18215, + "order address issues": 38592, + "address issues paper": 1771, + "achieves competitive results": 1319, + "supervised text classification": 54059, + "text classification methods": 56475, + "lack parallel data": 27906, + "limited parallel data": 30603, + "using attention based": 60567, + "neural network encoder": 37005, + "encoder decoder attention": 17498, + "proposed encoder decoder": 43763, + "model able achieve": 33488, + "achieve significantly higher": 1197, + "train neural network": 57618, + "github com facebookresearch": 22700, + "reduce training time": 45683, + "supervised learning setting": 54005, + "new york times": 37365, + "based transfer learning": 6105, + "work present new": 62756, + "systems built using": 54445, + "data make use": 12478, + "neural end end": 36953, + "models paper present": 35302, + "encoder decoder neural": 17505, + "decoder neural network": 13606, + "sentences paper propose": 49763, + "sentence extensive experiments": 49559, + "extensive experiments demonstrate": 19885, + "experiments demonstrate method": 19404, + "demonstrate method significantly": 13937, + "method significantly improve": 32655, + "existing approaches propose": 19029, + "fine grained word": 20945, + "significantly outperforms prior": 51006, + "results demonstrate method": 47578, + "demonstrate method outperforms": 13936, + "method outperforms baselines": 32599, + "propose new evaluation": 43502, + "new dataset containing": 37166, + "rely hand crafted": 46284, + "crafted features domain": 11680, + "features domain specific": 20564, + "domain specific resources": 16183, + "text experimental results": 56565, + "extremely low resource": 20164, + "low resource scenario": 31188, + "model performs significantly": 34206, + "utilize pre trained": 61101, + "trained word embeddings": 57917, + "usage social media": 59808, + "task sentiment analysis": 55361, + "training development test": 58064, + "report baseline results": 46428, + "precision recall measure": 41618, + "art results natural": 4380, + "textual visual information": 56987, + "joint representation learning": 27188, + "fine tuning parameters": 21007, + "model evaluate approach": 33841, + "approach outperforms existing": 3626, + "existing generative models": 19073, + "approach makes use": 3597, + "neural networks achieved": 37034, + "present empirical study": 41899, + "sentence classification task": 49526, + "classification task experimental": 8563, + "task experimental results": 55066, + "achieves best performance": 1305, + "end propose novel": 17702, + "propose novel attention": 43524, + "proposed methods significantly": 43838, + "methods significantly improve": 33040, + "significantly improve state": 50969, + "state art sentence": 52661, + "based generative model": 5751, + "using parallel corpora": 60854, + "models shown great": 35498, + "shown great success": 50716, + "sequence generation tasks": 49929, + "tasks previous work": 55811, + "paper propose methods": 39522, + "experiments state art": 19533, + "model performs better": 34203, + "development test sets": 14707, + "large unlabeled data": 29040, + "proposed method performs": 43826, + "method performs better": 32614, + "using unlabeled data": 61010, + "decision making processes": 13565, + "advances natural language": 1917, + "challenging task paper": 8151, + "use deep neural": 59865, + "task propose new": 55301, + "classification social media": 8555, + "data sets demonstrate": 12647, + "word embeddings based": 62158, + "task information extraction": 55138, + "state art average": 52585, + "make use data": 31607, + "systems recent years": 54614, + "standard machine learning": 52500, + "augmenting training data": 4990, + "neural network approach": 36995, + "feature based models": 20478, + "selection experimental results": 49138, + "baselines achieves state": 6229, + "task automatically identifying": 54925, + "language processing models": 28417, + "task multi task": 55227, + "models significantly better": 35507, + "models trained text": 35623, + "grounded language learning": 23262, + "research natural language": 47078, + "glove word embeddings": 22861, + "recurrent unit gru": 45628, + "network based model": 36708, + "using external resources": 60689, + "state art parsers": 52645, + "models recently shown": 35413, + "end end trainable": 17666, + "learned end end": 29459, + "using open source": 60849, + "achieved great success": 1238, + "address problem present": 1788, + "models publicly available": 35387, + "sentiment analysis datasets": 49820, + "large number training": 28927, + "work present approach": 62753, + "open source framework": 38450, + "language speech processing": 28500, + "gaussian mixture models": 22015, + "fewer model parameters": 20738, + "using small number": 60948, + "number model parameters": 38019, + "training data paper": 58023, + "memory augmented neural": 32241, + "models achieve state": 34673, + "open ended questions": 38431, + "remains challenging problem": 46328, + "linguistics cognitive science": 30821, + "significantly improves baseline": 50974, + "deep learning approaches": 13703, + "lack annotated data": 27873, + "data improve model": 12419, + "model performance paper": 34192, + "performance paper presents": 40477, + "english indian languages": 17825, + "results proposed models": 47786, + "proposed models outperform": 43863, + "models outperform existing": 35285, + "achieve fine grained": 1141, + "make code publicly": 31552, + "code publicly available": 8851, + "data text generation": 12731, + "2016 shared task": 260, + "results shared task": 47831, + "entity relation extraction": 18140, + "poses unique challenges": 41256, + "experiments public datasets": 19503, + "public datasets demonstrate": 44315, + "improvement f1 score": 25006, + "develop simple effective": 14613, + "simple effective model": 51156, + "outperform current state": 38790, + "sequence sequence models": 49991, + "improves state art": 25164, + "log linear models": 30975, + "solve problem propose": 51686, + "end end approach": 17636, + "multiple choice question": 36181, + "template based method": 56175, + "attains state art": 4676, + "character level language": 8212, + "outperforms previous best": 38921, + "previous best results": 42252, + "machine translation english": 31356, + "models based recurrent": 34758, + "domain specific knowledge": 16178, + "released open source": 46178, + "using policy gradient": 60862, + "policy gradient methods": 41096, + "sub word level": 53538, + "detection natural language": 14506, + "problem propose new": 42632, + "fixed size representation": 21081, + "paper presents empirical": 39472, + "experimental results neural": 19298, + "transfer learning settings": 58395, + "gold standard dataset": 22918, + "nlp tasks including": 37535, + "paper present general": 39451, + "using convolutional neural": 60629, + "loss function based": 31094, + "based models outperform": 5876, + "tasks work propose": 55972, + "work propose novel": 62784, + "novel framework called": 37827, + "results demonstrate model": 47579, + "demonstrate model achieves": 13941, + "tasks including sentiment": 55682, + "including sentiment analysis": 25298, + "supervised learning framework": 53999, + "learning framework called": 29654, + "make publicly available": 31591, + "bi directional lstms": 7003, + "sentence paper present": 49613, + "achieve competitive accuracy": 1126, + "accuracy state art": 1053, + "natural language task": 36453, + "tasks work present": 55971, + "term memory networks": 56251, + "sentence level tasks": 49593, + "cnn based models": 8761, + "approach achieving state": 3405, + "previously reported results": 42347, + "use recurrent neural": 59991, + "decoder experimental results": 13592, + "outperforms prior methods": 38932, + "state art pre": 52649, + "pre training methods": 41583, + "yields better results": 63119, + "better results using": 6960, + "manually annotated data": 31758, + "models widely used": 35682, + "accuracy paper describes": 1020, + "entity linking el": 18116, + "achieved f1 score": 1232, + "data significantly outperforms": 12663, + "gold standard datasets": 22919, + "obtain competitive results": 38168, + "training data approach": 57975, + "approach cross lingual": 3474, + "joint learning model": 27177, + "results model significantly": 47725, + "consistently improve performance": 10295, + "neural models achieved": 36974, + "work introduce new": 62692, + "structured prediction tasks": 53171, + "experiments model outperforms": 19469, + "use word embeddings": 60075, + "models propose novel": 35371, + "propose novel architecture": 43523, + "using attention mechanism": 60568, + "attention mechanism model": 4778, + "word character level": 62125, + "number trainable parameters": 38048, + "propose knowledge enhanced": 43430, + "hybrid neural network": 24322, + "recurrent units gru": 45630, + "neural network generate": 37008, + "high level features": 23746, + "extensive experiments public": 19898, + "public data sets": 44312, + "significantly outperform state": 50992, + "advances representation learning": 1924, + "propose neural architecture": 43489, + "lstm attention mechanism": 31242, + "model end end": 33822, + "end end differentiable": 17643, + "dataset proposed model": 13042, + "model achieves better": 33513, + "question answering paper": 44706, + "research social media": 47122, + "previous works mainly": 42320, + "models paper propose": 35304, + "paper propose end": 39508, + "error propagation problem": 18225, + "utilize contextual information": 61088, + "information experimental results": 25848, + "results effectiveness proposed": 47604, + "effectiveness proposed model": 16808, + "language state art": 28504, + "achieving best performance": 1395, + "rapid development deep": 44989, + "development deep learning": 14675, + "experiments sentiment analysis": 19520, + "existing models usually": 19109, + "models achieve excellent": 34670, + "fine grained classification": 20930, + "news social media": 37415, + "tasks existing methods": 55627, + "gives best results": 22804, + "highly competitive results": 23885, + "widely used various": 62023, + "address issues present": 1772, + "new encoder decoder": 37184, + "using long short": 60779, + "hierarchical encoder decoder": 23668, + "experimental results benchmark": 19272, + "results benchmark datasets": 47522, + "demonstrate superiority proposed": 13988, + "superiority proposed model": 53956, + "models outperform baseline": 35284, + "received increasing attention": 45259, + "increasing attention recent": 25445, + "recent years number": 45390, + "time consuming expensive": 57130, + "utilize unlabeled data": 61106, + "binary classification problem": 7146, + "introduce attention based": 26783, + "capture long distance": 7694, + "experiments proposed approach": 19494, + "use reinforcement learning": 59993, + "performance downstream task": 40306, + "previous sequence sequence": 42277, + "sequence seq2seq models": 49977, + "data conduct extensive": 12237, + "connectionist temporal classification": 10183, + "achieve high accuracy": 1151, + "semantic representation text": 49334, + "long range dependency": 31021, + "evaluate effectiveness proposed": 18455, + "large amounts annotated": 28833, + "labeled data training": 27750, + "entity recognition models": 18128, + "annotated training data": 2926, + "capture domain specific": 7667, + "use domain specific": 59871, + "models outperform previous": 35286, + "rule based statistical": 48386, + "corpus paper propose": 11400, + "propose new neural": 43510, + "new neural network": 37270, + "tasks previous works": 55812, + "semantic parsing dataset": 49310, + "compared previous approaches": 9434, + "challenging task requires": 8154, + "structured knowledge bases": 53161, + "proposed model outperforms": 43855, + "models improved performance": 35113, + "using bidirectional encoder": 60595, + "rich languages english": 48108, + "representation paper propose": 46565, + "novel neural architecture": 37883, + "based active learning": 5557, + "active learning methods": 1476, + "models propose new": 35370, + "present training data": 42044, + "text classification model": 56476, + "methods text classification": 33073, + "machine learning language": 31321, + "community question answering": 9273, + "paper address problem": 39253, + "multi turn dialogue": 36036, + "classification real world": 8530, + "demonstrate method achieves": 13935, + "method achieves best": 32361, + "specific linguistic phenomena": 52107, + "pair encoding bpe": 39150, + "compared previous models": 9436, + "english social media": 17875, + "social media websites": 51592, + "plays key role": 41002, + "achieve high performance": 1152, + "tasks results indicate": 55864, + "neural networks attention": 37036, + "use external resources": 59888, + "set linguistic features": 50186, + "web based tool": 61881, + "social media users": 51590, + "sequence labeling framework": 49934, + "proposed model obtains": 43854, + "natural language systems": 36452, + "large amounts text": 28840, + "models successfully applied": 35554, + "non parallel data": 37675, + "models better capture": 34782, + "better capture semantic": 6859, + "capture semantic information": 7707, + "semantic information word": 49288, + "pairs experimental results": 39188, + "character based neural": 8197, + "language model used": 28202, + "multilingual language models": 36090, + "language models language": 28268, + "generating natural language": 22384, + "received lot attention": 45263, + "used social media": 60307, + "social media content": 51571, + "paper propose general": 39512, + "multiple nlp tasks": 36257, + "neural models paper": 36980, + "coarse grained fine": 8785, + "grained fine grained": 23036, + "f1 score 76": 20207, + "demonstrate approach outperforms": 13870, + "growth social media": 23311, + "new loss function": 37242, + "natural language paper": 36437, + "investigate state art": 26987, + "based sequence sequence": 6028, + "approach achieves good": 3399, + "achieves good results": 1329, + "machine learning based": 31313, + "annotated data set": 2886, + "task learning mtl": 55177, + "propose novel decoding": 43530, + "using gradient based": 60712, + "left right right": 30001, + "right right left": 48143, + "produced state art": 43022, + "entity recognition speech": 18133, + "based skip gram": 6040, + "language model using": 28203, + "neural network long": 37014, + "network long short": 36762, + "general language model": 22065, + "transfer learning methods": 58385, + "model neural network": 34124, + "based models different": 5870, + "language understanding nlu": 28555, + "understanding nlu tasks": 59377, + "sequence labeling problem": 49937, + "propose alternative approach": 43291, + "art performance text": 4349, + "large parallel corpus": 28934, + "results language pairs": 47692, + "paper present comprehensive": 39446, + "address problem propose": 1789, + "knowledge target language": 27625, + "models significantly improve": 35508, + "open domain dialogue": 38422, + "summarization machine translation": 53889, + "hate speech detection": 23481, + "fields natural language": 20784, + "language processing recent": 28426, + "recent years research": 45394, + "apply state art": 3354, + "state art techniques": 52681, + "supervised learning ssl": 54006, + "language understanding slu": 28561, + "demonstrate proposed approach": 13964, + "supervised learning techniques": 54009, + "gradient descent sgd": 23007, + "machine learning technique": 31334, + "social media language": 51577, + "popular natural language": 41175, + "based conditional random": 5636, + "hindi english bengali": 23939, + "english bengali english": 17780, + "different social media": 15072, + "coarse fine grained": 8783, + "language pairs domains": 28367, + "multilingual cross lingual": 36075, + "cross lingual data": 11831, + "machine learning ml": 31324, + "ml natural language": 33433, + "new deep learning": 37171, + "networks proven effective": 36900, + "end end training": 17668, + "linear chain conditional": 30650, + "chain conditional random": 7958, + "question answering natural": 44705, + "language inference models": 28110, + "multi modal data": 35989, + "state art benchmark": 52588, + "multiple data sources": 36192, + "paper presents simple": 39483, + "annotated corpora available": 2879, + "sentence word word": 49672, + "german english chinese": 22666, + "data driven models": 12305, + "reaches state art": 45058, + "art nlp tasks": 4318, + "representative nlp tasks": 46801, + "data set used": 12645, + "best overall performance": 6788, + "proposed approach yields": 43736, + "widespread use social": 62034, + "health related information": 23519, + "information social media": 26092, + "simple rule based": 51207, + "end end model": 17653, + "based nmt models": 5916, + "model significantly outperforms": 34383, + "significantly outperforms previous": 51005, + "previous best models": 42250, + "constructed large scale": 10413, + "grained entity types": 23032, + "make datasets publicly": 31563, + "datasets publicly available": 13389, + "models proven effective": 35378, + "memory networks lstms": 32275, + "end end manner": 17650, + "significantly outperform baseline": 50989, + "overall f1 score": 39040, + "common sense knowledge": 9198, + "recent work shows": 45374, + "paper investigate possibility": 39412, + "method achieve better": 32356, + "achieve better accuracy": 1116, + "using deep learning": 60647, + "different machine learning": 14983, + "specific natural language": 52116, + "use deep learning": 59864, + "higher state art": 23847, + "relatively little attention": 46122, + "paper propose hybrid": 39516, + "machine translation approach": 31347, + "experiments language pairs": 19452, + "approaches state art": 3926, + "languages english german": 28655, + "based cross lingual": 5661, + "trained semi supervised": 57861, + "semi supervised manner": 49461, + "low medium high": 31160, + "using neural networks": 60835, + "rely large scale": 46293, + "low resource datasets": 31176, + "training data propose": 58028, + "propose transfer learning": 43680, + "model low resource": 34077, + "low resource data": 31175, + "data experiment results": 12343, + "test set accuracy": 56370, + "experiments natural language": 19478, + "language inference nli": 28111, + "neural attention models": 36937, + "models achieved great": 34676, + "data sparsity issue": 12684, + "methods achieve significant": 32730, + "achieve significant improvement": 1192, + "propose deep learning": 43352, + "model recurrent neural": 34289, + "network rnn long": 36798, + "rnn long short": 48200, + "network based methods": 36707, + "based methods proposed": 5851, + "semantic relationships words": 49330, + "meta learning approach": 32339, + "using social media": 60952, + "train recurrent neural": 57625, + "achieves bleu score": 1311, + "temporal classification ctc": 56182, + "speech recognition models": 52286, + "used feature extraction": 60187, + "near real time": 36512, + "models perform better": 35311, + "generate training data": 22260, + "training data task": 58042, + "generated training data": 22329, + "use hand crafted": 59905, + "experiments proposed model": 19497, + "micro f1 score": 33225, + "extend state art": 19832, + "results obtained different": 47748, + "languages low resource": 28718, + "based word occurrence": 6139, + "lead substantial improvements": 29278, + "decision making process": 13564, + "approaches widely used": 3955, + "information paper propose": 26002, + "widely used neural": 62020, + "used neural network": 60250, + "automatic manual evaluations": 5105, + "build large scale": 7409, + "large scale datasets": 28971, + "training data improve": 58002, + "learning models using": 29764, + "training deep neural": 58058, + "fine tuning approach": 20978, + "learning experimental results": 29635, + "approach yields better": 3745, + "machine learning research": 31330, + "reading comprehension tasks": 45087, + "use pre trained": 59976, + "detailed error analysis": 14423, + "paper present results": 39460, + "automatic text generation": 5129, + "text generation process": 56603, + "external linguistic knowledge": 19949, + "test set using": 56376, + "using ground truth": 60719, + "contrast state art": 10890, + "experiments publicly available": 19505, + "rule based machine": 48382, + "rule based systems": 48387, + "work present large": 62754, + "large scale analysis": 28959, + "report empirical results": 46432, + "task question answering": 55315, + "state art end": 52610, + "art end end": 4256, + "corpus large scale": 11370, + "large scale study": 29002, + "study propose novel": 53444, + "generates high quality": 22343, + "demonstrate proposed framework": 13966, + "word embeddings semantic": 62187, + "models best model": 34780, + "best model achieves": 6782, + "model achieves accuracy": 33511, + "study natural language": 53417, + "present natural language": 41954, + "sequence sequence attention": 49983, + "task learning approach": 55169, + "tasks paper describes": 55782, + "answering qa datasets": 3089, + "work propose simple": 62786, + "achieve competitive performance": 1127, + "performance compared existing": 40252, + "compared existing models": 9409, + "results proposed model": 47785, + "achieves competitive accuracy": 1317, + "art models paper": 4297, + "models paper proposes": 35305, + "model consistently outperforms": 33703, + "state art transformer": 52686, + "machine learning model": 31325, + "experimental results showed": 19312, + "results showed proposed": 47838, + "showed proposed method": 50672, + "proposed method achieved": 43806, + "achieved best performance": 1220, + "based social media": 6044, + "data propose novel": 12571, + "ground truth data": 23252, + "knowledge natural language": 27557, + "popular research topic": 41184, + "supervised learning problem": 54004, + "end end architectures": 17639, + "reinforcement learning method": 45872, + "task oriented dialogues": 55256, + "policy gradient algorithm": 41095, + "cross lingual semantic": 11850, + "dataset best knowledge": 12829, + "best knowledge dataset": 6770, + "data paper explore": 12530, + "paper explore problem": 39364, + "used improve performance": 60209, + "improvements current state": 25066, + "model trained proposed": 34475, + "transfer learning strategy": 58396, + "minimum bayes risk": 33304, + "achieve better performance": 1117, + "attention based model": 4720, + "significantly outperforms non": 51004, + "neural networks propose": 37064, + "attention mechanism used": 4782, + "like social media": 30505, + "topics natural language": 57456, + "expensive time consuming": 19221, + "additional linguistic features": 1686, + "language processing pipeline": 28420, + "french german spanish": 21662, + "areas natural language": 4157, + "code mixed data": 8824, + "significantly better results": 50943, + "code mixed tweets": 8830, + "shown great promise": 50715, + "novel cross lingual": 37795, + "cross lingual transfer": 11856, + "state art monolingual": 52634, + "data high resource": 12403, + "different language families": 14964, + "zero shot shot": 63180, + "shot shot learning": 50646, + "large annotated corpus": 28845, + "recent works shown": 45380, + "data automatically generated": 12173, + "pseudo parallel data": 44281, + "word embeddings evaluated": 62168, + "proposed methods outperform": 43837, + "large training corpus": 29029, + "performance domain specific": 40303, + "based bidirectional lstms": 5609, + "like sentiment analysis": 30503, + "combination convolutional neural": 9036, + "training word embeddings": 58315, + "embedding models using": 17048, + "paper propose multi": 39524, + "use neural networks": 59962, + "continuous latent variables": 10848, + "model outperforms single": 34165, + "word embeddings capture": 62160, + "social media using": 51591, + "et al 2018": 18405, + "et al 2017": 18404, + "liu et al": 30893, + "et al 2021": 18410, + "et al 2022": 18411, + "new annotated dataset": 37127, + "typologically different languages": 59167, + "expressed natural language": 19800, + "state art attention": 52583, + "generated content ugc": 22277, + "word embeddings approach": 62157, + "sentiment analysis models": 49823, + "trained models using": 57811, + "end end architecture": 17638, + "semeval 2017 task": 49431, + "cross lingual language": 11840, + "referring expression generation": 45760, + "work present simple": 62759, + "training data sets": 58036, + "evaluation shows method": 18720, + "previous work paper": 42308, + "close embedding space": 8685, + "language processing field": 28408, + "need feature engineering": 36565, + "proposed model significantly": 43858, + "significantly outperforms existing": 51002, + "achieved promising results": 1260, + "despite recent success": 14385, + "present simple effective": 42016, + "simple effective approach": 51151, + "graph convolutional networks": 23125, + "convolutional networks gcns": 11111, + "neural networks evaluate": 37046, + "experiments text classification": 19546, + "classification task demonstrate": 8562, + "task fine grained": 55087, + "fine grained sentiment": 20944, + "release new large": 46160, + "new large scale": 37235, + "large scale dataset": 28970, + "comprehension question answering": 9774, + "conduct human evaluation": 10054, + "recent years seen": 45396, + "large scale evaluation": 28975, + "english paper propose": 17854, + "cross lingual word": 11858, + "lingual word embeddings": 30739, + "embeddings machine translation": 17170, + "models shown promising": 35500, + "task specific features": 55394, + "paper propose adversarial": 39492, + "adversarial multi task": 1977, + "task learning framework": 55172, + "different text classification": 15098, + "publicly available url": 44356, + "available url http": 5386, + "variety natural language": 61283, + "sparse attention mechanism": 51965, + "detection challenging task": 14466, + "extensive experiments real": 19900, + "collected social media": 8968, + "sequence tagging problem": 50005, + "task learning setup": 55181, + "knowledge base question": 27410, + "base question answering": 5551, + "paper propose hierarchical": 39515, + "language processing computational": 28402, + "processing computational linguistics": 42861, + "applications paper present": 3229, + "multi layer perceptron": 35981, + "state art semantic": 52660, + "approaches improve performance": 3843, + "classification machine translation": 8491, + "machine translation automatic": 31349, + "method train model": 32685, + "sentiment analysis news": 49824, + "present qualitative analysis": 41993, + "inspired recent success": 26415, + "generative adversarial networks": 22586, + "adversarial networks gans": 1982, + "experimental results english": 19286, + "achieve significantly better": 1196, + "significant performance improvements": 50911, + "baseline state art": 6212, + "attracted lot attention": 4885, + "extensive empirical studies": 19866, + "publicly available http": 44343, + "available http www": 5306, + "provide fine grained": 44075, + "training data demonstrate": 57985, + "lexically constrained decoding": 30400, + "domain adaptation neural": 16001, + "performance domain adaptation": 40302, + "played important role": 40984, + "language modeling objective": 28212, + "consistent performance improvements": 10283, + "paper presents attempt": 39470, + "paper proposes simple": 39552, + "simple machine learning": 51189, + "suggest directions future": 53817, + "directions future work": 15293, + "present multi task": 41950, + "task learning model": 55175, + "state art standard": 52670, + "automatic human evaluations": 5096, + "pos tagging dependency": 41234, + "problems paper propose": 42719, + "outperforms previous methods": 38922, + "propose simple effective": 43633, + "experiments demonstrate approach": 19402, + "demonstrate approach significantly": 13871, + "based attention model": 5583, + "based models achieve": 5866, + "feature extraction techniques": 20488, + "art neural network": 4314, + "little labeled data": 30881, + "additional labeled data": 1680, + "cross lingual model": 11842, + "corpora low resource": 11218, + "propose novel data": 43529, + "novel data augmentation": 37797, + "data augmentation approach": 12150, + "nlp tasks existing": 37534, + "statistically significant improvements": 52773, + "task paper describes": 55262, + "paper describes participation": 39327, + "term memory blstm": 56247, + "different evaluation metrics": 14921, + "recent years automatic": 45382, + "english language datasets": 17832, + "paper introduces new": 39407, + "outperforms baseline model": 38870, + "performance paper describes": 40475, + "bayes logistic regression": 6350, + "label classification task": 27697, + "propose simple technique": 43637, + "finite state machines": 21059, + "attentional sequence sequence": 4860, + "standard machine translation": 52501, + "fully connected layers": 21718, + "uses convolutional neural": 60502, + "fully connected layer": 21717, + "cross lingual text": 11854, + "english source language": 17877, + "art methods paper": 4287, + "methods paper propose": 32975, + "evaluate approach task": 18438, + "improve classification accuracy": 24830, + "train model using": 57609, + "factoid question answering": 20297, + "code trained models": 8863, + "trained models publicly": 57807, + "present method automatically": 41944, + "method automatically generate": 32396, + "wu et al": 63023, + "order magnitude faster": 38633, + "deep neural models": 13738, + "shown great potential": 50714, + "paradigm natural language": 39625, + "based seq2seq model": 6025, + "models capable learning": 34804, + "high quality data": 23773, + "language processing computer": 28403, + "processing computer vision": 42863, + "learning models deep": 29754, + "models deep learning": 34889, + "noise training data": 37605, + "learning based method": 29535, + "approach wide range": 3739, + "results approach consistently": 47504, + "approach consistently improves": 3466, + "strong baseline future": 53003, + "address issue introducing": 1764, + "achieved good performance": 1235, + "input output sequences": 26310, + "improvement previous state": 25018, + "human evaluation shows": 24156, + "evaluation shows model": 18721, + "produces higher quality": 43031, + "vector space embeddings": 61466, + "address problem proposing": 1790, + "release new dataset": 46159, + "significantly outperforms competitive": 51000, + "according automatic human": 854, + "automatic human evaluation": 5095, + "trained models https": 57800, + "models https github": 35093, + "multi task training": 36030, + "limited labeled data": 30593, + "improve performance nlp": 24897, + "computational social science": 9863, + "model using combination": 34515, + "models experimental results": 34994, + "reduce computational cost": 45654, + "data publicly available": 12580, + "different natural language": 15002, + "proposed model performs": 43856, + "et al 2003": 18394, + "features pre trained": 20644, + "pre trained contextual": 41523, + "contextual word embeddings": 10789, + "deep natural language": 13732, + "models named entity": 35243, + "improves model performance": 25136, + "achieves better performance": 1308, + "better performance compared": 6930, + "textual entailment task": 56963, + "processing nlp machine": 42906, + "nlp machine learning": 37496, + "social media user": 51589, + "improves overall performance": 25140, + "automatic semi automatic": 5122, + "discrete latent variable": 15423, + "based human evaluation": 5773, + "observation propose novel": 38124, + "model achieves improvement": 33519, + "improvement bleu score": 24993, + "score state art": 48875, + "state art baseline": 52586, + "dialogue systems paper": 14789, + "proposed model generate": 43850, + "extensive experiments multiple": 19894, + "multiple deep learning": 36197, + "experiments benchmark dataset": 19363, + "tasks paper present": 55786, + "sequence sequence architectures": 49982, + "performance text based": 40599, + "machine translation present": 31377, + "tasks task specific": 55926, + "despite great success": 14363, + "important natural language": 24746, + "trained domain specific": 57714, + "improve performance word": 24903, + "task recent years": 55323, + "neural natural language": 36990, + "analysis shows model": 2759, + "amounts annotated data": 2545, + "consistently outperform baseline": 10299, + "reduces number parameters": 45696, + "child directed speech": 8290, + "corpus freely available": 11346, + "orders magnitude larger": 38672, + "perform large scale": 40119, + "freely available online": 21654, + "deep learning dl": 13707, + "large open domain": 28930, + "open domain dataset": 38421, + "transfer learning techniques": 58398, + "based state art": 6056, + "best performing models": 6798, + "experiments machine translation": 19461, + "model establishes new": 33836, + "state art bleu": 52591, + "empirical results demonstrate": 17341, + "different types knowledge": 15111, + "fine grained level": 20939, + "propose simple novel": 43636, + "advances state art": 1928, + "state art text": 52684, + "text natural language": 56673, + "outperforms previous models": 38923, + "tasks propose novel": 55820, + "evaluation results model": 18703, + "natural language datasets": 36417, + "neural networks extract": 37047, + "open source toolkit": 38460, + "semi supervised training": 49468, + "state art generative": 52619, + "layer wise relevance": 29214, + "wise relevance propagation": 62085, + "based bi directional": 5604, + "used previous work": 60272, + "model solve problem": 34397, + "solve problem using": 51687, + "statistical language model": 52745, + "cross domain knowledge": 11817, + "domain specific text": 16189, + "effectiveness proposed framework": 16805, + "recent works proposed": 45379, + "pre trained model": 41544, + "large scale open": 28992, + "scale open domain": 48608, + "open domain qa": 38424, + "approach achieve state": 3391, + "language generation systems": 28088, + "poses new challenges": 41251, + "statistical machine learning": 52749, + "approaches natural language": 3880, + "scarcity high quality": 48666, + "current evaluation metrics": 11976, + "high quality corpus": 23772, + "representation learning model": 46543, + "building high quality": 7447, + "shared task systems": 50506, + "deployed real world": 14173, + "real world use": 45143, + "generate synthetic data": 22252, + "language proposed approach": 28451, + "report experimental results": 46435, + "based attention mechanism": 5581, + "used sequence sequence": 60298, + "hierarchical attention networks": 23660, + "achieved remarkable performance": 1262, + "using multi task": 60818, + "sequence sequence s2s": 49995, + "superior state art": 53945, + "different tasks paper": 15095, + "using deep neural": 60648, + "art neural models": 4313, + "natural language explanations": 36422, + "short term memories": 50569, + "large real world": 28954, + "domain specific vocabulary": 16191, + "sentiment analysis experiments": 49821, + "representation learning framework": 46539, + "word embeddings word2vec": 62198, + "generating high quality": 22377, + "time consuming process": 57136, + "domain cross domain": 16036, + "evaluate proposed model": 18494, + "sentiment analysis model": 49822, + "recent years paper": 45392, + "progress natural language": 43106, + "data driven approaches": 12301, + "attention based sequence": 4725, + "sequence neural network": 49963, + "large document collections": 28872, + "information retrieval systems": 26067, + "use state art": 60028, + "speech recognition natural": 52287, + "recognition natural language": 45517, + "experimental results demonstrated": 19282, + "establish strong baseline": 18349, + "improvement classification accuracy": 24995, + "propose novel simple": 43563, + "conduct experiments datasets": 10044, + "publicly available models": 44346, + "model based deep": 33603, + "based deep learning": 5673, + "results proposed approach": 47782, + "trained labeled data": 57757, + "data state art": 12693, + "time consuming costly": 57128, + "real life tasks": 45107, + "end end e2e": 17644, + "use token level": 60049, + "vocabulary oov problem": 61708, + "results reveal proposed": 47816, + "leverage large amounts": 30274, + "word embedding layer": 62146, + "task specific attention": 55389, + "particularly low resource": 39886, + "introduce new approach": 26832, + "learning language models": 29696, + "language models training": 28335, + "learn word embeddings": 29445, + "methods large scale": 32919, + "establish new state": 18345, + "recent advances natural": 45285, + "benchmark datasets paper": 6458, + "datasets paper presents": 13364, + "presents novel method": 42098, + "neural networks demonstrate": 37041, + "achieves good performance": 1328, + "improved performance text": 24959, + "character word level": 8227, + "novel approach based": 37760, + "achieving performance comparable": 1418, + "knowledge graphs kgs": 27506, + "novel reinforcement learning": 37907, + "based knowledge graph": 5798, + "knowledge graph embeddings": 27501, + "pre training approach": 41567, + "embeddings pre trained": 17191, + "pre trained large": 41539, + "trained large corpora": 57766, + "best knowledge study": 6773, + "proposed cross lingual": 43749, + "conventional machine learning": 11005, + "classical machine learning": 8424, + "features paper propose": 20638, + "downstream nlp applications": 16346, + "novel neural model": 37884, + "state art unsupervised": 52687, + "features state art": 20674, + "learning human feedback": 29674, + "reinforcement learning algorithm": 45866, + "large action space": 28831, + "level machine translation": 30156, + "character level convolutional": 8207, + "paper propose model": 39523, + "evaluate proposed method": 18493, + "tasks text classification": 55932, + "improves performance text": 25146, + "classification relation extraction": 8533, + "recent neural models": 45326, + "models shown significant": 35502, + "text generation task": 56604, + "large scale corpus": 28967, + "2017 shared task": 268, + "previous work used": 42315, + "state art nli": 52640, + "despite recent advances": 14383, + "macro f1 score": 31407, + "f1 score 87": 20211, + "f1 score 93": 20216, + "best performing baseline": 6795, + "text state art": 56789, + "ground truth dataset": 23253, + "dataset publicly available": 13049, + "language model training": 28200, + "model training process": 34482, + "comparable better performance": 9291, + "network state art": 36807, + "evaluation metrics results": 18654, + "network cnn model": 36722, + "pairs training data": 39224, + "experimental results confirm": 19278, + "proposed method able": 43804, + "especially morphologically rich": 18288, + "character level representations": 8217, + "obtains significant improvement": 38256, + "domain training data": 16216, + "common nlp tasks": 9188, + "nlp tasks sentiment": 37549, + "performance baseline models": 40209, + "model outperforms prior": 34164, + "learning based framework": 29533, + "experiments benchmark datasets": 19364, + "benchmark datasets different": 6451, + "datasets different domains": 13229, + "state art data": 52603, + "training data different": 57986, + "best results achieved": 6816, + "speech pos tags": 52280, + "information improve performance": 25914, + "domain adaptation using": 16007, + "shared task aims": 50493, + "language understanding models": 28553, + "language inference task": 28113, + "model shared task": 34369, + "popularity social media": 41206, + "mining social media": 33324, + "models results suggest": 35458, + "available social media": 5366, + "crucial natural language": 11904, + "language models word": 28348, + "reinforcement learning directly": 45869, + "automatic metrics human": 5111, + "metrics human evaluation": 33170, + "datasets propose novel": 13380, + "model achieves new": 33520, + "text classification datasets": 56471, + "deep learning nlp": 13716, + "traditional natural language": 57536, + "using named entity": 60827, + "task make use": 55202, + "recurrent neural nets": 45624, + "human evaluations model": 24160, + "machine translation quality": 31379, + "information input text": 25925, + "neural network methods": 37016, + "embedding based models": 17018, + "explore state art": 19738, + "single task learning": 51344, + "traditional machine learning": 57527, + "existing deep learning": 19057, + "outperforms deep learning": 38894, + "large text corpus": 29026, + "news articles wikipedia": 37388, + "performing natural language": 40686, + "experiments real world": 19509, + "tasks paper proposes": 55789, + "demonstrates effectiveness proposed": 14031, + "effectiveness proposed approach": 16804, + "source code available": 51744, + "improve performance existing": 24890, + "performance existing methods": 40331, + "propose new methods": 43505, + "short term long": 50568, + "term long term": 56241, + "proposed method using": 43833, + "processing nlp recently": 42909, + "training evaluation data": 58092, + "text social media": 56777, + "using support vector": 60972, + "language identification task": 28099, + "leveraging unlabeled data": 30343, + "leverages pre trained": 30310, + "pre trained language": 41538, + "trained language model": 57759, + "processing tasks including": 42948, + "word embeddings input": 62172, + "attention natural language": 4794, + "language processing community": 28401, + "fundamental nlp tasks": 21785, + "benchmarks state art": 6544, + "previous work suggests": 42313, + "language machine translation": 28145, + "fine tuning model": 21000, + "fine tuning data": 20982, + "domain data fine": 16039, + "data fine tuning": 12366, + "tedious time consuming": 56163, + "achieves substantial improvements": 1383, + "paper propose different": 39504, + "content social media": 10559, + "learning fine grained": 29646, + "question answering text": 44714, + "compare different methods": 9337, + "significant improvement baselines": 50875, + "fine grained domain": 20933, + "recent years researchers": 45395, + "requires large scale": 46938, + "performance paper present": 40476, + "second propose novel": 49020, + "based reinforcement learning": 5988, + "extensive experimental results": 19874, + "significantly improves state": 50977, + "state art multiple": 52637, + "models nlp tasks": 35264, + "semantic syntactic information": 49360, + "remains open problem": 46343, + "techniques improve performance": 56097, + "using labeled data": 60750, + "task real world": 55319, + "end end solution": 17661, + "plays pivotal role": 41004, + "sub word units": 53539, + "surpass state art": 54167, + "network models using": 36769, + "nlp tasks models": 37541, + "neural models natural": 36978, + "apply machine learning": 3332, + "including deep learning": 25248, + "machine learning nlp": 31328, + "languages machine translation": 28721, + "document level features": 15805, + "despite simplicity approach": 14391, + "written natural language": 63005, + "language models proposed": 28308, + "models proposed method": 35375, + "word embeddings training": 62192, + "machine translation translate": 31391, + "language models trained": 28334, + "domain domain data": 16053, + "stanford question answering": 52559, + "question answering dataset": 44692, + "introduce data augmentation": 26795, + "data augmentation method": 12158, + "method improves performance": 32535, + "propose novel way": 43573, + "datasets evaluation metrics": 13256, + "random fields crfs": 44874, + "substantial improvements strong": 53624, + "using monolingual data": 60812, + "introduce open source": 26853, + "tasks information retrieval": 55688, + "task empirical results": 55040, + "performance models trained": 40441, + "wide range tasks": 61974, + "multi party conversations": 35997, + "tackle challenge propose": 54699, + "problem experimental results": 42558, + "significantly improves accuracy": 50973, + "sequence labeling task": 49939, + "word embeddings character": 62161, + "learning methods proposed": 29734, + "benchmark datasets demonstrate": 6450, + "f1 score 91": 20214, + "fully data driven": 21721, + "outperforms baseline methods": 38869, + "human evaluation experiments": 24147, + "widely used nlp": 62021, + "attention mechanism attention": 4773, + "self attention network": 49185, + "shows state art": 50806, + "natural language generator": 36427, + "novel approach automatically": 37759, + "external knowledge base": 19939, + "achieves performance comparable": 1353, + "simple effective solution": 51157, + "data social media": 12672, + "field computational linguistics": 20753, + "based distributional semantics": 5688, + "discrepancy training inference": 15418, + "models improve performance": 35111, + "points f1 score": 41073, + "information training data": 26131, + "training data insufficient": 58003, + "information extensive experiments": 25854, + "world datasets demonstrate": 62936, + "datasets demonstrate model": 13215, + "model achieves comparable": 33514, + "success deep learning": 53698, + "traditional language models": 57524, + "order magnitude smaller": 38636, + "state art non": 52642, + "incorporating pre trained": 25392, + "significantly outperforms strong": 51008, + "semi supervised setting": 49465, + "paper present method": 39455, + "datasets work propose": 13488, + "text paper introduce": 56688, + "end end asr": 17640, + "experimental results reveal": 19309, + "training data machine": 58014, + "state art retrieval": 52658, + "training data finally": 57996, + "despite success existing": 14395, + "best f1 score": 6764, + "increase f1 score": 25415, + "character level encoder": 8209, + "different people different": 15023, + "large scale multi": 28988, + "processing nlp techniques": 42914, + "state art works": 52694, + "previous studies demonstrated": 42287, + "paper investigate problem": 39413, + "end end deep": 17642, + "end deep neural": 17627, + "contrast previous approaches": 10882, + "task paper investigate": 55264, + "sentence level classification": 49581, + "tasks language understanding": 55711, + "classification slot filling": 8553, + "classification named entity": 8505, + "datasets paper introduce": 13362, + "fine grained information": 20936, + "human human dialogue": 24170, + "sentence representation learning": 49632, + "automatic evaluation method": 5084, + "long standing challenge": 31031, + "state art english": 52611, + "using logistic regression": 60777, + "domain specific embeddings": 16174, + "improve performance models": 24893, + "commonly used methods": 9225, + "little attention paid": 30871, + "using sentence level": 60925, + "existing evaluation metrics": 19068, + "provide detailed analysis": 44050, + "results models trained": 47729, + "models trained smaller": 35619, + "perform extensive analysis": 40104, + "experiment benchmark datasets": 19232, + "metrics shared task": 33200, + "perform series experiments": 40139, + "convolutional recurrent neural": 11116, + "par state art": 39618, + "using fine grained": 60696, + "freely available research": 21655, + "available research purposes": 5361, + "work focus task": 62669, + "low resource setting": 31190, + "fraction model parameters": 21430, + "paper introduce simple": 39404, + "available non commercial": 5334, + "online social networks": 38387, + "learning model using": 29749, + "achieve f1 score": 1138, + "extensive feature engineering": 19907, + "present new dataset": 41961, + "proposed model based": 43847, + "important source information": 24774, + "learning using large": 29932, + "languages training data": 28809, + "baseline large margin": 6179, + "word embedding learning": 62147, + "experimental results framework": 19287, + "framework achieves state": 21450, + "paper present work": 39466, + "downstream tasks paper": 16361, + "results experiments demonstrate": 47628, + "neural network classifier": 37001, + "significant performance gain": 50907, + "state art classification": 52594, + "new data set": 37162, + "large parallel corpora": 28933, + "humans learn language": 24279, + "does require parallel": 15975, + "level sentence level": 30208, + "based natural language": 5898, + "domain specific words": 16193, + "paper describes systems": 39333, + "model best performing": 33623, + "micro average f1": 33219, + "parsing natural language": 39789, + "natural language semantic": 36447, + "end end using": 17670, + "based multi task": 5889, + "best baseline model": 6752, + "predict fine grained": 41640, + "present deep learning": 41886, + "end end trained": 17667, + "trained sequence sequence": 57869, + "propose novel technique": 43567, + "baseline neural network": 6195, + "deep learning systems": 13720, + "social network data": 51594, + "quality automatically generated": 44495, + "time consuming laborious": 57133, + "10 absolute improvement": 36, + "training set training": 58252, + "achieved better results": 1223, + "work propose unsupervised": 62789, + "evaluated proposed method": 18544, + "using hand crafted": 60721, + "art results compared": 4373, + "strong cross lingual": 53025, + "builds recent work": 7480, + "recent work unsupervised": 45375, + "advances deep learning": 1910, + "robust machine learning": 48254, + "memory lstm models": 32264, + "manner experimental results": 31716, + "based model learns": 5860, + "data experimental results": 12345, + "achieved impressive performance": 1245, + "availability large scale": 5254, + "large scale parallel": 28993, + "model widely used": 34537, + "widely used datasets": 62014, + "rich source information": 48124, + "standard training data": 52537, + "using cross entropy": 60636, + "inspired recent work": 26416, + "dataset model achieves": 12997, + "exact match accuracy": 18851, + "language recent years": 28462, + "achieved significant success": 1272, + "machine reading comprehension": 31341, + "framework significantly improves": 21600, + "multiple strong baselines": 36294, + "paper investigates use": 39415, + "zero shot generalization": 63164, + "sentences training data": 49798, + "recent neural approaches": 45325, + "state art feature": 52616, + "scarcity labeled training": 48670, + "training data address": 57972, + "data address issue": 12124, + "address issue using": 1768, + "role labeling srl": 48312, + "single task model": 51345, + "model makes predictions": 34087, + "tasks proposed method": 55825, + "proposed method improve": 43820, + "method improve model": 32531, + "language model called": 28159, + "language model model": 28178, + "language model loss": 28175, + "level language model": 30145, + "non autoregressive model": 37636, + "large scale human": 28980, + "language model trained": 28199, + "present end end": 41901, + "active area research": 1472, + "english code switched": 17788, + "significant improvements existing": 50886, + "proposed method improves": 43821, + "language processing text": 28435, + "neural networks proposed": 37065, + "proposed model compared": 43848, + "successful natural language": 53737, + "art results number": 4382, + "locality sensitive hashing": 30956, + "state art classifiers": 52595, + "proposed deep learning": 43756, + "existing approaches require": 19030, + "queries natural language": 44656, + "paper conduct extensive": 39299, + "discuss advantages disadvantages": 15459, + "model performs best": 34202, + "processing paper propose": 42920, + "reading comprehension mrc": 45084, + "art models use": 4301, + "multi step reasoning": 36012, + "performance current state": 40274, + "significant improvements baselines": 50884, + "reading comprehension models": 45083, + "existing methods usually": 19105, + "models achieved state": 34682, + "meta learning framework": 32342, + "introduce novel method": 26847, + "extensive experiments investigate": 19890, + "models achieves state": 34684, + "available paper propose": 5341, + "memory lstm units": 32268, + "end propose simple": 17703, + "problem data scarcity": 42529, + "improves current state": 25123, + "future research dataset": 21886, + "knowledge bases kb": 27414, + "propose new algorithm": 43495, + "reinforcement learning approach": 45867, + "art results datasets": 4374, + "sequence seq2seq model": 49976, + "previous methods large": 42262, + "methods large margin": 32917, + "great success natural": 23218, + "existing end end": 19064, + "entity recognition relation": 18131, + "recognition relation extraction": 45531, + "build high quality": 7405, + "high quality dataset": 23774, + "research dataset available": 47011, + "dataset available https": 12820, + "github com lancopku": 22705, + "evaluation state art": 18726, + "fine grained control": 20932, + "extensive experiments proposed": 19897, + "results method achieves": 47715, + "existing neural models": 19117, + "new method called": 37253, + "resource poor target": 47263, + "source target domains": 51801, + "model extensive experiments": 33867, + "models transfer learning": 35627, + "performance nlp tasks": 40459, + "machine translation approaches": 31348, + "black box neural": 7193, + "contribute better understanding": 10928, + "develop machine learning": 14595, + "natural language model": 36433, + "based neural model": 5905, + "achieves significant performance": 1369, + "language model adapted": 28153, + "text mining techniques": 56665, + "achieve best results": 1114, + "orders magnitude smaller": 38673, + "expert domain knowledge": 19579, + "word embeddings word": 62197, + "propose novel methodology": 43546, + "real life applications": 45105, + "indo european languages": 25597, + "best worst scaling": 6840, + "learning model called": 29745, + "vaswani et al": 61446, + "based self attention": 6012, + "neural network called": 37000, + "supervised sequence labeling": 54043, + "compare state art": 9368, + "text classification approaches": 56468, + "outperformed previous state": 38840, + "state art benchmarks": 52589, + "training data source": 58039, + "data source code": 12675, + "source code released": 51752, + "models freely available": 35044, + "fully end end": 21724, + "additionally propose new": 1730, + "work study problem": 62831, + "proposed method significantly": 43829, + "maintaining comparable performance": 31488, + "compared previous work": 9438, + "achieves results competitive": 1361, + "answering dataset squad": 3068, + "reading comprehension dataset": 45081, + "work propose method": 62780, + "learning rl framework": 29853, + "deep learning natural": 13715, + "learning natural language": 29775, + "propose zero shot": 43708, + "accuracy test set": 1060, + "blind test set": 7218, + "recently released dataset": 45462, + "train neural networks": 57619, + "language processing propose": 28425, + "paper proposes novel": 39551, + "proposes novel approach": 43941, + "able achieve state": 671, + "propose unsupervised approach": 43692, + "monolingual word embeddings": 35815, + "embeddings proposed method": 17198, + "tasks sequence sequence": 55879, + "learning state art": 29895, + "state art machine": 52627, + "leveraging external knowledge": 30324, + "external knowledge bases": 19940, + "performance comparable state": 40247, + "machine learning applications": 31309, + "publicly available data": 44339, + "language models approach": 28231, + "features train model": 20685, + "existing methods suffer": 19101, + "question answering forums": 44697, + "conduct large scale": 10056, + "problems propose novel": 42724, + "model outperforms current": 34161, + "present best knowledge": 41859, + "attracted considerable attention": 4877, + "words proper nouns": 62485, + "semi automatic method": 49448, + "method makes use": 32571, + "different word embeddings": 15133, + "models real world": 35402, + "data sets different": 12648, + "rely pre trained": 46298, + "domain specific information": 16177, + "introduce novel neural": 26850, + "models proposed model": 35376, + "demonstrated effectiveness proposed": 14006, + "compared existing state": 9410, + "art performance benchmark": 4327, + "neural semantic parsing": 37093, + "like named entity": 30489, + "state art sota": 52667, + "art sota results": 4409, + "pointer network based": 41059, + "sequence sequence approach": 49979, + "extensive set experiments": 19913, + "language processing machine": 28414, + "processing machine translation": 42887, + "promising research direction": 43176, + "question answering dialogue": 44694, + "experimental results chinese": 19275, + "tasks proposed approach": 55823, + "question answering datasets": 44693, + "models convolutional neural": 34866, + "entities paper propose": 18071, + "conduct experiments widely": 10048, + "experiments widely used": 19563, + "used real world": 60285, + "model make use": 34085, + "simple effective mechanism": 51154, + "language processing involves": 28412, + "model propose novel": 34247, + "challenging real world": 8133, + "publicly available corpus": 44338, + "state art solutions": 52666, + "state art algorithms": 52578, + "used text classification": 60329, + "approach achieves best": 3395, + "tasks demonstrate proposed": 55579, + "domains experimental results": 16254, + "strong baseline systems": 53007, + "require task specific": 46893, + "language model fine": 28165, + "model fine tuning": 33895, + "fine tuning language": 20995, + "tuning language model": 58920, + "art text classification": 4429, + "embedding model trained": 17044, + "better performance state": 6936, + "art recurrent neural": 4364, + "introduce new dataset": 26834, + "new dataset called": 37164, + "features significantly improve": 20669, + "significantly improve accuracy": 50963, + "trained reinforcement learning": 57851, + "tasks paper investigate": 55785, + "external knowledge sources": 19945, + "neural non neural": 37083, + "self attention based": 49176, + "language models demonstrate": 28242, + "systems low resource": 54554, + "dimensional vector space": 15238, + "paper propose leverage": 39520, + "learning paper proposes": 29795, + "outperforms existing state": 38901, + "trained natural language": 57823, + "natural language corpora": 36415, + "traditional deep learning": 57516, + "based word level": 6138, + "multiple sources information": 36286, + "knowledge graphs kg": 27505, + "complex real world": 9654, + "real world settings": 45138, + "multilingual machine translation": 36095, + "neural networks recently": 37068, + "highly competitive performance": 23884, + "computer vision tasks": 9898, + "propose novel multi": 43550, + "model benchmark datasets": 33615, + "recent deep learning": 45301, + "existing text generation": 19159, + "text generation methods": 56599, + "tackle problem propose": 54710, + "text generation model": 56600, + "generative adversarial network": 22585, + "text propose novel": 56716, + "dialogue generation tasks": 14774, + "generation tasks demonstrate": 22563, + "tasks demonstrate model": 55578, + "baselines code available": 6244, + "models word embeddings": 35684, + "text training data": 56819, + "build end end": 7397, + "training machine translation": 58164, + "test set paper": 56375, + "cross lingual learning": 11841, + "language independent features": 28107, + "using word level": 61029, + "work tackle problem": 62837, + "sentence boundary detection": 49524, + "binary classification task": 7147, + "deep contextualized word": 13687, + "contextualized word representation": 10815, + "bidirectional language model": 7074, + "transfer learning approach": 58376, + "sentence level representations": 49590, + "multiple source languages": 36284, + "approach able achieve": 3388, + "proposed approach achieve": 43722, + "dataset fine tuning": 12933, + "fine tuning pre": 21011, + "tuning pre trained": 58944, + "lingual zero shot": 30741, + "zero shot setting": 63177, + "present novel deep": 41970, + "task existing approaches": 55062, + "best single model": 6823, + "state art scores": 52659, + "deep generative model": 13693, + "including natural language": 25281, + "models end end": 34966, + "experiment results proposed": 19252, + "used machine learning": 60230, + "present neural model": 41956, + "seen training time": 49067, + "human evaluation model": 24151, + "state art zero": 52695, + "art zero shot": 4444, + "leading state art": 29300, + "trained models downstream": 57797, + "models downstream tasks": 34941, + "data train models": 12739, + "evaluate pre trained": 18488, + "performance compared previous": 40254, + "tasks existing approaches": 55626, + "graph convolutional network": 23124, + "methods natural language": 32953, + "http github com": 24049, + "new task called": 37334, + "user generated data": 60417, + "create new dataset": 11712, + "gained significant attention": 21923, + "model improve performance": 33978, + "word level model": 62233, + "model large margin": 34041, + "paper present study": 39462, + "study end end": 53370, + "experiments effectiveness proposed": 19426, + "social media sites": 51585, + "word embedding space": 62153, + "unlike previously proposed": 59606, + "supervised deep learning": 53979, + "deep learning techniques": 13722, + "quantitative qualitative analyses": 44624, + "single multi task": 51322, + "abstractive text summarization": 775, + "research topic natural": 47132, + "character based word": 8198, + "based word based": 6135, + "widely used dataset": 62013, + "learning model jointly": 29746, + "language best knowledge": 27978, + "standard benchmark datasets": 52472, + "multi instance learning": 35974, + "labeled data experiments": 27742, + "datasets achieve state": 13142, + "order magnitude larger": 38635, + "recent years deep": 45383, + "dataset extensive experiments": 12921, + "learning models based": 29753, + "network cnn long": 36721, + "cnn long short": 8771, + "github com neulab": 22708, + "paper describes semeval": 39329, + "semeval 2018 task": 49433, + "incorporate commonsense knowledge": 25346, + "data code publicly": 12214, + "publicly available https": 44344, + "provide high quality": 44085, + "important role understanding": 24768, + "fine grained semantics": 20943, + "syntactic semantic analysis": 54323, + "end neural model": 17689, + "training data model": 58017, + "word vector representation": 62335, + "attention mechanism transformer": 4781, + "results machine translation": 47708, + "self attention mechanism": 49180, + "aware self attention": 5470, + "self attention mechanisms": 49181, + "problem field natural": 42566, + "end end text": 17665, + "question answering cqa": 44691, + "use neural network": 59961, + "prior work used": 42426, + "processing tasks language": 42949, + "poses significant challenges": 41254, + "supervised learning models": 54003, + "learning models used": 29763, + "hope work help": 24018, + "newly collected dataset": 37372, + "gold standard corpus": 22916, + "propose method based": 43453, + "paper introduce framework": 39401, + "data code https": 12212, + "code https github": 8819, + "feed forward network": 20712, + "art performance task": 4346, + "tasks question answering": 55832, + "paper present neural": 39457, + "predictions experimental results": 41759, + "substantial improvements baseline": 53623, + "shared task semantic": 50505, + "skip gram models": 51421, + "text classification results": 56483, + "semantic parsing models": 49313, + "novel graph based": 37834, + "art performance datasets": 4332, + "neural text generation": 37105, + "text generation models": 56601, + "source publicly available": 51792, + "publicly available parallel": 44349, + "available parallel corpora": 5343, + "low level features": 31157, + "et al 2005": 18395, + "important task nlp": 24779, + "task nlp applications": 55241, + "demonstrates superior performance": 14048, + "require large training": 46874, + "large training datasets": 29031, + "unsupervised state art": 59735, + "lingual cross lingual": 30695, + "require large number": 46872, + "trained single language": 57873, + "publicly available research": 44353, + "ones experimental results": 38337, + "applications machine learning": 3219, + "language understanding previous": 28557, + "indo aryan languages": 25595, + "self attention heads": 49177, + "results variety tasks": 47902, + "new end end": 37186, + "compared strong baselines": 9462, + "strong baselines including": 53014, + "proposed approach outperforms": 43729, + "significantly improve model": 50965, + "novel approach called": 37761, + "open domain setting": 38426, + "word level language": 62232, + "level language modeling": 30146, + "language modeling problem": 28217, + "transfer learning nlp": 58390, + "task training data": 55444, + "transfer learning pretrained": 58394, + "use transfer learning": 60058, + "learning transfer learning": 29920, + "transfer learning using": 58400, + "using sentence embeddings": 60924, + "surprisingly good performance": 54188, + "pre trained sentence": 41555, + "overall f1 scores": 39041, + "f1 scores compared": 20229, + "scarcity training data": 48677, + "accuracy f1 score": 976, + "network trained end": 36816, + "trained large amounts": 57765, + "problem recent work": 42643, + "recent work explored": 45368, + "learning low resource": 29715, + "potential directions future": 41387, + "directions future research": 15292, + "real time applications": 45115, + "code mixed text": 8829, + "sentiment positive negative": 49856, + "using contrastive learning": 60625, + "based text classification": 6091, + "model word embeddings": 34540, + "text classification proposed": 56482, + "sequence neural models": 49962, + "released pre trained": 46180, + "datasets source code": 13438, + "availability large amounts": 5253, + "low resourced languages": 31199, + "leveraging transfer learning": 30341, + "different languages english": 14970, + "english german french": 17814, + "baseline neural models": 6194, + "retrieval based method": 47942, + "annotated gold standard": 2899, + "challenging problem natural": 8127, + "problem social media": 42659, + "15 times faster": 153, + "downstream nlp tasks": 16347, + "graph based method": 23105, + "datasets proposed method": 13383, + "social media increasingly": 51576, + "mining natural language": 33320, + "discuss future directions": 15467, + "language generation models": 28085, + "models able generate": 34655, + "models consistently outperform": 34852, + "results publicly available": 47794, + "consistently outperforms state": 10306, + "art methods tasks": 4288, + "embedding methods word2vec": 17042, + "boost model performance": 7254, + "state art deep": 52605, + "art deep learning": 4247, + "use commonsense knowledge": 59846, + "multi modal fusion": 35990, + "cross modal attention": 11864, + "art results widely": 4393, + "results widely used": 47910, + "word embedding algorithms": 62143, + "word embedding features": 62145, + "publicly available non": 44347, + "address challenges propose": 1749, + "learning method learn": 29728, + "self training strategy": 49226, + "approach outperforms baseline": 3623, + "f1 score 75": 20206, + "serve starting point": 50081, + "model improves performance": 33984, + "present real world": 41997, + "real world application": 45122, + "building state art": 7472, + "semantic parsing semantic": 49315, + "high resource settings": 23795, + "obtain competitive performance": 38167, + "data augmentation methods": 12159, + "multi lingual model": 35985, + "space paper propose": 51880, + "select high quality": 49105, + "low resource scenarios": 31189, + "obtained pre trained": 38218, + "method achieves new": 32365, + "including human evaluation": 25262, + "based adversarial training": 5560, + "training data augmentation": 57976, + "monolingual cross lingual": 35797, + "cross lingual multilingual": 11845, + "monolingual multilingual models": 35807, + "performance cross lingual": 40272, + "combining machine learning": 9114, + "introduce new benchmark": 26833, + "dataset code available": 12840, + "relation extraction problem": 45979, + "directional gated recurrent": 15278, + "multiple real world": 36270, + "recent years neural": 45389, + "empirical results proposed": 17345, + "outperforms existing baselines": 38898, + "given piece text": 22770, + "significant improvement state": 50880, + "leads state art": 29330, + "state art single": 52665, + "art single model": 4402, + "inference paper propose": 25679, + "paper propose unified": 39538, + "achieve near human": 1173, + "near human level": 36506, + "large amounts parallel": 28839, + "resource languages like": 47240, + "achieve better results": 1118, + "neural models trained": 36983, + "trained maximum likelihood": 57788, + "fine tune models": 20951, + "fine tuned models": 20964, + "maximizing mutual information": 31965, + "task aims generate": 54897, + "approach tackle problem": 3715, + "knowledge graph based": 27497, + "new benchmark dataset": 37143, + "various evaluation metrics": 61337, + "language processing previous": 28422, + "conducting extensive experiments": 10103, + "extensive experiments analysis": 19879, + "tasks multi task": 55753, + "compared single task": 9454, + "contextualized word representations": 10816, + "performance transfer learning": 40608, + "tasks like machine": 55724, + "yields substantial improvements": 63136, + "near human performance": 36507, + "performance f1 score": 40342, + "f1 score 37": 20195, + "vector space using": 61469, + "model outperforms strong": 34167, + "similarity based approaches": 51084, + "context paper propose": 10685, + "models recently proposed": 35412, + "relation extraction task": 45981, + "better zero shot": 6995, + "zero shot performance": 63171, + "task specific training": 55404, + "zero shot translation": 63184, + "language model generating": 28168, + "establish state art": 18347, + "results multiple datasets": 47735, + "uses pre trained": 60529, + "paper describes submitted": 39332, + "relation extraction tasks": 45982, + "end end way": 17671, + "entity mention detection": 18119, + "approach outperforms previous": 3627, + "choice question answering": 8336, + "state art pretrained": 52650, + "evaluating language models": 18560, + "unsupervised neural machine": 59718, + "shared latent space": 50478, + "address issue introduce": 1763, + "shared task organized": 50502, + "time consuming task": 57137, + "lead better performance": 29257, + "impressive results achieved": 24816, + "reinforcement learning methods": 45873, + "art sota methods": 4406, + "like deep learning": 30468, + "real world large": 45132, + "learned word embeddings": 29493, + "propose solution problem": 43640, + "task proposed method": 55305, + "test set available": 56371, + "github com ibm": 22703, + "baseline models task": 6191, + "model used generate": 34509, + "provides state art": 44229, + "modeling machine translation": 34596, + "rnn based approaches": 48184, + "high quality sentence": 23785, + "corpora different domains": 11193, + "different domains demonstrate": 14906, + "domains demonstrate effectiveness": 16246, + "demonstrate effectiveness generality": 13898, + "state art open": 52643, + "training data introduce": 58004, + "reinforcement learning based": 45868, + "learning based model": 29537, + "markov decision process": 31846, + "sequence tagging task": 50006, + "experimental results based": 19271, + "outperformed state art": 38843, + "model pre trained": 34218, + "low resource domain": 31177, + "benchmark datasets proposed": 6459, + "paper propose cross": 39501, + "shed new light": 50528, + "achieving f1 score": 1404, + "f1 score 73": 20204, + "exposure bias problem": 19790, + "address problems paper": 1792, + "inverse reinforcement learning": 26930, + "generate higher quality": 22209, + "lack labeled data": 27898, + "present new framework": 41962, + "cross lingual information": 11838, + "lingual information retrieval": 30705, + "lingual word embedding": 30738, + "unsupervised cross lingual": 59690, + "cross lingual embeddings": 11834, + "attention based models": 4721, + "state art competitive": 52599, + "based neural architectures": 5902, + "sequence sequence language": 49988, + "models typically trained": 35640, + "approach low resource": 3593, + "training deep learning": 58057, + "set pre defined": 50222, + "propose neural model": 43490, + "benchmarks proposed model": 6539, + "approach fine tuning": 3539, + "word vector spaces": 62338, + "models achieve competitive": 34669, + "available low resource": 5325, + "resource languages paper": 47243, + "contrast existing approaches": 10876, + "provide valuable information": 44152, + "process extensive experiments": 42782, + "extensive experiments conducted": 19883, + "experiments conducted large": 19384, + "positive negative samples": 41288, + "reference based metrics": 45736, + "information word embeddings": 26161, + "models paper investigate": 35301, + "impact different types": 24593, + "data used train": 12761, + "propose novel task": 43566, + "build new dataset": 7418, + "evaluation results proposed": 18704, + "model outperforms baselines": 34158, + "outperforms baselines large": 38874, + "code available url": 8795, + "available url https": 5387, + "url https github": 59794, + "level fine grained": 30122, + "proposed framework outperforms": 43785, + "framework outperforms state": 21579, + "art methods large": 4286, + "requires large amounts": 46937, + "language models investigate": 28266, + "error propagation paper": 18224, + "proposed methods achieve": 43836, + "mean reciprocal rank": 31994, + "domain specific features": 16175, + "domain specific word": 16192, + "resource languages lrls": 47242, + "cross lingual training": 11855, + "method cross lingual": 32448, + "proposed model uses": 43861, + "sentiment classification tasks": 49835, + "learning pre trained": 29811, + "new publicly available": 37293, + "parsing semantic parsing": 39797, + "benchmarks demonstrate proposed": 6516, + "art results task": 4386, + "language propose novel": 28449, + "significantly improves translation": 50978, + "attention based bi": 4716, + "bidirectional gated recurrent": 7071, + "propose novel effective": 43533, + "entity type information": 18153, + "encourage model generate": 17596, + "new open source": 37275, + "proposed method substantially": 43830, + "method substantially improves": 32672, + "paper present end": 39450, + "experimental results public": 19307, + "results public datasets": 47792, + "datasets demonstrate method": 13214, + "neural network using": 37031, + "training data results": 58032, + "data results suggest": 12616, + "scale language model": 48587, + "prevent catastrophic forgetting": 42230, + "model performance different": 34188, + "setting new state": 50335, + "improved state art": 24967, + "present case studies": 41862, + "hybrid method combines": 24318, + "commonsense knowledge base": 9235, + "language specific models": 28497, + "use machine translation": 59943, + "experimental results significant": 19314, + "machine translation languages": 31364, + "work presents new": 62763, + "deep learning research": 13719, + "trained neural network": 57828, + "artificial intelligence systems": 4493, + "machine generated text": 31301, + "vectors pre trained": 61495, + "low resourced language": 31198, + "shown significant improvements": 50750, + "human evaluation demonstrate": 24145, + "downstream tasks recent": 16364, + "translation experimental results": 58612, + "optimization based meta": 38546, + "based meta learning": 5845, + "natural language domain": 36421, + "alleviate problem propose": 2417, + "classification datasets demonstrate": 8453, + "datasets demonstrate proposed": 13216, + "state art shot": 52664, + "make code data": 31550, + "feature extraction methods": 20487, + "corpus publicly available": 11415, + "training data major": 58015, + "synthetic training data": 54386, + "shared task paper": 50503, + "art baseline models": 4221, + "explore different strategies": 19700, + "challenging task especially": 8146, + "training method called": 58174, + "high low resource": 23752, + "tasks paper study": 55790, + "language modeling machine": 28210, + "social media datasets": 51574, + "cross lingual resources": 11849, + "model outperforms baseline": 34157, + "models existing methods": 34989, + "results provide insights": 47789, + "evaluation real world": 18693, + "specific word representations": 52176, + "quality labeled data": 44540, + "sequence models attention": 49960, + "work cross lingual": 62615, + "german french spanish": 22670, + "word sentence embeddings": 62302, + "tasks including classification": 55678, + "obtained https github": 38212, + "paper describes submissions": 39331, + "machine translation improve": 31361, + "learning high quality": 29672, + "achieving good performance": 1407, + "domain specific applications": 16166, + "domain specific tasks": 16187, + "tasks neural machine": 55764, + "high frequency words": 23740, + "training data set": 58035, + "method achieves comparable": 32363, + "quality state art": 44583, + "machine learning architecture": 31312, + "high quality training": 23787, + "amounts parallel data": 2556, + "data target language": 12722, + "experiments zero shot": 19567, + "data zero shot": 12781, + "zero shot language": 63165, + "models built using": 34800, + "range tasks including": 44938, + "create large scale": 11706, + "comparing state art": 9487, + "address issues introduce": 1770, + "competitive results compared": 9562, + "code pre trained": 8844, + "trained models available": 57795, + "systems experimental results": 54498, + "experimental results validate": 19320, + "validate effectiveness proposed": 61177, + "related social media": 45937, + "modalities text images": 33473, + "models trained different": 35606, + "deep learning frameworks": 13710, + "outperform strong baselines": 38826, + "unlike previous works": 59604, + "previous works propose": 42321, + "end deep learning": 17626, + "unified end end": 59470, + "machine learning deep": 31319, + "learning deep learning": 29583, + "qualitative analysis shows": 44474, + "language models perform": 28293, + "pre trained state": 41558, + "trained state art": 57883, + "language model bert": 28157, + "embeddings language models": 17158, + "propose method automatically": 43452, + "analysis state art": 2767, + "natural language expression": 36423, + "2018 shared task": 276, + "number high quality": 38008, + "fine tuned using": 20973, + "yields state art": 63132, + "crucial real world": 11909, + "domain specific corpora": 16168, + "state art domain": 52607, + "achieved remarkable progress": 1263, + "experiments multiple datasets": 19474, + "models able predict": 34658, + "transfer learning based": 58378, + "model transfer learning": 34487, + "f1 score 69": 20201, + "f1 score 78": 20208, + "source code publicly": 51751, + "closed world assumption": 8701, + "propose joint model": 43426, + "unlike existing methods": 59596, + "experimental results text": 19318, + "tasks method outperforms": 55746, + "language models results": 28316, + "build machine learning": 7411, + "art performance benchmarks": 4328, + "proposed model evaluated": 43849, + "language models using": 28342, + "improve prediction accuracy": 24907, + "state art overall": 52644, + "challenging task involves": 8148, + "demonstrate approach achieves": 13867, + "approach achieves state": 3403, + "art performance present": 4343, + "lstm based language": 31245, + "network based method": 36706, + "data recent work": 12588, + "inspired recent advances": 26413, + "domain adaptation problem": 16002, + "perform domain adaptation": 40092, + "source target domain": 51800, + "datasets analysis shows": 13150, + "memory lstm architecture": 32259, + "neural sequence labeling": 37097, + "sequence labeling models": 49936, + "attention based deep": 4718, + "annotated data language": 2883, + "time consuming labor": 57132, + "consuming labor intensive": 10448, + "single model trained": 51319, + "nlp tasks require": 37548, + "systems propose novel": 54605, + "evaluation metrics datasets": 18649, + "propose new paradigm": 43511, + "provide complementary information": 44034, + "results multiple benchmarks": 47734, + "rnn based models": 48188, + "area curve auc": 4139, + "encoder decoder based": 17499, + "progress recent years": 43114, + "self attention networks": 49186, + "uses self attention": 60532, + "self attention layers": 49179, + "open source python": 38457, + "transformer models perform": 58501, + "models low resource": 35203, + "language models learning": 28272, + "language processing algorithms": 28395, + "manually annotated corpus": 31757, + "methods perform better": 32979, + "significant improvements performance": 50887, + "english hindi code": 17821, + "hindi code mixed": 23937, + "datasets social media": 13436, + "nlp tasks recently": 37547, + "information multiple sources": 25981, + "capture syntactic semantic": 7715, + "study state art": 53463, + "experimental results method": 19292, + "results method significantly": 47718, + "simple effective way": 51160, + "proposed method results": 43827, + "local contextual information": 30934, + "benchmarks verify effectiveness": 6551, + "fundamental task natural": 21793, + "relations natural language": 46046, + "training data typically": 58045, + "tasks deep learning": 55572, + "machine translation named": 31371, + "zero shot capabilities": 63156, + "semantic parsing task": 49316, + "character level features": 8210, + "novel method using": 37867, + "proposed method does": 43816, + "rnn based methods": 48186, + "learning framework allows": 29652, + "maintaining high accuracy": 31493, + "multi head attention": 35964, + "art performances datasets": 4355, + "datasets proposed approach": 13382, + "statistical language models": 52746, + "question answering knowledge": 44700, + "model trained evaluated": 34471, + "conduct qualitative analysis": 10059, + "typologically diverse languages": 59169, + "cross lingual settings": 11852, + "performance widely used": 40630, + "framework propose novel": 21586, + "compare model performance": 9348, + "propose novel techniques": 43568, + "task learning models": 55176, + "novel multi modal": 37878, + "random forest model": 44877, + "resource indian languages": 47230, + "automatic metrics bleu": 5110, + "correlate poorly human": 11507, + "machine translation data": 31353, + "better performance standard": 6935, + "conducted real world": 10092, + "named entity linking": 36372, + "entity linking relation": 18117, + "relation extraction knowledge": 45975, + "nlp tasks machine": 37540, + "state art research": 52655, + "challenges future research": 8052, + "languages cross lingual": 28627, + "shared task multilingual": 50501, + "contextualized word embeddings": 10814, + "explore different ways": 19701, + "systems large margin": 54544, + "expectation maximization em": 19195, + "deep learning method": 13711, + "method solve problem": 32663, + "reading comprehension questions": 45085, + "art language model": 4269, + "complex deep learning": 9622, + "model language modeling": 34038, + "language modeling paper": 28214, + "achieve superior results": 1211, + "understanding tasks including": 59409, + "ground truth labels": 23256, + "combines state art": 9102, + "results datasets different": 47571, + "datasets different languages": 13230, + "results end end": 47608, + "human annotated datasets": 24099, + "large text collections": 29024, + "systems require large": 54620, + "require large annotated": 46870, + "large annotated data": 28846, + "domain invariant features": 16091, + "leads significant improvements": 29327, + "resource languages using": 47247, + "experiments low resource": 19459, + "learning based approaches": 29531, + "using machine translation": 60784, + "understanding nlu models": 59374, + "large scale social": 29001, + "scale social media": 48625, + "memory network lstm": 32273, + "model based approaches": 33600, + "recurrent encoder decoder": 45614, + "encoder decoder network": 17503, + "standard cross entropy": 52480, + "class imbalance problem": 8403, + "based seq2seq models": 6026, + "train evaluate models": 57589, + "mutual information input": 36346, + "issues paper propose": 27097, + "used pre trained": 60264, + "set test set": 50262, + "resource language like": 47234, + "establishing new state": 18366, + "open source dataset": 38449, + "various linguistic features": 61357, + "architecture achieves state": 4023, + "art results benchmark": 4371, + "better previous state": 6946, + "recommendations future research": 45569, + "future research field": 21890, + "data sets proposed": 12651, + "dataset freely available": 12937, + "using lexical features": 60768, + "rnn based language": 48185, + "indian languages hindi": 25520, + "fixed size vector": 21082, + "tasks despite success": 55585, + "learning long term": 29713, + "data source domain": 12676, + "labeled data target": 27748, + "data target domain": 12721, + "model trained source": 34477, + "trained source domain": 57878, + "source domain data": 51766, + "fine tuned small": 20968, + "knowledge source domain": 27613, + "representation experimental results": 46513, + "model achieves competitive": 33515, + "competitive performance state": 9555, + "surpasses state art": 54177, + "word embeddings widely": 62196, + "embeddings widely used": 17245, + "bridge gap propose": 7320, + "proposed model trained": 43860, + "results cross lingual": 47565, + "propose novel hierarchical": 43539, + "hierarchical attention mechanism": 23657, + "vinyals et al": 61613, + "dataset demonstrate effectiveness": 12884, + "demonstrate effectiveness methods": 13900, + "sequence labeling model": 49935, + "address issue propose": 1767, + "issue propose novel": 27077, + "kullback leibler divergence": 27680, + "construct new dataset": 10396, + "generation propose novel": 22532, + "generate human like": 22212, + "significantly outperform baselines": 50990, + "extensive empirical results": 19865, + "manually annotated dataset": 31759, + "correlation human judgements": 11522, + "semi markov conditional": 49452, + "markov conditional random": 31844, + "models using large": 35658, + "experiment results method": 19250, + "challenging task nlp": 8150, + "recent neural network": 45327, + "represent state art": 46482, + "issue propose new": 27076, + "propose new training": 43515, + "relation extraction model": 45977, + "paper introduce task": 39405, + "provide comprehensive analysis": 44037, + "nlp tasks text": 37550, + "answering named entity": 3084, + "model computationally efficient": 33690, + "propose framework named": 43393, + "significant improvement compared": 50876, + "conduct experiments real": 10047, + "poses significant challenge": 41253, + "github com google": 22701, + "lexical syntactic information": 30391, + "task oriented dialog": 55254, + "additional annotated data": 1652, + "expensive human annotation": 19210, + "achieve promising results": 1182, + "language understanding recent": 28559, + "paper design novel": 39336, + "labeled data propose": 27744, + "training data second": 58034, + "learning adversarial training": 29507, + "hierarchical reinforcement learning": 23687, + "results paper present": 47758, + "address issues propose": 1773, + "issues propose novel": 27101, + "demonstrate model significantly": 13943, + "model loss function": 34075, + "conversational question answering": 11049, + "obtains f1 score": 38249, + "f1 score 65": 20200, + "ample room improvement": 2567, + "work introduce novel": 62693, + "introduce novel task": 26851, + "shared task data": 50497, + "improves prediction accuracy": 25150, + "superiority proposed method": 53955, + "achieve higher accuracy": 1155, + "outperform previous methods": 38811, + "models based sequence": 34759, + "demonstrate model outperforms": 13942, + "methods data augmentation": 32810, + "data augmentation text": 12164, + "simple data augmentation": 51146, + "data augmentation strategy": 12161, + "machine translation propose": 31378, + "problem propose novel": 42633, + "proposed method consists": 43815, + "graph based neural": 23109, + "models existing approaches": 34988, + "propose novel approaches": 43522, + "data collection annotation": 12219, + "tasks state art": 55910, + "compare different models": 9338, + "sequence generation task": 49928, + "proximal policy optimization": 44262, + "generation tasks including": 22564, + "models using different": 35657, + "various text classification": 61406, + "self training method": 49225, + "previous work focuses": 42304, + "detection social media": 14529, + "learning paper propose": 29794, + "learning deep neural": 29584, + "perform zero shot": 40163, + "research recent years": 47112, + "years deep learning": 63055, + "better cross lingual": 6872, + "address shortcoming propose": 1800, + "substantially outperforms previous": 53647, + "translation cross lingual": 58593, + "trained cross lingual": 57702, + "contextual word representations": 10790, + "pre trained bidirectional": 41522, + "synthetic data generated": 54371, + "work explore different": 62658, + "improve performance neural": 24896, + "competitive baseline models": 9541, + "previous best result": 42251, + "propose new dataset": 43500, + "dependency parse trees": 14127, + "resource rich languages": 47270, + "demonstrate effectiveness approaches": 13895, + "text generation including": 56598, + "improvements strong baselines": 25104, + "introduce multi task": 26826, + "multi task setup": 36029, + "multi task model": 36022, + "task model outperforms": 55218, + "received considerable attention": 45257, + "evaluate proposed framework": 18492, + "downstream tasks including": 16357, + "results downstream tasks": 47600, + "downstream tasks existing": 16356, + "gives better results": 22806, + "sentiment analysis using": 49832, + "knowledge graph kg": 27503, + "learning reinforcement learning": 29837, + "prior work shown": 42425, + "models attention mechanism": 34736, + "attention mechanism learn": 4777, + "tasks image captioning": 55669, + "achieves strong performance": 1381, + "recent transformer based": 45362, + "art results wide": 4392, + "results wide range": 47908, + "experimental results different": 19284, + "collect new dataset": 8949, + "approach improves performance": 3565, + "word embeddings target": 62190, + "propose structure aware": 43650, + "based transformer model": 6109, + "rnn language models": 48196, + "language models achieved": 28227, + "demonstrates state art": 14043, + "models able learn": 34656, + "quality estimation qe": 44517, + "global contextual information": 22826, + "translation models trained": 58634, + "subject predicate object": 53556, + "training data experimental": 57993, + "significantly improve results": 50968, + "achieves superior performance": 1385, + "superior performance compared": 53936, + "different benchmark datasets": 14851, + "model significantly outperform": 34382, + "outperform baseline models": 38782, + "problem work propose": 42690, + "work propose task": 62787, + "low resource conditions": 31173, + "datasets paper propose": 13365, + "small training set": 51510, + "cs cmu edu": 11921, + "learned source domain": 29483, + "leverage unlabeled data": 30296, + "unlabeled data target": 59566, + "substantially improves performance": 53640, + "improves performance compared": 25142, + "common sense reasoning": 9199, + "model improves state": 33985, + "embeddings sentence embeddings": 17212, + "approach end end": 3512, + "nlp tasks demonstrate": 37533, + "propose new type": 43516, + "challenge propose simple": 8011, + "using automatic metrics": 60575, + "metrics human judgments": 33172, + "weakly supervised approach": 61860, + "high correlation human": 23720, + "specific training data": 52165, + "training data use": 58046, + "task paper present": 55265, + "shared task task": 50507, + "task pre trained": 55280, + "achieve macro f1": 1170, + "real world question": 45134, + "based real world": 5977, + "propose reinforcement learning": 43602, + "recent work demonstrated": 45367, + "performance large margin": 40412, + "results demonstrate potential": 47581, + "use sequence sequence": 60012, + "word embeddings pre": 62182, + "shared task dataset": 50498, + "models sequence sequence": 35485, + "model multi hop": 34109, + "multiple pieces evidence": 36262, + "graph neural networks": 23154, + "convolutional network gcn": 11108, + "experiments standard datasets": 19531, + "model convolutional neural": 33721, + "dataset low resource": 12986, + "performance sentence level": 40551, + "data pre trained": 12553, + "word embeddings language": 62174, + "experiments demonstrate model": 19405, + "smaller model size": 51522, + "training inference time": 58130, + "improve quality generated": 24912, + "models attention mechanisms": 34737, + "approach improve performance": 3562, + "character level information": 8211, + "multi hop reasoning": 35972, + "vectors word embeddings": 61502, + "basic building block": 6328, + "results suggest approach": 47866, + "shared task automatic": 50494, + "f1 score 58": 20197, + "results english chinese": 47610, + "competitively state art": 9572, + "models usually trained": 35661, + "sequence level training": 49951, + "reinforcement learning experiments": 45870, + "f1 score achieved": 20219, + "lexico syntactic patterns": 30405, + "propose novel methods": 43547, + "simple effective method": 51155, + "external commonsense knowledge": 19929, + "existing neural network": 19118, + "social media post": 51583, + "qualitative analysis model": 44472, + "human written references": 24261, + "novel model called": 37874, + "help model learn": 23579, + "model outperforms competitive": 34160, + "propose cross lingual": 43343, + "techniques deep learning": 56074, + "additional contextual information": 1661, + "embeddings word embeddings": 17247, + "make source code": 31599, + "source code models": 51749, + "code models available": 8835, + "sequence sequence framework": 49986, + "human evaluation results": 24154, + "evaluation results indicate": 18702, + "proposed approach achieves": 43723, + "92 f1 score": 560, + "fields computer vision": 20777, + "neural mt nmt": 36987, + "performing zero shot": 40695, + "lingual transfer learning": 30735, + "zero shot classification": 63157, + "neural models generate": 36976, + "better performance previous": 6934, + "level language models": 30147, + "tf idf features": 56992, + "develop deep learning": 14582, + "based pre trained": 5942, + "performs substantially better": 40719, + "state art span": 52668, + "model performance significantly": 34195, + "based supervised learning": 6069, + "obtain high quality": 38176, + "depth error analysis": 14186, + "propose multi task": 43479, + "applied wide range": 3312, + "representations pre trained": 46738, + "language models elmo": 28248, + "release code https": 46146, + "results model achieves": 47722, + "public benchmark datasets": 44307, + "using proposed approach": 60880, + "training low resource": 58161, + "domain semantic parsing": 16153, + "number training examples": 38050, + "lstm neural network": 31278, + "time experimental results": 57155, + "art performance terms": 4348, + "recall f1 score": 45241, + "art performance model": 4336, + "training data better": 57980, + "amounts unlabeled text": 2564, + "task specific labeled": 55398, + "specific labeled data": 52097, + "models trained single": 35617, + "knowledge bases generate": 27413, + "social media platform": 51581, + "evaluate performance model": 18482, + "new dataset consisting": 37165, + "demonstrate effectiveness framework": 13897, + "relation extraction models": 45978, + "performance large scale": 40413, + "language models nlms": 28290, + "significantly fewer parameters": 50959, + "models trained dataset": 35604, + "language generation task": 28089, + "recognition ner task": 45520, + "given input sentence": 22749, + "shows superior performance": 50809, + "large amounts labelled": 28838, + "amounts labelled data": 2554, + "models open domain": 35278, + "dialogue response generation": 14783, + "demonstrate strong performance": 13980, + "annotated data available": 2882, + "learning extensive experiments": 29641, + "extensive experiments analyses": 19878, + "new self supervised": 37309, + "self supervised learning": 49210, + "propose novel text": 43569, + "conditional generative adversarial": 9994, + "adversarial network gan": 1979, + "limited labelled data": 30595, + "winograd schema challenge": 62074, + "paper present deep": 39448, + "domain adaptation tasks": 16004, + "english chinese corpora": 17783, + "learn cross lingual": 29355, + "settings cross lingual": 50363, + "terms automatic evaluation": 56266, + "context aware models": 10590, + "large scale multimodal": 28990, + "improve overall performance": 24883, + "source language english": 51778, + "target language work": 54830, + "dataset model outperforms": 12998, + "10 percentage points": 49, + "naive bayes model": 36365, + "document level context": 15804, + "features extensive experiments": 20579, + "model proposed model": 34251, + "language representation model": 28469, + "bidirectional encoder representations": 7069, + "encoder representations transformers": 17540, + "language representation models": 28470, + "representation models bert": 46557, + "left right context": 29999, + "pre trained bert": 41521, + "trained bert model": 57682, + "bert model fine": 6684, + "model fine tuned": 33894, + "models wide range": 35680, + "conceptually simple empirically": 9956, + "obtains new state": 38253, + "generation paper propose": 22514, + "using neural sequence": 60836, + "perform ablation study": 40065, + "automated metrics human": 5055, + "human evaluation generated": 24148, + "state art fully": 52617, + "using semi supervised": 60922, + "low resource indian": 31180, + "language low resource": 28143, + "challenging task natural": 8149, + "different state art": 15079, + "fashion experimental results": 20413, + "paper explore use": 39366, + "experiments method significantly": 19465, + "paper proposes neural": 39549, + "variational autoencoder vae": 61245, + "gaussian mixture model": 22014, + "incorporate contextual information": 25349, + "sequence sequence approaches": 49980, + "previous work propose": 42309, + "empirical results model": 17344, + "results model able": 47721, + "model achieved accuracy": 33508, + "level attention mechanism": 30067, + "task low resource": 55195, + "rich resource language": 48117, + "resource language english": 47233, + "demonstrate superiority method": 13986, + "method state art": 32668, + "existing word embeddings": 19171, + "advanced deep learning": 1887, + "methods pre trained": 32986, + "accuracy paper introduces": 1021, + "cross domain cross": 11814, + "domain cross lingual": 16037, + "experiments available https": 19360, + "different linguistic features": 14981, + "data augmentation techniques": 12163, + "performance different tasks": 40295, + "neural networks state": 37073, + "networks state art": 36913, + "shown state art": 50753, + "task neural network": 55238, + "language inference dataset": 28109, + "models different datasets": 34915, + "using large pre": 60758, + "large pre trained": 28940, + "language modeling lm": 28209, + "publicly available large": 44345, + "training time compared": 58300, + "experimental results widely": 19323, + "data demonstrate effectiveness": 12272, + "improvements nlp tasks": 25087, + "building large scale": 7452, + "model trained dataset": 34469, + "experimental results various": 19321, + "able outperform state": 710, + "achieved good results": 1236, + "labeled data supervised": 27747, + "data supervised learning": 12711, + "tf idf based": 56991, + "state art seq2seq": 52662, + "multi label text": 35978, + "attention mechanism allows": 4771, + "mechanism allows model": 32098, + "substantially outperforms strong": 53649, + "strong baselines terms": 53018, + "language models work": 28349, + "fine tune language": 20949, + "language models improve": 28262, + "language models experiments": 28253, + "source code data": 51745, + "github com thunlp": 22713, + "word sentence representations": 62305, + "including named entity": 25279, + "recognition speech tagging": 45539, + "external knowledge resources": 19944, + "resources paper propose": 47324, + "achieved remarkable results": 1264, + "state art architectures": 52582, + "approach achieves new": 3401, + "unsupervised machine translation": 59708, + "term memory bilstm": 56246, + "unsupervised pre training": 59722, + "pre training large": 41581, + "approach benchmark datasets": 3430, + "outperform previous best": 38810, + "terms f1 score": 56290, + "sequence tagging model": 50003, + "comparable results state": 9309, + "propose data driven": 43348, + "multi headed attention": 35968, + "task demonstrate proposed": 55003, + "approach outperforms strong": 3631, + "model does use": 33787, + "available state art": 5371, + "quality generated texts": 44526, + "alternative approach based": 2498, + "conduct experiments different": 10045, + "model new domain": 34127, + "number parameters model": 38025, + "achieves significantly better": 1371, + "performance fine tuning": 40351, + "elastic weight consolidation": 16959, + "experiments proposed framework": 19495, + "low resource translation": 31196, + "information source sentence": 26095, + "hybrid deep learning": 24315, + "proposed approach shows": 43732, + "different datasets paper": 14891, + "used transfer learning": 60340, + "different languages different": 14968, + "cross lingual transferability": 11857, + "languages high resource": 28685, + "multiple natural language": 36254, + "effective method improve": 16672, + "state art approach": 52580, + "long tail distribution": 31036, + "extensive experiments datasets": 19884, + "models achieve better": 34667, + "proposed model able": 43843, + "bert devlin et": 6641, + "peters et al": 40801, + "radford et al": 44848, + "training data training": 58044, + "map natural language": 31796, + "using beam search": 60586, + "challenge work propose": 8024, + "improvements natural language": 25085, + "previous approaches model": 42240, + "approach outperforms prior": 3628, + "train end end": 57586, + "end end automatic": 17641, + "end automatic speech": 17618, + "cross domain settings": 11819, + "task text classification": 55435, + "state art datasets": 52604, + "methods low resource": 32932, + "truly low resource": 58825, + "learning based nlp": 29540, + "based nlp tasks": 5914, + "text understanding tasks": 56830, + "space experimental results": 51861, + "experimental results methods": 19293, + "word sentence level": 62303, + "task zero shot": 55476, + "poses great challenge": 41248, + "model zero shot": 34553, + "validate effectiveness method": 61175, + "make good use": 31575, + "paper presents neural": 39477, + "novel approach multi": 37764, + "achieve significant performance": 1194, + "significant performance improvement": 50910, + "non autoregressive models": 37637, + "models attention based": 34735, + "test set best": 56372, + "machine translation abstractive": 31346, + "modeling experimental results": 34574, + "non expert users": 37654, + "using multi layer": 60817, + "human robot interaction": 24238, + "based end end": 5702, + "art performance english": 4335, + "models recently achieved": 35411, + "pre trained weights": 41564, + "pre training data": 41574, + "results f1 score": 47631, + "text summarization question": 56798, + "techniques machine learning": 56108, + "problem short text": 42653, + "domain paper explore": 16129, + "approach achieves competitive": 3398, + "outperforming baseline models": 38846, + "graph based models": 23108, + "faster inference time": 20439, + "outperforms previous unsupervised": 38926, + "based models paper": 5877, + "english low resource": 17839, + "end end systems": 17663, + "classification sequence labeling": 8546, + "suffer error propagation": 53764, + "propose new framework": 43503, + "new framework named": 37211, + "demonstrate effectiveness efficiency": 13896, + "neural networks hierarchical": 37051, + "outperform existing approaches": 38794, + "language modeling using": 28222, + "open domain multi": 38423, + "dataset demonstrate model": 12885, + "evaluation natural language": 18661, + "likelihood estimation mle": 30519, + "neural networks gnns": 37050, + "attention paper propose": 4805, + "task existing methods": 55063, + "existing methods rely": 19099, + "conduct extensive analysis": 10050, + "processing deep learning": 42867, + "approaches cross lingual": 3790, + "propose novel problem": 43556, + "large scale labeled": 28982, + "future research task": 21893, + "task sequence tagging": 55364, + "resource language low": 47235, + "datasets results demonstrate": 13410, + "learning based natural": 29539, + "language processing model": 28416, + "need large scale": 36577, + "real world tasks": 45141, + "introduce large scale": 26818, + "text classification sequence": 56485, + "approach multi task": 3606, + "outperform competitive baselines": 38787, + "present rule based": 42002, + "settings paper propose": 50388, + "cross lingual entity": 11835, + "low resource target": 31193, + "large scale generative": 28978, + "supervised fine tuning": 53986, + "compare performance model": 9355, + "competitive baselines large": 9543, + "approaches benchmark datasets": 3775, + "training data learn": 58009, + "language understanding recently": 28560, + "end end methods": 17652, + "weakly supervised model": 61867, + "publicly available annotated": 44335, + "evaluate performance models": 18483, + "results model trained": 47726, + "models large margin": 35165, + "question natural language": 44740, + "require large labeled": 46871, + "labeled data train": 27749, + "costly time consuming": 11605, + "time consuming paper": 57135, + "performance fully supervised": 40354, + "available labeled data": 5316, + "demonstrate large scale": 13930, + "large scale unsupervised": 29007, + "attention based transformer": 4726, + "fine tuning training": 21030, + "high quality results": 23784, + "results real world": 47799, + "processing nlp field": 42904, + "real world problems": 45133, + "proposed method conduct": 43813, + "interpretable model agnostic": 26726, + "model agnostic explanations": 33552, + "achieved remarkable success": 1265, + "dataset results method": 13070, + "used data augmentation": 60136, + "approach outperforms baselines": 3624, + "understanding paper propose": 59379, + "attention self attention": 4828, + "extract relevant information": 19989, + "model empirical results": 33810, + "outperforming previous best": 38856, + "models based deep": 34755, + "using encoder decoder": 60674, + "conditional text generation": 10007, + "zhang et al": 63189, + "large memory footprint": 28909, + "art models terms": 4299, + "existing training data": 19164, + "natural language description": 36418, + "approach achieves better": 3396, + "acyclic graph dag": 1493, + "benchmark datasets furthermore": 6454, + "approach achieves high": 3400, + "achieves high accuracy": 1332, + "compared previous works": 9439, + "present state art": 42023, + "applied low resource": 3281, + "data training data": 12742, + "makes predictions based": 31633, + "cross lingual monolingual": 11844, + "model based convolutional": 33602, + "task previous works": 55292, + "provide large scale": 44097, + "active research area": 1479, + "method widely used": 32707, + "commonly used text": 9228, + "multi task settings": 36028, + "multilingual pre training": 36111, + "pre training fine": 41577, + "training fine tuning": 58107, + "domain specific datasets": 16172, + "method natural language": 32586, + "work propose deep": 62773, + "end neural models": 17690, + "document work propose": 15847, + "new question answering": 37296, + "test set outperforming": 56374, + "paper explore task": 39365, + "user generated reviews": 60418, + "analysis paper propose": 2712, + "art machine learning": 4279, + "document level representations": 15808, + "information retrieval task": 26068, + "results transfer learning": 47890, + "domain specific dataset": 16171, + "previously proposed approaches": 42341, + "downstream tasks like": 16359, + "tasks like speech": 55728, + "unlike previous models": 59601, + "based bi lstm": 5606, + "word embeddings glove": 62170, + "pretrained language modeling": 42159, + "data target task": 12723, + "based semantic parsing": 6015, + "experimental results outperforms": 19300, + "models trained large": 35614, + "decision making tasks": 13566, + "new task named": 37335, + "pointer generator network": 41057, + "automatic evaluation human": 5082, + "majority class baseline": 31529, + "understanding question answering": 59388, + "text classification models": 56477, + "machine learning natural": 31327, + "human like language": 24200, + "wide variety nlp": 61985, + "features best performing": 20532, + "standard sequence sequence": 52528, + "conditional variational autoencoder": 10010, + "variational autoencoder based": 61244, + "using automatic human": 60574, + "proposed framework able": 43779, + "quantitative qualitative analysis": 44625, + "semi supervised text": 49467, + "self training framework": 49224, + "large scale unlabeled": 29006, + "existing question answering": 19135, + "key value memory": 27341, + "conducted large scale": 10088, + "terms automatic metrics": 56268, + "metrics human evaluations": 33171, + "reasoning natural language": 45209, + "achieved new state": 1254, + "consistently improves performance": 10297, + "classification model based": 8495, + "automated machine learning": 5049, + "transfer learning paradigm": 58392, + "effectiveness transfer learning": 16820, + "transfer learning natural": 58389, + "present open source": 41983, + "model based bert": 33601, + "model available https": 33594, + "com google research": 9013, + "tasks real world": 55835, + "automatic evaluation metric": 5086, + "large scale machine": 28984, + "self attention models": 49183, + "supervised classification task": 53968, + "information language models": 25941, + "language models existing": 28251, + "language model named": 28179, + "benchmark demonstrate effectiveness": 6462, + "beam search decoding": 6368, + "high computational complexity": 23714, + "data code available": 12211, + "multi task deep": 36018, + "task deep neural": 54996, + "transformer language model": 58491, + "challenging problem requires": 8129, + "structures paper propose": 53192, + "application deep learning": 3163, + "quality experimental results": 44521, + "paper propose generate": 39513, + "human annotated dataset": 24098, + "datasets model achieves": 13335, + "bert pre training": 6705, + "pre training domain": 41575, + "training domain specific": 58073, + "domain specific fine": 16176, + "specific fine tuning": 52086, + "approach highly effective": 3555, + "performance commonly used": 40245, + "pre trained self": 41554, + "sequence generation model": 49926, + "competitive better performance": 9546, + "demonstrate current state": 13887, + "methods perform poorly": 32980, + "super characters method": 53922, + "paper propose semantic": 39532, + "used wide range": 60351, + "neural networks language": 37053, + "improvements low resource": 25079, + "latent embedding space": 29125, + "self attention model": 49182, + "transfer learning large": 58383, + "language models used": 28341, + "training testing data": 58296, + "extensive empirical study": 19867, + "neural network structure": 37026, + "network model trained": 36766, + "public datasets different": 44316, + "demonstrate superior performance": 13983, + "superior performance model": 53937, + "training data recent": 58030, + "achieves significant consistent": 1365, + "significant consistent improvements": 50859, + "model able predict": 33494, + "word overlap metrics": 62258, + "evaluation paper propose": 18667, + "approach code available": 3447, + "training data human": 58001, + "new training data": 37350, + "training data study": 58040, + "data study propose": 12703, + "supervised learning approach": 53996, + "manually annotated training": 31762, + "multinomial naive bayes": 36162, + "random forest rf": 44878, + "sequence sequence generation": 49987, + "apply proposed model": 3349, + "employs attention mechanism": 17405, + "extraction event extraction": 20063, + "pre training framework": 41578, + "conducted extensive experiments": 10085, + "task specific data": 55390, + "data large scale": 12455, + "domain specific models": 16181, + "near perfect accuracy": 36510, + "embeddings experimental results": 17132, + "approach zero shot": 3747, + "consistently outperforms previous": 10305, + "pre trained neural": 41549, + "language model text": 28198, + "languages propose method": 28758, + "approaches end end": 3808, + "achieve sota results": 1202, + "used state art": 60313, + "used prior work": 60275, + "large scale high": 28979, + "scale high quality": 48577, + "neural networks widely": 37080, + "propose new metrics": 43507, + "extensive experiments various": 19904, + "datasets proposed model": 13384, + "shot text classification": 50652, + "transfer learning experiments": 58379, + "language models pretrained": 28303, + "models pretrained large": 35352, + "language model objective": 28180, + "learning models proposed": 29759, + "graph attention network": 23097, + "results datasets demonstrate": 47570, + "static word embeddings": 52728, + "training data domain": 57988, + "scaled dot product": 48643, + "dot product attention": 16319, + "recent natural language": 45323, + "various downstream tasks": 61334, + "techniques natural language": 56115, + "processing machine learning": 42886, + "existing methods focus": 19095, + "datasets english chinese": 13249, + "effectively improve performance": 16740, + "embedding based methods": 17017, + "based methods knowledge": 5848, + "based methods use": 5852, + "based model trained": 5862, + "step natural language": 52817, + "language processing problems": 28424, + "2019 shared task": 285, + "training state art": 58271, + "sentence level cross": 49583, + "information pre trained": 26014, + "perform fine grained": 40110, + "inter sentence dependencies": 26587, + "approach pre trained": 3645, + "tasks source code": 55900, + "data driven method": 12303, + "converting natural language": 11079, + "transfer learning framework": 58381, + "domain pre training": 16135, + "domain fine tuning": 16072, + "fine tuning target": 21025, + "able improve performance": 701, + "propose novel learning": 43544, + "small training datasets": 51509, + "sequence sequence based": 49984, + "training data target": 58041, + "topological data analysis": 57468, + "extensive experiments using": 19903, + "systems perform poorly": 54587, + "new evaluation framework": 37192, + "cold start problem": 8929, + "macro averaged f1": 31405, + "task previous work": 55291, + "directly fine tuning": 15317, + "fine tuning pretrained": 21012, + "diverse nlp tasks": 15710, + "fine grained evaluation": 20935, + "released https github": 46176, + "paper presents unsupervised": 39485, + "data driven manner": 12302, + "comparison different approaches": 9494, + "provides better performance": 44185, + "domain specific task": 16186, + "require large amounts": 46869, + "context document level": 10616, + "encoder decoder transformer": 17508, + "decoder transformer model": 13618, + "pre processing techniques": 41512, + "language models successful": 28326, + "set nlp tasks": 50202, + "task specific models": 55402, + "language model pretraining": 28188, + "performance language model": 40407, + "results main findings": 47711, + "sub tasks sub": 53535, + "tasks sub task": 55916, + "widely used benchmarks": 62012, + "imdb movie reviews": 24571, + "downstream task performance": 16351, + "existing studies focus": 19151, + "downstream tasks especially": 16355, + "systems achieve high": 54421, + "text classification natural": 56478, + "types word embeddings": 59127, + "propose method generating": 43455, + "large scale empirical": 28973, + "including state art": 25304, + "model achieves good": 33517, + "large scale annotated": 28960, + "pretrained language model": 42158, + "lack high quality": 27892, + "high quality large": 23781, + "quality large scale": 44544, + "code pretrained models": 8846, + "pretrained models available": 42170, + "models available https": 34748, + "github com allenai": 22695, + "pre trained monolingual": 41546, + "model achieves results": 33523, + "text generation problem": 56602, + "state art points": 52648, + "based observation propose": 5922, + "training data low": 58013, + "data low resource": 12471, + "bert state art": 6722, + "tasks multiple datasets": 55756, + "achieve comparable results": 1124, + "achieve strong results": 1207, + "model outperform state": 34153, + "existing methods mainly": 19098, + "using self attention": 60917, + "tasks including text": 55683, + "including text classification": 25310, + "zero shot text": 63182, + "effectively experimental results": 16733, + "zero shot scenario": 63174, + "specific language models": 52101, + "described natural language": 14213, + "establishes new state": 18361, + "knowledge distillation method": 27442, + "text generation tasks": 56605, + "https www youtube": 24063, + "www youtube com": 63026, + "semantic similarity tasks": 49349, + "attention mechanism experiments": 4776, + "present new corpus": 41960, + "heuristic rule based": 23630, + "important challenging problem": 24707, + "previous studies mainly": 42289, + "process paper propose": 42815, + "propose multi modal": 43477, + "model real world": 34276, + "significant improvement previous": 50879, + "different datasets different": 14890, + "multilingual multi task": 36101, + "languages fine tuning": 28676, + "fine tuning datasets": 20983, + "best knowledge existing": 6771, + "fine tuning bert": 20981, + "analysis experimental results": 2663, + "datasets code available": 13174, + "available https www": 5310, + "model test time": 34453, + "work publicly available": 62798, + "models trained existing": 35610, + "methods significantly outperform": 33041, + "downstream text classification": 16371, + "traditional word embeddings": 57557, + "bi directional language": 7000, + "directional language model": 15280, + "language model elmo": 28161, + "et al proposed": 18412, + "text knowledge graph": 56637, + "sequence sequence problem": 49994, + "different types information": 15110, + "large room improvement": 28957, + "promising directions future": 43165, + "improve model robustness": 24874, + "received relatively little": 45265, + "data collection pipeline": 12220, + "analysis widely used": 2793, + "learning ml models": 29741, + "models results indicate": 35457, + "framework significantly outperforms": 21601, + "f1 score improvement": 20222, + "multiple pre trained": 36266, + "pre trained deep": 41526, + "domain adversarial training": 16015, + "neural networks paper": 37062, + "networks paper propose": 36889, + "language models nlm": 28289, + "methods require large": 33018, + "semi supervised model": 49464, + "experiments method achieves": 19463, + "standard nlp tasks": 52515, + "present unified framework": 42048, + "novel loss function": 37858, + "methods achieve new": 32729, + "state art adversarial": 52577, + "modern natural language": 35714, + "performances state art": 40648, + "sequential decision making": 50038, + "new research directions": 37303, + "results suggest method": 47867, + "et al 2019": 18406, + "et al 2019b": 18408, + "word sentence levels": 62304, + "improved performance downstream": 24958, + "tasks code available": 55541, + "minimum description length": 33306, + "english spanish french": 17880, + "classification task experiments": 8564, + "language cross lingual": 28013, + "cross lingual setting": 11851, + "neural networks multi": 37059, + "networks multi task": 36878, + "strong baselines future": 53013, + "use long short": 59938, + "penn discourse treebank": 40023, + "labels distant supervision": 27816, + "data proposed method": 12573, + "performance extensive experiments": 40340, + "extensive experiments standard": 19901, + "knowledge high resource": 27517, + "propose method learn": 43456, + "standard benchmark dataset": 52471, + "data augmentation technique": 12162, + "using distant supervision": 60660, + "positive negative examples": 41285, + "deep language models": 13696, + "language models struggle": 28324, + "underlying language model": 59269, + "ability language models": 616, + "task specific fine": 55395, + "fine tuning using": 21032, + "availability training data": 5257, + "multilabel text classification": 36056, + "model based approach": 33599, + "state art alternatives": 52579, + "posts social media": 41373, + "achieved competitive performance": 1228, + "learn domain invariant": 29363, + "introduce novel model": 26848, + "information propose new": 26030, + "propose new automatic": 43498, + "diverse natural language": 15708, + "investigate data augmentation": 26948, + "extensive experiments method": 19892, + "method achieves consistent": 32364, + "improvements existing approaches": 25073, + "present novel language": 41973, + "chinese natural language": 8313, + "masked language modeling": 31864, + "tasks require reasoning": 55855, + "require reasoning multiple": 46886, + "models code available": 34821, + "leading poor performance": 29296, + "propose weakly supervised": 43705, + "adversarial training method": 1992, + "different types models": 15113, + "code url https": 8866, + "github com microsoft": 22706, + "conduct detailed analysis": 10037, + "fine tuning models": 21001, + "models like bert": 35187, + "empirical results benchmark": 17340, + "based pretrained language": 5949, + "pretrained language models": 42160, + "compared human performance": 9414, + "art performance multiple": 4337, + "purely data driven": 44396, + "achieves high performance": 1333, + "paper address task": 39254, + "world low resource": 62948, + "task models trained": 55221, + "propose multitask learning": 43485, + "art nlp models": 4317, + "high quality text": 23786, + "graph based representation": 23111, + "achieve bleu score": 1120, + "multi hop question": 35970, + "hop question answering": 24002, + "task binary classification": 54941, + "loss function training": 31095, + "human performance tasks": 24215, + "using supervised learning": 60970, + "classifier trained using": 8606, + "bert fine tuning": 6662, + "english spanish english": 17879, + "create training data": 11719, + "shared task evaluation": 50499, + "variational auto encoder": 61242, + "auto encoder based": 5015, + "state art relation": 52654, + "representations transformers bert": 46777, + "transformers bert model": 58524, + "different bert models": 14855, + "world use case": 62966, + "based fine tuning": 5736, + "fine tuning approaches": 20979, + "machine learning components": 31317, + "large scale semantic": 28999, + "models based encoder": 34756, + "proposed method benchmark": 43810, + "method benchmark datasets": 32403, + "proposed approach able": 43721, + "zero shot transfer": 63183, + "shot transfer learning": 50655, + "understanding generation tasks": 59348, + "pre trained using": 41563, + "language modeling tasks": 28220, + "transformer language models": 58492, + "language models require": 28315, + "specifically propose new": 52223, + "significant improvements strong": 50890, + "art performance chinese": 4329, + "proposed method applied": 43808, + "classification textual entailment": 8575, + "art baselines paper": 4223, + "graph neural network": 23153, + "existing methods limited": 19097, + "attracted wide attention": 4890, + "diverse real world": 15714, + "teacher student framework": 55996, + "language processing researchers": 28430, + "fine grained knowledge": 20937, + "provided natural language": 44168, + "models able capture": 34654, + "curriculum learning approach": 12044, + "propose sequence sequence": 43625, + "significantly outperforms current": 51001, + "pre trained text": 41559, + "understanding nlu natural": 59375, + "nlu natural language": 37566, + "improve generalization ability": 24860, + "benchmarks natural language": 6535, + "language understanding commonsense": 28547, + "performance language models": 40408, + "bert language model": 6668, + "level contextual representations": 30084, + "models trained language": 35613, + "trained language modeling": 57760, + "art results standard": 4385, + "best knowledge paper": 6772, + "release pre trained": 46164, + "trained language representation": 57762, + "network cnn based": 36720, + "based model called": 5858, + "using neural language": 60833, + "bert pre trained": 6704, + "performance various nlp": 40624, + "existing pre trained": 19128, + "external knowledge paper": 19943, + "source code paper": 51750, + "code paper obtained": 8842, + "paper obtained https": 39431, + "masked language models": 31866, + "language models specific": 28321, + "language pre trained": 28383, + "art performance wide": 4353, + "comprehension natural language": 9772, + "detection sentiment analysis": 14523, + "reduces training time": 45700, + "time pre trained": 57196, + "improve prediction performance": 24908, + "performance introduce novel": 40399, + "datasets end end": 13247, + "address data sparsity": 1754, + "sentence level semantic": 49591, + "modern nlp systems": 35718, + "require high quality": 46861, + "high quality annotated": 23769, + "trained domain data": 57713, + "supervised learning task": 54007, + "human written ones": 24260, + "low resource neural": 31185, + "resource neural machine": 47256, + "provide valuable insights": 44153, + "conduct user study": 10069, + "using semantic similarity": 60920, + "introduce novel framework": 26845, + "neural network propose": 37022, + "target domain data": 54813, + "multi head self": 35966, + "head self attention": 23499, + "received significant attention": 45267, + "task specific model": 55401, + "pre train language": 41517, + "train language model": 57598, + "downstream tasks work": 16369, + "better task specific": 6976, + "language modeling pre": 28216, + "large batch sizes": 28850, + "model agnostic meta": 33554, + "agnostic meta learning": 2092, + "meta learning maml": 32343, + "machine translation datasets": 31354, + "datasets demonstrate superiority": 13218, + "available http github": 5305, + "compare proposed method": 9361, + "unlike prior work": 59608, + "lstm based approach": 31244, + "new research direction": 37302, + "effective cross lingual": 16640, + "text style transfer": 56794, + "modern machine learning": 35711, + "challenging work propose": 8164, + "solve problems propose": 51689, + "datasets paper present": 13363, + "best published results": 6811, + "using training data": 60996, + "speech tagging pos": 52303, + "controllable text generation": 10980, + "neural network sentence": 37025, + "pre training language": 41580, + "applying transfer learning": 3381, + "learning large scale": 29700, + "datasets empirical results": 13243, + "superiority proposed approach": 53954, + "datasets paper describes": 13361, + "english german language": 17815, + "powerful pre trained": 41442, + "quantitative qualitative results": 44627, + "method substantially outperforms": 32673, + "substantially outperforms existing": 53646, + "model able learn": 33492, + "research cross lingual": 47008, + "inference experimental results": 25655, + "time series data": 57211, + "experimental results multiple": 19297, + "benchmark datasets method": 6456, + "consistent improvements compared": 10279, + "compared baseline methods": 9384, + "joint learning framework": 27176, + "language modeling performance": 28215, + "used computer vision": 60123, + "training zero shot": 58321, + "decoder pre training": 13611, + "results proposed framework": 47783, + "release large scale": 46156, + "extensive experiment results": 19871, + "compared strong baseline": 9461, + "strong baseline models": 53006, + "models shown remarkable": 35501, + "human machine interaction": 24206, + "compared models trained": 9422, + "applications real world": 3243, + "rule based model": 48384, + "quantitative qualitative experiments": 44626, + "typically require large": 59155, + "domain specific data": 16170, + "approach state art": 3704, + "language model pre": 28185, + "zero shot cross": 63158, + "shot cross lingual": 50609, + "used fine tune": 60192, + "fine tune model": 20950, + "detailed analysis reveals": 14414, + "commonly used neural": 9227, + "transformer based seq2seq": 58473, + "learning zero shot": 29949, + "provide better understanding": 44021, + "language models typically": 28338, + "language models different": 28245, + "current best performing": 11964, + "existing benchmark datasets": 19041, + "shows significant performance": 50804, + "employ state art": 17392, + "shows promising performance": 50796, + "tasks single model": 55894, + "accordingly propose novel": 875, + "kullback leibler kl": 27681, + "leibler kl divergence": 30015, + "models evaluation metrics": 34980, + "improves previous state": 25152, + "hierarchical neural network": 23684, + "propose coarse fine": 43321, + "present novel end": 41971, + "sentence level word": 49597, + "improvement current state": 25001, + "deep learning paper": 13717, + "based model pre": 5861, + "domain unlabeled data": 16222, + "baselines low resource": 6278, + "state art terms": 52683, + "tasks demonstrate approach": 55576, + "recently state art": 45468, + "previous work proposed": 42310, + "methods cross lingual": 32806, + "tasks speech pos": 55905, + "state art discriminative": 52606, + "adapt pre trained": 1508, + "propose effective approach": 43361, + "label text classification": 27732, + "classification question answering": 8527, + "results low resource": 47705, + "large collections text": 28859, + "using adversarial learning": 60555, + "datasets state art": 13443, + "significant performance boost": 50903, + "corpus news articles": 11390, + "build general purpose": 7401, + "training data models": 58018, + "domains paper propose": 16282, + "low resource cross": 31174, + "resource cross lingual": 47218, + "information retrieval tasks": 26069, + "content paper propose": 10545, + "paper propose study": 39536, + "correlations human judgments": 11537, + "provide thorough analysis": 44145, + "built state art": 7491, + "art nlp techniques": 4319, + "task requires model": 55340, + "downstream classification tasks": 16336, + "form knowledge graph": 21324, + "existing approaches typically": 19032, + "space pre trained": 51884, + "learn better representations": 29349, + "framework extensive experiments": 21519, + "feature based neural": 20479, + "pairs english german": 39184, + "open source publicly": 38456, + "model cross lingual": 33732, + "paper provides comprehensive": 39559, + "provides comprehensive overview": 44189, + "achieve impressive performance": 1162, + "address problems propose": 1793, + "curriculum learning cl": 12045, + "trained neural networks": 57829, + "domain transfer learning": 16218, + "transfer learning fine": 58380, + "learning fine tuning": 29649, + "teacher student models": 55997, + "training data automatically": 57977, + "make better use": 31545, + "training data applying": 57974, + "ablation studies demonstrate": 658, + "shown promising performance": 50742, + "neural architecture search": 36932, + "task learning based": 55171, + "bert based baseline": 6618, + "experiments https github": 19442, + "pipeline based approach": 40894, + "universal sentence encoder": 59547, + "task specific information": 55396, + "model proposed paper": 34252, + "learning method based": 29727, + "supervised learning model": 54002, + "features experimental results": 20576, + "suitable real time": 53859, + "cross lingual models": 11843, + "problem paper proposes": 42622, + "challenging task work": 8155, + "task work propose": 55474, + "propose data augmentation": 43347, + "bert base model": 6614, + "use self attention": 60004, + "performance strong baseline": 40578, + "strong baseline model": 53005, + "knowledge distillation kd": 27441, + "leverages state art": 30316, + "using weakly supervised": 61022, + "perform systematic comparison": 40151, + "different types errors": 15109, + "absolute improvement state": 745, + "svm random forest": 54239, + "model makes use": 34088, + "question answering requires": 44708, + "yang et al": 63045, + "language models capable": 28237, + "large language models": 28899, + "synthetic data generation": 54372, + "trained publicly available": 57847, + "deep pre trained": 13743, + "representations learned large": 46706, + "unlabeled text data": 59583, + "time consuming manual": 57134, + "large scale sentence": 29000, + "effectiveness method conduct": 16791, + "improving model performance": 25186, + "performance pre trained": 40488, + "chinese pre trained": 8318, + "language models propose": 28307, + "models propose simple": 35372, + "baselines including bert": 6272, + "performances nlp tasks": 40644, + "resources available https": 47293, + "autoregressive language modeling": 5218, + "similarity based methods": 51085, + "fine grained labels": 20938, + "model based transformer": 33607, + "language agnostic sentence": 27957, + "mitigate problem propose": 33390, + "entity recognition task": 18134, + "experimental evaluations proposed": 19265, + "proposed approach performs": 43730, + "memory lstm gated": 32261, + "lstm gated recurrent": 31263, + "achieved significant improvements": 1269, + "alleviate problems propose": 2419, + "novel framework named": 37828, + "paper describes novel": 39326, + "describes novel approach": 14229, + "processing nlp text": 42915, + "translation text summarization": 58691, + "alleviate issues propose": 2411, + "head attention mechanism": 23496, + "propose novel self": 43559, + "models trained data": 35603, + "computationally expensive paper": 9875, + "expensive paper propose": 19215, + "shows competitive performance": 50770, + "code available github": 8793, + "language model finetuning": 28166, + "dataset state art": 13101, + "attain state art": 4669, + "release code models": 46147, + "code models https": 8836, + "information extraction methods": 25862, + "text experiments demonstrate": 56567, + "learning models including": 29755, + "linear support vector": 30671, + "language understanding evaluation": 28549, + "performance work propose": 40633, + "work propose effective": 62774, + "language models various": 28345, + "fine tuning process": 21014, + "nlp computer vision": 37476, + "application machine learning": 3167, + "learning ml techniques": 29742, + "vector machines svm": 61456, + "models perform best": 35310, + "results effectiveness approach": 47603, + "require large scale": 46873, + "large scale manually": 28985, + "scale manually annotated": 48595, + "paper propose using": 39541, + "self attention module": 49184, + "f1 score 81": 20209, + "human human conversations": 24169, + "domain data propose": 16040, + "resources low resource": 47314, + "pre trained cross": 41525, + "performance training data": 40606, + "proposed model achieved": 43845, + "fine grained coarse": 20931, + "grained coarse grained": 23028, + "outperforms zero shot": 38963, + "zero shot fine": 63163, + "systems state art": 54640, + "additionally introduce novel": 1723, + "social media paper": 51580, + "transfer learning tasks": 58397, + "sentence level embeddings": 49585, + "case studies demonstrate": 7797, + "models trained end": 35607, + "models neural network": 35254, + "generative language models": 22593, + "language models like": 28275, + "language model generate": 28167, + "relation extraction aims": 45973, + "task knowledge graph": 55154, + "help improve performance": 23571, + "benchmark datasets including": 6455, + "approach effectively improve": 3502, + "language models evaluate": 28250, + "language directions english": 28030, + "lingual language model": 30708, + "model pre training": 34219, + "propose novel graph": 43538, + "novel graph neural": 37835, + "social media like": 51578, + "improving cross lingual": 25174, + "languages demonstrate proposed": 28636, + "morpho syntactic features": 35837, + "greatly improve performance": 23230, + "work propose end": 62776, + "neural networks recurrent": 37069, + "networks recurrent neural": 36904, + "propose sentence level": 43622, + "training data multi": 58019, + "data multi task": 12498, + "propose novel transformer": 43571, + "novel transformer based": 37943, + "transformer based architecture": 58455, + "model achieves higher": 33518, + "achieves higher accuracy": 1335, + "using bert based": 60589, + "training data generated": 57998, + "compared existing datasets": 9407, + "trained multi task": 57815, + "far human performance": 20401, + "making informed decisions": 31659, + "recently attracted lot": 45410, + "attention mechanism proposed": 4780, + "prior work focused": 42421, + "based prior work": 5953, + "transfer learning approaches": 58377, + "comments social media": 9148, + "generation nlg tasks": 22508, + "social media based": 51568, + "generation experimental results": 22457, + "model generate natural": 33931, + "pre training method": 41582, + "data model size": 12490, + "different types noise": 15114, + "semantic structure text": 49356, + "multiple instance learning": 36230, + "instance learning mil": 26426, + "datasets different sizes": 13231, + "codes publicly available": 8880, + "recently pre trained": 45449, + "training large scale": 58150, + "current pre training": 12001, + "pre training tasks": 41598, + "trained models released": 57808, + "models released https": 35426, + "large neural models": 28918, + "transformer based sequence": 58474, + "cross lingual embedding": 11833, + "end propose new": 17701, + "annotated data training": 2887, + "extraction knowledge graph": 20075, + "state art work": 52693, + "automatic natural language": 5113, + "uses attention mechanism": 60492, + "trained fine tuning": 57733, + "paper available https": 39278, + "neural networks effective": 37044, + "data natural language": 12504, + "paper present dataset": 39447, + "nlp deep learning": 37481, + "models requires large": 35445, + "general purpose language": 22086, + "purpose language models": 44404, + "fine tuned pre": 20966, + "tuned pre trained": 58883, + "trained neural models": 57827, + "rich semantic information": 48120, + "learning based language": 29534, + "level natural language": 30166, + "layer recurrent neural": 29206, + "incorporate domain knowledge": 25352, + "model performs competitively": 34204, + "current deep learning": 11970, + "sequence sequence deep": 49985, + "learning models perform": 29758, + "models perform task": 35317, + "used train models": 60335, + "specifically pre train": 52220, + "real world situations": 45139, + "demonstrate approach achieve": 13866, + "hand crafted linguistic": 23388, + "model fine grained": 33892, + "word representations bert": 62288, + "requires reasoning multiple": 46949, + "source codes available": 51755, + "establish strong baselines": 18350, + "provided https github": 44163, + "requires deep understanding": 46923, + "prior work proposed": 42424, + "transformer based encoder": 58459, + "extensive experiments popular": 19896, + "improvement strong baselines": 25029, + "collect large scale": 8947, + "data expensive obtain": 12340, + "self supervised tasks": 49218, + "pseudo training data": 44283, + "propose self supervised": 43615, + "self supervised pre": 49215, + "supervised pre training": 54033, + "network pre trained": 36785, + "experimental results commonly": 19276, + "results commonly used": 47541, + "training data method": 58016, + "model combines multi": 33668, + "hidden test set": 23650, + "footnote url https": 21282, + "language model use": 28201, + "state art multi": 52635, + "language models fail": 28254, + "demonstrated state art": 14020, + "art performance various": 4352, + "fine tuned bert": 20958, + "tasks fine tuned": 55644, + "tuned bert model": 58868, + "improve performance low": 24891, + "models outperforms state": 35292, + "hierarchical attention network": 23659, + "used train evaluate": 60333, + "level f1 score": 30118, + "previous works usually": 42323, + "solve low resource": 51682, + "accuracy low resource": 1002, + "conduct empirical study": 10039, + "inspire future research": 26403, + "existing methods generate": 19096, + "terms automatic human": 56267, + "human like responses": 24201, + "language model proposed": 28190, + "various nlp applications": 61373, + "applications existing methods": 3206, + "results experimental results": 47626, + "compared rule based": 9449, + "gpt language model": 22982, + "adversarial networks gan": 1981, + "based graph based": 5761, + "contextualized word embedding": 10813, + "recent years existing": 45385, + "pre trained english": 41532, + "model generate high": 33930, + "processing nlp research": 42910, + "weighted f1 score": 61929, + "datasets recent years": 13395, + "task deep learning": 54995, + "masked language model": 31863, + "results demonstrate models": 47580, + "pre training approaches": 41568, + "fine tuning stage": 21020, + "performance strong baselines": 40579, + "using task specific": 60979, + "popular sequence sequence": 41186, + "provide depth analysis": 44048, + "using transformer based": 61000, + "transformer based language": 58461, + "case study shows": 7800, + "proposed model improves": 43852, + "currently state art": 12039, + "proposed end end": 43765, + "end end approaches": 17637, + "pipeline end end": 40899, + "publicly available paper": 44348, + "dataset source code": 13095, + "proposed method compared": 43812, + "language models use": 28340, + "second language learners": 49010, + "method achieves significant": 32366, + "language models bert": 28235, + "knowledge distillation approach": 27440, + "knowledge distillation methods": 27443, + "language models specifically": 28322, + "fine tuning technique": 21028, + "fine tuning methods": 20999, + "experimental results sentiment": 19311, + "results sentiment analysis": 47826, + "neural attention mechanism": 36935, + "news articles manually": 37386, + "recent developments neural": 45305, + "human written text": 24263, + "human written texts": 24264, + "model https github": 33964, + "propose new data": 43499, + "deep generative models": 13694, + "syntactic structures sentences": 54331, + "correlation human judgment": 11523, + "state art summarization": 52673, + "data https github": 12409, + "using cross lingual": 60637, + "character level embeddings": 8208, + "levels experimental results": 30239, + "existing approaches consider": 19026, + "gains state art": 21944, + "models work propose": 35689, + "network rnn based": 36797, + "auto encoder vae": 5016, + "consistently outperform existing": 10300, + "large scale language": 28983, + "based models perform": 5878, + "models perform significantly": 35316, + "zero shot semantic": 63176, + "fine coarse grained": 20923, + "based models shown": 5880, + "models demonstrate effectiveness": 34893, + "sample training data": 48459, + "tasks finally discuss": 55641, + "series state art": 50070, + "results standard benchmark": 47854, + "graph representation learning": 23162, + "accuracy natural language": 1013, + "incorporating external knowledge": 25386, + "attention mechanism using": 4783, + "language modeling techniques": 28221, + "context language models": 10665, + "automatic evaluation measures": 5083, + "representations work propose": 46793, + "art pretrained language": 4360, + "entity recognition tasks": 18135, + "models trained limited": 35615, + "code data https": 8800, + "cross lingual pre": 11846, + "lingual pre training": 30719, + "propose new pre": 43512, + "new pre training": 37287, + "pre training model": 41584, + "time step experiments": 57225, + "models encoder decoder": 34964, + "language models named": 28287, + "tasks including named": 55680, + "achieved strong performance": 1276, + "dataset document level": 12901, + "model document level": 33783, + "document level graph": 15806, + "based reading comprehension": 5975, + "art neural architectures": 4309, + "proposed approach consistently": 43725, + "model based graph": 33604, + "graph attention networks": 23098, + "tackle challenge paper": 54698, + "conduct comprehensive experiments": 10033, + "dataset results demonstrate": 13069, + "multilingual bert mbert": 36066, + "different cross lingual": 14883, + "small number labeled": 51489, + "recent years studies": 45397, + "new evaluation metric": 37194, + "propose novel automatic": 43525, + "deep learning algorithms": 13701, + "pretraining language models": 42207, + "language models fine": 28255, + "models fine tuning": 35032, + "improvements strong baseline": 25103, + "expert annotated dataset": 19570, + "pre training step": 41593, + "pre training phase": 41588, + "language model gpt": 28169, + "original training data": 38734, + "recent work neural": 45371, + "downstream applications existing": 16333, + "standard encoder decoder": 52488, + "models achieved impressive": 34678, + "automatic evaluation shows": 5089, + "language models generally": 28256, + "models generally trained": 35056, + "wide range topics": 61976, + "multi task framework": 36020, + "vision language tasks": 61639, + "transfer learning paper": 58391, + "external knowledge graphs": 19941, + "novel knowledge aware": 37847, + "based graph convolutional": 5762, + "bert based models": 6624, + "based models achieved": 5867, + "semantically similar sentences": 49393, + "used datasets demonstrate": 60139, + "language inference tasks": 28114, + "large scale pretrained": 28995, + "scale pretrained language": 48615, + "trained supervised manner": 57887, + "wide range downstream": 61967, + "language modeling objectives": 28213, + "task learning setting": 55180, + "bert masked language": 6680, + "state art using": 52688, + "propose simple efficient": 43634, + "achieving comparable performance": 1399, + "developments natural language": 14714, + "pre trained transformer": 41560, + "pre trained transformers": 41561, + "evaluate state art": 18508, + "transformer models bert": 58499, + "models bert led": 34774, + "outperforming strong baselines": 38863, + "preliminary results suggest": 41805, + "publicly release code": 44360, + "prior work largely": 42423, + "work largely focused": 62706, + "used pre training": 60265, + "tasks recent years": 55840, + "aware attention mechanism": 5442, + "better overall performance": 6925, + "investigate different approaches": 26952, + "performing model achieves": 40682, + "achieves macro f1": 1344, + "supervised models trained": 54022, + "speech tagging dependency": 52301, + "essential natural language": 18331, + "sentence prediction nsp": 49622, + "method consistently improves": 32437, + "achieve best performance": 1113, + "augmenting training set": 4991, + "performance previous work": 40498, + "bert bidirectional encoder": 6631, + "tasks masked language": 55742, + "pre training task": 41597, + "bert base bert": 6613, + "base bert large": 5539, + "specifically proposed model": 52226, + "language modeling language": 28208, + "pre trained domain": 41528, + "training pre trained": 58211, + "large scale models": 28986, + "self supervised language": 49209, + "models trained standard": 35621, + "entropy loss function": 18163, + "meta learning method": 32344, + "results fine grained": 47636, + "detection shared task": 14525, + "models based pre": 34757, + "fine tune pre": 20953, + "tune pre trained": 58862, + "bert models trained": 6689, + "models generalization ability": 35053, + "generalization ability models": 22116, + "benchmark future research": 6470, + "paper propose fine": 39510, + "propose fine grained": 43389, + "ground truth label": 23255, + "language models promising": 28306, + "pretrained cross lingual": 42151, + "resource languages propose": 47245, + "train fine tune": 57591, + "model language model": 34037, + "using language model": 60753, + "language model experimental": 28164, + "transformer based models": 58465, + "models fine tuned": 35031, + "fine tuned task": 20970, + "text mining information": 56662, + "approach transfer learning": 3727, + "detection low resource": 14498, + "applications work propose": 3261, + "methods zero shot": 33107, + "data existing methods": 12337, + "sources knowledge bases": 51833, + "task f1 score": 55076, + "proposed data augmentation": 43751, + "et al 2019a": 18407, + "significant performance degradation": 50905, + "challenge propose novel": 8010, + "score test set": 48879, + "set low resource": 50188, + "does require labeled": 15974, + "language processing present": 28421, + "speech translation st": 52312, + "general world knowledge": 22099, + "effective way improve": 16713, + "improve neural machine": 24878, + "pre training transformer": 41601, + "using maximum likelihood": 60793, + "training transformer based": 58306, + "cross lingual representations": 11848, + "simple efficient approach": 51162, + "based publicly available": 5965, + "trained bert models": 57683, + "competitive results state": 9563, + "comprehension mrc task": 9770, + "monte carlo dropout": 35827, + "evaluate approach using": 18439, + "shared semantic space": 50488, + "neural networks transformer": 37076, + "improvements compared state": 25060, + "representation model bert": 46553, + "parameters pre trained": 39717, + "significantly outperforms bert": 50999, + "large amounts high": 28836, + "amounts high quality": 2549, + "parallel corpora training": 39643, + "conditional language model": 9997, + "dataset introduce novel": 12970, + "inference nli models": 25677, + "datasets diverse domains": 13235, + "natural language present": 36438, + "multiple downstream tasks": 36206, + "aware language models": 5455, + "trained models experiments": 57798, + "data model achieves": 12489, + "code switched language": 8859, + "based experimental results": 5716, + "experimental results language": 19290, + "propose novel context": 43527, + "novel context aware": 37789, + "results approach improves": 47505, + "strong inductive bias": 53035, + "natural language queries": 36442, + "previous works focus": 42319, + "word representations based": 62287, + "statistical significance testing": 52763, + "facilitate cross lingual": 20264, + "training significantly improves": 58260, + "training paper propose": 58202, + "obtain better performance": 38163, + "words word embedding": 62548, + "fully connected neural": 21719, + "connected neural network": 10178, + "important research problem": 24763, + "progress machine learning": 43104, + "models different domains": 34916, + "self attention transformer": 49187, + "different neural architectures": 15006, + "produces high quality": 43029, + "performance existing models": 40332, + "li et al": 30420, + "generation aims generate": 22414, + "results automatic human": 47514, + "entities natural language": 18069, + "recent advances language": 45283, + "labeled data scarce": 27745, + "graph attention mechanism": 23096, + "large neural language": 28917, + "models generate better": 35058, + "github com miulab": 22707, + "training knowledge distillation": 58141, + "single model achieves": 51318, + "recent progress neural": 45339, + "model paper propose": 34176, + "transformer model trained": 58497, + "relations paper propose": 46050, + "novel generative model": 37831, + "experiments conducted datasets": 19383, + "datasets real world": 13393, + "generated state art": 22322, + "study propose new": 53443, + "based approach automatically": 5571, + "text classification systems": 56486, + "long training time": 31047, + "large scale pretraining": 28996, + "datasets indicate model": 13302, + "indicate model significantly": 25528, + "end end task": 17664, + "model paper presents": 34175, + "different information sources": 14955, + "mutual information mi": 36348, + "10 fold cross": 43, + "low data regimes": 31140, + "conduct systematic study": 10066, + "low resource ones": 31186, + "task aims extract": 54896, + "models achieved remarkable": 34680, + "token level sentence": 57299, + "pre trained multilingual": 41547, + "models recent years": 35409, + "results demonstrate framework": 47577, + "new objective function": 37272, + "natural language models": 36435, + "performance various tasks": 40625, + "controlled text generation": 10987, + "input natural language": 26303, + "human written summaries": 24262, + "named entity types": 36375, + "autoregressive language model": 5217, + "used ground truth": 60201, + "achieve performance comparable": 1179, + "success downstream tasks": 53700, + "self attention layer": 49178, + "language models achieve": 28226, + "hierarchical multi task": 23681, + "large pretrained language": 28942, + "manually annotated datasets": 31760, + "pre trained general": 41534, + "trained general domain": 57736, + "pre defined set": 41501, + "traditional supervised learning": 57549, + "substantially improve performance": 53637, + "improve performance compared": 24886, + "creates new state": 11737, + "present detailed analysis": 41890, + "task end end": 55048, + "test time model": 56389, + "representation input sentence": 46530, + "methods nlp tasks": 32960, + "performance shot learning": 40559, + "new data augmentation": 37161, + "models capable generating": 34803, + "framework state art": 21605, + "model capable generating": 33645, + "closely related tasks": 8707, + "train single model": 57635, + "model inference time": 34001, + "evaluation results reveal": 18705, + "data time consuming": 12735, + "learning based text": 29543, + "based models like": 5873, + "bert widely used": 6735, + "based methods like": 5849, + "performance real world": 40520, + "state art generation": 52618, + "multilingual bert model": 36067, + "bert model trained": 6686, + "crucial task natural": 11914, + "widely used approaches": 62009, + "augment training data": 4945, + "models extensive experiments": 35005, + "machine translated data": 31344, + "target language data": 54824, + "art deep neural": 4248, + "neural network text": 37028, + "models bert xlnet": 34777, + "capable zero shot": 7633, + "extractive question answering": 20138, + "language models recently": 28314, + "wide variety natural": 61984, + "models need large": 35249, + "word embeddings large": 62176, + "downstream fine tuning": 16339, + "tackle issues propose": 54707, + "semantic parsing model": 49312, + "proposed framework enables": 43784, + "language models produce": 28305, + "results paper propose": 47759, + "learning model based": 29744, + "transfer learning models": 58387, + "pre trained masked": 41542, + "trained masked language": 57784, + "models semi supervised": 35478, + "art pre trained": 4358, + "task specific language": 55399, + "roberta based models": 48218, + "reduce model size": 45672, + "impact model performance": 24600, + "networks pre trained": 36894, + "pre trained fine": 41533, + "trained fine tuned": 57732, + "fine tuned large": 20962, + "rich resource languages": 48118, + "model wide range": 34535, + "make dataset publicly": 31561, + "mechanism experimental results": 32117, + "based transformer models": 6110, + "techniques significantly improve": 56137, + "new test set": 37342, + "learning dl models": 29601, + "achieving human level": 1413, + "language models led": 28273, + "language models introduce": 28265, + "art performance natural": 4338, + "results english german": 47611, + "compared prior work": 9442, + "models knowledge graph": 35155, + "used evaluate quality": 60171, + "siamese neural network": 50821, + "contextual embeddings bert": 10766, + "task analysis shows": 54901, + "pre trained encoders": 41531, + "pre trained encoder": 41530, + "improvements competitive baselines": 25063, + "large labeled datasets": 28895, + "resource languages work": 47248, + "transfer learning tl": 58399, + "datasets demonstrate approach": 13211, + "propose novel sequence": 43561, + "level classification task": 30076, + "application pre trained": 3176, + "task state art": 55411, + "models large scale": 35167, + "achieve high quality": 1153, + "higher f1 score": 23824, + "novel training framework": 37940, + "work develop new": 62632, + "fine tuning procedure": 21013, + "method conceptually simple": 32432, + "multilingual language model": 36089, + "models paper describes": 35299, + "bias language models": 7030, + "make predictions based": 31588, + "text based models": 56456, + "models outperform strong": 35288, + "statistically significant differences": 52771, + "determinantal point processes": 14551, + "pre training knowledge": 41579, + "present comprehensive survey": 41874, + "compared competitive baseline": 9396, + "problem low resource": 42599, + "provide comprehensive evaluation": 44038, + "models multilingual bert": 35238, + "train transformer based": 57652, + "based masked language": 5834, + "experimental results compared": 19277, + "art transformer based": 4433, + "transformer based model": 58464, + "results available https": 47516, + "extraction natural language": 20088, + "model fine tune": 33893, + "transfer learning pre": 58393, + "learning pre training": 29812, + "absolute f1 points": 741, + "exhibits state art": 19011, + "official test sets": 38313, + "neural networks text": 37074, + "approach text classification": 3721, + "domain adaptation framework": 15998, + "lottery ticket hypothesis": 31128, + "bert based model": 6623, + "long form text": 31013, + "conduct series experiments": 10061, + "building recent advances": 7466, + "model reconstruct original": 34286, + "transformer based neural": 58468, + "superior results compared": 53943, + "single end end": 51301, + "best end end": 6760, + "uses deep neural": 60505, + "art results paper": 4384, + "settings zero shot": 50406, + "multilingual pre trained": 36110, + "models data augmentation": 34881, + "better capture long": 6858, + "building natural language": 7459, + "art competitive results": 4239, + "models based transformer": 34760, + "based transformer architecture": 6107, + "mining machine learning": 33317, + "recently neural models": 45442, + "tasks transfer learning": 55941, + "tasks fine tuning": 55645, + "models new dataset": 35257, + "pretrained masked language": 42167, + "language models mlms": 28283, + "autoregressive language models": 5219, + "low resource domains": 31178, + "various benchmark datasets": 61310, + "improves f1 score": 25130, + "outperforms previous sota": 38924, + "pre training text": 41600, + "generative pre trained": 22603, + "systems pre trained": 54593, + "multi task setting": 36027, + "pre training techniques": 41599, + "thorough error analysis": 57058, + "learning methods require": 29735, + "baselines future research": 6265, + "reach high performance": 45048, + "semi supervised unsupervised": 49469, + "supervised unsupervised learning": 54066, + "models heavily rely": 35081, + "data text classification": 12730, + "extraction sentiment analysis": 20109, + "training single model": 58262, + "graph convolutional neural": 23126, + "transformer based pre": 58471, + "language models proven": 28309, + "fine tuning downstream": 20986, + "tuning downstream tasks": 58909, + "power pre trained": 41430, + "models shown effective": 35497, + "simple effective strategy": 51158, + "source code https": 51748, + "bias training data": 7046, + "generate large scale": 22216, + "benefit downstream tasks": 6561, + "downstream tasks sentiment": 16367, + "analysis propose novel": 2729, + "propose context aware": 43334, + "overcome limitation propose": 39067, + "limitation propose novel": 30540, + "word representations obtained": 62290, + "capture fine grained": 7672, + "cross entropy ce": 11823, + "examples paper propose": 18922, + "cross entropy objective": 11825, + "contextualized embeddings bert": 10801, + "apply proposed method": 3348, + "experimental results verify": 19322, + "recently large scale": 45436, + "using zero shot": 61031, + "language models plms": 28297, + "progress pre trained": 43111, + "method improve performance": 32532, + "virtual adversarial training": 61624, + "paper present hierarchical": 39452, + "present qualitative quantitative": 41994, + "pretrained bert model": 42148, + "original bert model": 38705, + "based generative adversarial": 5750, + "paper propose adaptive": 39490, + "information different modalities": 25813, + "human evaluation scores": 24155, + "models tend rely": 35590, + "proposed framework achieves": 43780, + "learning self supervised": 29865, + "training multi task": 58182, + "models trained english": 35608, + "trained english data": 57723, + "data multiple languages": 12501, + "language models languages": 28269, + "language models speech": 28323, + "approach substantially outperforms": 3709, + "substantially outperforms state": 53648, + "sequence sequence task": 49997, + "apply pre trained": 3346, + "paper conduct systematic": 39300, + "fine tuning phase": 21009, + "model performance propose": 34193, + "address problem introduce": 1785, + "performance end end": 40318, + "propose novel joint": 43542, + "low dimensional embeddings": 31144, + "proposed method leads": 43822, + "trained transformer models": 57902, + "fine tuning large": 20996, + "model target domain": 34443, + "establishes state art": 18363, + "datasets fine tuning": 13277, + "fine tuning finally": 20991, + "systems machine learning": 54556, + "previous studies proposed": 42290, + "weakly supervised training": 61868, + "lead sub optimal": 29275, + "meta learning algorithm": 32338, + "en en fr": 17415, + "use adversarial training": 59817, + "based models use": 5882, + "learning language model": 29695, + "error analysis results": 18214, + "unlike previous studies": 59602, + "self supervised manner": 49211, + "paper propose contextual": 39499, + "sentence level document": 49584, + "nlp tasks lack": 37537, + "languages pre trained": 28754, + "benchmark state art": 6494, + "github com thu": 22712, + "com thu keg": 9026, + "model trained labeled": 34472, + "approach achieves comparable": 3397, + "large scale benchmark": 28963, + "learned pre training": 29475, + "models best knowledge": 34779, + "language models utilize": 28344, + "datasets work present": 13487, + "issues propose new": 27100, + "language model representations": 28191, + "study demonstrate effectiveness": 53356, + "superior performance state": 53939, + "approximate nearest neighbor": 3978, + "according experimental results": 860, + "bidirectional encoder representation": 7068, + "time space complexity": 57217, + "carry extensive experiments": 7778, + "language models gpt": 28259, + "multi task models": 36023, + "applied real world": 3290, + "unlike previous methods": 59600, + "using external knowledge": 60688, + "proposed method shows": 43828, + "shows better performance": 50766, + "achieved promising performance": 1259, + "resource languages remains": 47246, + "compared current state": 9400, + "text classification experimental": 56472, + "classification experimental results": 8468, + "classification models using": 8499, + "bring significant improvement": 7335, + "performance outperforms previous": 40471, + "deep learning technique": 13721, + "bert based classifier": 6620, + "scale human evaluation": 48580, + "question answering document": 44695, + "large scale public": 28997, + "language models able": 28225, + "long term dependency": 31040, + "neural networks using": 37079, + "existing publicly available": 19133, + "based data augmentation": 5664, + "large scale general": 28977, + "fine tuning strategy": 21024, + "deep transformer based": 13752, + "future research paper": 21891, + "data driven analysis": 12299, + "weakly supervised approaches": 61861, + "language pre training": 28384, + "use attention mechanism": 59827, + "propose unified framework": 43689, + "english chinese datasets": 17784, + "large pretrained models": 28943, + "shared task cross": 50496, + "teams participated shared": 56009, + "participated shared task": 39820, + "pretrained transformer language": 42188, + "models bert roberta": 34775, + "significantly better baselines": 50940, + "bert fine tuned": 6661, + "high quality diverse": 23776, + "github com jzbjyb": 22704, + "wide variety domains": 61981, + "models bert achieved": 34771, + "knowledge pre trained": 27573, + "tune language model": 58856, + "language model predict": 28186, + "fully supervised training": 21743, + "trained weak supervision": 57912, + "focused english language": 21221, + "text classification based": 56469, + "task language modeling": 55160, + "ground truth word": 23257, + "method effectively improves": 32473, + "problem propose use": 42634, + "introduce novel multi": 26849, + "context dependent word": 10612, + "provide experimental results": 44067, + "paper investigate extent": 39411, + "experiments pre training": 19491, + "dynamic programming algorithm": 16490, + "knowledge paper propose": 27565, + "present novel task": 41978, + "dataset model significantly": 12999, + "training data expensive": 57992, + "data augmentation approaches": 12151, + "word embeddings obtained": 62181, + "publicly available corpora": 44337, + "collect high quality": 8943, + "graph convolution network": 23121, + "rivals state art": 48171, + "state art recent": 52652, + "paper end end": 39348, + "bidirectional lstm bilstm": 7078, + "inference nli datasets": 25675, + "trained transformer based": 57898, + "text mining natural": 56663, + "paper propose approaches": 39496, + "achieves comparable performance": 1314, + "bag words cbow": 5505, + "open domain text": 38427, + "fine tuning multilingual": 21003, + "training data leads": 58008, + "paper provide detailed": 39556, + "critical sequence training": 11793, + "attentive neural network": 4864, + "joint learning approach": 27175, + "performance neural network": 40454, + "high inter annotator": 23743, + "transformer based bert": 58457, + "based bert model": 5601, + "advancing state art": 1936, + "model trained scratch": 34476, + "supervised state art": 54052, + "dataset propose new": 13038, + "art performance outperforms": 4340, + "case study demonstrate": 7799, + "better existing methods": 6888, + "task proposed model": 55306, + "detailed ablation studies": 14409, + "multitask learning framework": 36324, + "analysis pre trained": 2721, + "task propose simple": 55303, + "extensive experiments benchmarks": 19882, + "downstream tasks propose": 16363, + "data work propose": 12779, + "methods end end": 32839, + "learning improve performance": 29678, + "processing nlp recent": 42908, + "overcome data scarcity": 39061, + "data scarcity low": 12626, + "scarcity low resource": 48673, + "state ofthe art": 52706, + "markov chain monte": 31842, + "chain monte carlo": 7960, + "variety real world": 61289, + "black box models": 7191, + "evaluation results demonstrate": 18701, + "nearest neighbor knn": 36519, + "lack publicly available": 27909, + "fine tuning based": 20980, + "create high quality": 11699, + "make code available": 31549, + "previous research focused": 42271, + "paper propose knowledge": 39519, + "range dependencies paper": 44913, + "success pre trained": 53718, + "pre trained representations": 41552, + "fine tuning task": 21026, + "zero shot evaluation": 63160, + "knowledge knowledge graphs": 27537, + "achieves better results": 1309, + "based attention mechanisms": 5582, + "features attention mechanism": 20527, + "dataset method achieves": 12993, + "play important roles": 40973, + "lack comprehensive survey": 27879, + "transformer based architectures": 58456, + "native non native": 36405, + "binary classification model": 7145, + "models bert shown": 34776, + "different downstream tasks": 14910, + "significantly affect performance": 50936, + "benchmark datasets results": 6460, + "despite significant progress": 14389, + "classification low resource": 8489, + "language model specifically": 28195, + "unstructured text data": 59672, + "simple efficient method": 51163, + "training data compared": 57983, + "tasks recent studies": 55837, + "dutch language model": 16478, + "tasks pre trained": 55803, + "leads improved performance": 29316, + "cross modal representations": 11865, + "applied state art": 3296, + "denoising auto encoder": 14064, + "encoder pre trained": 17533, + "previous approaches focused": 42239, + "pre training corpus": 41572, + "effective pre training": 16686, + "pre training using": 41602, + "tasks low resource": 55733, + "large scale transformer": 29005, + "conditional masked language": 9999, + "achieved tremendous success": 1280, + "fine tuning multi": 21002, + "reach new state": 45051, + "outperforms strong baseline": 38949, + "strong baseline methods": 53004, + "main contribution paper": 31431, + "level self attention": 30204, + "pre trained parameters": 41551, + "enables model learn": 17444, + "comparable better results": 9292, + "work propose unified": 62788, + "benchmark datasets approach": 6447, + "tasks work investigate": 55970, + "unlabeled target domain": 59580, + "large language model": 28898, + "extensive automatic human": 19858, + "pre trained lms": 41541, + "address issue present": 1766, + "detailed ablation study": 14410, + "data machine translation": 12473, + "propose novel dynamic": 43532, + "surpasses previous state": 54175, + "task conduct experiments": 54967, + "language processing based": 28399, + "trained models like": 57803, + "model works better": 34547, + "model publicly available": 34262, + "scores state art": 48923, + "question answer qa": 44688, + "encoder representation transformers": 17537, + "representation transformers bert": 46599, + "experimental results standard": 19315, + "scale language models": 48588, + "corpus manually annotated": 11378, + "method outperforms strong": 32606, + "speech recognition errors": 52285, + "approach pre train": 3644, + "achieve strong performance": 1206, + "joint pre training": 27184, + "outperform strong baseline": 38825, + "contextual word embedding": 10788, + "observe fine tuning": 38134, + "rule based baseline": 48381, + "propose bert based": 43313, + "based models applied": 5868, + "general domain corpora": 22053, + "multi label multi": 35977, + "label multi class": 27716, + "weighted average f1": 61925, + "average f1 scores": 5408, + "widely spoken language": 62003, + "spoken language world": 52362, + "domain paper propose": 16131, + "commonly used datasets": 9223, + "resource languages low": 47241, + "improving pre trained": 25191, + "present novel corpus": 41969, + "attention academia industry": 4708, + "deep learning recently": 13718, + "recently pre training": 45450, + "pre training models": 41585, + "models significantly improved": 35509, + "nlp tasks question": 37545, + "using masked language": 60790, + "methods significant margin": 33038, + "aim bridge gap": 2140, + "high resource low": 23794, + "resource low resource": 47251, + "paper propose self": 39531, + "order make use": 38638, + "systems real world": 54611, + "recently graph neural": 45431, + "multi hop questions": 35971, + "simple fine tuning": 51171, + "patterns paper propose": 39974, + "languages experimental results": 28665, + "promising results compared": 43180, + "nlp tasks limited": 37539, + "self supervised pretraining": 49216, + "large transformer models": 29035, + "different model architectures": 14994, + "model architectures training": 33578, + "data real world": 12586, + "propose pre train": 43588, + "given input text": 22750, + "language models pre": 28300, + "machine translation order": 31374, + "experiments diverse set": 19420, + "arabic natural language": 4003, + "tasks like sentiment": 55727, + "transformers based models": 58522, + "art results nlp": 4381, + "allows model learn": 2473, + "datasets various sizes": 13480, + "evaluation metrics human": 18650, + "public large scale": 44323, + "et al 2020": 18409, + "facilitate future research": 20270, + "understanding human language": 59350, + "models self supervised": 35475, + "corpora fine tuned": 11204, + "micro averaged f1": 33221, + "fine tuning small": 21018, + "publicly available benchmark": 44336, + "various deep learning": 61323, + "available github repository": 5300, + "generative pre training": 22604, + "data available languages": 12178, + "using meta learning": 60799, + "supervised zero shot": 54073, + "sequence level knowledge": 49950, + "level knowledge distillation": 30140, + "best performance using": 6792, + "results zero shot": 47915, + "specific bert models": 52050, + "different domains languages": 14907, + "models different sizes": 34917, + "paper describes work": 39334, + "task remains challenging": 55335, + "present baseline results": 41854, + "real world environment": 45128, + "cross lingual representation": 11847, + "unsupervised weakly supervised": 59747, + "different use cases": 15117, + "topic classification task": 57395, + "performance proposed approach": 40505, + "improve model accuracy": 24871, + "pre training strategy": 41595, + "experiments conducted real": 19386, + "model achieves superior": 33527, + "learning methods deep": 29732, + "methods deep learning": 32815, + "enhance state art": 17924, + "data different domains": 12282, + "require labeled data": 46866, + "existing widely used": 19169, + "contextual embedding models": 10764, + "cross lingual alignment": 11829, + "general natural language": 22073, + "sufficient labeled data": 53805, + "datasets validate effectiveness": 13477, + "alleviate data scarcity": 2403, + "datasets code publicly": 13175, + "speech natural language": 52274, + "widely spoken languages": 62004, + "future research efforts": 21889, + "cross lingual tasks": 11853, + "cross lingual generalization": 11837, + "embedding models bert": 17046, + "experiments approach outperforms": 19357, + "extraction aims extract": 20046, + "features word level": 20697, + "core natural language": 11152, + "pre training objective": 41586, + "achieves similar better": 1374, + "studies natural language": 53286, + "active learning strategies": 1477, + "tasks benchmark datasets": 55520, + "experimental results bert": 19274, + "capture document level": 7665, + "word embeddings bert": 62159, + "cross lingual cross": 11830, + "domain test sets": 16207, + "non english languages": 37650, + "fasttext word embeddings": 20449, + "bert language models": 6669, + "tools natural language": 57383, + "feature extraction classification": 20486, + "detailed analysis experiments": 14413, + "plays fundamental role": 40998, + "lingual representation learning": 30721, + "dataset used train": 13127, + "train large scale": 57601, + "large scale cross": 28968, + "lingual pre trained": 30718, + "pre training objectives": 41587, + "training data work": 58050, + "data difficult obtain": 12285, + "use external knowledge": 59887, + "generate adversarial examples": 22177, + "strong baselines large": 53015, + "based pre training": 5943, + "pre training based": 41569, + "low data scenarios": 31141, + "traditional feature based": 57519, + "feature based methods": 20477, + "attention past years": 4807, + "order address issue": 38591, + "propose multi level": 43476, + "models proposed approach": 35374, + "self supervised framework": 49208, + "existing data augmentation": 19052, + "additional pre training": 1693, + "bert based language": 6622, + "achieved significant progress": 1271, + "different languages domains": 14969, + "language models ptlms": 28312, + "covid 19 pandemic": 11669, + "used pre train": 60263, + "art transformer models": 4434, + "relation extraction methods": 45976, + "models plms achieved": 35326, + "proposed approach compared": 43724, + "pre trained nlp": 41550, + "trained nlp models": 57832, + "fine tuned model": 20963, + "tasks transformer based": 55943, + "model multiple languages": 34114, + "approaches outperform strong": 3889, + "real world task": 45140, + "methods code available": 32783, + "present series experiments": 42009, + "automatic data augmentation": 5077, + "empirical results state": 17346, + "evaluation metric based": 18644, + "novel pre training": 37895, + "contextualized language models": 10805, + "using new dataset": 60838, + "bert xlm roberta": 6738, + "unlabeled data using": 59567, + "additional unlabeled data": 1709, + "pre trained contextualized": 41524, + "information extraction models": 25863, + "accuracy experimental results": 972, + "results significantly outperform": 47847, + "outperform previously reported": 38814, + "large scale pre": 28994, + "scale pre trained": 48612, + "push state art": 44427, + "use knowledge distillation": 59919, + "convolutional networks gcn": 11110, + "datasets approach outperforms": 13156, + "diverse set tasks": 15718, + "sentence sentence pair": 49642, + "based transformer based": 6108, + "benchmarks experimental results": 6524, + "robustness adversarial attacks": 48273, + "languages english low": 28656, + "constructing high quality": 10420, + "annotations experimental results": 2991, + "paper propose query": 39530, + "fine tuned language": 20961, + "tuned language model": 58876, + "outbreak covid 19": 38763, + "data scarcity problem": 12627, + "pre training bert": 41570, + "technique natural language": 56040, + "existing pre training": 19129, + "introduce new model": 26838, + "significant computational resources": 50857, + "models achieved promising": 34679, + "data annotation process": 12137, + "method automatically construct": 32394, + "tasks especially low": 55618, + "training neural models": 58189, + "transformer based text": 58475, + "finite state machine": 21058, + "amortized variational inference": 2542, + "improve cross lingual": 24838, + "language modeling mlm": 28211, + "large margin achieves": 28904, + "tasks compared previous": 55548, + "use word embedding": 60074, + "models shown impressive": 35499, + "language understanding benchmarks": 28545, + "different pre trained": 15029, + "trained models task": 57809, + "outperforms fine tuning": 38903, + "strong baselines automatic": 53010, + "baselines automatic human": 6236, + "auto regressive language": 5022, + "regressive language models": 45827, + "task domain specific": 55033, + "model able outperform": 33493, + "introduced bert model": 26881, + "acoustic linguistic features": 1437, + "natural language statements": 36450, + "pre trained lm": 41540, + "data available url": 12181, + "propose model called": 43465, + "graph based model": 23107, + "pretraining fine tuning": 42203, + "paper present automatic": 39445, + "effectiveness proposed techniques": 16809, + "improve robustness models": 24921, + "current pre trained": 12000, + "fine tuning performance": 21008, + "fine tuned roberta": 20967, + "semeval 2020 shared": 49436, + "2020 shared task": 292, + "language modeling datasets": 28207, + "improves zero shot": 25169, + "fine tune pretrained": 20954, + "language model task": 28197, + "obtain large scale": 38180, + "based models natural": 5874, + "state art encoder": 52609, + "art encoder decoder": 4254, + "f1 score 88": 20212, + "nlp tasks open": 37542, + "processing nlp existing": 42903, + "proposed multi task": 43866, + "information work propose": 26165, + "datasets method achieves": 13329, + "able generate high": 697, + "extensive ablation studies": 19853, + "recent advances nlp": 45287, + "art performance code": 4330, + "performance code publicly": 40241, + "systems paper introduce": 54580, + "significant performance gap": 50909, + "recent years increasing": 45387, + "covering wide range": 11661, + "neural models perform": 36981, + "paper bridge gap": 39281, + "extensive experiments widely": 19905, + "based models using": 5884, + "alleviates data scarcity": 2423, + "source code dataset": 51746, + "studies mainly focus": 53280, + "samples experimental results": 48473, + "experiments named entity": 19476, + "large human annotated": 28887, + "address problem introducing": 1786, + "art models trained": 4300, + "form natural language": 21330, + "recent advances artificial": 45281, + "advances artificial intelligence": 1907, + "train bert based": 57567, + "propose novel iterative": 43541, + "non english language": 37649, + "multi class text": 35948, + "class text classification": 8413, + "results compared standard": 47549, + "using model trained": 60807, + "zero shot model": 63169, + "utilizing external knowledge": 61123, + "knowledge graph extracted": 27502, + "encoded pre trained": 17483, + "impressive performance various": 24813, + "performance various benchmarks": 40622, + "art sota models": 4407, + "release data code": 46150, + "method fine tuning": 32511, + "approaches fine tuning": 3828, + "method achieves superior": 32368, + "sub optimal performance": 53527, + "ability pre trained": 632, + "trained model fine": 57792, + "data propose new": 12570, + "art methods automatic": 4283, + "present comprehensive study": 41873, + "guided pre training": 23349, + "fundamental nlp task": 21784, + "resource languages english": 47239, + "models achieved high": 34677, + "corpora paper propose": 11230, + "propose novel multilingual": 43551, + "achieving high performance": 1409, + "compared existing methods": 9408, + "various methods proposed": 61362, + "training dataset paper": 58053, + "standard fine tuning": 52494, + "instead fine tuning": 26450, + "using integer linear": 60739, + "using bi directional": 60593, + "using multilingual bert": 60820, + "cross lingual zero": 11859, + "reinforcement learning optimize": 45876, + "challenging natural language": 8117, + "superior performance proposed": 53938, + "performance proposed framework": 40506, + "generated natural language": 22302, + "automated human evaluation": 5047, + "language models usually": 28343, + "adversarial learning framework": 1973, + "human automatic evaluation": 24111, + "supervised manner using": 54016, + "method significantly better": 32654, + "human reading comprehension": 24230, + "key value pairs": 27342, + "demonstrate effectiveness model": 13901, + "domain domain adaptation": 16052, + "effective fine tuning": 16652, + "given pre trained": 22772, + "source natural language": 51787, + "pretrained nlp models": 42178, + "success pre training": 53719, + "large annotated datasets": 28847, + "model data augmentation": 33735, + "task sentence level": 55358, + "sentence level using": 49596, + "range downstream tasks": 44918, + "models pre training": 35340, + "performance zero shot": 40635, + "multilingual bert fine": 36065, + "accuracy zero shot": 1073, + "different fine tuning": 14936, + "earth mover distance": 16521, + "recent work proposed": 45372, + "using data augmentation": 60643, + "languages language families": 28705, + "scale knowledge graph": 48583, + "art results terms": 4388, + "using human evaluation": 60732, + "higher correlation human": 23818, + "exact match score": 18853, + "standard multi task": 52509, + "single task multi": 51346, + "tasks improve performance": 55672, + "german french italian": 22669, + "recent success large": 45356, + "multilingual pretrained language": 36113, + "language models provides": 28311, + "effective zero shot": 16718, + "existing methods adopt": 19093, + "demonstrate pre training": 13961, + "code datasets publicly": 8810, + "models bert gpt": 34773, + "address challenge present": 1744, + "pre train model": 41519, + "dataset fine tune": 12931, + "great progress recent": 23213, + "current end end": 11974, + "learning models trained": 29762, + "contextual language models": 10774, + "capabilities language models": 7598, + "entity recognition question": 18130, + "recognition question answering": 45529, + "multilingual transformer based": 36130, + "recent embedding based": 45308, + "embedding based approaches": 17016, + "overcome issue propose": 39064, + "works pre trained": 62902, + "open source code": 38448, + "answering qa tasks": 3091, + "advances language modeling": 1913, + "simply fine tuning": 51252, + "sentence level annotations": 49579, + "existing work focuses": 19174, + "improve language model": 24867, + "language model performance": 28182, + "introduce new framework": 26836, + "improve performance state": 24899, + "english language models": 17833, + "transfer learning multi": 58388, + "improve zero shot": 24941, + "reference free evaluation": 45741, + "state art cross": 52602, + "art cross lingual": 4243, + "use large scale": 59927, + "diversity training data": 15742, + "useful downstream applications": 60362, + "promising future research": 43167, + "task specific dataset": 55391, + "achieves new sota": 1349, + "make use existing": 31608, + "existing methods fail": 19094, + "based contrastive learning": 5648, + "contrastive learning based": 10900, + "require hand crafted": 46859, + "media platforms like": 32177, + "available online https": 5336, + "online https github": 38370, + "propose machine learning": 43446, + "available https aka": 5308, + "https aka ms": 24053, + "recent years previous": 45393, + "reported state art": 46456, + "features downstream tasks": 20566, + "indicate proposed method": 25533, + "proposed method based": 43809, + "used different tasks": 60151, + "significant performance drop": 50906, + "especially training data": 18307, + "train model end": 57607, + "open sourced code": 38463, + "attention mechanism capture": 4775, + "propose non autoregressive": 43519, + "agent reinforcement learning": 2059, + "attention based architectures": 4715, + "feature engineering based": 20484, + "capture semantic syntactic": 7709, + "task specific architectures": 55388, + "generation extensive experiments": 22461, + "sentiment analysis approaches": 49816, + "level pre trained": 30179, + "based shot learning": 6033, + "classification models based": 8497, + "issue present novel": 27074, + "tasks work explore": 55969, + "generation sequence sequence": 22546, + "task best knowledge": 54936, + "neural networks applied": 37035, + "strong baselines tasks": 53017, + "led state art": 29995, + "art performance achieved": 4325, + "attention network gat": 4796, + "results benchmark dataset": 47521, + "semeval 2020 task": 49437, + "uses graph neural": 60513, + "word embeddings represent": 62186, + "relation extraction question": 45980, + "recent research efforts": 45341, + "yielded state art": 63107, + "propose transformer based": 43682, + "transformer based network": 58467, + "task multi class": 55225, + "task learning techniques": 55183, + "demonstrate superiority model": 13987, + "including low resource": 25271, + "models perform poorly": 35314, + "using recently developed": 60899, + "training bert model": 57946, + "embeddings map words": 17172, + "evaluate model performance": 18473, + "incorporating commonsense knowledge": 25380, + "labels natural language": 27841, + "text challenging task": 56464, + "strong pre trained": 53044, + "approach experimental results": 3526, + "research low resource": 47069, + "models recent work": 35408, + "language models models": 28285, + "model trained predict": 34474, + "pre train fine": 41516, + "end end transformer": 17669, + "baseline future research": 6172, + "open domain conversational": 38420, + "data augmentation generate": 12156, + "guide future work": 23333, + "pre trained different": 41527, + "lack annotated datasets": 27874, + "f1 score 72": 20203, + "proposes new approach": 43938, + "data shared task": 12656, + "transformer based approach": 58453, + "make code models": 31551, + "code models publicly": 8837, + "research community paper": 47003, + "entity recognition using": 18136, + "variety language understanding": 61276, + "models substantially outperform": 35549, + "open sourced https": 38464, + "sourced https github": 51825, + "contextualized language representations": 10806, + "open source library": 38452, + "powerful language models": 41436, + "models transformer based": 35630, + "based models bert": 5869, + "art performance compared": 4331, + "models data available": 34882, + "nlp tasks pre": 37544, + "publicly https github": 44358, + "large amounts labeled": 28837, + "reaching state art": 45061, + "present simple efficient": 42017, + "address challenge introduce": 1743, + "speech tags dependency": 52305, + "shows significant improvement": 50802, + "end end pipeline": 17658, + "task semantic parsing": 55353, + "challenging nlp task": 8121, + "generation challenging task": 22433, + "languages non trivial": 28740, + "non commercial use": 37642, + "nlp systems paper": 37529, + "propose approach automatically": 43297, + "significant improvement accuracy": 50874, + "sentiment analysis classification": 49818, + "social networks twitter": 51598, + "topic modeling approach": 57416, + "semantic information text": 49287, + "art models large": 4296, + "time consuming work": 57138, + "self attention weights": 49188, + "labeled data achieve": 27739, + "data achieve state": 12111, + "conversations social media": 11064, + "growing body work": 23291, + "recent studies report": 45351, + "representations large scale": 46701, + "tuning specific tasks": 58958, + "fine tuning improves": 20994, + "representation pre trained": 46568, + "extended new languages": 19838, + "documents experimental results": 15878, + "advances pre trained": 1921, + "paper aim improve": 39261, + "present manually annotated": 41941, + "bert based classifiers": 6621, + "baselines future work": 6266, + "language models significantly": 28320, + "used fine tuning": 60193, + "downstream tasks compared": 16354, + "pretrained models publicly": 42171, + "art performance popular": 4342, + "paper conduct comprehensive": 39298, + "using pretrained language": 60872, + "information word level": 26162, + "task fine tuned": 55089, + "language models widely": 28347, + "language models study": 28325, + "corpus pre training": 11405, + "implementation publicly available": 24643, + "publicly available github": 44342, + "training data present": 58026, + "fully supervised models": 21742, + "paper propose improve": 39517, + "introduce transformer based": 26874, + "information word embedding": 26160, + "encoder experimental results": 17514, + "transformer encoder decoder": 58483, + "low resource situations": 31192, + "labels experimental results": 27822, + "tasks large scale": 55714, + "scale pre training": 48613, + "replaced token detection": 46406, + "based contextual embeddings": 5644, + "data augmentation framework": 12155, + "data fine tune": 12365, + "pre trained knowledge": 41537, + "future research direction": 21887, + "github https github": 22716, + "languages work propose": 28824, + "demonstrate competitive performance": 13883, + "sentiment analysis dataset": 49819, + "post processing technique": 41353, + "bert like models": 6676, + "word boundary information": 62122, + "proposed method outperformed": 43823, + "models shown success": 35503, + "shot learning setting": 50629, + "novel objective function": 37889, + "data high quality": 12402, + "comprehensive experiments demonstrate": 9792, + "wide range languages": 61970, + "recent studies demonstrated": 45350, + "self supervised training": 49219, + "language processing especially": 28406, + "trained transformer model": 57901, + "machine translation code": 31351, + "supervised end end": 53983, + "methods automatic human": 32762, + "based models experiments": 5871, + "f1 score test": 20225, + "context aware representations": 10591, + "experiments analysis demonstrate": 19350, + "machine translation document": 31355, + "different granularity levels": 14947, + "world applications paper": 62929, + "domain test data": 16205, + "pseudo labeled data": 44276, + "high quality pseudo": 23783, + "suffers data scarcity": 53790, + "data scarcity issue": 12625, + "issue paper propose": 27071, + "code mixing phenomenon": 8832, + "processing existing methods": 42871, + "methods mainly focus": 32937, + "optimal transport ot": 38534, + "pre trained sequence": 41557, + "strong zero shot": 53059, + "github com salesforce": 22710, + "amounts labeled data": 2552, + "scale unlabeled data": 48636, + "work propose approach": 62772, + "neural network applied": 36994, + "confirm effectiveness proposed": 10130, + "neural networks achieve": 37033, + "art performance addition": 4326, + "propose novel generative": 43537, + "language models achieving": 28228, + "experimental results real": 19308, + "second stage fine": 49022, + "challenging problem paper": 8128, + "task sequence labeling": 55363, + "achieves promising results": 1356, + "methods neural network": 32957, + "language models text": 28332, + "xlm roberta model": 63031, + "trained multilingual language": 57818, + "related covid 19": 45894, + "propose novel bert": 43526, + "train machine translation": 57605, + "dataset pre trained": 13032, + "trained transformer language": 57900, + "language experimental results": 28060, + "experimental results state": 19316, + "language models transfer": 28336, + "nearest neighbors knn": 36523, + "networks experimental results": 36852, + "state art t5": 52677, + "community recent years": 9275, + "languages english arabic": 28652, + "approach bring significant": 3435, + "visual textual information": 61672, + "shown pre trained": 50738, + "trained models perform": 57805, + "speech named entity": 52272, + "contrastive learning specifically": 10910, + "experimental results benchmarks": 19273, + "information external knowledge": 25856, + "large number labeled": 28923, + "time work propose": 57239, + "translation speech translation": 58681, + "translation quality paper": 58665, + "paper describes proposed": 39328, + "results transformer based": 47892, + "computer vision cv": 9895, + "transformer based approaches": 58454, + "paper proposes model": 39547, + "fine tuned transformer": 20972, + "methods proposed method": 32997, + "strong baselines task": 53016, + "context dependent context": 10611, + "dataset human evaluation": 12955, + "datasets used training": 13471, + "model best model": 33622, + "f1 score 92": 20215, + "language models provide": 28310, + "answer natural language": 3040, + "utilizing pre trained": 61128, + "language models downstream": 28247, + "address issue paper": 1765, + "exact match f1": 18852, + "unlabeled target language": 59581, + "covid 19 related": 11671, + "learning propose novel": 29824, + "language model pretrained": 28187, + "model pretrained large": 34230, + "sentiment analysis based": 49817, + "end end multi": 17655, + "text generative models": 56607, + "domain knowledge base": 16094, + "demonstrate proposed models": 13970, + "based approaches proposed": 5573, + "information used improve": 26143, + "advancements deep learning": 1898, + "public datasets model": 44317, + "choice pre trained": 8334, + "trained models used": 57810, + "inspired recent progress": 26414, + "level character level": 30073, + "achieves higher performance": 1336, + "proposed framework significantly": 43786, + "commonsense knowledge graph": 9236, + "best performance compared": 6791, + "learning models task": 29761, + "data models code": 12495, + "large transformer based": 29034, + "pretrained large language": 42162, + "training data hand": 58000, + "models multilingual models": 35239, + "models different tasks": 34918, + "vision language models": 61638, + "domains labeled data": 16266, + "adaptive fine tuning": 1575, + "f1 scores 70": 20228, + "propose novel training": 43570, + "test set consisting": 56373, + "strong baselines especially": 53012, + "classification task paper": 8566, + "evidence lower bound": 18813, + "different deep learning": 14894, + "scale training data": 48632, + "task learning strategy": 55182, + "consistently outperforms strong": 10307, + "task field natural": 55082, + "propose novel contrastive": 43528, + "new method automatically": 37251, + "code mixed language": 8827, + "propose novel pre": 43555, + "novel pre trained": 37894, + "experiments conducted benchmark": 19382, + "conducted benchmark datasets": 10075, + "art performance downstream": 4334, + "monolingual bert based": 35791, + "fine tuned downstream": 20960, + "help pre trained": 23585, + "usually requires large": 61067, + "benchmark dataset demonstrate": 6445, + "dataset demonstrate superiority": 12887, + "modern deep learning": 35705, + "trained multilingual bert": 57817, + "pre trained gpt": 41536, + "respectively paper describes": 47376, + "paper describes developed": 39322, + "obtained f1 score": 38210, + "paper addresses gap": 39256, + "fine tuning work": 21033, + "shared task participants": 50504, + "pointer network model": 41060, + "datasets used experiments": 13469, + "graph based semantic": 23112, + "using small set": 60949, + "languages propose novel": 28759, + "team semeval 2020": 56005, + "specifically introduce novel": 52210, + "task learning architecture": 55170, + "bert achieved great": 6604, + "fine tuning techniques": 21029, + "better performance achieved": 6928, + "models achieved excellent": 34675, + "language models help": 28260, + "propose model based": 43464, + "context covid 19": 10602, + "datasets method outperforms": 13330, + "using language models": 60754, + "easily incorporated existing": 16544, + "model achieve better": 33503, + "text classification text": 56489, + "fine tuning explore": 20990, + "performance compared baselines": 40251, + "standard natural language": 52512, + "remains poorly understood": 46346, + "presents large scale": 42089, + "layer pre trained": 29203, + "language models prlms": 28304, + "doi org 10": 15989, + "models masked language": 35218, + "open ended text": 38432, + "ended text generation": 17740, + "practical applications paper": 41459, + "code dataset released": 8807, + "training large models": 58149, + "open ended language": 38430, + "relation extraction event": 45974, + "data augmentation training": 12165, + "class classification task": 8397, + "unsupervised graph based": 59701, + "graph based ranking": 23110, + "based ranking model": 5973, + "multiple benchmark datasets": 36173, + "processing nlp community": 42901, + "ambiguity natural language": 2527, + "monolingual multilingual settings": 35808, + "used training data": 60338, + "pre training stage": 41592, + "supervised contrastive learning": 53972, + "significantly higher performance": 50961, + "attention research community": 4822, + "new manually annotated": 37246, + "text paper propose": 56691, + "bert experimental results": 6656, + "large parallel data": 28935, + "word segmentation cws": 62293, + "art sota performance": 4408, + "traditional state art": 57546, + "neural network gnn": 37009, + "applications paper introduce": 3228, + "datasets model outperforms": 13336, + "machine translation experiment": 31358, + "resources publicly available": 47330, + "github com csebuetnlp": 22697, + "models recent advances": 35407, + "transformer based methods": 58463, + "learning mtl framework": 29768, + "models lack robustness": 35160, + "domain paper present": 16130, + "methods improve robustness": 32896, + "develop end end": 14586, + "model domain specific": 33789, + "position aware attention": 41261, + "social media comments": 51570, + "document paper propose": 15818, + "performance public datasets": 40513, + "models achieved significant": 34681, + "ablation study demonstrates": 660, + "multi task transfer": 36031, + "shot language transfer": 50623, + "based models fine": 5872, + "models large number": 35166, + "work systematically study": 62835, + "analysis provides insights": 2734, + "dataset code publicly": 12842, + "understanding slu tasks": 59401, + "structural semantic information": 53085, + "aware text generation": 5474, + "task fine tune": 55088, + "fine tune bert": 20948, + "art results english": 4377, + "experimental results illustrate": 19288, + "10 training data": 54, + "learning multi task": 29770, + "efficient end end": 16871, + "provide useful information": 44149, + "texts social media": 56928, + "outperforms existing approaches": 38897, + "demonstrate approach effectively": 13869, + "capture local global": 7692, + "art performance public": 4344, + "performance transformer based": 40610, + "low resource high": 31179, + "generated language models": 22296, + "focus low resource": 21178, + "better comparable performance": 6865, + "simple effective data": 51152, + "effective data augmentation": 16642, + "data augmentation strategies": 12160, + "training data fine": 57997, + "benchmarks demonstrate effectiveness": 6515, + "code datasets available": 8809, + "improves performance strong": 25144, + "cc nc sa": 7898, + "nc sa license": 36499, + "based distant supervision": 5685, + "state art metrics": 52631, + "release code data": 46145, + "model predictions paper": 34225, + "overview current state": 39110, + "widely used language": 62017, + "training large language": 58148, + "self supervised models": 49212, + "self supervised contrastive": 49206, + "languages zero shot": 28828, + "modeling natural language": 34604, + "experiments datasets different": 19400, + "adversarial training improve": 1990, + "incorporate external knowledge": 25355, + "graph structural information": 23169, + "information knowledge base": 25937, + "obtain promising results": 38184, + "propose novel knowledge": 43543, + "years existing approaches": 63060, + "existing approaches focus": 19027, + "modern language models": 35708, + "models require large": 35442, + "data costly time": 12256, + "github repository https": 22720, + "repository https github": 46465, + "specific downstream tasks": 52075, + "downstream tasks natural": 16360, + "shown impressive performance": 50719, + "trained models fine": 57799, + "model word embedding": 34539, + "small amounts data": 51463, + "code mixed dataset": 8825, + "proposed pre training": 43880, + "scale empirical study": 48568, + "bilstm crf model": 7131, + "bilstm crf models": 7132, + "parametric non parametric": 39733, + "augmentation method improve": 4962, + "models better performance": 34783, + "scale labeled data": 48585, + "resulting poor performance": 47473, + "poor performance domain": 41141, + "general domain specific": 22055, + "language model results": 28192, + "different parts speech": 15021, + "results https github": 47662, + "datasets pre trained": 13373, + "model training evaluation": 34481, + "word embeddings introduce": 62173, + "end end setting": 17660, + "trained models bert": 57796, + "bert roberta albert": 6712, + "novel bert based": 37778, + "performance variety tasks": 40620, + "integral natural language": 26503, + "answer qa pairs": 3047, + "improvement zero shot": 25041, + "future work code": 21900, + "github com swarnahub": 22711, + "use graph convolutional": 59903, + "data sets evaluation": 12650, + "outperforms baseline approaches": 38868, + "tackle issue propose": 54705, + "tasks recent works": 55839, + "transformer models like": 58500, + "existing approaches usually": 19033, + "consistently significantly outperform": 10311, + "based zero shot": 6144, + "neural based models": 36939, + "training data neural": 58021, + "employ pre trained": 17389, + "future data collection": 21866, + "play essential role": 40970, + "case based reasoning": 7789, + "datasets approach significantly": 13157, + "art embedding based": 4252, + "method code available": 32416, + "examples paper present": 18921, + "tasks zero shot": 55975, + "classification tasks paper": 8570, + "token level predictions": 57297, + "significantly improves generalization": 50975, + "models including bert": 35119, + "make data code": 31559, + "code freely available": 8815, + "fine tuning generative": 20993, + "loss fine tuning": 31092, + "knowledge work propose": 27650, + "training data long": 58012, + "popular benchmark dataset": 41158, + "mixture experts moe": 33420, + "wide variety downstream": 61982, + "issue propose simple": 27078, + "especially natural language": 18290, + "models require significant": 35443, + "address challenges introduce": 1748, + "generative language model": 22592, + "text generation aims": 56594, + "standard language modeling": 52497, + "state art bert": 52590, + "f1 score using": 20226, + "fully supervised methods": 21741, + "methods transfer learning": 33083, + "feature attribution methods": 20475, + "remains major challenge": 46341, + "art performance unsupervised": 4350, + "generation task based": 22559, + "language model perform": 28181, + "commonsense question answering": 9238, + "unsupervised pre trained": 59721, + "generalization ability different": 22115, + "zero shot approaches": 63153, + "training data train": 58043, + "generated language model": 22295, + "level paper propose": 30173, + "problem fine tuning": 42569, + "directions english german": 15290, + "release large dataset": 46155, + "information pre training": 26015, + "effective way learn": 16714, + "using knowledge distillation": 60746, + "effectiveness approach achieving": 16767, + "strong state art": 53052, + "submission shared task": 53576, + "framework cross lingual": 21484, + "multiple languages using": 36238, + "neural ranking model": 37089, + "zero shot methods": 63168, + "datasets widely used": 13485, + "experiments ablation studies": 19345, + "language model mlm": 28177, + "models prior work": 35357, + "pre training pre": 41589, + "training pre training": 58212, + "task large scale": 55164, + "great success various": 23219, + "paper propose address": 39491, + "catastrophic forgetting problem": 7834, + "metrics correlate human": 33155, + "correlate human judgements": 11504, + "code released https": 8853, + "trained self supervised": 57858, + "contrastive pre training": 10917, + "prior work focuses": 42422, + "motivate future research": 35860, + "tasks remains unclear": 55852, + "remains unclear extent": 46352, + "end introduce novel": 17677, + "text based methods": 56455, + "work propose multi": 62782, + "trained relatively small": 57853, + "token level representations": 57298, + "achieved comparable performance": 1225, + "comparable performance compared": 9302, + "impressive performance gains": 24812, + "capture high level": 7677, + "integrate pre trained": 26509, + "context open domain": 10683, + "models introduce new": 35143, + "consistent significant improvement": 10286, + "language processing various": 28440, + "word phrase level": 62264, + "f1 scores previous": 20230, + "previous unsupervised methods": 42299, + "trained contextual embeddings": 57693, + "propose novel hybrid": 43540, + "need task specific": 36593, + "new method learning": 37254, + "new dataset named": 37167, + "good starting point": 22946, + "room future work": 48339, + "specific pre training": 52128, + "downstream applications including": 16334, + "large amounts domain": 28835, + "black box nature": 7192, + "increasing model size": 25455, + "present empirical results": 41898, + "sequence sequence transformer": 49999, + "human evaluation present": 24152, + "hierarchical attention model": 23658, + "resource settings extensive": 47276, + "settings extensive experiments": 50373, + "propose novel evaluation": 43535, + "widely used benchmark": 62011, + "human annotated corpus": 24096, + "small training data": 51507, + "work propose knowledge": 62779, + "baselines automatic evaluation": 6235, + "issue paper proposes": 27072, + "paper address challenge": 39251, + "using pre training": 60867, + "model xlm roberta": 34549, + "cross attention mechanism": 11808, + "small number examples": 51488, + "model agnostic framework": 33553, + "available data set": 5277, + "semantic parsing paper": 49314, + "multi task approach": 36016, + "outperforms recent state": 38938, + "self supervised objectives": 49214, + "text question answering": 56725, + "overcome problem propose": 39071, + "model based neural": 33606, + "models demonstrated strong": 34896, + "previous studies focus": 42288, + "mitigate issue propose": 33386, + "nlp recent years": 37518, + "datasets low resource": 13321, + "automated evaluation metrics": 5042, + "tasks entity linking": 55616, + "language models unsupervised": 28339, + "pretrained sequence sequence": 42184, + "text text transfer": 56811, + "text transfer transformer": 56822, + "transfer transformer t5": 58428, + "unified text text": 59481, + "text text format": 56808, + "available paper introduce": 5340, + "roberta language model": 48224, + "language models automatically": 28233, + "zero shot domain": 63159, + "train test sets": 57648, + "significantly boosts performance": 50946, + "new pre trained": 37286, + "works mainly focus": 62897, + "languages lack annotated": 28703, + "domain adaptive pre": 16011, + "adaptive pre training": 1579, + "art bert based": 4229, + "code data models": 8801, + "available cross lingual": 5275, + "pre training strategies": 41594, + "pre training propose": 41591, + "training propose novel": 58219, + "level contrastive learning": 30087, + "downstream language understanding": 16341, + "training data performance": 58024, + "yields substantial gains": 63135, + "available training time": 5381, + "novel self supervised": 37913, + "self supervised approach": 49205, + "datasets prior work": 13378, + "existing language models": 19082, + "knowledge intensive tasks": 27531, + "shows significant improvements": 50803, + "data active learning": 12117, + "outperforms prior state": 38933, + "provide insights future": 44094, + "improve performance task": 24901, + "best published result": 6810, + "present simple method": 42018, + "tuned language models": 58877, + "tasks like question": 55725, + "sequence prediction task": 49968, + "large scale domain": 28972, + "specifically propose novel": 52224, + "problem domain adaptation": 42545, + "present new large": 41963, + "code dataset publicly": 8806, + "tuned downstream tasks": 58874, + "achieves similar performance": 1375, + "lower resource languages": 31223, + "highly domain specific": 23896, + "weakly supervised dataset": 61863, + "language previous work": 28388, + "large volumes text": 29049, + "learning ml algorithms": 29740, + "dataset introduce new": 12969, + "language models data": 28241, + "neural models based": 36975, + "training experimental results": 58100, + "general language understanding": 22066, + "understanding evaluation benchmark": 59343, + "time consuming error": 57129, + "consuming error prone": 10444, + "sentence level representation": 49589, + "work perform extensive": 62748, + "nlp tasks information": 37536, + "specific knowledge graph": 52094, + "learning weak supervision": 29940, + "continued pre training": 10832, + "performance automatic evaluation": 40203, + "word embeddings encode": 62166, + "downstream tasks results": 16366, + "based cnn lstm": 5624, + "interactions social media": 26623, + "demonstrate proposed architecture": 13965, + "fine tuning strategies": 21023, + "suffer catastrophic forgetting": 53760, + "conduct extensive empirical": 10051, + "scarcity labeled data": 48669, + "quality generated data": 44525, + "experiments different languages": 19416, + "training examples available": 58095, + "work available https": 62584, + "art results outperforming": 4383, + "widely used improve": 62016, + "word embeddings experimental": 62169, + "recent pre trained": 45334, + "methods fine tuning": 32868, + "years pre trained": 63070, + "bert based pre": 6625, + "performs better existing": 40699, + "datasets used train": 13470, + "new multilingual dataset": 37265, + "language understanding key": 28551, + "introduce novel graph": 26846, + "paper describes contribution": 39321, + "describes contribution semeval": 14220, + "contribution semeval 2020": 10948, + "zero shot scenarios": 63175, + "trained human annotated": 57748, + "commonly used metrics": 9226, + "cross domain scenarios": 11818, + "quality human evaluation": 44531, + "learning methods learn": 29733, + "using annotated data": 60559, + "text classification framework": 56473, + "information extraction natural": 25864, + "sequential transfer learning": 50053, + "baseline models experimental": 6189, + "corpora state art": 11246, + "large pretrained transformer": 28944, + "pretrained transformer based": 42187, + "document level tasks": 15809, + "general purpose pretrained": 22088, + "previous works shown": 42322, + "increase computational cost": 25409, + "achieves sota results": 1377, + "effect data augmentation": 16612, + "compare performance proposed": 9356, + "answering knowledge graph": 3078, + "present comprehensive review": 41872, + "achieves comparable better": 1313, + "semi supervised fashion": 49458, + "training data zero": 58051, + "shot learning tasks": 50630, + "based model language": 5859, + "encoder representations transformer": 17539, + "human evaluation demonstrates": 24146, + "low high resource": 31154, + "work proposes novel": 62794, + "positive negative pairs": 41287, + "popular pre trained": 41179, + "tasks using different": 55955, + "trained encoder decoder": 57718, + "contrastive learning scl": 10909, + "end present novel": 17698, + "aspects natural language": 4548, + "recently transformer based": 45471, + "paper presents study": 39484, + "using random forest": 60892, + "experiments fine tuning": 19437, + "tuning language models": 58921, + "achieve good balance": 1146, + "consuming task paper": 10454, + "model performs par": 34205, + "performance compared baseline": 40250, + "study aims develop": 53323, + "using single model": 60946, + "dependency parsing dp": 14132, + "promising results natural": 43182, + "easy use interface": 16569, + "using automated metrics": 60572, + "demonstrate superiority approach": 13985, + "models tackle problem": 35578, + "approach outperforms competitive": 3625, + "information recent years": 26045, + "models code publicly": 34822, + "generation aims generating": 22415, + "various downstream nlp": 61333, + "shot zero shot": 50658, + "explore different approaches": 19699, + "language model improves": 28172, + "meta embedding learning": 32332, + "language models applied": 28229, + "effectiveness superiority proposed": 16814, + "implementation available https": 24639, + "training data points": 58025, + "applying state art": 3378, + "texts paper propose": 56910, + "language models knowledge": 28267, + "contrastive learning cl": 10901, + "propose contrastive learning": 43338, + "contrastive learning framework": 10903, + "efficacy proposed method": 16836, + "social media important": 51575, + "using crowd sourced": 60639, + "data work present": 12778, + "tuning large pre": 58924, + "language models demonstrated": 28243, + "sequence sequence architecture": 49981, + "language model evaluation": 28163, + "recent studies revealed": 45352, + "building cross lingual": 7441, + "learning models achieved": 29751, + "pseudo labels unlabeled": 44279, + "manner extensive experiments": 31719, + "suggest future work": 53819, + "fine tuning limited": 20997, + "multi lingual pre": 35987, + "shot learning problem": 50628, + "yield better performance": 63091, + "popular recent years": 41182, + "propose pre training": 43589, + "showing promising results": 50686, + "model takes advantage": 34440, + "model evaluate model": 33842, + "self supervised fashion": 49207, + "performance human evaluation": 40376, + "private test set": 42444, + "use language model": 59923, + "large corpus text": 28864, + "static word embedding": 52727, + "pretrained transformer model": 42189, + "shed light future": 50525, + "light future research": 30450, + "language processing requires": 28428, + "recently deep neural": 45416, + "paper propose joint": 39518, + "presents new dataset": 42095, + "rules natural language": 48393, + "experiments conducted widely": 19388, + "conducted widely used": 10099, + "ensemble model combines": 17979, + "fine tuned dataset": 20959, + "enhance model performance": 17916, + "consists sub tasks": 10332, + "training data proposed": 58029, + "bert based architectures": 6617, + "accuracy downstream tasks": 963, + "attention network model": 4797, + "present data driven": 41882, + "driven end end": 16423, + "model knowledge distillation": 34031, + "achieves promising performance": 1355, + "increasing attention recently": 25446, + "model performance various": 34197, + "stage fine tuning": 52430, + "fine tuning stages": 21021, + "performance gains compared": 40358, + "train evaluate model": 57588, + "methods machine learning": 32934, + "novel training method": 37941, + "scale unlabeled corpora": 48635, + "verify effectiveness method": 61537, + "generation pre trained": 22520, + "novel contrastive learning": 37791, + "masked language modelling": 31865, + "language understanding machine": 28552, + "tasks verify effectiveness": 55962, + "work develop novel": 62633, + "information improves performance": 25916, + "layers pre trained": 29233, + "pre trained roberta": 41553, + "task specific pre": 55403, + "domain specific pre": 16182, + "performance multi task": 40446, + "systems challenging task": 54450, + "art models struggle": 4298, + "github com ukplab": 22714, + "pretrained multilingual language": 42173, + "performance multilingual model": 40448, + "publicly available text": 44355, + "codes available https": 8875, + "pre training process": 41590, + "model compression techniques": 33687, + "novel data driven": 37798, + "distant supervision approach": 15558, + "better results existing": 6958, + "obtains comparable performance": 38244, + "multiple languages english": 36237, + "personally identifiable information": 40768, + "best f1 scores": 6765, + "highest f1 score": 23853, + "pretrained neural language": 42176, + "tasks conduct extensive": 55553, + "datasets sentiment analysis": 13417, + "general nlp tasks": 22077, + "original training set": 38735, + "trained text text": 57894, + "text text transformer": 56812, + "experiments demonstrate state": 19407, + "learning rl based": 29852, + "capture different aspects": 7663, + "fine tunes pre": 20976, + "tunes pre trained": 58896, + "2021 shared task": 299, + "various training strategies": 61410, + "propose model agnostic": 43463, + "model agnostic method": 33555, + "fine tuning transformer": 21031, + "tuning transformer based": 58972, + "model sequence sequence": 34361, + "efficacy proposed approach": 16835, + "language models zero": 28350, + "models zero shot": 35694, + "improve performance data": 24887, + "improving zero shot": 25202, + "limited amounts labeled": 30565, + "conduct ablation study": 10026, + "processing nlp approaches": 42900, + "data augmentation using": 12166, + "state art contextual": 52601, + "contextual language model": 10773, + "field machine learning": 20759, + "aware graph neural": 5451, + "demonstrated strong performance": 14022, + "publicly released code": 44363, + "released code https": 46172, + "github com gt": 22702, + "com gt salt": 9015, + "table text generation": 54690, + "given large number": 22758, + "dataset verify effectiveness": 13134, + "quality natural language": 44556, + "classification sequence tagging": 8547, + "dataset natural language": 13007, + "analysis human evaluation": 2674, + "wide variety applications": 61980, + "word embeddings derived": 62163, + "model text classification": 34457, + "dataset achieve state": 12798, + "training test time": 58294, + "proposed recent years": 43885, + "components pre trained": 9724, + "language models transformer": 28337, + "multi lingual models": 35986, + "achieve goal propose": 1143, + "model performance experiments": 34190, + "results human evaluation": 47664, + "improve performance natural": 24895, + "development language technology": 14681, + "attention computational linguistics": 4729, + "computational linguistics community": 9846, + "using graph neural": 60716, + "model simple effective": 34386, + "popular language models": 41167, + "significantly improve classification": 50964, + "transition based approach": 58538, + "transformer based pretrained": 58472, + "manually labeled data": 31782, + "language processing use": 28438, + "address problem paper": 1787, + "finally propose method": 20877, + "different languages evaluate": 14971, + "previous sota methods": 42280, + "future research present": 21892, + "methods real world": 33006, + "language models better": 28236, + "evaluate language models": 18466, + "social science research": 51603, + "high quality labeled": 23780, + "baselines terms automatic": 6310, + "models learn generate": 35176, + "simple model agnostic": 51194, + "constructing large scale": 10423, + "based self supervised": 6013, + "art models benchmark": 4294, + "models benchmark datasets": 34766, + "real life settings": 45106, + "potential future directions": 41391, + "art relation extraction": 4366, + "remains relatively unexplored": 46348, + "shot learning model": 50627, + "shot learning methods": 50626, + "shared encoder decoder": 50469, + "study zero shot": 53478, + "tasks language models": 55710, + "proposed method effectively": 43818, + "language models recent": 28313, + "domain adaptation propose": 16003, + "neural attention model": 36936, + "relevant external knowledge": 46216, + "outperforms baselines terms": 38876, + "text text generation": 56809, + "using term frequency": 60986, + "using majority voting": 60786, + "comparisons state art": 9514, + "models achieve new": 34672, + "task extensive experiments": 55070, + "types natural language": 59104, + "computer vision speech": 9897, + "learning models work": 29765, + "tasks like semantic": 55726, + "semeval 2021 task": 49439, + "language models increasingly": 28264, + "language models experimental": 28252, + "fine tuned multilingual": 20965, + "contrastive learning objectives": 10908, + "open challenges future": 38414, + "challenges future directions": 8051, + "sequence language models": 49943, + "training test datasets": 58292, + "text pre trained": 56702, + "based nlp models": 5913, + "brings significant improvement": 7344, + "demonstrate efficacy approach": 13906, + "models analysis shows": 34711, + "unlike existing approaches": 59595, + "performance different models": 40294, + "different models compared": 14997, + "language models varying": 28346, + "widely used text": 62022, + "https doi org": 24057, + "org 10 5281": 38678, + "10 5281 zenodo": 34, + "model different types": 33767, + "performance compared strong": 40256, + "terms accuracy efficiency": 56264, + "promising results cross": 43181, + "propose novel metric": 43548, + "standard data sets": 52482, + "task demonstrate approach": 55001, + "various nlp problems": 61374, + "word level character": 62227, + "demo available https": 13847, + "model performance low": 34191, + "art results achieved": 4370, + "classification models trained": 8498, + "extracting structured information": 20041, + "art results despite": 4375, + "data collected using": 12217, + "fine tuning nlp": 21005, + "input sequence length": 26332, + "low resource multilingual": 31184, + "techniques fine tuning": 56090, + "auxiliary task improve": 5241, + "order achieve better": 38589, + "automatically generated questions": 5177, + "work propose generate": 62778, + "word level embedding": 62228, + "overcome limitations propose": 39069, + "train test splits": 57649, + "slow inference speed": 51451, + "trained models paper": 57804, + "novel machine learning": 37860, + "propose new way": 43517, + "transfer learning ability": 58375, + "language model perplexity": 28183, + "models demonstrated impressive": 34895, + "word level phrase": 62235, + "level phrase level": 30177, + "method experimental results": 32496, + "code dataset available": 8805, + "traditional fine tuning": 57521, + "achieve comparable better": 1122, + "models lms trained": 35200, + "code mixed english": 8826, + "significantly outperforms multilingual": 51003, + "address limitation propose": 1777, + "token sentence level": 57306, + "phase pre training": 40806, + "effectiveness pre training": 16802, + "continuing pre training": 10838, + "fine grained annotation": 20927, + "fine grained annotations": 20928, + "experiments demonstrate superiority": 19408, + "non autoregressive transformer": 37638, + "effective domain adaptation": 16647, + "averaged f1 scores": 5423, + "models trained human": 35612, + "achieves significantly higher": 1372, + "low quality data": 31168, + "step paper propose": 52820, + "models based bert": 34754, + "trained multilingual models": 57819, + "propose plug play": 43584, + "methods benchmark datasets": 32768, + "self supervised speech": 49217, + "baseline model using": 6187, + "paper present submission": 39463, + "representation pre training": 46569, + "experimental results achieve": 19269, + "achieve promising performance": 1181, + "end trainable neural": 17718, + "test time experiments": 56388, + "improve performance model": 24892, + "level contextual information": 30083, + "results machine learning": 47707, + "transfer low resource": 58402, + "important nlp task": 24749, + "language models low": 28281, + "multilingual low resource": 36093, + "stage pre training": 52439, + "proposed address problem": 43713, + "recent advances text": 45289, + "task semeval 2021": 55355, + "models bert albert": 34772, + "various pre trained": 61379, + "task specific metrics": 55400, + "proposed approach improves": 43728, + "models domain specific": 34937, + "foster future research": 21411, + "benchmark dataset containing": 6444, + "based models trained": 5881, + "strong baselines achieves": 53009, + "increase model complexity": 25418, + "layer perceptron mlp": 29200, + "lower level tasks": 31215, + "information fine tuning": 25878, + "publicly release dataset": 44361, + "dataset https github": 12953, + "text generation capabilities": 56596, + "performance bert based": 40217, + "evaluating quality generated": 18569, + "use contextual information": 59853, + "analyses demonstrate effectiveness": 2594, + "state art accuracies": 52575, + "code open sourced": 8840, + "extract meaningful information": 19987, + "pretrained transformer models": 42190, + "use language specific": 59925, + "tuned bert models": 58869, + "benefits pre trained": 6587, + "model compared state": 33674, + "contrastive learning method": 10904, + "models specifically propose": 35527, + "benchmark datasets extensive": 6453, + "fine tuning paradigm": 21006, + "like recurrent neural": 30497, + "recently proposed method": 45456, + "model achieve state": 33506, + "continuous embedding space": 10845, + "state art hybrid": 52621, + "domain specific rules": 16184, + "data self training": 12636, + "data extensive experiments": 12351, + "token level sequence": 57300, + "different transformer based": 15106, + "thanks availability large": 57000, + "domain domain settings": 16054, + "distant supervision method": 15559, + "words given text": 62428, + "languages using english": 28817, + "data models available": 12494, + "contextualised language models": 10794, + "language models plm": 28296, + "propose self training": 43616, + "tasks open source": 55776, + "human evaluations demonstrate": 24159, + "modern transformer based": 35724, + "used benchmark dataset": 60104, + "learning domain specific": 29606, + "release dataset code": 46152, + "task oriented semantic": 55257, + "oriented semantic parsing": 38700, + "beam search generate": 6369, + "pre trained seq2seq": 41556, + "trained seq2seq models": 57867, + "language models tend": 28331, + "negative log likelihood": 36624, + "based evaluation metrics": 5709, + "evaluation shows models": 18722, + "classification tasks recent": 8571, + "access high quality": 823, + "language models encode": 28249, + "f1 score model": 20224, + "language model xlm": 28204, + "datasets available https": 13163, + "retrieval ir systems": 47949, + "zero shot settings": 63178, + "pre training synthetic": 41596, + "address gap propose": 1759, + "dataset news articles": 13012, + "effectiveness method using": 16792, + "extensive experimental studies": 19875, + "studies real world": 53293, + "related downstream tasks": 45903, + "multilingual transformer model": 36131, + "performance pretrained language": 40495, + "task specific datasets": 55392, + "evaluate models using": 18475, + "pre training effective": 41576, + "transfer natural language": 58409, + "like bert gpt": 30463, + "existing approaches suffer": 19031, + "improves performance tasks": 25145, + "propose automatic evaluation": 43307, + "generation question answering": 22536, + "standard text classification": 52535, + "shot fine tuning": 50617, + "achieve excellent results": 1136, + "training data annotation": 57973, + "require complex reasoning": 46846, + "lack parallel corpora": 27905, + "methods document level": 32828, + "experiments wide range": 19561, + "labels extensive experiments": 27825, + "paper present empirical": 39449, + "simple strong baseline": 51214, + "limited task specific": 30624, + "summarization experimental results": 53884, + "fine tuning results": 21016, + "existing work usually": 19175, + "extensive experiments english": 19887, + "datasets demonstrate superior": 13217, + "propose neuro symbolic": 43493, + "input output pairs": 26309, + "results indicate models": 47678, + "model propose new": 34246, + "domain adaptation uda": 16006, + "propose fine tune": 43390, + "processing nlp algorithms": 42898, + "indicate proposed approach": 25532, + "proposed approach effective": 43726, + "methods recent years": 33008, + "rise deep learning": 48153, + "text fine tuning": 56581, + "fine tuning experiments": 20989, + "relative position encoding": 46106, + "pretrained large scale": 42163, + "achieved competitive results": 1229, + "competitive results strong": 9564, + "results strong baselines": 47859, + "monte carlo sampling": 35828, + "experiments various datasets": 19557, + "learning fine tune": 29647, + "statistically significant performance": 52774, + "models trained solely": 35620, + "dependence labeled data": 14103, + "low resource tasks": 31195, + "paper present systematic": 39464, + "shot context learning": 50607, + "results demonstrate superior": 47584, + "improvements compared strong": 25061, + "learning algorithms using": 29513, + "code data publicly": 8802, + "learning based systems": 29542, + "learning experiments demonstrate": 29637, + "massive amounts data": 31880, + "task learning problem": 55179, + "shed light important": 50526, + "performance nlp models": 40458, + "relevant natural language": 46225, + "paper propose pre": 39529, + "language processing aims": 28394, + "achieved high performance": 1241, + "weak supervision based": 61851, + "outside training distribution": 39028, + "data augmentation da": 12153, + "proposed model shows": 43857, + "model shows significant": 34377, + "models multi task": 35236, + "experimental results multi": 19296, + "fine tuning neural": 21004, + "extensive empirical evaluation": 19864, + "natural language despite": 36420, + "text classification approach": 56467, + "data model training": 12491, + "experiments commonly used": 19377, + "attention nlp community": 4802, + "using cosine similarity": 60634, + "faster inference speed": 20438, + "downstream tasks information": 16358, + "play central role": 40962, + "shelf language models": 50538, + "paper propose automatic": 39498, + "human evaluation method": 24149, + "key challenge task": 27298, + "annotated data model": 2884, + "learning text classification": 29911, + "fine tuned target": 20969, + "based model using": 5863, + "introduce contrastive learning": 26791, + "maximize mutual information": 31959, + "models achieve high": 34671, + "leaving room improvement": 29985, + "text classification benchmarks": 56470, + "demonstrates effectiveness approach": 14030, + "zero shot manner": 63167, + "manually labeled training": 31784, + "ablation study shows": 661, + "aware machine translation": 5458, + "bert roberta xlnet": 6713, + "trained manually annotated": 57782, + "focus sentence level": 21198, + "aim improve performance": 2151, + "fine tunes model": 20975, + "learning models natural": 29756, + "models using sequence": 35659, + "shown remarkable success": 50748, + "paper propose dual": 39505, + "existing open source": 19125, + "benchmark datasets model": 6457, + "training data previous": 58027, + "propose novel regularization": 43558, + "response generation tasks": 47393, + "challenging tasks natural": 8157, + "neural generation models": 36958, + "years witnessed increasing": 63082, + "novel unsupervised method": 37948, + "based token level": 6098, + "paid little attention": 39144, + "graph convolution networks": 23122, + "using graph convolutional": 60715, + "fine tune plms": 20952, + "classification task results": 8567, + "based support vector": 6073, + "representation model trained": 46554, + "state art evaluated": 52613, + "used various downstream": 60348, + "learning based solutions": 29541, + "deep learning era": 13708, + "trained transformer encoder": 57899, + "evaluations proposed model": 18767, + "multi armed bandit": 35939, + "domain language model": 16099, + "model work propose": 34544, + "proposed method used": 43831, + "collecting high quality": 8975, + "provide baseline performance": 44013, + "crucial downstream tasks": 11900, + "model augmented data": 33589, + "unlabeled data improve": 59565, + "way improve performance": 61809, + "improve performance strong": 24900, + "art text generation": 4430, + "knowledge language models": 27542, + "propose multi label": 43475, + "finally conduct extensive": 20846, + "achieved best results": 1221, + "significant performance boosts": 50904, + "representation learning language": 46540, + "despite recent advancements": 14382, + "generation models trained": 22499, + "shown remarkable performance": 50747, + "classification task fine": 8565, + "data data augmentation": 12268, + "extensive experiments evaluate": 19888, + "suffer data scarcity": 53762, + "encourage future research": 17594, + "question answering sentiment": 44709, + "answering sentiment analysis": 3096, + "test sets using": 56379, + "present comprehensive analysis": 41871, + "attracted increasing attention": 4882, + "address limitations propose": 1779, + "correlations human judgements": 11536, + "dataset large scale": 12978, + "contrastive learning multi": 10906, + "art models datasets": 4295, + "performance fine tuned": 40350, + "datasets models trained": 13338, + "review state art": 48041, + "models available url": 34749, + "machine learning data": 31318, + "annotated test sets": 2922, + "issue low resource": 27068, + "various language models": 61351, + "github com declare": 22698, + "com declare lab": 9009, + "language processing bert": 28400, + "learning models automatically": 29752, + "improve performance results": 24898, + "multiple low resource": 36243, + "text extensive experiments": 56572, + "methods fully supervised": 32872, + "github com rucaibox": 22709, + "strategies improve performance": 52905, + "address challenge study": 1746, + "different previous methods": 15034, + "generation models produce": 22498, + "use parallel data": 59972, + "pretrained multilingual models": 42174, + "language models method": 28282, + "adopted neural machine": 1871, + "employ end end": 17381, + "using sequence labeling": 60931, + "applying pre trained": 3375, + "language model downstream": 28160, + "adapting pre trained": 1569, + "reasoning question answering": 45220, + "settings experimental results": 50370, + "language modeling question": 28218, + "words natural language": 62464, + "samples extensive experiments": 48475, + "conventional sequence sequence": 11013, + "representations fine tuning": 46671, + "substantially outperforms baselines": 53645, + "conduct ablation studies": 10025, + "tackle problems propose": 54712, + "results method improves": 47716, + "methods large margins": 32918, + "f1 score macro": 20223, + "using self supervised": 60918, + "limited annotated data": 30567, + "sequence model bart": 49953, + "model achieve competitive": 33504, + "extract structured information": 19997, + "datasets transformer based": 13463, + "field artificial intelligence": 20751, + "plays essential role": 40996, + "used transformer based": 60342, + "state art character": 52592, + "model source code": 34400, + "model able transfer": 33495, + "introduce novel dataset": 26844, + "existing automatic metrics": 19037, + "graph based encoder": 23104, + "reflect real world": 45775, + "ability models generalize": 627, + "art performance nlp": 4339, + "including support vector": 25306, + "language models performing": 28295, + "weakly supervised methods": 61866, + "use transformer based": 60060, + "tasks commonly used": 55545, + "task demonstrate effectiveness": 55002, + "point processes dpps": 41049, + "model performance address": 34186, + "performance address issue": 40187, + "influence model performance": 25728, + "use task specific": 60041, + "language work propose": 28584, + "term memory language": 56248, + "performance automatic human": 40204, + "generated social media": 22319, + "available github https": 5299, + "pre trained massive": 41543, + "conduct exhaustive experiments": 10041, + "increases model size": 25436, + "languages domains paper": 28648, + "datasets method significantly": 13331, + "high quality automatic": 23771, + "knowledge knowledge graph": 27536, + "high computational costs": 23716, + "transfer learning language": 58382, + "model new data": 34126, + "annotated data scarce": 2885, + "general pre trained": 22081, + "data recent years": 12589, + "generation empirical results": 22452, + "task learning paradigm": 55178, + "work bridge gap": 62591, + "release benchmark dataset": 46143, + "improved performance various": 24960, + "roberta based model": 48217, + "language processing approaches": 28398, + "model performance compared": 34187, + "performance compared models": 40253, + "rely supervised learning": 46303, + "supervised learning technique": 54008, + "manually labeled dataset": 31783, + "train state art": 57639, + "performances wide range": 40654, + "experiments pre trained": 19490, + "despite success large": 14396, + "need labeled data": 36575, + "compared baseline model": 9385, + "easily integrated existing": 16546, + "workshop asian translation": 62921, + "loss experimental results": 31087, + "supervised unsupervised machine": 54067, + "language models llms": 28278, + "research open domain": 47084, + "datasets open domain": 13355, + "years end end": 63058, + "trained large datasets": 57768, + "number training samples": 38051, + "tasks proposed framework": 55824, + "based logistic regression": 5820, + "model knowledge graph": 34032, + "medium low resource": 32219, + "models trained datasets": 35605, + "text language model": 56643, + "zero shot prompting": 63172, + "research real world": 47110, + "trained models achieve": 57794, + "make model learn": 31581, + "trained models large": 57802, + "improve data efficiency": 24841, + "language models train": 28333, + "based large language": 5806, + "language model t5": 28196, + "submissions shared task": 53579, + "shared task leaderboard": 50500, + "source code freely": 51747, + "utilizes pre trained": 61117, + "consistent performance gains": 10282, + "code switched text": 8860, + "scores previous state": 48914, + "applications deep learning": 3197, + "based bi encoder": 5605, + "multilingual xlm roberta": 36136, + "higher f1 scores": 23825, + "affect downstream performance": 2013, + "various transformer based": 61412, + "performance language understanding": 40409, + "propose new multi": 43509, + "different pre training": 15030, + "amounts text data": 2558, + "available benchmark datasets": 5266, + "fine tuning propose": 21015, + "require fine tuning": 46857, + "tuning large language": 58923, + "compared standard transformer": 9457, + "questions require complex": 44806, + "employing pre trained": 17401, + "able achieve better": 669, + "shot shot settings": 50647, + "achieved macro f1": 1249, + "https huggingface datasets": 24061, + "research direction propose": 47020, + "dataset code released": 12843, + "released url https": 46185, + "performance improvement compared": 40384, + "labeled training set": 27768, + "results pre trained": 47771, + "recent pretrained language": 45336, + "bayes support vector": 6355, + "using fine tuned": 60697, + "demonstrate models trained": 13946, + "aims automatically generate": 2176, + "tf idf word2vec": 56994, + "tasks extensive experiments": 55636, + "existing works focus": 19177, + "leveraging large scale": 30330, + "fine grained manner": 20940, + "jointly learns representations": 27203, + "zero shot baselines": 63155, + "social media challenging": 51569, + "shared task 2021": 50492, + "based bert architecture": 5600, + "fine tuning existing": 20988, + "applied various tasks": 3310, + "uses language model": 60517, + "sequence sequence pre": 49993, + "pre train large": 41518, + "bert based baselines": 6619, + "baselines benchmark datasets": 6240, + "benchmark datasets code": 6448, + "baseline future work": 6173, + "models fine tune": 35030, + "yields best results": 63116, + "joint entity relation": 27169, + "language models obtained": 28291, + "machine learning paper": 31329, + "nature social media": 36487, + "novel large scale": 37852, + "gap training inference": 21983, + "approaches significantly outperform": 3921, + "method make use": 32569, + "performance code available": 40240, + "furthermore propose novel": 21835, + "pattern exploiting training": 39961, + "task document level": 55030, + "external knowledge knowledge": 19942, + "produce promising results": 43000, + "data new domain": 12511, + "dense passage retrieval": 14078, + "language models present": 28302, + "complex multi step": 9637, + "work propose self": 62785, + "outperforms baselines significantly": 38875, + "performance existing state": 40333, + "performed benchmark datasets": 40658, + "reinforcement learning agent": 45865, + "cross domain generalization": 11816, + "new fine tuning": 37207, + "validate effectiveness model": 61176, + "contrastive learning objective": 10907, + "language model plm": 28184, + "approach publicly available": 3661, + "research field natural": 47037, + "language processing lack": 28413, + "method improve accuracy": 32530, + "macro average f1": 31403, + "models new domains": 35258, + "alleviate catastrophic forgetting": 2400, + "need fine tuning": 36567, + "fine tuning framework": 20992, + "text text models": 56810, + "objective pre training": 38100, + "relatively small number": 46131, + "language processing typically": 28437, + "nlp tasks datasets": 37532, + "based graph neural": 5763, + "model performance using": 34196, + "intent classification task": 26565, + "study pre trained": 53435, + "11 indic languages": 88, + "question answering framework": 44698, + "improvements automatic evaluation": 25048, + "consistently outperforms baselines": 10303, + "contrastive representation learning": 10920, + "work presents novel": 62764, + "tasks propose use": 55821, + "pre training corpora": 41571, + "pre trained natural": 41548, + "work propose efficient": 62775, + "high quality generated": 23778, + "german italian spanish": 22673, + "models text generation": 35597, + "non autoregressive generation": 37634, + "learning framework based": 29653, + "compared baseline models": 9386, + "resource target language": 47280, + "best models achieve": 6784, + "existing zero shot": 19179, + "paper tackles problem": 39593, + "models perform reasonably": 35315, + "data augmentation based": 12152, + "efficient fine tuning": 16874, + "tuning pretrained models": 58948, + "incorporated pre trained": 25371, + "achieve human level": 1159, + "train neural models": 57617, + "research topic field": 47131, + "domain labeled data": 16096, + "models fine grained": 35029, + "consistently outperforms vanilla": 10308, + "trained models ptms": 57806, + "multi task pre": 36026, + "task pre training": 55281, + "fine tuning plms": 21010, + "machine readable format": 31339, + "manual automatic evaluation": 31734, + "model downstream task": 33791, + "baselines cross lingual": 6247, + "generate pseudo labels": 22231, + "imitation reinforcement learning": 24580, + "based domain specific": 5691, + "domain specific model": 16180, + "state art comparable": 52598, + "evaluation human evaluation": 18625, + "tasks end end": 55612, + "paper propose controllable": 39500, + "fine tuning tasks": 21027, + "language fine tuning": 28073, + "improve model generalization": 24872, + "lm fine tuned": 30907, + "logistic regression random": 30997, + "regression random forest": 45818, + "transformer model performance": 58496, + "language models limited": 28276, + "methods deep neural": 32816, + "supervised contrastive loss": 53973, + "publicly available resources": 44354, + "language models new": 28288, + "human annotation paper": 24102, + "quality human annotated": 44530, + "language models predict": 28301, + "results language models": 47691, + "outperforms baseline systems": 38872, + "methods achieves state": 32735, + "better performance paper": 6933, + "tuning pretrained language": 58947, + "work shed light": 62819, + "sequence labeling problems": 49938, + "models trained small": 35618, + "round trip translation": 48360, + "larger existing datasets": 29078, + "analyze impact different": 2819, + "inference nli model": 25676, + "contrastive learning approach": 10899, + "different domains results": 14908, + "models plms bert": 35327, + "monolingual multilingual cross": 35806, + "compared commonly used": 9394, + "nearest neighbor retrieval": 36520, + "models trained general": 35611, + "set real world": 50233, + "pre trained generative": 41535, + "trained generative language": 57740, + "different word order": 15134, + "natural language reasoning": 36446, + "propose novel non": 43553, + "propose curriculum learning": 43345, + "task learning method": 55174, + "meaning original text": 32008, + "cross lingual experiments": 11836, + "results fine tuned": 47637, + "learning fine tuned": 29648, + "outperform existing models": 38796, + "data available task": 12179, + "conduct comprehensive analysis": 10032, + "competitive performance using": 9556, + "end training data": 17721, + "training data directly": 57987, + "corpus propose novel": 11409, + "accuracy current state": 954, + "trained using different": 57909, + "prompt based methods": 43201, + "training data explore": 57995, + "training data multiple": 58020, + "best performing submission": 6799, + "latest state art": 29159, + "recent developments natural": 45304, + "settings code available": 50360, + "enhanced pre trained": 17937, + "survey provide comprehensive": 54214, + "empirically demonstrate effectiveness": 17359, + "evaluation downstream tasks": 18612, + "retrieval reading comprehension": 47967, + "domain language models": 16100, + "continual learning cl": 10823, + "construct new benchmark": 10395, + "training data challenging": 57981, + "transformer based nlp": 58469, + "domain adaptation da": 15997, + "outperforms best performing": 38879, + "local global context": 30939, + "recent approaches use": 45293, + "does improve performance": 15953, + "mean absolute error": 31988, + "fine grained scores": 20941, + "models known suffer": 35157, + "based finding propose": 5731, + "present transformer based": 42046, + "guide generation process": 23335, + "corpus proposed model": 11411, + "model generalize new": 33925, + "machine translation multilingual": 31370, + "small number domain": 51487, + "fully fine tuned": 21733, + "wide range language": 61969, + "tasks including question": 55681, + "method conduct extensive": 32434, + "hard negative mining": 23447, + "recurrent neural language": 45623, + "natural language prompts": 36441, + "multiple datasets demonstrate": 36194, + "approach paper present": 3634, + "models perform par": 35313, + "low computational cost": 31134, + "previous work usually": 42317, + "processing nlp domain": 42902, + "using bert model": 60590, + "bert model pre": 6685, + "training data collected": 57982, + "approach substantially improves": 3708, + "baselines human evaluation": 6269, + "language models human": 28261, + "demonstrate large language": 13929, + "used train model": 60334, + "learning methods using": 29737, + "model achieves strong": 33526, + "suffer spurious correlations": 53784, + "classification multi label": 8501, + "human annotated training": 24100, + "paper provide comprehensive": 39555, + "perform significantly worse": 40142, + "applied zero shot": 3315, + "helps improve model": 23609, + "obtaining high quality": 38235, + "provide weak supervision": 44156, + "different evaluation settings": 14922, + "review recent progress": 48038, + "interpretable machine learning": 26722, + "dataset real world": 13054, + "autoregressive non autoregressive": 5224, + "study problem generating": 53440, + "outperforms various strong": 38960, + "various strong baselines": 61400, + "new sota performance": 37320, + "analysis shared task": 2754, + "paper propose textbf": 39537, + "fine tune transformer": 20956, + "popular text classification": 41194, + "architecture pre trained": 4078, + "provide comprehensive review": 44040, + "explored paper propose": 19761, + "based bidirectional long": 5608, + "bert natural language": 6693, + "propose multi stage": 43478, + "important research topic": 24764, + "model better performance": 33626, + "develop natural language": 14603, + "false positive rate": 20382, + "model transfer knowledge": 34486, + "domain specific sentiment": 16185, + "new multi task": 37263, + "language models robust": 28317, + "different bert based": 14854, + "approaches tackle problem": 3934, + "conduct zero shot": 10071, + "word embeddings evaluate": 62167, + "work introduces novel": 62696, + "modal contrastive learning": 33455, + "contrastive learning model": 10905, + "learning model learns": 29747, + "systems end end": 54486, + "ground truth datasets": 23254, + "human evaluation protocol": 24153, + "mask language modeling": 31858, + "trained existing datasets": 57726, + "global structural information": 22844, + "meta learning based": 32341, + "available experimental results": 5291, + "verify effectiveness approach": 61536, + "especially social media": 18302, + "results compared baseline": 47546, + "f1 score 46": 20196, + "novel knowledge graph": 37848, + "questions work propose": 44817, + "metrics bleu rouge": 33146, + "results cross domain": 47564, + "labeled data source": 27746, + "indo aryan language": 25594, + "f1 score best": 20220, + "demonstrate fine tuning": 13913, + "based metrics bleu": 5855, + "data pre processing": 12552, + "semantic information model": 49286, + "task improve performance": 55127, + "semeval 2022 task": 49441, + "despite promising results": 14379, + "benchmark natural language": 6482, + "make best use": 31543, + "new target domain": 37332, + "real world events": 45129, + "art sota approaches": 4405, + "specific pre trained": 52127, + "models diverse set": 34933, + "propose prompt based": 43595, + "datasets extensive experimental": 13268, + "source code work": 51753, + "code work available": 8869, + "baseline models trained": 6192, + "zero shot baseline": 63154, + "contrast previous studies": 10883, + "domain specific training": 16190, + "style experimental results": 53485, + "recent advances pretrained": 45288, + "dataset code models": 12841, + "large neural networks": 28919, + "computational resources training": 9860, + "proposed model text": 43859, + "generation models generate": 22497, + "propose transition based": 43684, + "different hyper parameters": 14952, + "parameters experimental results": 39696, + "experimental results depth": 19283, + "results depth analysis": 47588, + "experiments analyses demonstrate": 19348, + "learning approach allows": 29520, + "improvements benchmark datasets": 25053, + "gap pre training": 21973, + "sequence models achieved": 49959, + "model reinforcement learning": 34296, + "findings provide insights": 20913, + "given knowledge graph": 22752, + "seven benchmark datasets": 50416, + "performance previous state": 40497, + "perform manual evaluation": 40121, + "neural network task": 37027, + "model new task": 34129, + "work present systematic": 62760, + "modeling long range": 34593, + "experiments datasets demonstrate": 19399, + "memory lstm model": 32263, + "based findings propose": 5733, + "wide range existing": 61968, + "f1 score 62": 20198, + "training data addition": 57971, + "number labeled data": 38012, + "key design choices": 27307, + "unsupervised supervised models": 59737, + "latest deep learning": 29156, + "multiple information sources": 36227, + "research paper present": 47087, + "guide future research": 23332, + "classification fundamental task": 8475, + "based large pre": 5807, + "current neural models": 11992, + "recent progress language": 45338, + "test state art": 56383, + "understanding nlu research": 59376, + "series controlled experiments": 50061, + "language understanding capabilities": 28546, + "counterfactual data augmentation": 11618, + "lead state art": 29273, + "models struggle generalize": 35541, + "unsupervised contrastive learning": 59688, + "language models successfully": 28327, + "training validation test": 58313, + "models terms accuracy": 35592, + "widely studied recent": 62006, + "studied recent years": 53234, + "data set results": 12644, + "different classification models": 14864, + "based models used": 5883, + "models fail capture": 35016, + "wide range text": 61975, + "data driven systems": 12306, + "novel latent variable": 37854, + "used training corpus": 60337, + "benchmark text classification": 6501, + "better performance model": 6932, + "set large scale": 50182, + "corpus high quality": 11354, + "representation learning approach": 46537, + "paper propose automated": 39497, + "multilingual code mixed": 36069, + "ability large language": 618, + "superior zero shot": 53947, + "challenging task aims": 8144, + "optimized end end": 38567, + "does rely external": 15967, + "question answering existing": 44696, + "paper proposes multi": 39548, + "input neural network": 26305, + "does require training": 15977, + "models using data": 35656, + "advent deep learning": 1958, + "information extraction documents": 25861, + "tuning pretrained bert": 58946, + "novel model based": 37873, + "macro f1 scores": 31408, + "dataset f1 score": 12925, + "make correct predictions": 31557, + "models automatically generate": 34746, + "sequence classification task": 49916, + "data code models": 12213, + "training data significantly": 58037, + "model improves accuracy": 33983, + "strategies experimental results": 52902, + "demonstrate practical utility": 13959, + "open source models": 38453, + "zero shot task": 63181, + "text generation evaluation": 56597, + "outperforming previous work": 38858, + "advancements natural language": 1902, + "model better capture": 33625, + "especially zero shot": 18313, + "furthermore propose new": 21834, + "scale human annotated": 48579, + "trained general purpose": 57737, + "language models potential": 28298, + "qualitative quantitative analyses": 44479, + "demonstrate model able": 13940, + "performance drops dramatically": 40311, + "prompt based learning": 43199, + "better low resource": 6913, + "english dataset demonstrate": 17794, + "generation aims produce": 22416, + "data expensive time": 12341, + "fine tuning shot": 21017, + "train model large": 57608, + "appropriate training data": 3968, + "training data based": 57979, + "reason paper propose": 45170, + "datasets significant improvements": 13429, + "combined pre trained": 9083, + "transformer based large": 58462, + "probability language model": 42479, + "systems previous works": 54597, + "inference extensive experiments": 25658, + "model performance results": 34194, + "using active learning": 60550, + "text previous works": 56708, + "leverage large scale": 30275, + "fine tuning different": 20984, + "achieved significant performance": 1270, + "recent works demonstrated": 45377, + "trained seq2seq model": 57866, + "novel prompt based": 37901, + "cross lingual knowledge": 11839, + "language models apply": 28230, + "existing approaches perform": 19028, + "essential task natural": 18336, + "performance compared supervised": 40257, + "shown large language": 50725, + "baseline f1 score": 6167, + "serve baseline future": 50074, + "based cosine similarity": 5655, + "learning models paper": 29757, + "observe fine tuned": 38133, + "approaches experimental results": 3817, + "based automatic human": 5589, + "seq seq models": 49891, + "fine tune task": 20955, + "data augmentation work": 12167, + "conduct comprehensive study": 10034, + "non neural models": 37671, + "experiments method outperforms": 19464, + "heterogeneous graph based": 23621, + "available github com": 5298, + "based classification models": 5619, + "trained models usually": 57812, + "trained models language": 57801, + "labeled data using": 27753, + "hyper parameter tuning": 24326, + "higher correlations human": 23820, + "method based deep": 32399, + "art baselines terms": 4225, + "propose novel prompt": 43557, + "shown language models": 50723, + "consisting multiple sentences": 10318, + "conducting human evaluation": 10105, + "trained large language": 57769, + "methods require significant": 33019, + "challenging low resource": 8112, + "limited data available": 30578, + "method outperforms competitive": 32600, + "based learning based": 5812, + "does require additional": 15970, + "prior work explored": 42420, + "level entity level": 30112, + "work proposes new": 62793, + "multi objective optimization": 35994, + "proposed method better": 43811, + "literature paper propose": 30859, + "token level classification": 57296, + "training domain data": 58072, + "general domain text": 22056, + "2022 shared task": 304, + "mean standard deviation": 31998, + "improve final performance": 24856, + "experiments conducted publicly": 19385, + "conducted publicly available": 10090, + "positive negative sentiment": 41289, + "automated speech recognition": 5059, + "non zero shot": 37692, + "improve generation quality": 24862, + "tasks self supervised": 55869, + "language models despite": 28244, + "knowledge pre training": 27574, + "success large language": 53705, + "compared existing approaches": 9406, + "machine translation test": 31389, + "provide detailed description": 44051, + "design simple effective": 14300, + "ability transformer based": 647, + "lead improved performance": 29261, + "training self supervised": 58243, + "bert achieved state": 6605, + "knowledge downstream tasks": 27451, + "prompt tuning methods": 43211, + "meta learning approaches": 32340, + "models including state": 35120, + "alternative fine tuning": 2502, + "use pretrained language": 59980, + "language understanding current": 28548, + "models trained evaluated": 35609, + "using large language": 60757, + "bert base models": 6615, + "enrich training data": 17962, + "aware pre training": 5466, + "propose token level": 43675, + "commonly used evaluation": 9224, + "seen surge research": 49064, + "training work propose": 58317, + "work propose framework": 62777, + "detection experimental results": 14483, + "methods achieved promising": 32733, + "domain social media": 16163, + "corpus fine tune": 11344, + "evaluation demonstrates effectiveness": 18608, + "important real world": 24759, + "robust domain shift": 48246, + "gap propose novel": 21978, + "easy data augmentation": 16560, + "data augmentation eda": 12154, + "work compare different": 62603, + "inter annotator agreements": 26577, + "effectiveness method compared": 16790, + "data models trained": 12496, + "support research task": 54124, + "supervised unsupervised approaches": 54065, + "domains work propose": 16302, + "previous work typically": 42314, + "paper study performance": 39579, + "results shot learning": 47836, + "smaller language models": 51520, + "nlp data augmentation": 37478, + "word embeddings learnt": 62178, + "remains key challenge": 46336, + "code mixed languages": 8828, + "language models tasks": 28330, + "zero shot retrieval": 63173, + "especially low data": 18285, + "paper propose evaluate": 39509, + "annotated test set": 2921, + "use fine tuned": 59891, + "github com alibaba": 22694, + "extensive experiments based": 19880, + "strong performance zero": 53041, + "current transformer based": 12022, + "leading sub optimal": 29302, + "github com amazon": 22696, + "language models leverage": 28274, + "facilitate research direction": 20276, + "semi structured data": 49454, + "model self attention": 34349, + "demonstrated impressive results": 14012, + "prompting large language": 43217, + "language model llm": 28173, + "model effectively learn": 33799, + "alleviate issue propose": 2409, + "semantic role labelling": 49339, + "conduct experiments benchmark": 10043, + "using diverse set": 60665, + "provide comparative analysis": 44030, + "data augmentation improve": 12157, + "surpass current state": 54163, + "state art existing": 52614, + "achieve sota performance": 1201, + "performance improvement state": 40385, + "perform human evaluation": 40113, + "objective subjective evaluation": 38105, + "human decision making": 24133, + "shown fine tuning": 50710, + "propose retrieval augmented": 43609, + "develop data driven": 14580, + "artificial intelligence applications": 4492, + "data cross lingual": 12263, + "propose new technique": 43514, + "general purpose models": 22087, + "lot training data": 31123, + "semantic representation input": 49333, + "method outperforms prior": 32603, + "comprehensive empirical study": 9787, + "capable generating diverse": 7621, + "propose multi view": 43480, + "empirical results analysis": 17338, + "propose template based": 43665, + "outperform fine tuning": 38799, + "improve performance domain": 24888, + "higher level semantic": 23832, + "significantly outperforming previous": 50995, + "given language model": 22756, + "using prompt based": 60876, + "code data released": 8803, + "data released https": 12598, + "entropy ce loss": 18160, + "additional fine tuning": 1671, + "automatically construct large": 5151, + "20 f1 score": 226, + "zero shot models": 63170, + "fully supervised baselines": 21740, + "consistently better performance": 10292, + "extraction sequence labeling": 20111, + "model achieves promising": 33522, + "available large scale": 5320, + "language model ii": 28170, + "rich information contained": 48103, + "applications text generation": 3253, + "propose method called": 43454, + "downstream tasks code": 16353, + "previous zero shot": 42326, + "important research direction": 24762, + "input language model": 26289, + "proposed approach state": 43734, + "learn latent representation": 29389, + "model uses word": 34513, + "approach domain adaptation": 3497, + "modal pre training": 33463, + "human evaluation study": 24157, + "long input sequences": 31016, + "models able perform": 34657, + "impressive results various": 24817, + "bert models different": 6688, + "gap propose new": 21977, + "knowledge state art": 27619, + "evaluation low resource": 18634, + "dataset cross lingual": 12875, + "employ multi task": 17386, + "task text mining": 55436, + "real use cases": 45117, + "propose novel shot": 43562, + "single task counterparts": 51343, + "novel approach generating": 37762, + "high quality entity": 23777, + "automated text processing": 5063, + "resource constrained devices": 47213, + "recent advances large": 45284, + "provides easy use": 44194, + "learning based automatic": 29532, + "baseline models performance": 6190, + "prior work demonstrated": 42419, + "adversarial training approach": 1989, + "applied text classification": 3303, + "learning models recently": 29760, + "performance standard fine": 40574, + "methods typically rely": 33087, + "compared pre trained": 9432, + "approach shows competitive": 3690, + "adversarial training language": 1991, + "art performance using": 4351, + "improve performance multi": 24894, + "text sentiment analysis": 56764, + "play critical role": 40964, + "used supervised learning": 60318, + "models models trained": 35230, + "understanding language models": 59358, + "domain zero shot": 16230, + "challenging task existing": 8147, + "limitations propose novel": 30556, + "processing nlp information": 42905, + "techniques pre trained": 56123, + "best performing method": 6796, + "models learn better": 35175, + "development artificial intelligence": 14669, + "proposed approach reduces": 43731, + "learn fine grained": 29372, + "existing methods use": 19103, + "real world facts": 45130, + "neural text classification": 37104, + "book question answering": 7247, + "methods proven effective": 32999, + "substantial performance gains": 53627, + "improve downstream performance": 24846, + "dataset social media": 13093, + "scale benchmark dataset": 48556, + "performance paper proposes": 40479, + "previous neural models": 42266, + "open question paper": 38442, + "embedding state art": 17065, + "result proposed model": 47449, + "model outperforms best": 34159, + "design pre training": 14296, + "sentence level evaluation": 49586, + "lingual cross domain": 30694, + "representations low resource": 46714, + "improve robustness model": 24920, + "pre training cross": 41573, + "training cross domain": 57967, + "approaches paper present": 3891, + "text remains challenging": 56736, + "language model improve": 28171, + "approach achieves accuracy": 3394, + "word embeddings demonstrate": 62162, + "method significantly reduces": 32658, + "various downstream applications": 61332, + "present novel training": 41979, + "end propose method": 17700, + "paper explore various": 39367, + "sequence model generate": 49954, + "including pre trained": 25289, + "test data using": 56341, + "outperform baseline model": 38781, + "processing tasks recently": 42953, + "datasets results proposed": 13411, + "extensive experiments existing": 19889, + "conditional variational auto": 10009, + "art large language": 4273, + "end end requires": 17659, + "data centric approach": 12202, + "model learns identify": 34055, + "low resources languages": 31201, + "word embeddings propose": 62183, + "traditional sequence sequence": 57544, + "approaches pre trained": 3897, + "development deep neural": 14676, + "adapt state art": 1511, + "transformer encoder model": 58484, + "neural networks generate": 37048, + "paper provides overview": 39560, + "sentence level accuracy": 49578, + "recall f1 scores": 45242, + "human loop approach": 24204, + "dataset empirical results": 12906, + "use end end": 59877, + "explicit semantic information": 19625, + "task propose multi": 55300, + "previous approaches use": 42241, + "present detailed error": 41891, + "multi domain multi": 35956, + "simple effective unsupervised": 51159, + "empirical results using": 17348, + "languages make code": 28724, + "methods typically require": 33088, + "language models llm": 28277, + "based data driven": 5665, + "discuss future research": 15468, + "language models given": 28258, + "representation amr graph": 46491, + "work demonstrates feasibility": 62628, + "video available https": 61580, + "fully supervised baseline": 21739, + "experiments public benchmarks": 19502, + "paper proposed novel": 39543, + "evaluation experimental results": 18619, + "learning large language": 29699, + "semeval 2023 task": 49443, + "based multiple choice": 5893, + "model new domains": 34128, + "outperforms commonly used": 38884, + "93 f1 score": 563, + "demonstrated impressive performance": 14011, + "text generation approaches": 56595, + "systems conversational agents": 54461, + "propose encoder decoder": 43368, + "art results unsupervised": 4390, + "learning data augmentation": 29579, + "based hypothesis propose": 5776, + "leverage external knowledge": 30269, + "standard pre trained": 52518, + "use fine tuning": 59892, + "limited english language": 30585, + "level sequence labeling": 30211, + "better performance baselines": 6929, + "performance compared using": 40258, + "evaluation dataset consisting": 18604, + "experiment results benchmark": 19248, + "terms exact match": 56287, + "framework end end": 21505, + "bleu rouge meteor": 7212, + "does require task": 15976, + "annotator agreement iaa": 3011, + "used benchmark datasets": 60105, + "solve problem introduce": 51685, + "transformer based plms": 58470, + "opening new avenues": 38476, + "discriminative generative models": 15445, + "like bert achieved": 30462, + "language models attention": 28232, + "methods based pre": 32765, + "positive unlabeled pu": 41301, + "unlabeled pu learning": 59576, + "performance paper conduct": 40474, + "github com deeplearnxmu": 22699, + "addition propose novel": 1638, + "different previous works": 15035, + "use real world": 59987, + "diverse downstream tasks": 15702, + "news articles using": 37387, + "poor generalization ability": 41135, + "data source language": 12677, + "source language data": 51777, + "non autoregressive manner": 37635, + "learning shown great": 29875, + "cases zero shot": 7819, + "fine tuning entire": 20987, + "improved performance compared": 24957, + "languages large amounts": 28708, + "text paper investigate": 56689, + "tasks sentence classification": 55872, + "largest human annotated": 29097, + "prompt tuning method": 43210, + "future research area": 21884, + "text image generation": 56622, + "widely used methods": 62018, + "gold standard data": 22917, + "baselines terms f1": 6311, + "extensive experiments performed": 19895, + "based fine tuned": 5735, + "brown et al": 7373, + "learned language models": 29464, + "benchmark datasets compared": 6449, + "recently attracted attention": 45409, + "attracted attention researchers": 4875, + "problems propose new": 42723, + "respectively proposed method": 47380, + "despite widespread use": 14406, + "embeddings domain specific": 17117, + "transformer based classification": 58458, + "model annotated data": 33566, + "effectiveness data augmentation": 16775, + "work zero shot": 62862, + "paper propose data": 39502, + "recent research shows": 45343, + "use bidirectional encoder": 59837, + "sentiment social media": 49860, + "sentiment analysis results": 49827, + "attention mechanism pre": 4779, + "zero shot evaluations": 63161, + "inference time paper": 25701, + "task language models": 55161, + "model bert bidirectional": 33619, + "representation learning based": 46538, + "task best model": 54937, + "open research question": 38445, + "compared competitive baselines": 9397, + "paper present hybrid": 39453, + "end end evaluation": 17645, + "data used improve": 12760, + "leading better performance": 29289, + "paper fills gap": 39374, + "requires expert knowledge": 46927, + "new unseen domains": 37355, + "develop neural model": 14605, + "performing end end": 40677, + "use unsupervised methods": 60067, + "including end end": 25254, + "empirical results indicate": 17342, + "propose natural language": 43487, + "english foreign language": 17806, + "large scale annotation": 28961, + "foundation natural language": 21419, + "future research code": 21885, + "auto regressive decoder": 5021, + "recently demonstrated impressive": 45418, + "general domain language": 22054, + "generation text summarization": 22567, + "relatively small datasets": 46130, + "data challenging task": 12206, + "evaluation metrics model": 18651, + "performs considerably better": 40706, + "results various nlp": 47904, + "positive negative sentiments": 41290, + "respectively extensive experiments": 47370, + "simple easy implement": 51149, + "significant improvement existing": 50877, + "models work analyze": 35688, + "weakly supervised data": 61862, + "label classification tasks": 27698, + "training extensive experiments": 58103, + "performance monolingual multilingual": 40444, + "performing bert based": 40672, + "hand crafted templates": 23390, + "proposed framework effectively": 43783, + "based hand crafted": 5765, + "survey recent advances": 54217, + "information unstructured text": 26140, + "64 f1 score": 470, + "language understanding natural": 28554, + "learning framework using": 29656, + "learning dl based": 29600, + "specific task paper": 52154, + "tasks empirical results": 55607, + "multi modal multi": 35991, + "perform comparably better": 40076, + "construct high quality": 10388, + "high quality multi": 23782, + "use self supervised": 60005, + "relatively little work": 46123, + "provide case study": 44024, + "achieves absolute improvement": 1301, + "machine translation generate": 31359, + "testing state art": 56414, + "low resource problem": 31187, + "framework zero shot": 21630, + "results compared previous": 47548, + "text davinci 003": 56527, + "evaluation metrics paper": 18652, + "benefit transfer learning": 6572, + "computational resources paper": 9859, + "furthermore conduct detailed": 21811, + "deep learning technology": 13724, + "complex state art": 9663, + "datasets significant improvement": 13428, + "model text davinci": 34458, + "method wide range": 32705, + "domain low resource": 16107, + "model previous work": 34233, + "processing nlp used": 42917, + "present experiments using": 41912, + "language models t5": 28328, + "integrating state art": 26525, + "address gap introduce": 1758, + "paper propose stage": 39535, + "transformer based multi": 58466, + "end paper propose": 17693, + "machine learning text": 31336, + "knowledge extensive experiments": 27477, + "time series forecasting": 57212, + "proposes new method": 43939, + "art neural based": 4310, + "recent years thanks": 45398, + "adapt target domain": 1513, + "model different datasets": 33766, + "models lms pretrained": 35199, + "various types knowledge": 61414, + "speakers different languages": 52005, + "https bit ly": 24055, + "information theoretic measures": 26122, + "shelf language model": 50537, + "dataset fine tuned": 12932, + "trainable neural network": 57664, + "models cross domain": 34875, + "challenging task complexity": 8145, + "parameters propose novel": 39719, + "language models contain": 28240, + "aware contrastive learning": 5446, + "contrastive learning strategy": 10911, + "scale large language": 48590, + "language models shot": 28318, + "information state art": 26105, + "suffer significant performance": 53782, + "datasets previous studies": 13376, + "minimal human intervention": 33289, + "finally discuss limitations": 20853, + "overview state art": 39119, + "social commonsense knowledge": 51557, + "model shows better": 34376, + "training data large": 58007, + "knowledge experimental results": 27472, + "tasks limited training": 55730, + "models llms gpt": 35196, + "task model needs": 55217, + "topic recent years": 57423, + "various domains including": 61330, + "does require access": 15969, + "rule based models": 48385, + "problem recent works": 42644, + "novel approach using": 37766, + "strategy data augmentation": 52931, + "adapts pre trained": 1586, + "platforms like twitter": 40953, + "structured information unstructured": 53157, + "motivated recent advances": 35874, + "model training scheme": 34483, + "fine tuned training": 20971, + "pretrained sentence encoders": 42182, + "results test set": 47881, + "language models performance": 28294, + "algorithm natural language": 2288, + "recognition ner named": 45519, + "ner named entity": 36681, + "recently end end": 45424, + "english vice versa": 17900, + "language model best": 28158, + "transformer based framework": 58460, + "demonstrate significant improvement": 13973, + "task aims automatically": 54895, + "art baselines significantly": 4224, + "approaches low resource": 3866, + "safety critical applications": 48430, + "state art computational": 52600, + "domain annotated data": 16019, + "generation method based": 22491, + "method automatically extract": 32395, + "based tf idf": 6094, + "speech recognition error": 52284, + "sequenceto": 50032, + "scatter": 48678, + "implicated": 24652, + "successor": 53754, + "interchangeably": 26639, + "vt": 61750, + "agendas": 2052, + "enlightened": 17956, + "scm": 48777, + "512": 433, + "filed": 20794, + "gaussians": 22019, + "preset": 42128, + "monologues": 35819, + "demos": 14059, + "experimenter": 19339, + "235": 325, + "proceeded": 42749, + "impairing": 24617, + "exaggerated": 18856, + "wei": 61911, + "maximized": 31961, + "440": 408, + "155": 159, + "extractable": 20004, + "interaction different": 26597, + "classification annotation": 8433, + "corpora arabic": 11178, + "strength model": 52971, + "training criterion": 57965, + "presents hybrid": 42086, + "roles paper": 48327, + "conditions using": 10021, + "utterances dialogue": 61146, + "influence future": 25726, + "function evaluate": 21754, + "model effectiveness": 33800, + "dataset showed": 13083, + "architecture widely": 4099, + "jointly performs": 27215, + "technique paper": 56042, + "emission probabilities": 17275, + "strategy produces": 52947, + "par current": 39613, + "directly evaluate": 15313, + "domain like": 16103, + "based handcrafted": 5766, + "context memory": 10673, + "variables experimental": 61227, + "constructed model": 10414, + "opened new": 38472, + "networks promising": 36896, + "propose modifications": 43470, + "problem single": 42657, + "stronger correlation": 53062, + "release publicly": 46166, + "incorporates different": 25374, + "type location": 59061, + "associated confidence": 4621, + "possible candidates": 41318, + "specific individual": 52089, + "generation widely": 22579, + "methods topic": 33076, + "transfer different": 58358, + "interaction users": 26614, + "irrelevant noise": 27041, + "architecture transfer": 4093, + "investigated performance": 27000, + "propose novelty": 43575, + "work inspired": 62689, + "seq2seq methods": 49900, + "time making": 57176, + "effective self": 16692, + "importance training": 24692, + "existing online": 19123, + "reported using": 46457, + "flexible approach": 21108, + "detection given": 14489, + "studies confirm": 53253, + "lstms achieved": 31290, + "text tweets": 56826, + "step achieve": 52796, + "complex long": 9633, + "introduce fine": 26805, + "84 accuracy": 531, + "control quality": 10972, + "strategy generating": 52935, + "nature makes": 36482, + "implement framework": 24634, + "named emph": 36369, + "good fit": 22932, + "benefit wide": 6576, + "develop text": 14618, + "available address": 5260, + "method expand": 32494, + "main difficulties": 31434, + "task harder": 55113, + "problem artificial": 42507, + "user social": 60447, + "generation difficult": 22448, + "attention information": 4759, + "lack supervised": 27918, + "methods unable": 33089, + "importance identifying": 24683, + "suffer limitations": 53771, + "traditional training": 57553, + "apply adversarial": 3320, + "prediction specifically": 41739, + "distribution labels": 15642, + "generates multiple": 22348, + "tasks validate": 55958, + "unsupervised does": 59693, + "attention focused": 4750, + "74 f1": 499, + "entire conversation": 18020, + "utterance conversation": 61134, + "models majority": 35212, + "sequenceto sequence": 50033, + "ranking process": 44976, + "exist multiple": 19015, + "utterances structured": 61152, + "natural structure": 36467, + "characteristics natural": 8240, + "generalized different": 22151, + "opinion piece": 38503, + "generative latent": 22594, + "task user": 55461, + "architecture achieve": 4020, + "sub topics": 53536, + "years methods": 63064, + "possible combinations": 41320, + "model relevant": 34303, + "domains minimal": 16274, + "attains better": 4674, + "relations limited": 46040, + "resource paper": 47259, + "supervision human": 54082, + "information extracting": 25859, + "extracting sentence": 20038, + "supervised signal": 54048, + "improvement overall": 25011, + "approach complex": 3461, + "obtain strong": 38195, + "levels hierarchy": 30241, + "evaluation achieve": 18573, + "rich useful": 48127, + "input experiments": 26277, + "choosing suitable": 8348, + "supports multi": 54143, + "known produce": 27665, + "trained hundreds": 57749, + "list relevant": 30840, + "classifiers perform": 8622, + "based news": 5911, + "corpus building": 11290, + "challenges compared": 8035, + "yale lily": 63042, + "sufficiently capture": 53809, + "work treats": 62847, + "step perform": 52821, + "occurrence relations": 38277, + "extracted directly": 20009, + "architecture self": 4084, + "central research": 7920, + "representation textual": 46594, + "stream nlp": 52959, + "framework context": 21482, + "local topic": 30952, + "contexts different": 10750, + "parts model": 39906, + "supervision information": 54083, + "specific label": 52095, + "years despite": 63056, + "perspectives experimental": 40779, + "similar domain": 51039, + "problems standard": 42731, + "utilize external": 61092, + "reducing accuracy": 45702, + "approach adapted": 3406, + "optimal choice": 38526, + "samples single": 48488, + "additional supervised": 1700, + "layer based": 29179, + "provided task": 44172, + "popular ones": 41177, + "various useful": 61416, + "filtering mechanism": 20811, + "scores 79": 48888, + "recently emergence": 45422, + "systems hand": 54516, + "recently pretrained": 45451, + "ranked 3rd": 44954, + "rank best": 44949, + "words makes": 62453, + "trained external": 57729, + "experiments unsupervised": 19550, + "rapid advancement": 44987, + "compared approach": 9379, + "applications end": 3202, + "reference set": 45746, + "correlation words": 11530, + "established metrics": 18357, + "accelerate research": 802, + "knowledge encoding": 27459, + "framework fully": 21525, + "metrics large": 33176, + "12 improvement": 107, + "models tackling": 35579, + "study improve": 53389, + "generative transformer": 22613, + "train embedding": 57581, + "ii joint": 24505, + "problem set": 42650, + "simple unified": 51222, + "12 respectively": 111, + "approach semi": 3684, + "representations include": 46687, + "results pipeline": 47765, + "art strong": 4414, + "generating corresponding": 22370, + "leverage user": 30298, + "supervision self": 54092, + "results detailed": 47590, + "gives improvement": 22807, + "study create": 53350, + "efficiently exploit": 16914, + "respectively achieves": 47358, + "transfer furthermore": 58365, + "provides little": 44211, + "problem annotating": 42503, + "sub fields": 53518, + "attracted growing": 4879, + "baselines evaluated": 6256, + "component generate": 9703, + "cleaned dataset": 8647, + "gpt radford": 22986, + "architecture long": 4062, + "framework guide": 21531, + "module framework": 35758, + "approach quite": 3663, + "powerful pretrained": 41443, + "considered existing": 10247, + "encoder jointly": 17519, + "grammatical inference": 23073, + "making progress": 31666, + "adopt contrastive": 1863, + "advent neural": 1960, + "models importance": 35106, + "improves inference": 25133, + "capture essential": 7668, + "context detect": 10613, + "approach strong": 3705, + "shows comparable": 50767, + "evaluations approach": 18751, + "style word": 53505, + "learning sample": 29856, + "serves effective": 50090, + "tasks separately": 55876, + "level contextualized": 30085, + "various learning": 61354, + "effectively capturing": 16727, + "tokens attention": 57322, + "explore current": 19694, + "minimal annotation": 33284, + "closely follow": 8703, + "examine model": 18866, + "input making": 26296, + "conventional training": 11016, + "relies word": 46269, + "exploit interactions": 19656, + "queries extract": 44652, + "results individual": 47681, + "extend study": 19833, + "2016 proposed": 258, + "model enhances": 33826, + "tasks utilizing": 55957, + "generate reasonable": 22234, + "encoder extract": 17516, + "sensitive quality": 49503, + "functions designed": 21770, + "ranks second": 44983, + "preserving content": 42121, + "aspects related": 4551, + "robustness issues": 48282, + "transfer propose": 58416, + "help select": 23588, + "propose document": 43356, + "length 512": 30024, + "showed approach": 50664, + "method rule": 32642, + "use metrics": 59949, + "tend learn": 56201, + "method effectiveness": 32474, + "related papers": 45922, + "architecture optimization": 4071, + "predicted results": 41670, + "unknown training": 59559, + "chosen based": 8351, + "performance speed": 40572, + "similar vector": 51075, + "illustrate utility": 24519, + "classification score": 8540, + "50 improvement": 421, + "combining textual": 9125, + "quantify impact": 44609, + "metrics rely": 33197, + "provides interpretable": 44208, + "systems analyze": 54430, + "scenario experimental": 48685, + "larger original": 29084, + "novel retrieval": 37910, + "greatly simplifies": 23239, + "original semantics": 38726, + "reference sets": 45747, + "bias paper": 7036, + "variables paper": 61229, + "level different": 30101, + "decoder paradigm": 13609, + "benefits jointly": 6582, + "task relations": 55330, + "learning select": 29863, + "make inference": 31576, + "special form": 52020, + "considered state": 10252, + "model assign": 33581, + "reconstruction task": 45584, + "learning received": 29833, + "generator trained": 22623, + "increase inference": 25416, + "task importantly": 55125, + "corpus composed": 11299, + "queries challenging": 44649, + "based bag": 5592, + "confidence estimates": 10112, + "importance developing": 24680, + "effectively alleviate": 16722, + "leverages text": 30317, + "corpus highly": 11355, + "corpus general": 11348, + "sampling negative": 48505, + "attention design": 4735, + "simply concatenating": 51250, + "build predictive": 7420, + "sets annotated": 50281, + "employ teacher": 17393, + "problem inconsistency": 42581, + "manner model": 31720, + "corpora improves": 11208, + "solely relies": 51643, + "tend overfit": 56204, + "generation introducing": 22479, + "methods extensive": 32858, + "learning create": 29575, + "distilbert model": 15564, + "regarding word": 45794, + "deep text": 13750, + "objective maximize": 38093, + "text open": 56680, + "framework implement": 21538, + "plms pre": 41020, + "nli based": 37449, + "existing words": 19172, + "fusion models": 21859, + "recent decade": 45299, + "level segment": 30200, + "models followed": 35041, + "data codes": 12215, + "raw textual": 45039, + "automatically aligning": 5139, + "phonetic semantic": 40828, + "domain important": 16082, + "work tried": 62848, + "domain experimental": 16063, + "audio samples": 4929, + "attention trained": 4838, + "framework demonstrates": 21489, + "related previous": 45927, + "score task": 48877, + "conditioning input": 10018, + "detecting implicit": 14449, + "form multi": 21328, + "underlying causes": 59263, + "propose refine": 43599, + "available proposed": 5350, + "range current": 44909, + "shows excellent": 50776, + "problems apply": 42694, + "adjacency matrix": 1839, + "strategy leverages": 52940, + "model checkpoint": 33653, + "loss computed": 31085, + "development text": 14708, + "domains datasets": 16244, + "results sub": 47861, + "baselines standard": 6301, + "simpler approach": 51229, + "module predict": 35767, + "metrics focus": 33167, + "new technologies": 37340, + "nodes based": 37589, + "systems contain": 54458, + "features graph": 20593, + "type different": 59052, + "generation furthermore": 22467, + "works adopt": 62875, + "components called": 9716, + "process output": 42813, + "increasing interests": 25453, + "sequential generation": 50042, + "finally validate": 20887, + "techniques particularly": 56118, + "experiment real": 19246, + "overall effectiveness": 39038, + "approach linguistic": 3591, + "model textbf": 34459, + "inter document": 26581, + "helps learn": 23610, + "set learning": 50184, + "community attention": 9259, + "methods release": 33012, + "obtained promising": 38219, + "model textit": 34460, + "proposes solution": 43943, + "handle wide": 23420, + "metrics trained": 33204, + "word utterance": 62333, + "approaches combined": 3784, + "experiments reported": 19511, + "studies achieved": 53242, + "iteratively improves": 27131, + "chinese benchmark": 8298, + "make errors": 31570, + "faster baseline": 20433, + "benefit pre": 6568, + "development technology": 14705, + "generate faithful": 22200, + "margin especially": 31820, + "context better": 10595, + "labeling demonstrate": 27783, + "capturing dependencies": 7732, + "self improvement": 49198, + "outputs human": 39016, + "perform exhaustive": 40101, + "methods difficult": 32825, + "end zero": 17728, + "level identification": 30131, + "detection word": 14540, + "length complexity": 30026, + "design efficient": 14276, + "tasks entailment": 55614, + "interpretable features": 26720, + "level train": 30224, + "improving entity": 25179, + "simple generic": 51175, + "indicating possible": 25543, + "dataset considering": 12859, + "baselines improved": 6270, + "leveraging graph": 30325, + "share insights": 50458, + "labeling accuracy": 27776, + "accuracy 65": 902, + "examples human": 18909, + "used resources": 60292, + "text summarization text": 56799, + "paper presents hybrid": 39475, + "introduces novel method": 26895, + "architecture widely used": 4100, + "novel approach learning": 37763, + "learning methods applied": 29731, + "paper explore novel": 39363, + "neural networks promising": 37063, + "learning widely used": 29942, + "new domains paper": 37177, + "performs competitively state": 40704, + "model obtain state": 34138, + "multi task neural": 36024, + "research paper presents": 47088, + "hierarchical structure model": 23693, + "showed proposed approach": 50671, + "different annotation schemes": 14837, + "prior works focus": 42428, + "problem artificial intelligence": 42508, + "74 f1 score": 500, + "systems usually rely": 54665, + "challenging research problem": 8138, + "model able extract": 33490, + "make use knowledge": 31609, + "characteristics natural language": 8241, + "level representation learning": 30193, + "novel approach task": 37765, + "does require human": 15973, + "recent years methods": 45388, + "neural networks provide": 37067, + "background knowledge paper": 5493, + "obtain strong results": 38196, + "problem proposing novel": 42637, + "compared baselines including": 9388, + "cost human annotation": 11585, + "explore deep learning": 19697, + "information different sources": 25814, + "neural generation model": 36957, + "architecture self attention": 4085, + "knowledge graphs paper": 27507, + "perspectives experimental results": 40780, + "multi choice questions": 35944, + "easily extended new": 16541, + "proposed framework compared": 43781, + "recently pretrained language": 45452, + "model learns generate": 34054, + "novel encoder decoder": 37813, + "convolution neural networks": 11098, + "language models multi": 28286, + "propose multi agent": 43474, + "explore transfer learning": 19745, + "widely used evaluation": 62015, + "achieves significant gains": 1366, + "human evaluation compared": 24144, + "tuning task specific": 58966, + "pre existing knowledge": 41503, + "generation models based": 22496, + "state art strong": 52672, + "art attention based": 4217, + "attracted growing attention": 4880, + "extensive experiments demonstrated": 19886, + "various neural network": 61371, + "gpt radford et": 22987, + "data semi supervised": 12639, + "models jointly learn": 35151, + "training data achieves": 57970, + "shows comparable performance": 50768, + "does require fine": 15972, + "models successfully used": 35555, + "existing datasets limited": 19055, + "using naive bayes": 60825, + "self supervised neural": 49213, + "information recent studies": 26044, + "experimental results presented": 19304, + "paper propose graph": 39514, + "comparable performance previous": 9303, + "training data given": 57999, + "task adaptive pretraining": 54883, + "knowledge distillation technique": 27444, + "method rule based": 32643, + "automatic evaluation methods": 5085, + "correlation human evaluation": 11521, + "scenario experimental results": 48686, + "demonstrating effectiveness proposed": 14052, + "demonstrate effectiveness using": 13904, + "construct new datasets": 10397, + "encoder decoder paradigm": 17506, + "test sets respectively": 56378, + "does require domain": 15971, + "datasets perform poorly": 13369, + "considered state art": 10253, + "paper proposes effective": 39545, + "corpus training data": 11449, + "self training approach": 49223, + "based bag words": 5593, + "small set labeled": 51500, + "insights future research": 26389, + "model proposed approach": 34250, + "employ teacher student": 17394, + "plms pre trained": 41021, + "previous sota models": 42281, + "mutual information based": 36345, + "relatively small scale": 46132, + "results framework significantly": 47640, + "level segment level": 30201, + "contrastive learning experimental": 10902, + "models new tasks": 35259, + "art supervised approaches": 4417, + "domain experimental results": 16064, + "collect large number": 8946, + "intelligence ai systems": 26536, + "different experimental settings": 14926, + "utilize external knowledge": 61093, + "publicly available propose": 44352, + "achieves superior results": 1386, + "obtained promising results": 38220, + "better results fine": 6959, + "relational graph convolutional": 46008, + "large margin especially": 28905, + "end zero shot": 17729, + "method outperforms standard": 32604, + "method fine tunes": 32510, + "accuracy compared existing": 949, + "based knowledge distillation": 5797, + "semisupervised": 49475, + "nt": 37972, + "mic": 33216, + "revolve": 48063, + "reshaping": 47181, + "angular": 2861, + "mixes": 33413, + "representation feature": 46516, + "research persian": 47093, + "words included": 62434, + "performance algorithms": 40190, + "multimedia content": 36140, + "processing makes": 42888, + "drastically reduced": 16394, + "multilayer networks": 36058, + "corpus previous": 11407, + "exploiting semantic": 19675, + "improves scores": 25158, + "consider multi": 10214, + "proposed lstm": 43801, + "models hmm": 35091, + "explore parameter": 19723, + "task computationally": 54965, + "change data": 8168, + "component results": 9713, + "set patterns": 50215, + "scale state": 48626, + "filling gap": 20802, + "techniques despite": 56076, + "input distribution": 26267, + "require supervision": 46891, + "representations auxiliary": 46621, + "systems great": 54515, + "augmenting neural": 4988, + "typical approaches": 59132, + "architectures present": 4120, + "baseline achieve": 6151, + "units grus": 59532, + "progress achieved": 43091, + "solution improve": 51654, + "recognition key": 45509, + "model negative": 34122, + "shown natural": 50729, + "trained study": 57885, + "work comparing": 62604, + "task fashion": 55079, + "capability paper": 7612, + "lstm using": 31287, + "obtain significantly": 38191, + "segments text": 49096, + "framework unsupervised": 21618, + "evaluated test": 18551, + "embeddings state": 17220, + "cnn trained": 8776, + "set order": 50208, + "evaluated recent": 18546, + "knowledge best": 27417, + "task architectures": 54911, + "semantic nature": 49302, + "retrieved based": 47982, + "sentences analysis": 49679, + "large publicly": 28947, + "samples produced": 48486, + "lack supervision": 27919, + "discuss methods": 15475, + "area recent": 4147, + "outperform lstm": 38801, + "complex contextual": 9619, + "resources evaluation": 47301, + "investigated approaches": 26998, + "efforts shown": 16943, + "support online": 54122, + "progress generating": 43098, + "production use": 43053, + "automatically finding": 5173, + "better single": 6966, + "simplify model": 51242, + "motivated development": 35864, + "vector quantized": 61460, + "word network": 62251, + "apply domain": 3327, + "representation quality": 46573, + "step ranking": 52826, + "errors systems": 18250, + "embeddings additional": 17080, + "usually evaluated": 61048, + "attempt mitigate": 4690, + "standard baseline": 52468, + "specifically incorporate": 52208, + "levels accuracy": 30237, + "public english": 44319, + "robustness learned": 48283, + "end achieve": 17611, + "expensive limited": 19211, + "method utilize": 32699, + "independent specific": 25504, + "embedding paper": 17051, + "distillation techniques": 15580, + "different design": 14897, + "data difficulty": 12286, + "big gap": 7090, + "head attentions": 23497, + "networks bi": 36834, + "generic architecture": 22626, + "problem utilizing": 42685, + "long span": 31029, + "degrade model": 13805, + "powerful representations": 41445, + "catalyze research": 7831, + "specific adaptation": 52041, + "novel modeling": 37875, + "input bert": 26258, + "settings outperforming": 50386, + "demonstrate outperforms": 13953, + "effectiveness real": 16810, + "task primary": 55294, + "text followed": 56582, + "earlier studies": 16507, + "improves increasing": 25132, + "using approximate": 60563, + "utilize text": 61103, + "structured machine": 53163, + "trained pseudo": 57845, + "languages corpus": 28624, + "better terms": 6977, + "words short": 62512, + "predicted language": 41668, + "datasets special": 13440, + "segment length": 49074, + "developing text": 14664, + "dataset 12": 12791, + "especially difficult": 18272, + "new record": 37298, + "refine output": 45765, + "provides intuitive": 44209, + "argue performance": 4166, + "including character": 25242, + "identify models": 24431, + "language far": 28068, + "12 14": 104, + "evaluate context": 18447, + "experiments adding": 19346, + "mixup method": 33426, + "larger domain": 29076, + "time word": 57237, + "avoid information": 5432, + "needed improve": 36601, + "dependent text": 14152, + "works focusing": 62892, + "problem words": 42688, + "possible word": 41342, + "applications medical": 3221, + "discuss importance": 15470, + "advances unsupervised": 1931, + "used arabic": 60093, + "guide network": 23340, + "essential tool": 18338, + "relies large": 46267, + "mechanism reduce": 32140, + "based relevance": 5991, + "baselines bert": 6241, + "learning underlying": 29923, + "terms proposed": 56311, + "data bias": 12190, + "algorithmic bias": 2315, + "accelerate development": 800, + "decoding constraints": 13628, + "corpora despite": 11191, + "pretraining domain": 42200, + "study intrinsic": 53394, + "training recently": 58224, + "directions improving": 15296, + "case task": 7802, + "size representations": 51397, + "results just": 47685, + "predictions compared": 41756, + "popularity research": 41204, + "language constraints": 28004, + "supervised transformer": 54063, + "15 respectively": 151, + "trained experimental": 57727, + "modify input": 35737, + "performed manually": 40664, + "unavailable low": 59214, + "datasets set": 13418, + "method attains": 32388, + "evaluation despite": 18609, + "solution data": 51653, + "words strong": 62522, + "cnn layer": 8768, + "relying human": 46308, + "applying various": 3382, + "progress present": 43112, + "successful text": 53739, + "similarity loss": 51100, + "languages standard": 28795, + "automatically mining": 5191, + "domain lack": 16097, + "aspect multi": 4531, + "reduces memory": 45692, + "gaining momentum": 21928, + "usually contains": 61043, + "evaluations benchmark": 18752, + "directly encode": 15312, + "scheme model": 48729, + "overall training": 39052, + "incorporating large": 25389, + "works data": 62881, + "including arabic": 25239, + "works suffer": 62910, + "characters common": 8253, + "models relying": 35432, + "performance applied": 40194, + "effectiveness technique": 16817, + "propagation errors": 43244, + "linear non linear": 30663, + "language models domain": 28246, + "models achieve comparable": 34668, + "prediction task predicting": 41743, + "good performance task": 22937, + "combination data augmentation": 9038, + "neural networks improve": 37052, + "different writing systems": 15137, + "relatively large corpus": 46119, + "learn long term": 29393, + "recurrent units grus": 45631, + "language models code": 28239, + "task model achieves": 55216, + "multi task fashion": 36019, + "use semantic information": 60007, + "open source platform": 38454, + "embeddings state art": 17221, + "supervised neural network": 54026, + "paper propose semi": 39533, + "large publicly available": 28948, + "low resource task": 31194, + "models findings suggest": 35027, + "propose knowledge distillation": 43429, + "layers deep neural": 29221, + "evaluation proposed approach": 18685, + "consistently significantly outperforms": 10312, + "input text output": 26346, + "train encoder decoder": 57584, + "task learning improve": 55173, + "different design choices": 14898, + "text based features": 56454, + "make model robust": 31582, + "address data scarcity": 1753, + "multi head attentions": 35965, + "neural networks bi": 37038, + "practical use cases": 41477, + "web search engines": 61895, + "model does need": 33785, + "supervised neural models": 54025, + "manually designed features": 31776, + "model performance domain": 34189, + "data sets english": 12649, + "contain rich information": 10470, + "encoder decoder networks": 17504, + "recent years end": 45384, + "cross domain evaluation": 11815, + "multi task architectures": 36017, + "challenging task recent": 8153, + "recent advances unsupervised": 45290, + "performance different datasets": 40293, + "available publicly https": 5354, + "language processing works": 28442, + "text based approach": 56453, + "based representation learning": 5994, + "methods heavily rely": 32885, + "challenging task previous": 8152, + "model experiments proposed": 33853, + "representation learning specifically": 46545, + "particularly challenging task": 39878, + "unavailable low resource": 59215, + "opens new avenues": 38483, + "14 f1 score": 140, + "pre trained unsupervised": 41562, + "accuracy f1 scores": 977, + "better human judgment": 6899, + "space extensive experiments": 51864, + "propose compare methods": 43325, + "inhibits": 26212, + "discreteness": 15431, + "sas": 48520, + "modeling existing": 34572, + "memory store": 32283, + "features long": 20619, + "learning useful": 29929, + "method similar": 32659, + "different candidate": 14856, + "gain improvements": 21908, + "fixed context": 21074, + "recent baselines": 45296, + "analyze current": 2808, + "derived corpus": 14200, + "strong bert": 53019, + "tree neural": 58753, + "matching given": 31912, + "random samples": 44886, + "large networks": 28915, + "rise new": 48154, + "information inter": 25929, + "method self": 32646, + "robust overfitting": 48261, + "tasks systematic": 55922, + "empirically effective": 17360, + "classifiers performance": 8623, + "base task": 5552, + "processing important": 42874, + "various augmentation": 61305, + "achieved far": 1233, + "terms size": 56315, + "brings consistent": 7340, + "space leverage": 51873, + "model lightweight": 34062, + "effectively combines": 16729, + "utilizing bert": 61119, + "learning solutions": 29885, + "network components": 36724, + "text achieved": 56423, + "various advanced": 61298, + "comprehensive studies": 9799, + "50 training": 422, + "networks research": 36906, + "method general": 32514, + "mechanism ensure": 32114, + "evaluations multiple": 18763, + "encoder effectively": 17510, + "models affect": 34700, + "solution called": 51652, + "parameter inference": 39670, + "crucial success": 11912, + "different graph": 14948, + "bert domain": 6645, + "techniques knowledge": 56102, + "available end": 5287, + "used conduct": 60124, + "lexical structural": 30387, + "covers diverse": 11665, + "attempt automatically": 4681, + "graph extensive": 23135, + "question remains": 44749, + "mechanism does": 32108, + "methods categories": 32779, + "pipeline called": 40895, + "scale synthetic": 48629, + "efficient interpretable": 16879, + "19 datasets": 184, + "assigning labels": 4604, + "sequences time": 50027, + "unfortunately models": 59453, + "streaming data": 52963, + "approaches real": 3906, + "contrastive framework": 10897, + "expert models": 19584, + "successfully perform": 53748, + "framework knowledge": 21552, + "attempt apply": 4680, + "weak supervised": 61849, + "perturbed data": 40795, + "scenarios study": 48709, + "case propose": 7795, + "popular english": 41163, + "examples extensive": 18901, + "extract sentence": 19992, + "english finally": 17803, + "task findings": 55085, + "superior baselines": 53931, + "decomposition method": 13659, + "large transformers": 29036, + "learning contrastive": 29572, + "does come": 15937, + "augmentation introduce": 4958, + "especially suited": 18304, + "dataset encourage": 12908, + "challenges evaluating": 8043, + "content original": 10543, + "discussion future": 15493, + "version paper": 61555, + "networks widely used": 36924, + "deep multi task": 13730, + "language processing important": 28409, + "recently achieved great": 45402, + "method achieve comparable": 32357, + "approach compares favorably": 3458, + "results compared existing": 47547, + "nature language processing": 36481, + "domain specific bert": 16167, + "address problem develop": 1784, + "previous attention based": 42244, + "furthermore existing methods": 21820, + "approach able outperform": 3389, + "downstream tasks rely": 16365, + "high performance nlp": 23759, + "range text classification": 44940, + "prompt based method": 43200, + "graph extensive experiments": 23136, + "train task specific": 57645, + "demonstrates strong performance": 14045, + "training transformer language": 58307, + "approaches real world": 3907, + "methods code publicly": 32784, + "significant room improvement": 50924, + "model recent work": 34281, + "domain specific downstream": 16173, + "recent work natural": 45370, + "paper https github": 39390, + "models perform competitively": 35312, + "model best knowledge": 33621, + "inspired observation propose": 26409, + "examples extensive experiments": 18902, + "work presents simple": 62765, + "learning contrastive learning": 29573, + "empirical results suggest": 17347, + "significantly improves zero": 50979, + "pursues": 44420, + "conveyed text": 11085, + "approach case": 3444, + "architecture modeling": 4064, + "recently word": 45475, + "set document": 50137, + "identifies important": 24409, + "programming based": 43084, + "focus main": 21179, + "document discourse": 15785, + "significant problems": 50914, + "huge volumes": 24080, + "embeddings graph": 17144, + "lexical choices": 30357, + "learning encourage": 29621, + "architecture encoder": 4047, + "systems involving": 54536, + "use formal": 59894, + "develop open": 14610, + "portions text": 41223, + "generation focuses": 22464, + "iterative algorithm": 27122, + "document experimental": 15792, + "meaningful results": 32026, + "seek better": 49049, + "evaluations verify": 18772, + "faithful original": 20366, + "applies attention": 3317, + "various document": 61327, + "text guide": 56612, + "datasets superiority": 13448, + "outperformed simple": 38841, + "target pairs": 54835, + "contains semantic": 10504, + "domains current": 16242, + "metrics capture": 33147, + "generation lack": 22481, + "created novel": 11731, + "produce domain": 42980, + "novel global": 37832, + "importance modeling": 24685, + "topics provide": 57460, + "memory using": 32287, + "verify robustness": 61544, + "sentence labels": 49574, + "based abstract": 5554, + "novel weakly": 37954, + "seeks generate": 49057, + "20 human": 227, + "scores multiple": 48910, + "single short": 51337, + "challenge identifying": 7985, + "genetic algorithms": 22638, + "keywords topics": 27355, + "study complex": 53343, + "constraints control": 10373, + "based denoising": 5675, + "edges nodes": 16588, + "using case": 60599, + "512 tokens": 434, + "insights propose": 26393, + "evaluation future": 18622, + "metric text": 33126, + "languages czech": 28631, + "level latent": 30148, + "documents evaluate": 15875, + "establishing strong": 18367, + "models extracted": 35008, + "results evaluating": 47619, + "approach delivers": 3479, + "dataset related": 13058, + "closer natural": 8712, + "propose hierarchy": 43410, + "use structural": 60030, + "specifically framework": 52205, + "general news": 22075, + "baseline scores": 6208, + "documents sentences": 15912, + "review existing work": 48033, + "paper present unsupervised": 39465, + "proposed framework effective": 43782, + "propose latent variable": 43435, + "work present neural": 62755, + "apply deep learning": 3325, + "learning methods task": 29736, + "novel method automatically": 37864, + "significantly improve quality": 50967, + "lack suitable datasets": 27917, + "propose novel paradigm": 43554, + "world applications work": 62930, + "trained deep learning": 57707, + "source target pairs": 51804, + "correlates better human": 11513, + "set experiments demonstrate": 50154, + "reinforcement learning models": 45875, + "generating training data": 22404, + "specifically design new": 52192, + "previous studies usually": 42292, + "results model better": 47723, + "domain introduce new": 16088, + "novel weakly supervised": 37955, + "approach outperforms standard": 3629, + "propose graph based": 43406, + "covid 19 paper": 11670, + "language model allowing": 28154, + "effectively improves performance": 16742, + "using case study": 60600, + "resources machine translation": 47316, + "datasets large scale": 13313, + "sequence neural networks": 49964, + "machine generated human": 31300, + "sentence level training": 49595, + "limitation propose new": 30539, + "recent works focus": 45378, + "generation task using": 22561, + "experiments public benchmark": 19501, + "perform extensive empirical": 40105, + "annotated data work": 2888, + "prosperity": 43962, + "981": 573, + "executor": 18988, + "containment": 10487, + "dataset presents": 13034, + "features contrast": 20547, + "contribution task": 10949, + "technique creating": 56031, + "stop word": 52865, + "information encode": 25829, + "text identifying": 56619, + "41 f1": 402, + "automated techniques": 5061, + "events entities": 18792, + "determining given": 14563, + "content representations": 10553, + "generally focus": 22166, + "provide sentence": 44127, + "present significant": 42013, + "focus finding": 21164, + "networks gnn": 36862, + "worse human": 62972, + "popularity deep": 41201, + "design specific": 14301, + "proposed select": 43889, + "model ranks": 34271, + "expensive task": 19219, + "score indicating": 48856, + "task 2019": 54869, + "learn sparse": 29426, + "aim detect": 2143, + "respect training": 47352, + "work adversarial": 62561, + "implement baseline": 24632, + "used bag": 60099, + "representation making": 46548, + "complex set": 9661, + "offers flexible": 38302, + "single type": 51353, + "large majority": 28901, + "using distantly": 60661, + "art level": 4276, + "wikipedia provide": 62054, + "set ranking": 50231, + "2019 dataset": 282, + "problem steps": 42668, + "apply fine": 3328, + "provides flexibility": 44199, + "relations use": 46060, + "solutions propose": 51671, + "seven existing": 50419, + "learn subtle": 29431, + "datasets short": 13420, + "based classical": 5617, + "related performance": 45923, + "account present": 879, + "task little": 55191, + "practice existing": 41484, + "understanding cognitive": 59330, + "pairs derived": 39179, + "tasks relevant": 55848, + "unordered set": 59623, + "information space": 26098, + "dataset hindi": 12950, + "variants outperform": 61237, + "task obtained": 55247, + "f1 score 74": 20205, + "using general purpose": 60703, + "obtains competitive performance": 38246, + "paper focus extracting": 39378, + "aware word embeddings": 5478, + "neural networks gnn": 37049, + "popularity deep learning": 41202, + "attracted increasing research": 4883, + "increasing research attention": 25462, + "corpora used train": 11255, + "advantage large scale": 1941, + "extensive experiments publicly": 19899, + "shared task 2019": 50491, + "new graph based": 37216, + "layer multi head": 29192, + "learning model trained": 29748, + "state art level": 52625, + "propose model named": 43466, + "large number studies": 28926, + "experimental results pre": 19303, + "studies mainly focused": 53281, + "f1 score 63": 20199, + "supervised unsupervised settings": 54069, + "improvement compared baselines": 24999, + "fun": 21750, + "mining important": 33314, + "collected web": 8971, + "results easily": 47601, + "labeled tweets": 27769, + "terms order": 56303, + "words tweet": 62537, + "discuss benefits": 15461, + "additionally paper": 1727, + "classification sub": 8559, + "spanish respectively": 51947, + "analyses dataset": 2592, + "idf model": 24478, + "information events": 25843, + "regression support": 45819, + "robustness terms": 48296, + "sets tasks": 50308, + "capture explicit": 7669, + "language arabic": 27966, + "solve text": 51693, + "mechanism text": 32144, + "detecting words": 14452, + "features considering": 20545, + "employ attention": 17374, + "label assignment": 27690, + "score identifying": 48853, + "research identify": 47050, + "information written": 26166, + "leverages contextual": 30302, + "shallow neural": 50442, + "set label": 50176, + "involves extracting": 27018, + "words comparison": 62383, + "sentence methods": 49601, + "sets previous": 50302, + "helpful low": 23597, + "language studies": 28509, + "extraction fundamental": 20069, + "mechanism task": 32143, + "day day life": 13502, + "terms classification accuracy": 56276, + "networks bi lstm": 36835, + "tf idf model": 56993, + "bi lstm layer": 7014, + "regression support vector": 45820, + "analysis demonstrates model": 2647, + "recent years online": 45391, + "art methods cross": 4285, + "helpful low resource": 23598, + "task order improve": 55250, + "761": 508, + "undetected": 59435, + "tagger trained": 54731, + "manually extracted": 31779, + "images difficult": 24553, + "methods representing": 33016, + "identification use": 24401, + "data automated": 12170, + "study date": 53354, + "completion methods": 9612, + "performance predictive": 40492, + "description language": 14244, + "achieve decent": 1131, + "learned textual": 29485, + "graphs use": 23192, + "lack unified": 27926, + "decisions model": 13574, + "trained german": 57742, + "use encoder decoder": 59875, + "results paper proposes": 47760, + "results reported literature": 47807, + "art methods benchmark": 4284, + "model generalization capability": 33923, + "word based context": 62118, + "illustrate effectiveness proposed": 24516, + "art methods use": 4290, + "raises important question": 44860, + "suffers problem": 53793, + "meanings different": 32033, + "supervised human": 53989, + "embedding represent": 17056, + "approach largely": 3583, + "diversity generation": 15736, + "includes text": 25236, + "task alignment": 54898, + "differ languages": 14814, + "decade witnessed": 13539, + "workshop shared task": 62923, + "reducing training time": 45715, + "used previous works": 60273, + "propose novel structure": 43565, + "language parallel data": 28373, + "term loss": 56242, + "help existing": 23560, + "text respect": 56744, + "based manner": 5830, + "present shared": 42011, + "use study": 60032, + "making dataset": 31649, + "proposed approach generate": 43727, + "present shared task": 42012, + "tackle problem generating": 54709, + "time novel": 57185, + "twitter using": 59043, + "corpus twitter": 11451, + "posts tweets": 41374, + "attracted researchers": 4887, + "years social": 63076, + "facebook youtube": 20248, + "public safety": 44327, + "report best": 46429, + "typically represent": 59153, + "research effective": 47024, + "language framework": 28079, + "thorough study": 57065, + "text proposed model": 56718, + "years social media": 63077, + "accurate natural language": 1083, + "art results languages": 4378, + "task best performing": 54938, + "flags": 21091, + "550": 445, + "develop approaches": 14573, + "compare baseline": 9331, + "unsupervised nature": 59716, + "extracted twitter": 20024, + "complexity modeling": 9684, + "level develop": 30100, + "demonstrate knowledge": 13926, + "limited specific": 30618, + "ensemble combining": 17973, + "studies zero": 53314, + "particular dataset": 39839, + "dataset key": 12972, + "illustrate model": 24518, + "detection important task": 14493, + "use dataset train": 59861, + "obtain good performance": 38174, + "features usually": 20693, + "significant increases": 50892, + "features crucial": 20549, + "versions english": 61560, + "content makes": 10536, + "study exploring": 53377, + "work addition": 62554, + "obtains competitive results": 38247, + "suffer high computational": 53766, + "based benchmark datasets": 5598, + "level level": 30149, + "scores introduce": 48905, + "recasting": 45251, + "important social": 24772, + "grained aspects": 23024, + "fine grained aspects": 20929, + "textual datasets": 56958, + "used baselines": 60101, + "everyday lives": 18805, + "task strong": 55412, + "showing results": 50688, + "task tasks": 55430, + "applied study": 3297, + "level given": 30125, + "words change": 62377, + "approaches topic": 3942, + "sentences differ": 49704, + "individual text": 25583, + "components proposed": 9726, + "providing feedback": 44243, + "art survey": 4419, + "state art survey": 52675, + "create hybrid": 11702, + "network models including": 36768, + "bi encoder model": 7006, + "paraphrase data": 39738, + "combining visual": 9126 + } + } +} \ No newline at end of file