{ "ctfidf_model": { "bm25_weighting": false, "reduce_frequent_words": false }, "vectorizer_model": { "params": { "analyzer": "word", "binary": false, "decode_error": "strict", "encoding": "utf-8", "input": "content", "lowercase": true, "max_df": 1.0, "max_features": null, "min_df": 5, "ngram_range": [ 1, 3 ], "stop_words": "english", "strip_accents": null, "token_pattern": "(?u)\\b\\w\\w+\\b", "vocabulary": null }, "vocab": { "intelligent": 26541, "input": 26252, "methods": 32722, "im": 24527, "essential": 18320, "making": 31643, "text": 56419, "entries": 18156, "east": 16554, "asian": 4513, "scripts": 48951, "application": 3159, "languages": 28588, "fully": 21711, "explored": 19755, "paper": 39247, "discusses": 15488, "tools": 57374, "contribute": 10926, "development": 14665, "computer": 9885, "processing": 42847, "propose": 43280, "design": 14260, "philosophy": 40817, "regards": 45797, "service": 50092, "platform": 40948, "treats": 58738, "study": 53316, "cross": 11806, "disciplinary": 15373, "subject": 53550, "perspectives": 40778, "software": 51634, "engineering": 17765, "human": 24086, "interaction": 26595, "hci": 23493, "natural": 36408, "language": 27947, "nlp": 37459, "discuss": 15457, "indicate": 25523, "number": 37980, "possible": 41313, "future": 21863, "research": 46977, "directions": 15288, "includes": 25229, "reflection": 45779, "role": 48301, "networks": 36826, "english": 17772, "acquisition": 1447, "collection": 8978, "practical": 41455, "criteria": 11772, "annotate": 2869, "free": 21637, "speech": 52250, "corpora": 11170, "children": 8293, "utterances": 61143, "theoretical": 57017, "level": 30054, "main": 31421, "claim": 8380, "syntactic": 54291, "interpreted": 26738, "outcome": 38765, "use": 59813, "machinery": 31395, "intrinsic": 26767, "features": 20514, "accessible": 835, "directly": 15303, "known": 27653, "network": 36691, "properties": 43257, "global": 22820, "patterns": 39964, "view": 61595, "power": 41423, "organization": 38684, "underlying": 59261, "grammar": 23061, "taking": 54786, "look": 31063, "issues": 27083, "examines": 18873, "build": 7384, "net": 36688, "projection": 43138, "relations": 46014, "recall": 45238, "opposed": 38515, "adult": 1877, "grammars": 23066, "early": 16510, "child": 8288, "defined": 13781, "concept": 9920, "structure": 53087, "overcome": 39056, "difficulty": 15197, "develop": 14569, "set": 50098, "systematic": 54389, "assuming": 4637, "constituency": 10346, "hierarchy": 23702, "based": 5553, "lexico": 30402, "thematic": 57004, "end": 17610, "obtain": 38158, "annotation": 2933, "enables": 17435, "perform": 40063, "statistics": 52776, "size": 51376, "structures": 53180, "ii": 24503, "standard": 52460, "measures": 32074, "complexity": 9674, "provide": 44001, "detailed": 14407, "example": 18875, "general": 22042, "model": 33480, "lexical": 30351, "information": 25747, "conforms": 10147, "abstract": 755, "reflects": 45782, "typical": 59130, "dictionary": 14803, "entry": 18167, "mapped": 31799, "formed": 21370, "xml": 63034, "document": 15765, "transformation": 58442, "used": 60078, "implement": 24631, "semantics": 49398, "enable": 17419, "extraction": 20042, "manipulation": 31711, "format": 21365, "hypothesized": 24353, "approach": 3385, "form": 21312, "solution": 51650, "framework": 21445, "understanding": 59320, "reasoning": 45181, "intelligence": 26534, "combines": 9090, "discourse": 15384, "powerful": 41432, "representation": 46487, "formalism": 21355, "capable": 7614, "exploiting": 19672, "ontological": 38396, "advanced": 1885, "solve": 51675, "following": 21261, "problems": 42692, "compromising": 9823, "practicality": 41481, "factors": 20304, "restriction": 47425, "nature": 36476, "question": 44684, "response": 47391, "limitation": 30534, "scale": 48550, "domains": 16231, "real": 45097, "life": 30436, "describes": 14217, "experiments": 19341, "learning": 29497, "dutch": 16476, "rules": 48389, "using": 60544, "inductive": 25609, "logic": 30978, "programming": 43082, "machine": 31297, "discipline": 15374, "logical": 30982, "operators": 38497, "different": 14832, "ways": 61839, "approaching": 3958, "problem": 42495, "experimented": 19336, "compared": 9378, "related": 45886, "work": 62551, "task": 54865, "results": 47479, "direct": 15250, "correspondence": 11542, "quality": 44486, "background": 5490, "knowledge": 27386, "constructed": 10405, "theory": 57035, "demonstrating": 14050, "ability": 591, "ilp": 24526, "good": 22924, "advantage": 1937, "prior": 42392, "domain": 15992, "available": 5258, "outlined": 38775, "range": 44906, "deep": 13681, "make": 31538, "morphological": 35838, "resources": 47291, "word": 62108, "similarity": 51081, "bootstrap": 7265, "seed": 49042, "lexicon": 30407, "deployed": 14170, "items": 27115, "precision": 41612, "shown": 50692, "strengths": 52975, "weaknesses": 61870, "classes": 8415, "particular": 39830, "focus": 21142, "relative": 46088, "accessibility": 834, "resource": 47209, "types": 59076, "predicted": 41664, "associated": 4620, "applications": 3183, "examine": 18860, "evolutionary": 18834, "naming": 36378, "game": 21949, "communicating": 9244, "agents": 2061, "equipped": 18196, "selected": 49115, "coupling": 11634, "biological": 7168, "linguistic": 30744, "ingredients": 26198, "transition": 58536, "small": 51461, "change": 8167, "control": 10960, "parameter": 39664, "poorly": 41148, "group": 23270, "linguistically": 30813, "transforms": 58535, "perfectly": 40062, "large": 28829, "abilities": 589, "kept": 27286, "fixed": 21073, "appears": 3146, "continuous": 10840, "genetic": 22636, "proceeds": 42752, "effect": 16609, "initially": 26230, "learn": 29342, "creates": 11735, "niche": 37439, "pressure": 42136, "increase": 25402, "suggests": 53845, "cultural": 11935, "processes": 42844, "intensive": 26558, "took": 57353, "place": 40926, "performance": 40164, "species": 52039, "experienced": 19227, "triggered": 58794, "rapid": 44985, "expansion": 19188, "civilization": 8377, "zipf": 63192, "law": 29172, "states": 52719, "words": 62358, "ranked": 44951, "order": 38584, "decreasing": 13672, "frequency": 21668, "texts": 56855, "inversely": 26931, "proportional": 43276, "rank": 44947, "robust": 48236, "experimental": 19257, "observation": 38121, "date": 13492, "satisfactory": 48523, "explanation": 19603, "suggest": 53812, "arise": 4183, "evolution": 18833, "dominated": 16310, "meanings": 32032, "competition": 9533, "synonyms": 54287, "retrieval": 47937, "term": 56231, "relevance": 46189, "taken": 54772, "mean": 31986, "formal": 21343, "conformity": 10146, "given": 22721, "user": 60403, "query": 44660, "rule": 48377, "documents": 15852, "submitted": 53582, "certain": 7933, "perceived": 40047, "selection": 49131, "formally": 21363, "solving": 51700, "supplemented": 54108, "procedure": 42741, "relevant": 46198, "introduce": 26775, "quantitative": 44614, "measure": 32044, "single": 51282, "exists": 19180, "determination": 14552, "shall": 50437, "consider": 10206, "simplest": 51232, "opinion": 38498, "proposed": 43709, "does": 15932, "restrictions": 47426, "applied": 3262, "data": 12100, "mining": 33309, "allows": 2450, "exploration": 19678, "sequences": 50020, "phenomena": 40809, "usually": 61033, "tends": 56213, "isolated": 27050, "relation": 45963, "offers": 38298, "invaluable": 26915, "analyses": 2588, "sentences": 49675, "dialogues": 14797, "report": 46423, "attempt": 4677, "inspecting": 26397, "verbs": 61520, "french": 21657, "accounts": 885, "road": 48210, "analysis": 2609, "comes": 9130, "original": 38703, "unsupervised": 59678, "training": 57920, "allowing": 2443, "discovery": 15414, "sequential": 50035, "analyzer": 2837, "appearing": 3145, "provided": 44157, "classification": 8426, "links": 30837, "successive": 53752, "distinct": 15588, "clusters": 8750, "segmentation": 49078, "interpretation": 26733, "applying": 3357, "statistical": 52733, "independent": 25493, "semantic": 49230, "annotations": 2982, "despite": 14354, "importance": 24676, "summarizing": 53911, "evolving": 18841, "events": 18790, "received": 45253, "attention": 4706, "researchers": 47148, "field": 20749, "multi": 35936, "previous": 42236, "et": 18392, "al": 2228, "2007": 244, "presented": 42057, "methodology": 32715, "automatic": 5066, "summarization": 53875, "emitted": 17278, "multiple": 36164, "sources": 51826, "event": 18778, "heart": 23525, "lies": 30434, "identification": 24383, "similarities": 51078, "differences": 14820, "various": 61297, "axes": 5481, "synchronic": 54276, "diachronic": 14733, "achieved": 1213, "introduction": 26903, "notion": 37733, "connect": 10172, "messages": 32323, "resulting": 47460, "graph": 23093, "grid": 23247, "creation": 11747, "completes": 9609, "planning": 40944, "phase": 40804, "nlg": 37444, "architecture": 4019, "case": 7787, "contained": 10478, "exceeding": 18947, "required": 46897, "compression": 9812, "rate": 45012, "initial": 26213, "thoughts": 57075, "probabilistic": 42453, "content": 10511, "stage": 52426, "tries": 58792, "alleviate": 2398, "present": 41839, "automated": 5034, "method": 32352, "origin": 38702, "non": 37632, "native": 36399, "speakers": 52003, "identified": 24403, "listener": 30844, "detection": 14453, "nationality": 36397, "existence": 19019, "phoneme": 40820, "allow": 2433, "new": 37123, "discriminative": 15442, "phonemes": 40822, "database": 12782, "construct": 10381, "classifier": 8589, "phone": 40819, "significant": 50845, "result": 47431, "developed": 14623, "correct": 11464, "96": 569, "error": 18210, "reduction": 45716, "tested": 56393, "techniques": 56052, "2001": 237, "great": 23198, "linguist": 30743, "pioneer": 40887, "article": 4446, "written": 62994, "memory": 32239, "characteristics": 8233, "behaviour": 6401, "parallel": 39641, "romance": 48331, "portuguese": 41224, "examples": 18885, "port": 41214, "fr": 21426, "meaning": 31999, "definition": 13792, "class": 8390, "complement": 9583, "object": 38080, "nominal": 37631, "distribution": 15632, "limited": 30560, "nouns": 37744, "head": 23494, "restricted": 47421, "noun": 37739, "verbal": 61512, "idioms": 24482, "la": 27686, "excluded": 18973, "constructions": 10431, "reductions": 45725, "complex": 9615, "com": 9003, "tune": 58853, "instrument": 26487, "industry": 25620, "comprises": 9818, "components": 9715, "implementing": 24650, "fundamental": 21774, "operations": 38494, "lexicons": 30415, "exploitation": 19670, "management": 31690, "structured": 53151, "formats": 21367, "compact": 9279, "readable": 45070, "binary": 7142, "necessary": 36528, "included": 25227, "combining": 9107, "approaches": 3750, "manually": 31754, "originating": 38750, "substantial": 53615, "coverage": 11650, "distributed": 15621, "lr": 31232, "license": 30428, "speaking": 52009, "achieving": 1388, "proficiency": 43066, "highly": 23877, "process": 42753, "requires": 46910, "kinds": 27369, "skills": 51415, "like": 30460, "connection": 10180, "communicative": 9255, "goals": 22907, "intentions": 26572, "usual": 61032, "starting": 52569, "point": 41044, "help": 23547, "learner": 29494, "acquire": 1440, "enhanced": 17929, "electronic": 16966, "version": 61550, "age": 2045, "old": 38326, "pattern": 39958, "henceforth": 23613, "regarded": 45791, "partially": 39807, "lack": 27869, "grounding": 23264, "context": 10578, "shortcomings": 50578, "believe": 6409, "regard": 45790, "basic": 6326, "course": 11635, "needs": 36607, "improvement": 24975, "unlike": 59591, "books": 7248, "computers": 9900, "open": 38409, "media": 32157, "dynamic": 16481, "changes": 8176, "users": 60457, "performances": 40636, "preferences": 41791, "account": 876, "building": 7435, "amounts": 2543, "changing": 8184, "procedures": 42747, "extracting": 20028, "verb": 61508, "raw": 45033, "polish": 41102, "novel": 37747, "technique": 56022, "em": 16999, "algorithm": 2258, "performs": 40696, "disambiguation": 15358, "valence": 61169, "frame": 21437, "forests": 21304, "obtained": 38201, "parser": 39757, "post": 41344, "second": 48994, "idea": 24366, "concerns": 9961, "filtering": 20810, "incorrect": 25397, "frames": 21444, "detected": 14443, "parsed": 39756, "motivated": 35863, "similar": 51027, "arguments": 4178, "tend": 56197, "phenomenon": 40814, "described": 14211, "terms": 56262, "newly": 37368, "introduced": 26879, "occurrence": 38272, "matrices": 31938, "split": 52346, "steps": 52840, "list": 30838, "valid": 61171, "determined": 14559, "according": 852, "combined": 9076, "computed": 9882, "best": 6743, "extracted": 20005, "reaches": 45056, "score": 48782, "45": 410, "39": 383, "principles": 42390, "realization": 45158, "compilation": 9576, "concordance": 9980, "paths": 39951, "forms": 21374, "presentation": 42055, "lexicographic": 30406, "online": 38352, "surveys": 54223, "public": 44302, "mood": 35831, "costly": 11598, "impractical": 24803, "web": 61875, "material": 31925, "indicative": 25546, "blogs": 7228, "emails": 17002, "queries": 44648, "inexpensive": 25630, "extensive": 19851, "assess": 4575, "fluctuations": 21123, "concerned": 9959, "extension": 19847, "profile": 43069, "questionnaire": 44764, "indicators": 25549, "10": 27, "2006": 243, "org": 38676, "send": 49477, "delivered": 13834, "later": 29149, "long": 31002, "medium": 32216, "confusion": 10160, "current": 11958, "prediction": 41688, "systems": 54417, "gram": 23049, "models": 34647, "lm": 30903, "estimate": 18370, "probability": 42472, "phrase": 40834, "past": 39928, "years": 63049, "attempts": 4696, "enrich": 17960, "want": 61767, "explore": 19684, "predictive": 41773, "powers": 41450, "latent": 29117, "lsa": 31235, "reliable": 46249, "distance": 15540, "dependencies": 14104, "evaluate": 18433, "integrate": 26504, "cache": 7520, "partial": 39806, "reranking": 46971, "interpolation": 26707, "improvements": 25042, "baseline": 6145, "simple": 51132, "investigate": 26937, "grapheme": 23180, "ukrainian": 59188, "alphabet": 2486, "recognition": 45489, "involvement": 27016, "description": 14240, "actors": 1486, "influences": 25731, "story": 52879, "line": 30645, "higher": 23810, "se": 48956, "represents": 46816, "flow": 21118, "expressions": 19806, "read": 45066, "lost": 31112, "respect": 47343, "actor": 1484, "exactly": 18855, "behaves": 6389, "major": 31499, "concern": 9958, "beings": 6404, "try": 58844, "store": 52872, "short": 50549, "associating": 4628, "diverse": 15691, "aspects": 4536, "incidents": 25220, "virtual": 61622, "collocations": 8996, "associative": 4631, "completion": 9611, "acting": 1454, "discovered": 15410, "managed": 31689, "separated": 49880, "blocks": 7223, "broken": 7367, "refer": 45732, "mind": 33274, "maps": 31810, "priority": 42436, "functions": 21768, "represent": 46466, "actual": 1488, "temporal": 56178, "situation": 51368, "inside": 26382, "map": 31793, "reconstruct": 45576, "recent": 45276, "computational": 9831, "construction": 10424, "exposure": 19788, "built": 7481, "simulate": 51255, "effects": 16823, "values": 61212, "paragraph": 39635, "viewed": 61604, "association": 4629, "turns": 58994, "strongly": 53067, "increases": 25433, "decreases": 13671, "slightly": 51434, "high": 23705, "occurrences": 38279, "probably": 42484, "introduces": 26890, "bias": 7019, "cases": 7804, "scientific": 48753, "newspapers": 37431, "marked": 31832, "attitudes": 4870, "opinions": 38505, "author": 4999, "persons": 40771, "objective": 38084, "subjective": 53561, "statements": 52716, "attitude": 4869, "accomplished": 848, "modality": 33474, "german": 22661, "expressed": 19796, "special": 52012, "modalities": 33467, "induces": 25603, "proven": 43991, "sentence": 49513, "adverbs": 1962, "modal": 33451, "punctuation": 44387, "marks": 31851, "combination": 9031, "instruments": 26489, "express": 19791, "finding": 20896, "pre": 41497, "step": 52795, "arbitrary": 4012, "output": 38964, "consists": 10319, "representing": 46810, "conception": 9929, "toolkit": 57370, "designed": 14306, "maintenance": 31498, "collaborative": 8932, "generative": 22583, "ease": 16522, "portability": 41215, "spreading": 52380, "tool": 57355, "source": 51740, "products": 43057, "eventually": 18801, "showed": 50663, "filters": 20814, "adequate": 1832, "anaphoric": 2849, "reference": 45734, "modifier": 35733, "compounds": 9757, "modular": 35741, "generating": 22362, "definitions": 13794, "specific": 52040, "uses": 60489, "java": 27150, "prolog": 43149, "custom": 12052, "functional": 21763, "notation": 37719, "implemented": 24646, "generator": 22614, "technological": 56155, "parts": 39903, "brought": 7368, "consistent": 10272, "goal": 22875, "corpus": 11261, "reproducing": 46831, "exposed": 19785, "presenting": 42064, "literature": 30856, "preliminary": 41799, "million": 33249, "space": 51847, "tests": 56416, "norms": 37713, "vocabulary": 61700, "test": 56329, "judgments": 27243, "tasks": 55483, "composed": 9732, "corresponding": 11544, "ages": 2066, "stratified": 52957, "intended": 26549, "basis": 6337, "developmental": 14710, "studies": 53241, "finally": 20834, "aims": 2170, "tracing": 57491, "implementation": 24637, "comprehension": 9762, "derived": 14199, "integration": 26527, "1998": 206, "way": 61788, "categories": 7841, "learned": 29450, "experience": 19225, "shaped": 50449, "symbol": 54266, "larger": 29067, "smaller": 51514, "grounded": 23259, "algorithms": 2318, "compute": 9878, "sets": 50280, "measuring": 32083, "contexts": 10749, "provides": 44178, "unifying": 59491, "categorized": 7857, "methodologies": 32714, "appear": 3137, "quite": 44825, "surface": 54149, "fact": 20286, "closely": 8702, "include": 25222, "headed": 23501, "versus": 61561, "measured": 32066, "micro": 33217, "macro": 31401, "views": 61609, "thread": 57082, "decisions": 13572, "share": 50454, "common": 9163, "key": 27292, "means": 32038, "translation": 58571, "russian": 48413, "dictionaries": 14802, "calculate": 7522, "weights": 61937, "statistic": 52732, "consideration": 10243, "realized": 45161, "program": 43076, "integrated": 26512, "monitoring": 35786, "bilingual": 7103, "publications": 44331, "containing": 10481, "30": 354, "thousand": 57076, "created": 11721, "ancient": 2854, "investigated": 26997, "avenues": 5395, "investigation": 27005, "profound": 43073, "humanitarian": 24265, "going": 22909, "war": 61771, "dog": 15986, "hearing": 23524, "era": 18206, "voices": 61725, "telling": 56168, "thinking": 57049, "argue": 4161, "conscious": 10194, "able": 665, "feel": 20724, "lower": 31205, "concepts": 9931, "keeps": 27281, "inspiring": 26420, "inquiry": 26373, "insights": 26386, "body": 7238, "cognition": 8888, "decipherment": 13557, "modern": 35700, "surrounding": 54195, "areas": 4151, "successful": 53732, "borrowed": 7270, "paired": 39161, "currently": 12030, "extract": 19967, "unknown": 59557, "multilingual": 36062, "abbreviated": 583, "versions": 61559, "translations": 58706, "spanish": 51936, "creating": 11738, "authoring": 5003, "publishing": 44378, "chain": 7956, "deals": 13522, "descriptive": 14255, "mark": 31831, "emphasize": 17310, "mistakes": 33373, "necessity": 36540, "describing": 14239, "considered": 10245, "important": 24697, "assessment": 4589, "concludes": 9973, "setting": 50314, "guidelines": 23351, "activity": 1483, "elements": 16977, "modelling": 34640, "creole": 11759, "dialect": 14745, "inspiration": 26399, "1999": 207, "manual": 31729, "earlier": 16505, "works": 62872, "generation": 22406, "1997": 205, "unification": 59464, "tree": 58740, "adjoining": 1847, "tag": 54724, "prototype": 43972, "supporting": 54136, "pr": 41454, "communication": 9245, "mod": 33450, "cr": 11675, "en": 17412, "descriptions": 14250, "du": 16456, "es": 18255, "cat": 7827, "base": 5536, "er": 18205, "ce": 7903, "core": 11143, "dialects": 14749, "rely": 46275, "sub": 53514, "fs": 21701, "attributes": 4905, "attribute": 4899, "instantiated": 26441, "subset": 53606, "equivalent": 18201, "hybrid": 24308, "principle": 42384, "west": 61946, "area": 4136, "inferred": 25709, "gathered": 22002, "inferences": 25704, "syntactical": 54336, "syntagmatic": 54344, "bigram": 7098, "paradigmatic": 39631, "pair": 39146, "clustered": 8734, "sublanguage": 53570, "vocabularies": 61699, "possibilities": 41310, "markov": 31840, "spectral": 52244, "matrix": 31939, "distributions": 15674, "yields": 63113, "soft": 51619, "clustering": 8735, "heterogeneous": 23616, "visually": 61686, "segmented": 49091, "homogeneous": 23995, "segments": 49095, "specifically": 52178, "useful": 60355, "happened": 23434, "mixed": 33400, "distinguishing": 15609, "hard": 23439, "especially": 18262, "toolbox": 57369, "resolve": 47198, "accurately": 1091, "distinction": 15597, "replace": 46401, "membership": 32230, "assignment": 4605, "presents": 42068, "bypass": 7506, "bottleneck": 7276, "furthermore": 21801, "relies": 46264, "convenient": 10995, "readily": 45076, "clearly": 8656, "display": 15519, "potential": 41379, "brings": 7338, "appealing": 3136, "define": 13774, "altogether": 2515, "trying": 58847, "establish": 18340, "objects": 38116, "efforts": 16933, "lead": 29254, "emergence": 17264, "efficient": 16857, "exhibits": 19008, "degree": 13813, "synonymy": 54288, "reduces": 45687, "efficiency": 16837, "dynamical": 16494, "indefinite": 25490, "time": 57112, "hand": 23382, "reduce": 45647, "feature": 20472, "limit": 30532, "negligible": 36652, "observed": 38142, "noise": 37594, "communicated": 9242, "examined": 18872, "evenly": 18776, "streams": 52966, "leads": 29303, "generally": 22161, "redundancy": 45727, "ambiguity": 2525, "resolution": 47194, "pronouns": 43233, "putting": 44433, "simulation": 51263, "artificial": 4487, "challenge": 7965, "gate": 21991, "wide": 61955, "assembling": 4570, "incoming": 25328, "strategy": 52923, "having": 23484, "adjacent": 1840, "enriched": 17963, "pronoun": 43230, "engine": 17761, "bases": 6322, "klein": 27379, "understood": 59422, "textual": 56951, "inspired": 26404, "argumentative": 4177, "prototypical": 43974, "considering": 10255, "gain": 21903, "realize": 45160, "understand": 59288, "summaries": 53868, "intellectual": 26532, "ownership": 39129, "detectable": 14442, "far": 20394, "collocation": 8995, "classical": 8420, "fuzzy": 21902, "grade": 22998, "easy": 16556, "existing": 19021, "mutual": 36341, "utilized": 61107, "inference": 25638, "function": 21751, "easily": 16531, "seen": 49059, "demonstrated": 14001, "utility": 61079, "pairs": 39166, "working": 62868, "novels": 37959, "constituting": 10361, "project": 43131, "gutenberg": 23363, "www": 63024, "advantages": 1947, "overcoming": 39076, "drawbacks": 16403, "better": 6842, "databases": 12785, "relationships": 46076, "psycholinguistics": 44290, "studied": 53219, "disentangle": 15499, "rich": 48091, "topology": 57471, "produce": 42973, "production": 43046, "ai": 2111, "cognitive": 8889, "scientists": 48774, "argued": 4167, "analogy": 2583, "influential": 25733, "modeling": 34557, "mapping": 31800, "smt": 51543, "requirement": 46908, "coded": 8871, "representations": 46611, "ideas": 24378, "relational": 46002, "remove": 46374, "builds": 7478, "analogical": 2579, "mappings": 31808, "lists": 30847, "automatically": 5137, "discover": 15405, "analogies": 2581, "metaphors": 32349, "achieves": 1283, "compare": 9326, "variety": 61260, "alternative": 2496, "reach": 45043, "obey": 38076, "established": 18351, "parallels": 39663, "physical": 40860, "laying": 29239, "groundwork": 23269, "perspective": 40773, "scaling": 48647, "regularities": 45833, "dynamics": 16500, "suggesting": 53835, "possibility": 41311, "findings": 20904, "principal": 42379, "frequent": 21682, "discussion": 15490, "groups": 23278, "disparate": 15513, "levels": 30235, "formality": 21357, "distances": 15551, "deviations": 14721, "poisson": 41085, "characterized": 8249, "exponential": 19781, "extent": 19921, "deviation": 14720, "depends": 14160, "type": 59047, "behavior": 6390, "determines": 14561, "usage": 59799, "conclusions": 9977, "significance": 50842, "recurrence": 45605, "times": 57246, "empirical": 17317, "anticipated": 3120, "uniquely": 59519, "precise": 41609, "lens": 30043, "thought": 57073, "implications": 24654, "overt": 39106, "manifestations": 31702, "collective": 8991, "lingual": 30690, "browse": 7374, "semantically": 49379, "mathematical": 31932, "called": 7539, "atlas": 4647, "senses": 49492, "create": 11690, "graphs": 23186, "projected": 43136, "projections": 43143, "constitute": 10359, "denote": 14071, "sense": 49481, "trends": 58781, "fed": 20703, "produced": 43016, "link": 30826, "trend": 58777, "stored": 52874, "index": 25511, "instances": 26432, "linked": 30830, "correspond": 11540, "makes": 31613, "thanks": 56998, "world": 62925, "achieve": 1105, "webpages": 61904, "google": 22951, "search": 48961, "associate": 4619, "page": 39138, "count": 11610, "webpage": 61903, "conditional": 9991, "probabilities": 42470, "correlate": 11500, "inequality": 25627, "confidence": 10110, "reliability": 46248, "hypothesis": 24344, "testing": 56401, "decide": 13552, "probable": 42483, "sequence": 49905, "individually": 25589, "low": 31131, "25": 332, "track": 57492, "occur": 38268, "independently": 25508, "sufficient": 53798, "generality": 22106, "supervised": 53959, "tackle": 54696, "unrelated": 59632, "tailoring": 54770, "series": 50058, "induction": 25605, "predicate": 41627, "argument": 4170, "inducing": 25604, "conceptual": 9948, "framenet": 21443, "previously": 42327, "proposing": 43945, "essentially": 18339, "stages": 52448, "representative": 46794, "apply": 3319, "identify": 24410, "predicates": 41632, "roles": 48326, "jargon": 27149, "evaluates": 18555, "variations": 61250, "support": 54114, "multiword": 36332, "mwes": 36355, "clear": 8650, "cut": 12066, "difference": 14818, "situations": 51371, "involved": 27015, "finite": 21056, "state": 52572, "frequently": 21684, "consequences": 10200, "recognising": 45488, "linguistics": 30818, "survey": 54201, "trees": 58766, "structuring": 53198, "active": 1470, "voice": 61721, "passive": 39927, "transitive": 58547, "grammatical": 23067, "serves": 50089, "patient": 39955, "receiving": 45271, "action": 1455, "purpose": 44400, "generated": 22265, "boolean": 7249, "constituents": 10358, "subgroups": 53549, "phrases": 40848, "permutation": 40732, "isomorphism": 27055, "property": 43274, "resulted": 47458, "developers": 14645, "evolve": 18836, "transformations": 58446, "technical": 56017, "storage": 52871, "access": 818, "environment": 18170, "traditionally": 57558, "vital": 61690, "style": 53480, "specifications": 52235, "fit": 21067, "efficiently": 16911, "retrieved": 47981, "naturally": 36472, "oriented": 38695, "redundant": 45728, "indexing": 25514, "demonstrate": 13858, "fast": 20419, "iso": 27048, "deal": 13512, "psychology": 44293, "science": 48745, "frequencies": 21667, "chapter": 8190, "tap": 54800, "internet": 26696, "typed": 59075, "local": 30928, "aiming": 2167, "personal": 40754, "objectives": 38110, "globally": 22850, "effectively": 16719, "largest": 29092, "return": 47997, "aggregate": 2071, "estimates": 18378, "introducing": 26897, "normalized": 37708, "doesn": 15985, "searches": 48990, "anymore": 3125, "opt": 38520, "neutral": 37121, "determine": 14553, "settings": 50357, "publicly": 44332, "offer": 38290, "mechanisms": 32150, "canonical": 7589, "amazon": 2520, "reviews": 48047, "come": 9127, "26": 341, "32": 367, "people": 40026, "review": 48026, "helpful": 23595, "evaluation": 18571, "including": 25237, "market": 31836, "political": 41107, "campaigns": 7565, "fundamentally": 21795, "asking": 4523, "did": 14809, "think": 57048, "analyzing": 2840, "book": 7245, "dataset": 12786, "helpfulness": 23599, "just": 27248, "subtle": 53675, "relates": 45961, "evaluations": 18750, "product": 43041, "plagiarism": 40932, "distinguish": 15602, "predictions": 41754, "competing": 9528, "theories": 57034, "social": 51553, "unexpected": 59438, "populations": 41213, "countries": 11627, "typology": 59171, "universal": 59537, "implication": 24653, "facts": 20316, "extant": 19817, "adjectives": 1845, "typically": 59135, "sample": 48446, "assisting": 4616, "deserve": 14258, "careful": 7754, "hierarchical": 23653, "cope": 11127, "sampling": 48496, "noisy": 37612, "channel": 8187, "derives": 14207, "overall": 39032, "drop": 16440, "generate": 22173, "coherent": 8913, "length": 30023, "outperforms": 38864, "operates": 38487, "simplifying": 51244, "sequentially": 50054, "plays": 40988, "entity": 18093, "tracking": 57496, "identifying": 24453, "mentions": 32302, "entities": 18036, "extending": 19840, "named": 36368, "coreference": 11158, "names": 36377, "definite": 13791, "ne": 36502, "tagging": 54734, "solutions": 51666, "separate": 49873, "mention": 32295, "aspect": 4526, "doing": 15990, "contrast": 10873, "simultaneously": 51269, "joint": 27164, "effectiveness": 16765, "japanese": 27144, "originally": 38742, "variants": 61234, "candidates": 7583, "log": 30971, "likelihood": 30516, "suitable": 53854, "compound": 9754, "phrasal": 40833, "units": 59528, "compositionality": 9751, "polysemy": 41121, "validate": 61172, "candidate": 7568, "collect": 8937, "pages": 39140, "adapted": 1550, "compositional": 9741, "polysemous": 41119, "shows": 50761, "hurdles": 24302, "dravidian": 16397, "indian": 25516, "explores": 19771, "chinese": 8296, "join": 27160, "coin": 8924, "literally": 30853, "sounds": 51739, "combinatory": 9058, "sound": 51734, "pronunciation": 43234, "letter": 30050, "remains": 46325, "interesting": 26647, "entirely": 18031, "dimension": 15223, "add": 1587, "traditional": 57509, "hindi": 23934, "teaching": 56000, "reported": 46450, "accuracy": 892, "60": 456, "80": 520, "depending": 14154, "concentrate": 9915, "difficulties": 15195, "assumption": 4638, "referring": 45758, "expression": 19804, "restructuring": 47429, "linkage": 30829, "explicitly": 19629, "mentioned": 32298, "discursive": 15456, "perceptual": 40059, "referents": 45756, "note": 37721, "mechanism": 32095, "handle": 23405, "treated": 58733, "fresh": 21688, "widely": 61988, "recognized": 45553, "proliferation": 43148, "schemes": 48733, "runs": 48408, "counter": 11613, "need": 36541, "standards": 52548, "increasingly": 25468, "mandatory": 31699, "answer": 3029, "comprised": 9817, "morpho": 35835, "annotator": 3009, "overview": 39108, "applicability": 3152, "comparative": 9317, "rdf": 45042, "outline": 38774, "committee": 9160, "international": 26694, "tc": 55983, "37": 379, "sc": 48543, "primary": 42368, "solicit": 51648, "participation": 39826, "members": 32229, "community": 9258, "entering": 18014, "composing": 9736, "message": 32320, "services": 50094, "instance": 26423, "abbreviations": 585, "writing": 62988, "saving": 48537, "fashion": 20411, "collected": 8953, "processed": 42842, "sms": 51542, "strategies": 52890, "challenging": 8081, "consonant": 10340, "skeleton": 51408, "phonetic": 40825, "rough": 48356, "handwritten": 23431, "care": 7752, "proposes": 43929, "dependency": 14116, "parsing": 39769, "expresses": 19802, "interactions": 26615, "polarity": 41088, "composition": 9739, "led": 29987, "saturation": 48532, "polarities": 41087, "lexicalized": 30397, "translated": 58553, "consequence": 10199, "refinement": 45767, "sheds": 50531, "light": 30448, "conflicting": 10141, "recursion": 45634, "recursive": 45635, "exist": 19014, "byproduct": 7510, "insight": 26383, "universals": 59552, "constructs": 10433, "multimodal": 36141, "interfaces": 26663, "graphics": 23185, "gestures": 22683, "facial": 20259, "promise": 43154, "effective": 16625, "possibly": 41343, "imprecise": 24804, "ambiguous": 2529, "coordinated": 11124, "cohesive": 8922, "presentations": 42056, "adapting": 1563, "interface": 26659, "prevention": 42233, "interactive": 26626, "audio": 4924, "video": 61577, "constraints": 10371, "generic": 22625, "approached": 3748, "takes": 54776, "workshop": 62919, "informal": 25739, "http": 24047, "proofs": 43237, "interact": 26591, "follow": 21253, "intersection": 26745, "proof": 43235, "theoretic": 57010, "infinite": 25715, "wsd": 63019, "selects": 49167, "proximity": 44263, "variant": 61232, "heuristic": 23626, "analyzes": 2839, "glosses": 22854, "matched": 31902, "obtains": 38240, "58": 448, "35": 373, "respectively": 47357, "recommends": 45574, "deeper": 13756, "experiment": 19230, "comparable": 9287, "individual": 25562, "aligned": 2354, "respective": 47355, "counterparts": 11623, "carried": 7768, "alignment": 2362, "employed": 17396, "74": 497, "alignments": 2390, "expected": 19197, "contrary": 10871, "intuition": 26906, "rise": 48151, "consistently": 10288, "close": 8683, "discriminate": 15437, "rendering": 46383, "unsuitable": 59677, "prove": 43979, "productive": 43055, "central": 7916, "drive": 16415, "facilitated": 20279, "referencing": 45752, "sanskrit": 48515, "involves": 27017, "handling": 23423, "integral": 26501, "firstly": 21064, "necessitates": 36538, "conjunctions": 10166, "points": 41064, "letters": 30051, "transform": 58439, "accepted": 815, "authority": 5006, "approximately": 3981, "precisely": 41611, "comprehensively": 9804, "pertaining": 40784, "schema": 48722, "beginnings": 6383, "comprehensive": 9779, "computationally": 9870, "lean": 29337, "neural": 36927, "ann": 2867, "scanned": 48653, "character": 8194, "segment": 49071, "ongoing": 38347, "aid": 2126, "characters": 8252, "assamese": 4566, "north": 37714, "eastern": 16555, "india": 25515, "static": 52722, "recorded": 45588, "individuals": 25591, "segmenting": 49093, "entire": 18019, "trained": 57666, "recognizes": 45557, "boundary": 7282, "assumed": 4635, "performed": 40656, "match": 31894, "confirmed": 10135, "recently": 45400, "spaces": 51908, "availability": 5244, "programs": 43089, "analyze": 2804, "contain": 10459, "enhance": 17908, "readability": 45069, "journal": 27226, "titles": 57271, "china": 8295, "papers": 39608, "citations": 8366, "visualized": 61684, "factor": 20298, "analyzed": 2834, "ellipsis": 16993, "formula": 21380, "validity": 61199, "newspaper": 37429, "fragments": 21436, "converted": 11075, "mono": 35787, "sentential": 49810, "discourses": 15404, "require": 46839, "primitive": 42376, "ones": 38334, "discovering": 15412, "reproduction": 46832, "mutation": 36339, "happens": 23436, "living": 30896, "family": 20388, "relatedness": 45957, "1950s": 194, "percentage": 40051, "historical": 23955, "weak": 61847, "judgment": 27242, "avoids": 5438, "subjectivity": 53568, "replicated": 46415, "quick": 44818, "comparison": 9488, "indo": 25592, "european": 18427, "families": 20387, "position": 41259, "carry": 7774, "roots": 48344, "explorer": 19770, "geographical": 22648, "division": 15750, "shared": 50464, "levenshtein": 30252, "averages": 5425, "stability": 52410, "replacement": 46407, "speed": 52319, "tackled": 54715, "technology": 56158, "society": 51610, "codes": 8873, "differ": 14813, "varying": 61425, "sides": 50824, "reflexive": 45783, "facilitate": 20261, "stem": 52792, "cell": 7908, "relate": 45885, "sociological": 51618, "quest": 44683, "activities": 1482, "reconstructed": 45579, "contains": 10488, "debated": 13528, "beginning": 6381, "apparently": 3134, "accurate": 1074, "optimal": 38525, "studying": 53479, "reconstruction": 45581, "completed": 9604, "ordinary": 38675, "spoken": 52350, "signed": 50841, "humans": 24270, "distinguished": 15607, "enabling": 17453, "check": 8271, "aim": 2132, "finds": 20918, "errors": 18232, "parse": 39753, "encoding": 17562, "paraphrasing": 39745, "recognize": 45549, "longer": 31049, "convey": 11082, "entailment": 17998, "reads": 45095, "element": 16972, "likely": 30524, "infer": 25635, "true": 58819, "bidirectional": 7064, "answering": 3063, "summarize": 53904, "turn": 58986, "pointing": 41063, "prominent": 43151, "articles": 4462, "resurgence": 47918, "spatio": 51989, "motivates": 35876, "looking": 31069, "parameters": 39686, "convergence": 11024, "invariance": 26916, "vowels": 61745, "samples": 48463, "american": 2535, "98": 572, "06": 23, "percent": 40050, "90": 551, "13": 121, "vowel": 61744, "protein": 43968, "biomedical": 7171, "accelerate": 799, "informatics": 25746, "advance": 1879, "art": 4207, "neglected": 36648, "characteristic": 8231, "impact": 24587, "simplification": 51236, "showing": 50675, "poses": 41243, "parsers": 39763, "seeks": 49055, "abstracts": 778, "improve": 24820, "pipeline": 40890, "evaluated": 18519, "annotated": 2875, "23": 322, "simplified": 51238, "record": 45587, "17th": 177, "century": 7931, "analyse": 2585, "morphologically": 35846, "syntactically": 54338, "contemporary": 10509, "transcribed": 58333, "transcription": 58339, "transducers": 58344, "si": 50816, "equivalents": 18204, "ac": 786, "comment": 9139, "transcriptions": 58340, "omission": 38330, "let": 30048, "writers": 62986, "wrongly": 63017, "plural": 41030, "symbols": 54271, "visual": 61647, "isolation": 27053, "morpheme": 35833, "16th": 171, "19th": 209, "standardised": 52543, "problematic": 42691, "complaint": 9581, "websites": 61906, "mis": 33339, "travel": 58725, "published": 44366, "noticed": 37731, "accompany": 842, "highlight": 23859, "lexemes": 30349, "trace": 57487, "gov": 22962, "arabic": 3992, "formation": 21366, "person": 40749, "reason": 45167, "ameliorate": 2532, "integrates": 26518, "transliteration": 58715, "typological": 59164, "conferences": 10109, "module": 35751, "syntax": 54345, "glimpse": 22818, "appropriate": 3962, "critique": 11804, "topic": 57388, "nps": 37967, "heuristics": 23633, "classify": 8630, "coding": 8884, "orthogonal": 38752, "diversity": 15730, "scheme": 48726, "selective": 49162, "suppress": 54146, "signals": 50833, "transmitted": 58718, "transmit": 58717, "receiver": 45268, "adaptive": 1572, "squared": 52400, "null": 37979, "combat": 9029, "interference": 26665, "illustrate": 24514, "steering": 52791, "arrival": 4203, "decoders": 13622, "preserved": 42118, "decoding": 13624, "incorporating": 25377, "hidden": 23637, "hmms": 23977, "successfully": 53740, "spite": 52345, "hmm": 23975, "independence": 25491, "obviously": 38263, "violated": 61615, "mismatch": 33350, "modify": 35736, "dependence": 14101, "fair": 20355, "say": 48540, "2010": 248, "consensus": 10198, "choice": 8331, "commercially": 9157, "aimed": 2163, "departure": 14096, "diagnostic": 14739, "resampling": 46973, "rarely": 45001, "manipulations": 31712, "demonstrates": 14026, "responsible": 47408, "numbers": 38057, "rates": 45021, "crucial": 11893, "improving": 25170, "little": 30868, "profoundly": 43075, "limits": 30637, "instructions": 26483, "explain": 19593, "actions": 1461, "vector": 61449, "address": 1740, "organize": 38687, "vsm": 61749, "broad": 7348, "yielding": 63108, "category": 7860, "breadth": 7309, "familiar": 20385, "pointers": 41062, "poetry": 41041, "know": 27384, "govern": 22963, "prosody": 43958, "numerous": 38064, "converts": 11080, "prose": 43955, "accordance": 851, "ensure": 17988, "conjunction": 10165, "verse": 61549, "handled": 23421, "considerably": 10237, "reducing": 45701, "target": 54801, "gives": 22802, "suggestions": 53842, "impossible": 24802, "allowed": 2442, "component": 9700, "interacts": 26637, "ambiguities": 2524, "addition": 1598, "unique": 59507, "addressed": 1804, "impaired": 24616, "dictate": 14800, "ner": 36675, "immediate": 24581, "getting": 22685, "right": 48136, "extended": 19835, "assume": 4632, "systematically": 54407, "spatial": 51984, "markers": 31835, "reveal": 48007, "characterizing": 8251, "accounting": 884, "highlighted": 23870, "commonsense": 9232, "geometry": 22659, "pragmatic": 41494, "formalized": 21361, "suited": 53864, "prepositions": 41821, "illustrating": 24522, "inferential": 25706, "adequacy": 1830, "psycholinguistic": 44287, "mainly": 31466, "distinguishes": 15608, "underlies": 59259, "adults": 1878, "fulfilling": 21709, "ontologies": 38397, "coherence": 8903, "talk": 54790, "variability": 61220, "bringing": 7337, "capacities": 7634, "detecting": 14444, "relied": 46259, "elementary": 16974, "edus": 16607, "linear": 30648, "nested": 36686, "unfortunately": 59451, "strong": 52998, "enforces": 17754, "round": 48358, "effort": 16923, "bank": 5525, "validated": 61185, "47": 412, "445": 409, "encouraging": 17604, "73": 494, "calculus": 7528, "foundation": 21415, "categorial": 7838, "concatenation": 9911, "discontinuous": 15380, "displacement": 15518, "generalization": 22112, "preserves": 42119, "elimination": 16992, "contribution": 10940, "scene": 48713, "guess": 23324, "shape": 50448, "correctly": 11488, "modeled": 34554, "avoid": 5430, "72": 493, "manifested": 31703, "compiled": 9578, "peculiarities": 40008, "half": 23365, "20th": 310, "explanatory": 19608, "writer": 62985, "discussed": 15485, "tables": 54691, "implicit": 24658, "table": 54686, "undefined": 59248, "script": 48949, "conducted": 10072, "digital": 15207, "manipulated": 31708, "processor": 42970, "seven": 50414, "persian": 40743, "affects": 2024, "additional": 1650, "suggested": 53834, "companion": 9284, "complements": 9596, "arxiv": 4505, "ca": 7519, "complete": 9598, "highlighting": 23871, "worse": 62971, "worst": 62976, "subsequent": 53604, "focuses": 21234, "pipelines": 40910, "decades": 13540, "publication": 44330, "location": 30966, "france": 21634, "vs": 61747, "sixth": 51373, "iteration": 27119, "release": 46140, "exploit": 19651, "massive": 31878, "parallelism": 39658, "computation": 9825, "physically": 40862, "architectures": 4101, "variable": 61221, "grained": 23020, "strictly": 52986, "deterministic": 14564, "allocation": 2430, "modules": 35771, "variables": 61226, "transparent": 58721, "abstraction": 766, "exhibit": 19000, "aiding": 2130, "verification": 61525, "scheduling": 48721, "computing": 9903, "resolving": 47205, "incremental": 25482, "manner": 31713, "whilst": 61952, "considerations": 10244, "forward": 21401, "incorporates": 25372, "simulator": 51267, "compiler": 9579, "code": 8790, "consisting": 10314, "bit": 7185, "marking": 31839, "instruction": 26478, "routes": 48363, "crisis": 11771, "cue": 11931, "associations": 4630, "statistically": 52769, "prevalent": 42226, "practice": 41483, "giving": 22812, "weightage": 61922, "unigram": 59493, "detect": 14434, "biases": 7053, "span": 51918, "agnostic": 2086, "fidelity": 20748, "cues": 11933, "benchmark": 6422, "relatively": 46113, "obscure": 38118, "capture": 7650, "followed": 21256, "dice": 14799, "popular": 41154, "pmi": 41035, "incomplete": 25331, "lacks": 27930, "consistency": 10265, "usable": 59798, "reflect": 45772, "influence": 25723, "myriad": 36356, "reproduced": 46824, "decline": 13581, "interplay": 26702, "environments": 18175, "communities": 9257, "relationship": 46066, "quantify": 44607, "topics": 57443, "controlling": 10989, "indicated": 25537, "success": 53695, "scales": 48644, "reveals": 48017, "sizes": 51407, "shorter": 50588, "adapts": 1583, "groupings": 23277, "endogenous": 17743, "contributors": 10959, "force": 21285, "driven": 16417, "distinctive": 15599, "identity": 24472, "deriving": 14208, "taxonomy": 55981, "contributions": 10951, "challenges": 8026, "targeting": 54861, "briefly": 7329, "faced": 20250, "limiting": 30635, "scope": 48778, "details": 14432, "contributing": 10938, "practically": 41482, "ontology": 38398, "applies": 3316, "italian": 27108, "pedagogical": 40009, "specialized": 52031, "rapidly": 44992, "concerning": 9960, "progressively": 43125, "replaced": 46404, "par": 39610, "ais": 2223, "le": 29253, "sp": 51846, "plus": 41033, "cycle": 12073, "ee": 16608, "systemic": 54416, "operationalized": 38493, "measurement": 32072, "correlations": 11532, "summarizes": 53910, "choices": 8340, "analyst": 2797, "visualization": 61677, "unambiguous": 59203, "located": 30963, "altered": 2490, "reading": 45079, "dramatically": 16386, "reader": 45073, "exceptions": 18960, "consolidate": 10336, "weakened": 61853, "enormous": 17957, "manifold": 31705, "affected": 2019, "simply": 51246, "ignored": 24494, "recognised": 45487, "african": 2039, "indonesian": 25599, "population": 41212, "belongs": 6420, "greater": 23222, "lives": 30895, "possess": 41307, "skill": 51413, "navigation": 36495, "ethnic": 18420, "unclear": 59234, "reached": 45055, "path": 39944, "disputed": 15525, "configuration": 10124, "draw": 16399, "authors": 5007, "cite": 8367, "2008": 245, "2009": 246, "consist": 10262, "200": 233, "covering": 11654, "island": 27046, "randomized": 44896, "random": 44868, "surprising": 54181, "adequately": 1833, "hypothesize": 24352, "inferring": 25710, "speaker": 51995, "certainty": 7952, "prosodic": 43956, "signal": 50828, "dialogue": 14763, "centered": 7913, "causing": 7892, "uncertainty": 59226, "utterance": 61132, "improves": 25111, "predict": 41634, "uncertain": 59224, "eliciting": 16982, "contextually": 10818, "elicit": 16979, "ratings": 45023, "listeners": 30845, "internal": 26684, "fragment": 21434, "bulgarian": 7494, "placed": 40928, "matches": 31903, "classified": 8585, "double": 16320, "keywords": 27353, "debate": 13527, "participants": 39812, "conversations": 11056, "converge": 11021, "coordinate": 11123, "dimensions": 15243, "pitch": 40912, "empirically": 17355, "supported": 54135, "exclusively": 18978, "controlled": 10982, "laboratory": 27864, "twitter": 59030, "novelty": 37961, "140": 141, "geared": 22030, "conversation": 11028, "priori": 42429, "conversational": 11037, "verified": 61528, "investigating": 27004, "stylistic": 53509, "symmetry": 54273, "commonly": 9215, "status": 52779, "overviews": 39121, "lithuanian": 30867, "today": 57277, "tackles": 54717, "issue": 27058, "mt": 35918, "requirements": 46909, "regular": 45830, "designing": 14338, "deploying": 14174, "run": 48400, "windows": 62066, "managing": 31695, "accessing": 836, "pause": 39981, "perceives": 40049, "stream": 52958, "boundaries": 7281, "capability": 7607, "remember": 46369, "recurring": 45633, "locating": 30965, "removed": 46377, "complexities": 9673, "capabilities": 7595, "locations": 30969, "viterbi": 61694, "hypothetical": 24356, "segmentations": 49090, "block": 7220, "incurs": 25489, "latency": 29116, "sending": 49479, "appendix": 3150, "immediately": 24582, "adapt": 1499, "styles": 53507, "adjust": 1850, "partner": 39901, "preceding": 41606, "striking": 52989, "coordination": 11126, "arisen": 4184, "gaining": 21925, "emphasizing": 17315, "adaptation": 1518, "deeply": 13762, "embedded": 17006, "fictional": 20746, "dialogs": 14762, "don": 16313, "receive": 45252, "benefits": 6579, "movie": 35894, "suggestive": 53844, "gender": 22032, "surprisingly": 54185, "average": 5398, "females": 20731, "males": 31682, "distributional": 15661, "linguists": 30825, "categorical": 7840, "cs": 11919, "cl": 8378, "vectors": 61479, "competitors": 9575, "showcases": 50661, "pronominal": 43229, "anaphora": 2848, "proposals": 43279, "translate": 58549, "chains": 7963, "zero": 63150, "hardly": 23457, "unrestricted": 59638, "84": 530, "entropy": 18157, "telugu": 56170, "syllabic": 54263, "somewhat": 51710, "complicated": 9696, "notes": 37724, "continuation": 10826, "covered": 11653, "answers": 3104, "consumers": 10439, "consequently": 10203, "consumer": 10438, "targets": 54864, "spam": 51916, "focused": 21215, "primarily": 42360, "identifiable": 24381, "deceptive": 13551, "deliberately": 13827, "authentic": 4997, "integrating": 26519, "ultimately": 59192, "nearly": 36525, "gold": 22910, "additionally": 1711, "revealing": 48016, "wikipedia": 62041, "explicit": 19610, "wordnet": 62355, "highest": 23848, "ws": 63018, "spearman": 52010, "rho": 48090, "coefficient": 8885, "79": 514, "75": 501, "value": 61207, "87": 539, "78": 511, "polynomial": 41118, "svm": 54231, "esa": 18256, "2011": 250, "unsuccessful": 59676, "section": 49035, "normalizing": 37709, "layered": 29216, "matching": 31908, "normalization": 37704, "metrics": 33132, "connected": 10173, "missing": 33360, "geopolitical": 22660, "potentially": 41412, "organizations": 38686, "adding": 1592, "augmenting": 4986, "nlm": 37457, "correcting": 11480, "ocr": 38284, "pubmed": 44379, "country": 11628, "graphical": 23182, "download": 16325, "facilitates": 20280, "tuned": 58866, "shot": 50595, "gun": 23362, "produces": 43024, "simpler": 51228, "constituent": 10354, "optimized": 38565, "indexed": 25512, "improved": 24942, "20": 218, "downloaded": 16327, "https": 24051, "renders": 46384, "offered": 38296, "semi": 49445, "frameworks": 21631, "34": 371, "49": 414, "html": 24046, "moments": 35780, "represented": 46805, "stochastic": 52854, "derivations": 14195, "string": 52991, "belonging": 6418, "scalar": 48549, "widespread": 62030, "practices": 41489, "binding": 7159, "node": 37583, "treebank": 58762, "showcasing": 50662, "topological": 57466, "fields": 20775, "tandem": 54796, "usefulness": 60399, "greatly": 23225, "increased": 25427, "layers": 29217, "kind": 27365, "threads": 57083, "intuitive": 26909, "concise": 9963, "expressive": 19813, "specification": 52234, "formulas": 21383, "inverse": 26926, "lambda": 27938, "derive": 14197, "operator": 38496, "combinatorial": 9056, "directed": 15263, "assign": 4597, "robot": 48231, "command": 9135, "questions": 44766, "querying": 44682, "puzzles": 44435, "translating": 58564, "solvers": 51697, "applicable": 3155, "investigates": 27002, "ewc": 18843, "ad": 1494, "hoc": 23978, "utilised": 61075, "retrieve": 47976, "ntcir": 37973, "promising": 43157, "iterated": 27117, "explains": 19602, "embedding": 17008, "serve": 50072, "purposes": 44416, "expense": 19200, "learnability": 29447, "dis": 15347, "encoded": 17475, "rhetorical": 48086, "play": 40960, "tion": 57263, "motivation": 35883, "emotionally": 17301, "comments": 9144, "datasets": 13138, "examining": 18874, "consecutive": 10195, "posts": 41369, "discussions": 15494, "comparisons": 9512, "correlation": 11517, "emotional": 17295, "variation": 61239, "inter": 26574, "rater": 45018, "assessing": 4586, "likert": 30526, "agreement": 2103, "rating": 45022, "bottlenecks": 7279, "scoring": 48932, "tunisian": 58975, "specificity": 52237, "arises": 4185, "referred": 45757, "assigning": 4603, "methodological": 32713, "combinations": 9055, "concentrates": 9917, "combine": 9060, "undertaken": 59430, "spanning": 51951, "essay": 18315, "proposal": 43278, "literary": 30854, "enabled": 17434, "validation": 61191, "dynamically": 16495, "transcribing": 58336, "implied": 24672, "alternating": 2494, "conditions": 10019, "discussing": 15489, "projects": 43145, "machines": 31396, "things": 57047, "animals": 2863, "cause": 7883, "causes": 7890, "converting": 11077, "proper": 43250, "operate": 38486, "comparing": 9478, "developing": 14646, "complementary": 9586, "providing": 44234, "industrial": 25615, "stable": 52412, "documented": 15850, "interoperability": 26699, "lexica": 30350, "affective": 2022, "verify": 61533, "unbiased": 59221, "positive": 41277, "lends": 30022, "negative": 36615, "informativeness": 26179, "uniformly": 59488, "decrease": 13666, "emotions": 17302, "sentiment": 49814, "orientation": 38693, "precomputed": 41620, "85": 534, "02": 19, "outperforming": 38844, "biggest": 7096, "deployment": 14175, "trainable": 57662, "planner": 40942, "restaurant": 47412, "template": 56173, "supports": 54141, "planners": 40943, "demonstration": 14057, "affecting": 2020, "responses": 47403, "reproduce": 46821, "agglutinative": 2069, "listed": 30842, "schedule": 48717, "mwe": 36354, "crf": 11760, "disadvantage": 15349, "choosing": 8346, "running": 48405, "tried": 58791, "generations": 22582, "fold": 21249, "fitness": 21068, "64": 468, "08": 25, "86": 536, "oral": 38583, "socio": 51611, "tags": 54756, "file": 20793, "metadata": 32346, "added": 1589, "transducer": 58343, "cascades": 7785, "modified": 35731, "cascade": 7781, "locate": 30962, "merging": 32317, "campaign": 7564, "passage": 39918, "greek": 23245, "conversion": 11069, "plain": 40936, "constant": 10342, "adverbial": 1961, "ending": 17741, "ly": 31293, "2000": 236, "1986": 197, "1990": 198, "fine": 20921, "haven": 23483, "exploited": 19671, "deleting": 13821, "freely": 21650, "yes": 63086, "interrogative": 26744, "permits": 40730, "places": 40930, "operation": 38489, "business": 7503, "medicine": 32215, "hashtags": 23476, "29": 347, "extensions": 19850, "tagger": 54730, "contextual": 10758, "critical": 11775, "expressing": 19803, "unable": 59198, "preprocessing": 41825, "clarify": 8387, "tokens": 57321, "augment": 4938, "largely": 29050, "sent": 49511, "mobile": 33447, "phones": 40824, "artifact": 4485, "private": 42442, "battery": 6346, "paying": 39991, "privacy": 42438, "live": 30894, "collects": 8993, "submissions": 53577, "checks": 8279, "adds": 1827, "releasing": 46187, "resultant": 47457, "sql": 52393, "month": 35829, "sender": 49478, "000": 1, "focusing": 21245, "mandarin": 31696, "paragraphs": 39639, "iii": 24508, "framing": 21633, "agenda": 2051, "positions": 41276, "communicate": 9241, "recursively": 45639, "directional": 15275, "normal": 37701, "image": 24528, "commutative": 9278, "historically": 23963, "autonomous": 5211, "brain": 7297, "room": 48337, "intermediate": 26673, "starts": 52571, "peoples": 40045, "bridge": 7318, "gap": 21957, "bridges": 7323, "linking": 30833, "worth": 62979, "pursuing": 44421, "achievable": 1104, "reasonable": 45171, "cost": 11576, "2012": 251, "weight": 61915, "minimal": 33282, "liu": 30891, "tfidf": 56995, "extra": 19960, "wikinews": 62040, "18": 178, "hints": 23951, "hypotheses": 24343, "circumstances": 8362, "deduction": 13676, "plausible": 40958, "laws": 29174, "predicts": 41781, "presence": 41836, "dimensional": 15225, "richer": 48128, "goes": 22908, "psychological": 44291, "observations": 38126, "obtaining": 38229, "visualize": 61683, "notions": 37734, "asr": 4554, "remarkable": 46356, "surroundings": 54199, "discard": 15366, "performing": 40669, "sensitive": 49494, "lexically": 30398, "pinpoint": 40885, "rational": 45025, "treatment": 58736, "expectation": 19193, "resembles": 47176, "risk": 48160, "decision": 13558, "favor": 20452, "generalized": 22150, "left": 29997, "corner": 11165, "informed": 26180, "conflicts": 10142, "misleading": 33349, "costs": 11606, "minimized": 33299, "minimizing": 33301, "duration": 16474, "computations": 9877, "filler": 20799, "decompositions": 13660, "tensor": 56222, "paradigm": 39621, "maximizing": 31963, "race": 44843, "remote": 46372, "preferred": 41792, "configurations": 10125, "subgroup": 53548, "dominant": 16305, "interlocutor": 26670, "agent": 2053, "interconnected": 26640, "advantageous": 1946, "yield": 63088, "male": 31680, "equally": 18190, "disjoint": 15508, "emerged": 17258, "confirm": 10128, "thesis": 57043, "simulations": 51266, "analytically": 2802, "measurements": 32073, "thorough": 57054, "emergent": 17268, "dispersion": 15517, "conventions": 11020, "cfg": 7954, "pos": 41228, "chunking": 8357, "naive": 36360, "bayesian": 6356, "disambiguate": 15354, "tagged": 54728, "mimic": 33267, "proxies": 44259, "lay": 29175, "attractive": 4896, "poor": 41131, "paid": 39141, "limitations": 30542, "conclude": 9967, "brief": 7327, "ranking": 44962, "indirect": 25554, "merits": 32319, "exhaustive": 18996, "outputs": 39011, "bring": 7330, "22": 317, "15": 145, "31": 364, "77": 510, "14": 135, "qa": 44444, "ir": 27033, "engines": 17771, "bound": 7280, "bearing": 6372, "ranks": 44980, "40": 394, "held": 23543, "trec": 58739, "analysed": 2587, "enhancing": 17947, "70": 486, "difficult": 15153, "blind": 7216, "feedback": 20715, "rf": 48083, "unlikely": 59614, "ranging": 44944, "calculations": 7527, "intricate": 26765, "algorithmic": 2313, "optimization": 38542, "demanding": 13843, "weather": 61874, "forecasting": 21294, "convert": 11071, "versatile": 61547, "pervasive": 40797, "erroneous": 18208, "solved": 51694, "prone": 43225, "spelling": 52333, "operating": 38488, "bad": 5498, "editing": 16595, "correction": 11482, "corrections": 11486, "selecting": 49121, "virtue": 61627, "microsoft": 33232, "inclusive": 25324, "speeches": 52318, "parallelized": 39662, "shed": 50523, "spread": 52376, "facets": 20258, "persuasive": 40781, "overlap": 39087, "ground": 23249, "claims": 8384, "automata": 5031, "varieties": 61259, "situate": 51366, "formalisms": 21356, "cover": 11643, "structural": 53074, "divergences": 15687, "transduction": 58345, "experimenting": 19340, "awareness": 5479, "phrased": 40847, "affect": 2011, "quotes": 44841, "situational": 51369, "distinctiveness": 15600, "portable": 41216, "proved": 43984, "extensively": 19916, "released": 46170, "edition": 16598, "thesaurus": 57042, "150": 155, "ago": 2099, "countless": 11626, "students": 53216, "50": 419, "accepting": 816, "tractable": 57499, "dissertation": 15536, "transforming": 58534, "contrasted": 10893, "computerized": 9899, "benchmarks": 6509, "turned": 58992, "excellent": 18953, "abundant": 780, "optical": 38521, "conceived": 9914, "inaccurate": 25209, "misspellings": 33369, "cornerstone": 11166, "spell": 52331, "spellings": 52338, "images": 24551, "developments": 14711, "advent": 1956, "lot": 31113, "textbooks": 56851, "transformed": 58447, "imperfect": 24628, "occasionally": 38264, "falsely": 20384, "identifies": 24408, "leading": 29287, "suggestion": 53841, "harnesses": 23467, "huge": 24068, "replacements": 46408, "revealed": 48015, "executed": 18982, "platforms": 40951, "judges": 27240, "assigned": 4602, "traced": 57489, "65": 471, "gets": 22684, "replicate": 46414, "esl": 18260, "300": 359, "digest": 15206, "synonym": 54284, "82": 527, "00": 0, "33": 369, "labelled": 27799, "qualitative": 44469, "supposed": 54144, "editors": 16601, "requested": 46836, "guideline": 23350, "richness": 48132, "grams": 23083, "drastically": 16391, "conclusion": 9976, "topical": 57438, "biographical": 7165, "conflict": 10140, "controversial": 10991, "concluding": 9975, "controversy": 10993, "metric": 33109, "quantifies": 44606, "proportion": 43275, "edit": 16590, "penalty": 40020, "penalties": 40019, "coefficients": 8886, "configurable": 10123, "suit": 53852, "interested": 26646, "bridging": 7325, "unit": 59521, "decomposition": 13658, "meta": 32328, "layer": 29177, "standalone": 52459, "decomposed": 13654, "growing": 23287, "crowdsourcing": 11888, "accommodate": 838, "cheap": 8268, "commercial": 9153, "advertising": 2003, "ecological": 16575, "tips": 57266, "pilot": 40883, "checking": 8275, "incorrectly": 25401, "basically": 6335, "checker": 8273, "bigger": 7095, "checkers": 8274, "suffer": 53758, "sparseness": 51974, "acronyms": 1450, "terminologies": 56260, "fail": 20328, "catch": 7836, "hinges": 23947, "big": 7087, "volume": 61727, "detector": 14542, "detects": 14544, "generates": 22336, "corrector": 11498, "outstanding": 39029, "drastic": 16390, "instructional": 26482, "drawn": 16407, "considers": 10261, "enriches": 17965, "dealing": 13520, "straight": 52883, "formulate": 21384, "consisted": 10264, "55": 444, "42": 405, "representativeness": 46803, "conveying": 11086, "uniform": 59485, "associates": 4627, "transitions": 58546, "modification": 35728, "induced": 25602, "syllable": 54264, "spectrum": 52246, "syllables": 54265, "400": 397, "logarithmic": 30977, "beta": 6841, "piecewise": 40882, "fitting": 21071, "piece": 40875, "fits": 21069, "smallest": 51527, "sum": 53867, "lowest": 31229, "criterion": 11773, "chosen": 8350, "16": 161, "tie": 57102, "bayes": 6347, "unaligned": 59202, "pairing": 39164, "needing": 36606, "carefully": 7757, "curated": 11947, "wider": 62026, "biased": 7050, "favors": 20461, "abstain": 754, "deemed": 13680, "ensemble": 17969, "parents": 39748, "keeping": 27277, "unlabeled": 59561, "97": 570, "predicting": 41673, "relying": 46306, "ensembles": 17984, "counts": 11629, "tip": 57265, "tongue": 57352, "valuable": 61201, "clues": 8731, "organisation": 38680, "mental": 32289, "psycho": 44285, "ve": 61448, "knows": 27671, "guided": 23343, "portion": 41220, "morphology": 35850, "bigrams": 7099, "threshold": 57089, "regarding": 45792, "puzzle": 44434, "unstructured": 59666, "unannotated": 59205, "dirichlet": 15343, "choose": 8342, "replacing": 46410, "beginners": 6380, "learners": 29495, "networking": 36824, "declarative": 13578, "bangla": 5523, "preserving": 42120, "synchronously": 54280, "facilitating": 20281, "opportunity": 38513, "adopt": 1862, "involving": 27023, "interests": 26658, "pro": 42449, "anti": 3118, "tone": 57350, "rhetoric": 48085, "professional": 43060, "debates": 13529, "contradicts": 10870, "assertions": 4574, "hope": 24005, "encourage": 17589, "pursue": 44418, "abbreviation": 584, "unpaired": 59624, "runtime": 48409, "github": 22692, "plugin": 41029, "licensed": 30430, "supplement": 54104, "lie": 30433, "pca": 39996, "cca": 7900, "computes": 9901, "theoretically": 57027, "efficacy": 16830, "superior": 53929, "remain": 46311, "notoriously": 37736, "weeks": 61910, "moderately": 35697, "sized": 51406, "expensive": 19201, "gradients": 23012, "contrastive": 10895, "estimation": 18381, "estimating": 18379, "unnormalized": 59620, "penn": 40021, "magnitude": 31414, "fewer": 20735, "scalability": 48545, "labeling": 27775, "transfer": 58349, "label": 27688, "gained": 21913, "conducting": 10100, "subspace": 53612, "regularized": 45848, "jointly": 27192, "minimizes": 33300, "penalizing": 40018, "evaluating": 18556, "adversarial": 1963, "performers": 40668, "measurable": 32043, "adopting": 1873, "characterize": 8248, "successes": 53731, "failures": 20354, "simulating": 51262, "extrinsic": 20169, "belief": 6406, "greedily": 23240, "train": 57562, "pretraining": 42196, "tuning": 58897, "maximum": 31966, "nb": 36496, "kernel": 27288, "outperformed": 38835, "separating": 49885, "excluding": 18975, "meaningful": 32023, "scheduled": 48718, "usages": 59811, "bengali": 6591, "91": 555, "21": 313, "analysing": 2608, "specifies": 52240, "adjective": 1843, "enter": 18012, "appropriateness": 3972, "reflected": 45777, "thousands": 57078, "permitted": 40731, "ibm": 24359, "asked": 4520, "sections": 49036, "history": 23965, "alpha": 2485, "explained": 19600, "realizing": 45163, "dream": 16412, "passing": 39926, "turing": 58978, "recipe": 45480, "dedicated": 13673, "corrects": 11499, "inserting": 26377, "finer": 21037, "ngram": 37436, "dated": 13494, "centuries": 7930, "started": 52568, "period": 40723, "contributed": 10934, "separately": 49881, "tendencies": 56210, "contributes": 10935, "significantly": 50931, "desired": 14346, "barrier": 5530, "strings": 52995, "easier": 16525, "answered": 3061, "defines": 13789, "exploits": 19677, "stemming": 52793, "inflected": 25717, "root": 48341, "suffix": 53811, "inflectional": 25720, "audience": 4922, "balanced": 5514, "terminology": 56261, "algorithmically": 2317, "instead": 26445, "summary": 53912, "summarizer": 53909, "inputs": 26361, "evaluator": 18774, "asks": 4525, "specify": 52241, "wishes": 62088, "ask": 4517, "highlights": 23874, "red": 45640, "implementations": 24645, "overload": 39094, "receives": 45269, "day": 13500, "obstacles": 38156, "progress": 43090, "justify": 27260, "bio": 7161, "clarity": 8389, "workers": 62865, "days": 13504, "week": 61908, "news": 37382, "center": 7912, "majority": 31525, "numeric": 38059, "impression": 24805, "presumably": 42138, "narrator": 36387, "knowing": 27385, "recommend": 45562, "leave": 29981, "rated": 45017, "uncover": 59244, "optional": 38577, "recover": 45592, "female": 20730, "nuanced": 37975, "orientations": 38694, "multifaceted": 36050, "gendered": 22040, "connections": 10184, "homophily": 23997, "correlated": 11509, "emerges": 17269, "audiences": 4923, "mainstream": 31479, "mediated": 32195, "driving": 16438, "comprising": 9819, "107": 75, "authored": 5002, "autoregressive": 5212, "diffusion": 15204, "united": 59524, "unpredictable": 59629, "characterization": 8246, "demographic": 13854, "geographic": 22647, "predictors": 41780, "cities": 8372, "racial": 44845, "demographics": 13857, "moving": 35901, "unified": 59465, "reproduces": 46825, "lines": 30686, "companies": 9283, "presidents": 42132, "chronological": 8353, "select": 49098, "400k": 399, "conceptualization": 9950, "institute": 26471, "late": 29112, "sophisticated": 51715, "government": 22967, "officials": 38315, "needed": 36600, "strengthen": 52972, "collaboration": 8931, "historians": 23953, "instantly": 26444, "axis": 5482, "radicals": 44851, "radical": 44849, "horizontal": 24023, "middle": 33235, "chart": 8257, "vocal": 61720, "slight": 51432, "figure": 20791, "particularly": 39876, "switching": 54260, "lattice": 29163, "microblogs": 33231, "conforming": 10145, "package": 39135, "classifiers": 8610, "scenarios": 48690, "regardless": 45795, "seemingly": 49058, "supplementary": 54105, "owl": 39127, "notations": 37720, "engineers": 17770, "latvian": 29168, "compliant": 9694, "usability": 59797, "write": 62984, "ahead": 2110, "continuations": 10827, "lookahead": 31067, "satisfying": 48529, "references": 45751, "ace": 1101, "considerable": 10227, "ordered": 38664, "permutations": 40734, "27": 342, "28": 345, "seconds": 49033, "universe": 59553, "insightful": 26385, "pp": 41451, "62": 464, "63": 466, "transitivity": 58548, "vast": 61436, "mere": 32312, "exponentially": 19783, "shifts": 50546, "volatility": 61726, "exemplify": 18993, "manifest": 31701, "token": 57279, "overlook": 39096, "inform": 25737, "intuitions": 26908, "specialist": 52027, "posing": 41257, "creative": 11751, "dubbed": 16466, "readers": 45075, "invention": 26923, "approximate": 3975, "retrieves": 47991, "sufficiently": 53808, "organized": 38688, "start": 52565, "exact": 18848, "substring": 53660, "stepwise": 52846, "substrings": 53661, "aligning": 2361, "bounds": 7287, "collections": 8988, "refers": 45762, "romanian": 48333, "endings": 17742, "enjoy": 17950, "interpret": 26708, "autoencoder": 5024, "reduced": 45684, "loss": 31081, "employs": 17403, "hampered": 23378, "sr": 52403, "faster": 20431, "stanford": 52555, "tokenizer": 57319, "recognizer": 45555, "extend": 19818, "benefit": 6557, "accumulation": 890, "multitude": 36329, "election": 16961, "winner": 62068, "presidential": 42130, "president": 42129, "100": 56, "predictor": 41779, "incompleteness": 25332, "generalizations": 22135, "initializing": 26229, "queried": 44647, "generalizes": 22153, "unseen": 59642, "desirable": 14343, "functionalities": 21764, "observe": 38129, "billion": 7117, "requiring": 46958, "corrected": 11479, "evidence": 18806, "raising": 44864, "grammaticality": 23080, "judgements": 27239, "simplicity": 51234, "scaled": 48641, "amenable": 2533, "tensors": 56227, "generalising": 22105, "outperform": 38779, "face": 20239, "leverages": 30300, "array": 4201, "workload": 62871, "server": 50087, "cluster": 8732, "servers": 50088, "python": 44438, "incorporate": 25343, "analytical": 2800, "planned": 40941, "releases": 46186, "citep": 8370, "urgent": 59787, "wizard": 62095, "oz": 39131, "react": 45062, "failure": 20351, "experts": 19589, "follows": 21273, "sees": 49069, "submission": 53571, "reordering": 46389, "decided": 13553, "accomplish": 845, "moses": 35853, "decoder": 13584, "reorder": 46387, "farsi": 20408, "urdu": 59783, "bleu": 7202, "kendall": 27284, "tau": 55977, "hamming": 23376, "scarce": 48656, "feasible": 20471, "option": 38576, "comparability": 9286, "cosine": 11572, "calculated": 7523, "categorization": 7854, "addresses": 1807, "monolingual": 35789, "differently": 15147, "constrain": 10362, "visibility": 61631, "nodes": 37588, "degrees": 13815, "informational": 26167, "assisted": 4615, "founded": 21422, "raised": 44856, "teachers": 55998, "specialists": 52028, "chomsky": 8341, "properly": 43255, "rewriting": 48079, "justified": 27258, "edge": 16585, "modifications": 35729, "termination": 56258, "derivation": 14193, "increasing": 25441, "mail": 31420, "arrange": 4198, "maintaining": 31485, "removing": 46379, "stop": 52864, "secondly": 49031, "thirdly": 57050, "tf": 56989, "idf": 24475, "minimum": 33302, "preprocessed": 41824, "counted": 11612, "reuters": 48006, "subsets": 53611, "trade": 57501, "money": 35783, "grain": 23019, "classic": 8419, "hardware": 23459, "scarcity": 48661, "pivot": 40916, "trilingual": 58799, "tuples": 58977, "disambiguating": 15357, "mixing": 33414, "favorable": 20453, "reports": 46460, "merge": 32314, "morphosyntactic": 35852, "classifying": 8634, "ict": 24364, "eu": 18422, "automates": 5065, "updating": 59769, "innovative": 26249, "adopts": 1875, "crawl": 11685, "predefined": 41623, "sites": 51365, "extrinsically": 20174, "crawled": 11686, "acquiring": 1446, "descriptors": 14257, "dealt": 13523, "validating": 61190, "iterating": 27118, "distinctions": 15598, "bootstrapping": 7267, "minimize": 33297, "sparse": 51963, "holds": 23985, "perception": 40055, "robots": 48235, "salient": 48439, "behaviors": 6400, "attribution": 4914, "nonverbal": 37699, "wiki": 62036, "restricts": 47428, "friendly": 21690, "collaboratively": 8933, "edited": 16594, "customize": 12063, "technologies": 56156, "recognizers": 45556, "unless": 59590, "self": 49172, "confusions": 10162, "telephone": 56165, "exploring": 19776, "populated": 41211, "stopwords": 52870, "logically": 30991, "scored": 48883, "devised": 14727, "geolocation": 22653, "devices": 14723, "configure": 10126, "lan": 27941, "guage": 23318, "faces": 20252, "elicitation": 16980, "specialised": 52025, "removal": 46373, "exchanged": 18966, "regions": 45806, "digitized": 15219, "disparity": 15515, "consequent": 10201, "violation": 61617, "align": 2351, "passages": 39922, "chapters": 8191, "distilled": 15581, "targeted": 54858, "gains": 21930, "extractive": 20133, "offering": 38297, "worthy": 62981, "onset": 38395, "stopping": 52867, "auditory": 4937, "stops": 52869, "msa": 35913, "carrier": 7772, "cv": 12069, "outcomes": 38766, "formalize": 21359, "issued": 27082, "intent": 26563, "cooperation": 11121, "procedural": 42739, "paraphrases": 39744, "inventory": 26925, "labels": 27807, "collapsing": 8936, "submit": 53581, "triplets": 58810, "plans": 40946, "timely": 57244, "capturing": 7729, "negation": 36612, "formulation": 21392, "subsequently": 53605, "lemmas": 30017, "accompanying": 843, "commons": 9231, "nc": 36497, "sa": 48417, "permit": 40729, "lemma": 30016, "initiative": 26234, "recommendations": 45567, "illustration": 24523, "synergies": 54281, "arrangement": 4199, "animal": 2862, "brains": 7298, "discriminating": 15439, "semantical": 49378, "importantly": 24795, "walk": 61758, "compares": 9474, "discrimination": 15440, "fluently": 21135, "preparation": 41813, "rigid": 48147, "commands": 9137, "interpreting": 26741, "respond": 47388, "punjabi": 44390, "fruitful": 21697, "governance": 22964, "records": 45591, "legacy": 30002, "matters": 31945, "mother": 35854, "110": 92, "10th": 80, "total": 57474, "900": 554, "nations": 36398, "covers": 11662, "disadvantages": 15350, "ends": 17746, "enhancements": 17943, "personalized": 40765, "freebase": 21648, "removes": 46378, "judged": 27237, "unifies": 59484, "labeled": 27735, "mechanical": 32091, "turk": 58980, "approximation": 3984, "indicates": 25538, "shallow": 50438, "ndcg": 36501, "68": 478, "93": 561, "inclusion": 25323, "beneficial": 6554, "scores": 48886, "politeness": 41106, "requests": 46838, "guide": 23328, "polite": 41105, "elections": 16962, "stack": 52415, "exchange": 18964, "reputation": 46834, "utilizes": 61112, "student": 53208, "tracked": 57494, "curriculum": 12042, "cooccurrence": 11118, "pointwise": 41082, "heavy": 23538, "embeddings": 17074, "competitive": 9536, "quantitatively": 44629, "sole": 51640, "near": 36503, "danish": 12095, "swedish": 54248, "captured": 7723, "enhancement": 17942, "emerging": 17270, "gujarati": 23358, "transliterated": 58714, "sheer": 50533, "forums": 21399, "surveyed": 54221, "picture": 40871, "marathi": 31813, "trigram": 58797, "calculating": 7525, "preserve": 42113, "phonological": 40831, "television": 56166, "closed": 8694, "captions": 7649, "channels": 8189, "months": 35830, "annotating": 2930, "providers": 44177, "uncovered": 59245, "worldwide": 62970, "appropriately": 3971, "ex": 18844, "preprocess": 41823, "cats": 7865, "witnessed": 62090, "translators": 58713, "manager": 31692, "felt": 20729, "multinomial": 36160, "triples": 58804, "sparsity": 51978, "learns": 29951, "noting": 37732, "concentration": 9919, "fi": 20744, "filter": 20807, "24": 327, "categorize": 7856, "hold": 23979, "unwanted": 59761, "weighted": 61923, "sensitivity": 49506, "medical": 32198, "clinical": 8667, "synthesis": 54357, "adjustment": 1854, "experimentally": 19330, "strict": 52983, "clauses": 8640, "concrete": 9981, "saved": 48535, "forming": 21373, "encyclopedic": 17609, "microblogging": 33230, "region": 45803, "visualising": 61676, "aggregating": 2075, "uk": 59185, "birth": 7184, "2013": 252, "validates": 61188, "feasibility": 20467, "visualisation": 61673, "volumes": 61731, "tweets": 59008, "rest": 47409, "surrounded": 54194, "overlapping": 39091, "enforcing": 17755, "influx": 25734, "quantities": 44634, "reuse": 48002, "curation": 11953, "propagated": 43240, "originated": 38748, "provenance": 43996, "propagation": 43243, "heavily": 23527, "8000": 524, "occurred": 38271, "inconsistent": 25339, "visualise": 61675, "website": 61905, "inflection": 25719, "disciplines": 15375, "latest": 29153, "assumptions": 4639, "multilevel": 36061, "ratio": 45024, "emotion": 17285, "strength": 52970, "wisdom": 62077, "quickly": 44820, "scenario": 48682, "notably": 37718, "discourage": 15381, "malicious": 31683, "reject": 45879, "markedly": 31833, "hot": 24028, "cold": 8927, "warm": 61772, "freezing": 21656, "contrasting": 10894, "begin": 6378, "intervention": 26750, "korean": 27674, "prepared": 41815, "350": 374, "retrieving": 47993, "accomplishing": 850, "delivers": 13836, "parametrized": 39734, "interpretable": 26718, "files": 20795, "nl": 37443, "raises": 44857, "circumvent": 8363, "libraries": 30423, "scholars": 48738, "divide": 15743, "genre": 22641, "assist": 4609, "pose": 41237, "genres": 22642, "gradually": 23015, "internally": 26693, "trains": 58323, "library": 30424, "fiction": 20745, "proportions": 43277, "narrative": 36381, "themes": 57007, "stone": 52862, "analogous": 2582, "likewise": 30529, "house": 24040, "synonymous": 54286, "recognizing": 45558, "compositions": 9753, "dual": 16457, "materials": 31926, "develops": 14716, "automate": 5032, "refine": 45764, "trivial": 58811, "occurring": 38280, "mixture": 33417, "recurrent": 45606, "spans": 51956, "substantially": 53631, "700": 488, "hierarchically": 23699, "organised": 38682, "institutions": 26474, "authorities": 5005, "member": 32228, "union": 59506, "official": 38306, "viewing": 61606, "categorisation": 7851, "ec": 16572, "centre": 7924, "tm": 57275, "professionally": 43062, "professionals": 43063, "departments": 14095, "monitor": 35784, "eye": 20175, "europe": 18426, "19": 183, "press": 42133, "functionality": 21765, "multilinguality": 36138, "motor": 35887, "perceive": 40046, "foreign": 21296, "accent": 807, "proceed": 42748, "mirror": 33336, "neurons": 37119, "grasping": 23195, "neuro": 37115, "augmented": 4975, "update": 59765, "revisits": 48058, "extracts": 20142, "reframing": 45788, "dissemination": 15535, "danger": 12093, "complemented": 9594, "marketing": 31837, "captures": 7727, "inspire": 26401, "granularity": 23091, "smoothly": 51540, "overcomes": 39075, "insertion": 26378, "upper": 59772, "unprecedented": 59627, "visualizations": 61682, "density": 14091, "updates": 59768, "daily": 12083, "gaussian": 22010, "multivariate": 36330, "mel": 32226, "gmm": 22870, "dnn": 15756, "acoustic": 1433, "email": 17001, "love": 31129, "hate": 23479, "suicide": 53851, "genders": 22041, "women": 62105, "joy": 27234, "sadness": 48425, "men": 32288, "prefer": 41787, "fear": 20466, "trust": 58830, "backbone": 5484, "rising": 48159, "participating": 39823, "constantly": 10343, "enterprise": 18015, "university": 59555, "quantification": 44602, "behavioral": 6399, "movements": 35890, "unfold": 59448, "interlocutors": 26671, "exchanging": 18968, "lag": 27933, "maximally": 31952, "perfect": 40060, "improper": 24819, "calculation": 7526, "translator": 58712, "taggers": 54733, "viz": 61696, "38": 380, "46": 411, "worked": 62863, "emphasis": 17308, "meteor": 32350, "diseases": 15498, "genes": 22635, "proxy": 44264, "generalize": 22136, "sciences": 48752, "economic": 16577, "morphemes": 35834, "weakly": 61856, "transferred": 58433, "bitext": 7187, "expectations": 19196, "facilities": 20283, "encode": 17459, "f1": 20179, "attaining": 4672, "crfs": 11766, "12k": 119, "5k": 451, "ontonotes": 38400, "conll": 10167, "03": 20, "branch": 7299, "narrowing": 36390, "constrained": 10363, "integer": 26498, "relaxation": 46137, "lastly": 29106, "stress": 52979, "encountering": 17587, "totally": 57478, "1st": 216, "optimality": 38535, "maximization": 31954, "fake": 20370, "regulation": 45857, "crawling": 11689, "curating": 11952, "benchmarking": 6507, "cis": 8364, "edu": 16603, "estimations": 18388, "researcher": 47147, "practitioners": 41491, "national": 36395, "hundreds": 24295, "millions": 33262, "downstream": 16330, "popularity": 41200, "policies": 41092, "classifies": 8629, "43": 406, "56": 446, "11": 82, "accuracies": 891, "subjectively": 53567, "parameterized": 39685, "indicating": 25541, "cornell": 11164, "inadequate": 25211, "solid": 51649, "grounds": 23267, "away": 5480, "aware": 5439, "obvious": 38262, "meaningless": 32031, "contradictions": 10868, "hash": 23473, "valued": 61210, "optimize": 38561, "europarl": 18425, "preexisting": 41786, "learnt": 29979, "timeline": 57241, "epochs": 18186, "evolves": 18840, "salience": 48435, "neglect": 36647, "dependent": 14145, "mined": 33276, "rouge": 48348, "consuming": 10441, "simplify": 51241, "really": 45164, "3rd": 389, "selectional": 49159, "preference": 41789, "covariance": 11640, "discrete": 15419, "setup": 50408, "dissimilar": 15537, "informative": 26168, "employing": 17399, "walks": 61759, "nuances": 37976, "dl": 15751, "factual": 20317, "truly": 58823, "adopted": 1869, "formalization": 21358, "equivalence": 18200, "helps": 23601, "logics": 30992, "wh": 61950, "desire": 14345, "flexible": 21107, "smoothed": 51536, "absolute": 736, "kneser": 27380, "ney": 37434, "smoothing": 51538, "baselines": 6225, "perplexity": 40737, "bag": 5500, "favorably": 20455, "geo": 22646, "intrinsically": 26774, "wrong": 63015, "plausibility": 40957, "mimics": 33271, "expert": 19568, "beliefs": 6408, "perceptions": 40056, "epistemic": 18184, "cognitively": 8899, "turkish": 58983, "grouping": 23276, "synsets": 54290, "hypernyms": 24335, "hyponyms": 24341, "determining": 14562, "gather": 22000, "tens": 56217, "extreme": 20151, "restrictive": 47427, "czech": 12077, "optimised": 38539, "differs": 15151, "categorised": 7853, "repository": 46463, "leveraging": 30318, "keyphrase": 27345, "dramatic": 16384, "growth": 23307, "pros": 43953, "cons": 10193, "keyphrases": 27346, "paraphrase": 39737, "robustness": 48270, "encourages": 17602, "enhances": 17944, "scattered": 48679, "resides": 47184, "quantity": 44638, "final": 20816, "extensible": 19846, "01": 18, "36": 377, "94": 564, "denotation": 14069, "cohesion": 8921, "fusion": 21850, "topicality": 57441, "cc": 7896, "similarly": 51131, "poem": 41038, "safety": 48428, "reporting": 46458, "shaping": 50451, "looks": 31070, "acquired": 1443, "transductive": 58346, "incident": 25219, "compressed": 9809, "counterpart": 11622, "spotting": 52375, "minor": 33329, "synchronous": 54279, "substitution": 53658, "distortion": 15612, "mismatches": 33354, "discriminatively": 15449, "margin": 31816, "scalable": 48547, "extractions": 20132, "referential": 45754, "resolves": 47204, "95": 566, "purely": 44394, "encyclopedia": 17608, "minority": 33331, "pairwise": 39236, "doubt": 16323, "devise": 14724, "extends": 19842, "templates": 56177, "subtask": 53667, "hopes": 24019, "integrity": 26531, "leverage": 30254, "pruning": 44268, "confirming": 10136, "idiosyncratic": 24484, "constraint": 10369, "judge": 27236, "suitability": 53853, "wise": 62078, "capacity": 7635, "refining": 45771, "modifying": 35738, "para": 39620, "parametric": 39730, "orthographic": 38754, "eliminates": 16988, "rare": 44997, "chunk": 8356, "forest": 21300, "keyword": 27348, "pagerank": 39139, "centrality": 7922, "neighborhood": 36660, "assumes": 4636, "completely": 9605, "computable": 9824, "gaze": 22021, "movement": 35889, "incorporated": 25367, "coupled": 11632, "detrimental": 14565, "situated": 51367, "bi": 6998, "opportunities": 38510, "options": 38579, "malay": 31676, "rivaling": 48168, "entails": 18009, "buy": 7505, "asymmetric": 4641, "treat": 58730, "representatives": 46804, "nowadays": 37964, "bible": 7062, "conduct": 10023, "alternatives": 2513, "pivoting": 40922, "cascading": 7786, "pseudo": 44272, "motivating": 35880, "suffers": 53788, "drawback": 16402, "subtree": 53681, "ted": 56159, "st": 52409, "pay": 39989, "cheaper": 8269, "snippets": 51548, "prerequisite": 41831, "blog": 7225, "asynchronous": 4644, "beat": 6373, "vectorial": 61475, "autoencoders": 5029, "regularizer": 45849, "cycles": 12075, "quasi": 44645, "cyclic": 12076, "sought": 51733, "huang": 24066, "eat": 16570, "reverse": 48021, "preparing": 41816, "act": 1451, "inconsistencies": 25336, "documentation": 15849, "tts": 58850, "intelligible": 26547, "sounding": 51737, "confusing": 10159, "degrade": 13804, "interview": 26752, "death": 13525, "occurs": 38283, "health": 23511, "worker": 62864, "interviews": 26753, "reviewed": 48043, "site": 51364, "normalised": 37703, "locally": 30961, "compelling": 9518, "succeeds": 53694, "virtually": 61626, "surpass": 54161, "possessing": 41309, "2004": 241, "year": 63046, "sign": 50827, "possesses": 41308, "boosting": 7260, "misses": 33359, "als": 2487, "signature": 50839, "neglecting": 36650, "emphasise": 17309, "chat": 8259, "talking": 54791, "modes": 35725, "evolved": 18838, "tweet": 59001, "speak": 51993, "differentiable": 15139, "journalists": 27231, "mr": 35904, "writings": 62993, "entertaining": 18017, "educational": 16605, "landscape": 27945, "spurred": 52391, "employ": 17372, "solely": 51641, "citation": 8365, "encouraged": 17601, "imposing": 24801, "unconstrained": 59241, "convex": 11081, "1995": 203, "idiomatic": 24481, "mistaken": 33371, "patent": 39943, "nascent": 36392, "aggressive": 2084, "stay": 52782, "representational": 46608, "symbolic": 54267, "viewpoints": 61608, "stakeholders": 52452, "customers": 12059, "designers": 14337, "corpuses": 11463, "reliably": 46255, "establishing": 18364, "inherent": 26199, "dag": 12082, "ic": 24360, "personalization": 40763, "imbalanced": 24565, "multiclass": 36046, "id": 24365, "prompt": 43197, "forecast": 21293, "meet": 32221, "tamil": 54793, "weibo": 61912, "billions": 7123, "sentimental": 49868, "emoticons": 17284, "shifting": 50545, "deliberate": 13826, "api": 3130, "500": 423, "convolutional": 11100, "max": 31948, "pooling": 41124, "handles": 23422, "distant": 15553, "supervision": 54075, "strongest": 53065, "narrow": 36388, "balance": 5513, "benchmarked": 6506, "pearson": 40004, "optimising": 38540, "standing": 52549, "defining": 13790, "tailored": 54768, "gradient": 23001, "descent": 14209, "blending": 7200, "ignore": 24488, "judgement": 27238, "enjoys": 17953, "2003": 239, "han": 23381, "unicode": 59461, "ids": 24485, "bloom": 7229, "party": 39913, "psychologists": 44292, "exemplars": 18990, "exemplar": 18989, "differential": 15141, "equations": 18194, "portions": 41222, "preservation": 42112, "anomalous": 3022, "discarded": 15367, "merely": 32313, "prejudice": 41798, "marginalizing": 31828, "lacking": 27928, "completeness": 9608, "protection": 43966, "instantiation": 26442, "counting": 11625, "plots": 41024, "contrasts": 10925, "differentiate": 15143, "activate": 1463, "adjacency": 1838, "arousal": 4196, "intensity": 26556, "harder": 23454, "roughly": 48357, "crowd": 11879, "sourced": 51820, "implicitly": 24665, "regression": 45811, "na": 36357, "harvesting": 23472, "phonetics": 40830, "corresponds": 11564, "optimum": 38575, "direction": 15269, "shortest": 50591, "sindhi": 51281, "glyphs": 22869, "blend": 7198, "glyph": 22868, "satisfy": 48528, "request": 46835, "critically": 11798, "monetary": 35782, "tangible": 54799, "drawing": 16405, "operationalize": 38492, "recipient": 45482, "indications": 25545, "helping": 23600, "abundantly": 784, "croatian": 11805, "shuffled": 50814, "shuffling": 50815, "topologies": 57470, "corroborate": 11565, "lose": 31078, "ordering": 38665, "paris": 39749, "pieces": 40878, "dense": 14074, "bilinear": 7102, "rendered": 46382, "suitably": 53862, "thematically": 57005, "reinforced": 45862, "maximal": 31951, "reinforce": 45861, "caused": 7889, "newsgroups": 37428, "rigorous": 48148, "thesauri": 57041, "centric": 7926, "older": 38328, "got": 22960, "newer": 37367, "joining": 27162, "iv": 27135, "48": 413, "randomly": 44897, "picked": 40868, "57": 447, "remarkably": 46363, "44": 407, "attested": 4868, "window": 62063, "shortening": 50587, "tutorial": 58995, "guarantees": 23323, "predominantly": 41784, "theme": 57006, "carrying": 7779, "unigrams": 59495, "89": 545, "expanding": 19186, "asset": 4595, "vietnam": 61590, "resourced": 47287, "initialized": 26227, "coming": 9133, "headwords": 23510, "noted": 37723, "lookup": 31071, "hausa": 23482, "doc": 15759, "hierarchies": 23701, "swiss": 54251, "twice": 59027, "predetermined": 41626, "offset": 38319, "operational": 38490, "origins": 38751, "fluent": 21128, "publish": 44364, "trials": 58786, "encapsulate": 17456, "interestingly": 26656, "implements": 24651, "simulated": 51258, "games": 21950, "establishes": 18359, "correspondences": 11543, "commentaries": 9141, "kernels": 27291, "smith": 51533, "decomposing": 13656, "glass": 22816, "passes": 39925, "slower": 51452, "refines": 45770, "rnn": 48179, "encoder": 17487, "encodes": 17560, "decodes": 13623, "maximize": 31956, "qualitatively": 44483, "realistic": 45147, "collecting": 8972, "south": 51844, "africa": 2038, "home": 23992, "imperative": 24626, "verbalization": 61514, "elaborate": 16951, "subsumption": 53665, "depend": 14097, "singular": 51359, "precondition": 41621, "biomedicine": 7179, "ke": 27276, "exploratory": 19681, "novices": 37963, "behaviours": 6403, "screen": 48947, "recordings": 45590, "guidance": 23326, "arts": 4504, "tourism": 57480, "tailor": 54767, "agency": 2050, "newswire": 37432, "snapshot": 51545, "ag": 2043, "guides": 23354, "emerge": 17257, "loose": 31076, "prescriptive": 41835, "fledged": 21103, "stories": 52876, "lab": 27687, "majorly": 31537, "expect": 19192, "flexibility": 21106, "induce": 25601, "memorized": 32236, "illustrated": 24520, "host": 24025, "2014": 253, "financial": 20889, "facing": 20285, "shift": 50542, "sociolinguistic": 51616, "centers": 7915, "fall": 20372, "contact": 10458, "norm": 37700, "sharp": 50521, "urban": 59782, "popularly": 41208, "hinglish": 23948, "rural": 48411, "interacting": 26594, "asymptotic": 4643, "maintain": 31481, "sizeable": 51405, "routing": 48367, "continue": 10828, "happiness": 23437, "energy": 17747, "acceleration": 806, "ideal": 24376, "interlinked": 26669, "specifying": 52242, "straightforward": 52885, "paradigms": 39632, "leaving": 29983, "pointed": 41054, "extremely": 20153, "analyzers": 2838, "regularity": 45834, "encountered": 17586, "hosted": 24026, "expressiveness": 19815, "tight": 57106, "sinhala": 51361, "automation": 5208, "inherently": 26204, "reasons": 45234, "kbp": 27273, "nist": 37442, "analytics": 2803, "conference": 10108, "tac": 54695, "begins": 6384, "exercise": 18994, "partitions": 39898, "young": 63142, "engineer": 17762, "sphere": 52341, "substitutes": 53656, "ternary": 56326, "curve": 12049, "lying": 31294, "editor": 16600, "monotonic": 35820, "communicates": 9243, "auxiliary": 5228, "completions": 9614, "movies": 35899, "tv": 58998, "retrain": 47932, "arrive": 4204, "retraining": 47935, "adaptable": 1517, "updated": 59766, "title": 57268, "76": 506, "leaves": 29982, "annotators": 3014, "periods": 40726, "amt": 2578, "disease": 15496, "protocol": 43970, "iterations": 27120, "arrived": 4205, "merged": 32315, "voting": 61739, "imitate": 24572, "innovation": 26247, "minorities": 33330, "broadcast": 7357, "originate": 38747, "innovations": 26248, "chance": 8165, "regularization": 45835, "producing": 43037, "disproportionately": 15523, "monte": 35825, "carlo": 7765, "regimes": 45802, "adoption": 1874, "moderate": 35695, "interpreter": 26739, "actually": 1489, "incentivizes": 25216, "arguably": 4160, "concreteness": 9985, "surpassed": 54169, "ceiling": 7905, "plenty": 41010, "guiding": 23356, "legal": 30003, "instant": 26438, "messaging": 32325, "increment": 25481, "threats": 57087, "contractions": 10863, "tokenization": 57316, "additive": 1739, "multiplicative": 36317, "superiority": 53948, "dates": 13495, "currency": 11957, "390": 384, "variance": 61230, "c4": 7518, "knn": 27382, "dimensionality": 15241, "lemmatization": 30018, "belong": 6416, "conjecture": 10163, "agree": 2100, "concatenated": 9908, "gated": 21993, "degrades": 13809, "assistant": 4613, "displaying": 15521, "food": 21275, "leaning": 29338, "referenced": 45750, "adaptations": 1549, "complications": 9698, "synthesize": 54360, "tremendously": 58776, "contents": 10575, "searching": 48991, "accompanied": 841, "owing": 39125, "purchase": 44391, "influenced": 25730, "accordingly": 872, "posting": 41366, "deceive": 13545, "minute": 33334, "unintentionally": 59503, "shorthand": 50593, "normalisation": 37702, "globalization": 22849, "te": 55986, "clause": 8637, "attachment": 4657, "degraded": 13807, "achievements": 1282, "court": 11639, "friends": 21691, "seek": 49048, "weigh": 61913, "vote": 61737, "counterfactual": 11616, "stand": 52458, "markup": 31852, "hebrew": 23540, "analytic": 2799, "workflows": 62867, "van": 61213, "nearest": 36517, "neighbour": 36667, "ive": 27136, "sorting": 51721, "involve": 27013, "brown": 7370, "throw": 57095, "searched": 48989, "lengths": 30040, "algebra": 2255, "weighting": 61932, "jhu": 27156, "summer": 53917, "trigger": 58793, "holder": 23982, "expanded": 19185, "ldc": 29252, "vertices": 61564, "edges": 16587, "normally": 37711, "minimization": 33296, "crossing": 11875, "paves": 39985, "diminishing": 15247, "returns": 48000, "gathering": 22003, "ea": 16502, "greedy": 23241, "agglomerative": 2067, "pe": 40000, "microblog": 33228, "dbpedia": 13507, "identifiers": 24407, "repair": 46390, "nmt": 37573, "weakness": 61869, "conventional": 10999, "inability": 25205, "oov": 38404, "translates": 58563, "wmt14": 62100, "contest": 10577, "dot": 16317, "partitioning": 39897, "fraction": 21427, "wsj": 63020, "inevitable": 25628, "automating": 5206, "rival": 48167, "scratch": 48943, "labor": 27862, "electricity": 16965, "ultimate": 59190, "prospective": 43960, "capitalization": 7640, "simplistic": 51245, "specificities": 52236, "svms": 54240, "explaining": 19601, "accessed": 831, "apis": 3131, "meeting": 32222, "word2vec": 62344, "mikolov": 33239, "attracted": 4873, "notice": 37726, "preventing": 42232, "explanations": 19605, "cbow": 7895, "skip": 51417, "sg": 50433, "softmax": 51627, "interpretations": 26737, "alongside": 2484, "basics": 6336, "neuron": 37118, "backpropagation": 5495, "demo": 13845, "devoted": 14732, "arising": 4186, "vehicle": 61503, "vehicles": 61504, "drives": 16437, "dialog": 14750, "upstream": 59778, "motifs": 35855, "portal": 41217, "slow": 51448, "ccg": 7901, "truth": 58834, "borrow": 7269, "parses": 39767, "jobs": 27159, "atis": 4646, "duplicate": 16469, "gazetteer": 22022, "logistic": 30993, "constructing": 10418, "confounding": 10149, "conditioning": 10017, "revisit": 48055, "signatures": 50840, "attributed": 4903, "spurious": 52384, "broader": 7362, "watson": 61785, "bing": 7160, "apache": 3127, "cnn": 8757, "2d": 348, "1d": 213, "convolution": 11091, "17": 172, "th": 56996, "seminal": 49471, "sex": 50430, "condition": 9990, "promote": 43189, "education": 16604, "woman": 62104, "weaker": 61855, "absence": 733, "resolved": 47203, "washington": 61779, "city": 8375, "poorer": 41147, "acceptable": 812, "12": 103, "eliminating": 16990, "extractor": 20140, "unbalanced": 59218, "surpasses": 54170, "learnable": 29448, "unveil": 59757, "contradicting": 10866, "universally": 59550, "vietnamese": 61591, "ought": 38759, "flexibly": 21112, "optimizes": 38573, "aka": 2224, "optimally": 38536, "succeeded": 53692, "lda": 29248, "interpretability": 26713, "illustrates": 24521, "contract": 10862, "repositories": 46462, "inflections": 25721, "cumbersome": 11941, "generators": 22624, "boosted": 7258, "bow": 7288, "remaining": 46322, "factorization": 20300, "fillers": 20800, "estimated": 18376, "degradation": 13800, "neighbours": 36670, "neighbourhoods": 36669, "pushing": 44430, "convolutions": 11117, "parameterization": 39682, "advocates": 2006, "vision": 61633, "compresses": 9810, "optimisation": 38537, "hypergraph": 24331, "compactly": 9281, "synset": 54289, "numerical": 38060, "positivity": 41305, "negativity": 36646, "attains": 4673, "connects": 10189, "caption": 7647, "coco": 8788, "transmission": 58716, "aided": 2129, "tell": 56167, "sharing": 50513, "intra": 26756, "reproducibility": 46826, "sophistication": 51718, "mix": 33399, "ecosystem": 16582, "indirectly": 25556, "broadly": 7365, "gesture": 22682, "propagate": 43239, "intriguing": 26766, "paving": 39987, "embodied": 17252, "neighbor": 36655, "neighbors": 36665, "shortcoming": 50576, "mode": 33479, "conflate": 10138, "neighborhoods": 36662, "horizon": 24022, "glove": 22855, "progresses": 43121, "pictures": 40872, "attracting": 4891, "modifiers": 35734, "denoting": 14073, "thing": 57046, "treating": 58735, "comparably": 9315, "turkic": 58982, "strives": 52997, "lms": 30916, "necessarily": 36527, "fusing": 21849, "uni": 59458, "fused": 21847, "41": 401, "clean": 8641, "studio": 53315, "83": 529, "tremendous": 58772, "journals": 27232, "rnns": 48208, "gpus": 22996, "perplexities": 40736, "hit": 23971, "succeed": 53691, "enlarge": 17954, "isolates": 27051, "conveys": 11087, "grafting": 23018, "falling": 20376, "metaphor": 32347, "organizing": 38692, "drifts": 16414, "05": 22, "drift": 16413, "decreased": 13670, "democracy": 13850, "ads": 1876, "1993": 202, "indices": 25551, "replicable": 46413, "lstm": 31237, "cells": 7909, "accumulates": 888, "click": 8661, "attenuate": 4867, "unimportant": 59500, "specially": 52037, "ngrams": 37437, "treebanks": 58765, "modelled": 34639, "feed": 20710, "lstms": 31289, "semeval": 49424, "constrains": 10368, "akin": 2226, "turning": 58993, "shannon": 50446, "deficiencies": 13769, "optimizing": 38574, "formulating": 21389, "black": 7189, "box": 7289, "formalise": 21353, "supplied": 54111, "cooking": 11119, "transcript": 58337, "recipes": 45481, "devlin": 14729, "augments": 4992, "heuristically": 23632, "gating": 22005, "fuse": 21845, "stronger": 53060, "framed": 21442, "engage": 17756, "breakthrough": 7315, "grammatically": 23081, "wang": 61764, "competitor": 9574, "margins": 31830, "register": 45807, "vanishing": 61218, "culture": 11939, "imposed": 24799, "principled": 42385, "thresholds": 57092, "decade": 13538, "break": 7310, "jensen": 27152, "divergence": 15685, "scholarly": 48736, "revision": 48053, "59": 450, "memories": 32233, "utilizing": 61118, "gen": 22031, "feedforward": 20718, "affords": 2034, "healthcare": 23521, "expertise": 19587, "ran": 44866, "650": 472, "patients": 39957, "chronic": 8352, "emergency": 17267, "department": 14094, "visits": 61646, "stores": 52875, "proficient": 43068, "grows": 23305, "vary": 61419, "quotient": 44842, "finance": 20888, "thoroughly": 57067, "everyday": 18802, "handful": 23403, "dnns": 15758, "aggregated": 2073, "detectors": 14543, "shedding": 50529, "cater": 7864, "british": 7345, "analysts": 2798, "infeasible": 25632, "robustly": 48269, "nell": 36672, "cutting": 12067, "gazetteers": 22023, "served": 50086, "song": 51711, "examination": 18858, "locality": 30954, "notable": 37716, "figures": 20792, "biographies": 7166, "academia": 787, "governments": 22968, "statement": 52715, "antonyms": 3123, "subtasks": 53669, "disregarded": 15528, "symmetric": 54272, "connectivity": 10188, "displayed": 15520, "ubiquitous": 59174, "betweenness": 6996, "authorship": 5009, "harvest": 23470, "prioritize": 42432, "maximizes": 31962, "impacting": 24612, "engineered": 17763, "anchor": 2850, "treatments": 58737, "lets": 30049, "affairs": 2010, "implies": 24673, "obstacle": 38155, "variational": 61240, "engaged": 17757, "mitigating": 33394, "persistent": 40748, "unlabelled": 59586, "labelling": 27805, "emph": 17307, "ent": 17994, "facet": 20255, "ignoring": 24498, "hours": 24039, "kb": 27270, "hop": 24000, "atomic": 4649, "assemble": 4567, "ae": 2007, "reconstructing": 45580, "correlational": 11531, "advances": 1904, "inversion": 26932, "formulated": 21387, "yelp": 63084, "mass": 31877, "fetching": 20733, "forum": 21397, "visualizing": 61685, "cloud": 8719, "hosting": 24027, "fly": 21137, "diversified": 15728, "sina": 51279, "2nd": 351, "5th": 453, "stacking": 52422, "exciting": 18969, "multiplication": 36315, "mildly": 33244, "subsumes": 53664, "88": 543, "1000": 67, "kg": 27357, "pdf": 39998, "pixel": 40924, "mu": 35935, "fueled": 21706, "businesses": 7504, "manage": 31687, "saying": 48541, "expands": 19187, "aggregation": 2078, "egyptian": 16945, "dialectal": 14747, "hotel": 24033, "13th": 134, "parsimonious": 39768, "partly": 39899, "bots": 7274, "organic": 38679, "wanted": 61769, "alert": 2252, "advertisements": 2002, "robotic": 48233, "believed": 6414, "adjusted": 1852, "las": 29102, "2x": 353, "frontiers": 21693, "advancing": 1933, "drug": 16451, "agreed": 2101, "varied": 61253, "forces": 21288, "exposing": 19787, "reproducible": 46828, "scenes": 48716, "3d": 386, "geometric": 22654, "robotics": 48234, "specified": 52239, "correlates": 11511, "conveyed": 11084, "algebraic": 2256, "justifications": 27257, "parliament": 39751, "sessions": 50097, "external": 19927, "stimuli": 52852, "parliamentary": 39752, "impacted": 24610, "4th": 417, "nlpcc": 37561, "2015": 254, "tracks": 57498, "cn": 8756, "standardized": 52546, "antonym": 3122, "solver": 51696, "exceed": 18944, "closer": 8709, "introductory": 26904, "composable": 9730, "essence": 18319, "legitimate": 30011, "subjects": 53569, "distinguishable": 15606, "conditioned": 10013, "captioning": 7648, "credit": 11758, "cnns": 8778, "compose": 9731, "multitask": 36322, "addressing": 1818, "abnormal": 732, "deliver": 13833, "impacts": 24613, "auto": 5011, "footnote": 21278, "plot": 41023, "gates": 21999, "nets": 36690, "pipelined": 40909, "varies": 61254, "geographically": 22651, "coordinates": 11125, "attached": 4655, "attributable": 4898, "geography": 22652, "compatibility": 9515, "subtly": 53678, "sentiments": 49869, "projecting": 43137, "victim": 61575, "connotation": 10190, "losses": 31109, "amr": 2572, "subgraphs": 53547, "_1": 579, "compatible": 9516, "elaboration": 16956, "supervising": 54074, "activation": 1466, "ended": 17736, "concurrently": 9987, "mimicking": 33270, "acquires": 1445, "politics": 41115, "sports": 52371, "entertainment": 18018, "smartphones": 51532, "obama": 38075, "iphone": 27031, "tied": 57103, "holistic": 23989, "personality": 40761, "posted": 41355, "yahoo": 63040, "observable": 38120, "aligner": 2360, "empowers": 17409, "abstractions": 767, "generalizable": 22110, "ablations": 663, "elucidate": 16997, "pitman": 40915, "yor": 63138, "particle": 39829, "beam": 6364, "opening": 38474, "door": 16316, "distilling": 15585, "lightweight": 30456, "distill": 15565, "retain": 47920, "compromise": 9820, "lags": 27936, "lyrics": 31295, "anew": 2857, "echo": 16574, "music": 36337, "promoting": 43195, "intense": 26554, "resort": 47206, "manuscripts": 31792, "paramount": 39735, "artificially": 4501, "explosion": 19778, "videos": 61589, "played": 40982, "repeated": 46393, "prevents": 42234, "concentrating": 9918, "adverse": 1997, "reactions": 45064, "spontaneous": 52369, "radically": 44850, "revise": 48050, "compete": 9523, "normalize": 37707, "warping": 61775, "layout": 29242, "seamless": 48958, "managers": 31693, "players": 40986, "reinforcement": 45863, "rewards": 48074, "worlds": 62969, "shifted": 50544, "bounding": 7286, "apparent": 3133, "sensible": 49493, "umbrella": 59195, "filling": 20801, "propositional": 43949, "societies": 51609, "demand": 13841, "lots": 31125, "commonplace": 9230, "deluge": 13839, "acknowledged": 1429, "traces": 57490, "manipulate": 31707, "twofold": 59044, "navigate": 36493, "interactively": 26636, "decompose": 13652, "provision": 44257, "conform": 10144, "intentionally": 26571, "pragmatics": 41496, "judicial": 27244, "philosophical": 40816, "grouped": 23275, "saliency": 48436, "reaction": 45063, "messenger": 32326, "reddit": 45641, "controls": 10990, "timing": 57260, "subreddits": 53600, "faq": 20393, "delays": 13818, "confused": 10158, "motivations": 35886, "fundamentals": 21797, "pace": 39133, "sector": 49037, "impose": 24798, "segmental": 49077, "rescoring": 46976, "pass": 39916, "hinge": 23945, "marginal": 31823, "lattices": 29167, "leveraged": 30299, "mapreduce": 31809, "inferior": 25707, "textit": 56853, "closeness": 8708, "depth": 14182, "trades": 57506, "fronts": 21694, "compounding": 9756, "typing": 59161, "mit": 33377, "temperature": 56171, "encodings": 17581, "refined": 45766, "listen": 30843, "attend": 4701, "transcribe": 58332, "accepts": 817, "spectra": 52243, "emits": 17277, "ctc": 11927, "wer": 61945, "assessed": 4583, "item": 27114, "render": 46381, "residual": 47185, "comply": 9699, "understandable": 59319, "sparql": 51962, "benefited": 6577, "advancement": 1895, "triple": 58802, "opposite": 38517, "opponent": 38509, "fluency": 21124, "negatively": 36641, "transcripts": 58341, "causal": 7867, "denoted": 14072, "ed": 16583, "compactness": 9282, "pronounced": 43232, "overlooked": 39097, "default": 13763, "hyperparameters": 24339, "coarse": 8781, "hyperparameter": 24337, "chunks": 8358, "wmt": 62097, "recognise": 45486, "laymen": 29241, "preferring": 41793, "overly": 39101, "ascertain": 4511, "anomaly": 3023, "competent": 9527, "elusive": 16998, "pure": 44392, "modulated": 35749, "nearby": 36516, "severe": 50424, "overfitting": 39081, "chose": 8349, "testbed": 56392, "dropout": 16444, "emphasized": 17313, "regularizations": 45845, "opaque": 38408, "svd": 54230, "incur": 25486, "blockwise": 7224, "accounted": 883, "intention": 26569, "attain": 4666, "contiguous": 10819, "semitic": 49476, "experimentation": 19334, "pick": 40867, "adjusting": 1853, "posterior": 41358, "calibration": 7534, "intervals": 26748, "persuasiveness": 40782, "bipartite": 7180, "versatility": 61548, "funding": 21798, "toolkits": 57373, "crowdsourced": 11885, "finish": 21054, "fix": 21072, "conveniently": 10997, "spur": 52382, "innovatively": 26251, "surge": 54157, "sociolinguistics": 51617, "featuring": 20701, "synergy": 54283, "influencing": 25732, "emission": 17274, "evident": 18828, "subword": 53684, "copying": 11137, "byte": 7512, "fraud": 21636, "posits": 41306, "employees": 17398, "generalizability": 22108, "severely": 50426, "justification": 27256, "rst": 48372, "reweighting": 48075, "calculates": 7524, "submodular": 53591, "600": 459, "verbnet": 61519, "ukp": 59186, "tu": 58851, "proceedings": 42751, "talks": 54792, "commentary": 9142, "instrumental": 26488, "translationese": 58705, "shelf": 50535, "splitting": 52349, "amharic": 2538, "interoperable": 26700, "actively": 1480, "99": 574, "exception": 18957, "rationales": 45029, "sliding": 51430, "psychometric": 44294, "recovery": 45597, "unify": 59490, "relating": 45962, "recovered": 45594, "nonlinear": 37695, "sexual": 50431, "wants": 61770, "seeing": 49047, "elaborated": 16952, "pave": 39983, "overlaps": 39093, "offline": 38316, "clef": 8659, "load": 30926, "gpu": 22994, "decoded": 13583, "dividing": 15749, "histories": 23964, "topically": 57442, "regularly": 45853, "assistance": 4612, "filtered": 20809, "adaption": 1571, "supplies": 54112, "opus": 38580, "ter": 56230, "factored": 20299, "casing": 7820, "understudy": 59427, "adjustable": 1851, "intend": 26548, "resemble": 47175, "orders": 38669, "ensured": 17991, "iwslt": 27138, "stems": 52794, "cleaning": 8649, "embed": 17004, "met": 32327, "profit": 43072, "unobserved": 59621, "logs": 31000, "taxonomies": 55980, "hyponymy": 24342, "specialization": 52029, "missions": 33368, "scl": 48776, "divided": 15745, "revisiting": 48057, "siamese": 50817, "separates": 49884, "impede": 24621, "solves": 51698, "replies": 46420, "customer": 12053, "exchanges": 18967, "silver": 51022, "reply": 46421, "ap": 3126, "recommended": 45570, "composes": 9735, "denotations": 14070, "playing": 40987, "positional": 41270, "referent": 45753, "conceptually": 9954, "linearly": 30682, "linearity": 30678, "pressures": 42137, "understands": 59421, "attempted": 4694, "618": 463, "showcase": 50660, "potentials": 41421, "literatures": 30866, "consume": 10436, "journey": 27233, "taiwan": 54771, "complementing": 9595, "1m": 215, "acts": 1487, "averaging": 5426, "subtitles": 53674, "ranged": 44942, "ranges": 44943, "67": 475, "temporally": 56195, "71": 492, "09": 26, "relaxed": 46138, "i2b2": 24357, "tense": 56219, "narratives": 36386, "underrepresented": 59284, "blstm": 7230, "corrupted": 11568, "prevalence": 42225, "indication": 25544, "differing": 15150, "regional": 45804, "neighboring": 36663, "shrinking": 50812, "rsa": 48371, "literal": 30851, "criticized": 11803, "unrealistic": 59630, "opens": 38480, "approximating": 3983, "enjoyed": 17951, "understudied": 59425, "prohibitively": 43127, "deletion": 13822, "dqn": 16378, "iteratively": 27129, "rephrasing": 46400, "decode": 13582, "competitively": 9570, "msr": 35917, "tang": 54797, "poems": 41039, "li": 30418, "colors": 9000, "white": 61953, "color": 8998, "colored": 8999, "fostering": 21413, "mismatched": 33353, "fairly": 20360, "pooled": 41123, "frozen": 21695, "marker": 31834, "happy": 23438, "sleep": 51427, "serial": 50056, "nonsensical": 37697, "annual": 3019, "sea": 48957, "reviewers": 48045, "synthetic": 54367, "spot": 52372, "retained": 47923, "paraphrased": 39742, "imitates": 24574, "attempting": 4695, "compensate": 9519, "recommender": 45571, "star": 52562, "replication": 46418, "gcn": 22026, "slang": 51425, "absent": 735, "secondary": 49030, "alleviates": 2421, "superficial": 53924, "hinder": 23925, "vqa": 61746, "tuple": 58976, "concisely": 9965, "priors": 42437, "spawned": 51992, "intractable": 26763, "exceeds": 18948, "fortunately": 21396, "tendency": 56211, "socially": 51606, "complicates": 9697, "arithmetic": 4187, "verbatim": 61518, "offs": 38318, "footprint": 21283, "budget": 7380, "lately": 29114, "impressive": 24807, "gone": 22923, "regularize": 45846, "surveillance": 54200, "injury": 26241, "compensation": 9522, "disability": 15348, "chemical": 8280, "harvested": 23471, "aids": 2131, "lecture": 29986, "ds": 16453, "york": 63139, "spend": 52339, "personally": 40766, "fascinating": 20410, "vastly": 61442, "recording": 45589, "pretrained": 42146, "sick": 50823, "guarantee": 23319, "meets": 32225, "markovian": 31850, "accumulating": 889, "slt": 51457, "enriching": 17966, "bytes": 7516, "affinity": 2027, "romantic": 48336, "objectively": 38109, "indicator": 25548, "109": 77, "parent": 39746, "regularizes": 45851, "130": 129, "songs": 51713, "rankings": 44979, "charts": 8258, "factoid": 20295, "proposition": 43948, "fails": 20349, "threefold": 57088, "summarized": 53908, "alternatively": 2512, "uncovers": 59247, "insignificant": 26394, "61": 462, "tackling": 54719, "multilabel": 36054, "formalised": 21354, "compositionally": 9752, "toy": 57483, "estimator": 18389, "complementarity": 9585, "expand": 19183, "pi": 40866, "interdependent": 26643, "160": 167, "derivative": 14196, "affixes": 2029, "inheritance": 26207, "acyclic": 1491, "correctness": 11496, "sure": 54148, "checked": 8272, "statically": 52729, "backward": 5497, "imdb": 24569, "simulates": 51261, "formatted": 21368, "reaching": 45059, "man": 31686, "contradiction": 10867, "reusing": 48005, "transferring": 58435, "ignores": 24497, "boost": 7250, "doubling": 16321, "substitute": 53654, "icon": 24362, "averaged": 5420, "amplifies": 2570, "functioning": 21767, "69": 481, "surpassing": 54178, "labeler": 27773, "slot": 51440, "66": 473, "overfit": 39079, "width": 62035, "ups": 59776, "injects": 26240, "belgian": 6405, "convincing": 11089, "peak": 40001, "prompted": 43212, "prompts": 43223, "transactions": 58331, "environmental": 18174, "setups": 50412, "supportive": 54140, "opposing": 38516, "undergraduate": 59257, "biology": 7170, "classrooms": 8636, "powered": 41431, "elastic": 16957, "instructors": 26486, "upload": 59770, "unlimited": 59615, "downloadable": 16326, "browser": 7375, "raters": 45020, "teach": 55987, "portals": 41218, "reside": 47182, "superposition": 53957, "mathematically": 31936, "arora": 4192, "2016": 255, "atoms": 4650, "succinct": 53755, "converter": 11076, "conceptualize": 9952, "cast": 7821, "extrapolate": 20148, "bandit": 5521, "storyline": 52881, "dd": 13510, "gibbs": 22687, "cited": 8368, "dearth": 13524, "plug": 41025, "distributionally": 15673, "invariant": 26917, "converging": 11027, "phonology": 40832, "tricky": 58789, "exceptional": 18958, "tension": 56221, "credible": 11757, "incredible": 25479, "viral": 61621, "quantifying": 44613, "nonparametric": 37696, "incrementally": 25485, "facebook": 20244, "manuscript": 31791, "hypothesizing": 24355, "homogeneity": 23994, "81": 526, "incorporation": 25396, "sparsemax": 51973, "smooth": 51535, "vectorized": 61477, "rows": 48369, "columns": 9002, "partition": 39895, "parallelize": 39661, "remained": 46320, "naturalistic": 36471, "exams": 18943, "posit": 41258, "wealth": 61872, "modifies": 35735, "customization": 12062, "faithfully": 20367, "reversing": 48025, "forgetting": 21306, "nnlm": 37581, "rescore": 46975, "ptb": 44296, "crafted": 11677, "resembling": 47177, "centroid": 7928, "centroids": 7929, "assignments": 4607, "discriminant": 15435, "violence": 61619, "draws": 16411, "connotations": 10191, "liwc": 30897, "dissimilarities": 15538, "fourth": 21425, "smarter": 51530, "prepositional": 41819, "coder": 8872, "acted": 1453, "happening": 23435, "keyboard": 27344, "deleted": 13820, "completing": 9610, "saves": 48536, "latin": 29161, "quotations": 44839, "identical": 24379, "infrequent": 26188, "ru": 48374, "pathways": 39954, "pathway": 39953, "pays": 39993, "comparatively": 9324, "activations": 1469, "highway": 23923, "judging": 27241, "joins": 27163, "differentially": 15142, "penalizes": 40017, "subcomponents": 53543, "inspected": 26396, "foundational": 21420, "cascaded": 7782, "apart": 3129, "downside": 16328, "vanilla": 61214, "feeding": 20722, "apps": 3986, "sampler": 48462, "academic": 789, "yoruba": 63141, "archives": 4132, "unsatisfactory": 59640, "quiz": 44835, "participant": 39811, "engagement": 17758, "agencies": 2049, "statuses": 52781, "anns": 3018, "maintained": 31484, "humanities": 24266, "matter": 31944, "criticism": 11800, "inaccessible": 25206, "printed": 42391, "largescale": 29091, "inspection": 26398, "batched": 6344, "speedup": 52328, "nli": 37448, "adaptively": 1582, "softly": 51626, "wat": 61783, "attentional": 4856, "hypernymy": 24336, "pluggable": 41027, "seq2seq": 49892, "selectively": 49164, "repeat": 46392, "copynet": 11140, "utilize": 61085, "stacked": 52420, "crucially": 11918, "trading": 57507, "card": 7750, "filtration": 20815, "film": 20806, "evaluators": 18775, "loses": 31079, "sts": 53206, "authenticity": 4998, "exhibited": 19006, "approximates": 3982, "seeking": 49054, "experiences": 19228, "consumption": 10456, "dissimilarity": 15539, "reward": 48065, "extractors": 20141, "propagating": 43242, "longitudinal": 31060, "trajectories": 58328, "arrangements": 4200, "multilayer": 36057, "perceptron": 40057, "mlp": 33442, "preceded": 41604, "waiting": 61756, "famous": 20390, "behave": 6386, "violent": 61620, "rewrite": 48076, "gist": 22690, "reusable": 48001, "harnessing": 23468, "guaranteeing": 23322, "antecedent": 3115, "rewritten": 48082, "mutually": 36352, "crosslingual": 11876, "nn": 37578, "conditionals": 10012, "trans": 58330, "interpolated": 26704, "depicted": 14165, "satisfied": 48526, "phrasing": 40859, "f_": 20233, "synthesized": 54362, "automotive": 5210, "approximations": 3985, "patch": 39941, "consult": 10434, "categorizing": 7859, "recruitment": 45601, "company": 9285, "job": 27158, "inappropriate": 25213, "postings": 41367, "board": 7235, "seekers": 49053, "resume": 47916, "decomposes": 13655, "indexes": 25513, "outside": 39024, "zone": 63195, "radio": 44852, "engaging": 17760, "fixing": 21087, "ing": 26195, "conceptnet": 9930, "ppdb": 41452, "casual": 7826, "hindered": 23926, "cloze": 8722, "50k": 428, "rocstories": 48299, "struggle": 53199, "youtube": 63146, "modestly": 35727, "initialize": 26226, "ensembling": 17986, "umls": 59196, "researches": 47172, "promotes": 43194, "requisite": 46967, "progression": 43123, "ubiquity": 59178, "identifier": 24406, "presumed": 42139, "proactively": 42452, "hits": 23973, "prefixes": 41797, "degenerated": 13797, "longest": 31057, "subsequence": 53602, "uninterpretable": 59504, "textbf": 56849, "imposes": 24800, "assigns": 4608, "unidirectional": 59462, "interdependence": 26641, "constitutes": 10360, "mrr": 35909, "reliance": 46257, "corruptions": 11571, "sat": 48521, "closest": 8715, "1500": 157, "dropped": 16446, "dp": 16375, "embeds": 17251, "column": 9001, "encoders": 17551, "row": 48368, "237": 326, "essays": 18317, "grades": 23000, "courses": 11638, "digitization": 15218, "aggression": 2083, "anonymous": 3028, "observing": 38153, "intensities": 26555, "conducts": 10106, "handcrafted": 23398, "meetings": 32224, "reflecting": 45778, "team": 56001, "smart": 51528, "confirms": 10137, "recovering": 45595, "attended": 4703, "programmers": 43081, "cpu": 11672, "cores": 11163, "finnish": 21061, "execution": 18985, "headlines": 23507, "divergent": 15688, "exercises": 18995, "predecessors": 41622, "meaningfulness": 32030, "repeating": 46395, "dropping": 16447, "batch": 6339, "picks": 40870, "polyglot": 41117, "inventories": 26924, "monolingually": 35816, "curricula": 12041, "characterizes": 8250, "stimulus": 52853, "rigorously": 48150, "generalizing": 22158, "premise": 41810, "discrepancy": 15416, "illness": 24512, "pressing": 42134, "actionable": 1459, "branches": 7302, "commonalities": 9212, "relevancy": 46197, "school": 48740, "adam": 1498, "faithfulness": 20368, "blogging": 7227, "venue": 61506, "peers": 40012, "intermediary": 26672, "92": 558, "granularities": 23090, "iterative": 27121, "contextualized": 10798, "sudden": 53757, "disaster": 15364, "crises": 11770, "52": 435, "qualities": 44485, "100k": 70, "cooperative": 11122, "strengthens": 52974, "equip": 18195, "reparameterization": 46391, "tri": 58782, "imagenet": 24549, "verifying": 61545, "equal": 18188, "imply": 24674, "crawler": 11688, "ablation": 654, "pushed": 44428, "trivially": 58813, "parallelizable": 39659, "snli": 51550, "infers": 25712, "textsc": 56949, "unbounded": 59222, "indispensable": 25559, "basque": 6338, "hungarian": 24299, "taxonomic": 55979, "idiosyncrasies": 24483, "splits": 52348, "ar": 3989, "inherits": 26210, "2005": 242, "policy": 41093, "critic": 11774, "rl": 48172, "bootstrapped": 7266, "sped": 52249, "immune": 24586, "investigations": 27007, "protect": 43963, "insurance": 26496, "accountability": 882, "protected": 43964, "ehr": 16947, "assembled": 4568, "injecting": 26238, "sampled": 48460, "recommendation": 45564, "vague": 61168, "fourier": 21423, "probing": 42490, "dull": 16468, "remedy": 46366, "passed": 39924, "desiderata": 14259, "finetuning": 21047, "copy": 11132, "capitalize": 7642, "greatest": 23224, "governed": 22965, "triggers": 58796, "observes": 38152, "parallelization": 39660, "albeit": 2249, "elegant": 16971, "throughput": 57094, "modest": 35726, "cambridge": 7561, "loosely": 31077, "couple": 11630, "ill": 24509, "denoising": 14062, "summarises": 53873, "favourably": 20463, "abstracting": 765, "timelines": 57242, "disagreement": 15352, "standardizing": 52547, "normative": 37712, "meant": 32041, "genuine": 22644, "promises": 43156, "genuinely": 22645, "indistinguishable": 25560, "negations": 36614, "2022": 302, "2017": 263, "plethora": 41011, "researched": 47146, "police": 41091, "hashtag": 23475, "amplified": 2569, "americans": 2537, "argues": 4168, "diverge": 15684, "opposition": 38519, "enforcement": 17753, "confront": 10152, "grow": 23286, "annealing": 2868, "epoch": 18185, "wall": 61760, "street": 52967, "picking": 40869, "expressivity": 19816, "answerable": 3060, "53": 438, "staged": 52447, "diagram": 14743, "retaining": 47924, "execute": 18981, "executions": 18987, "5000": 425, "brand": 7304, "advertisement": 2001, "delivering": 13835, "lift": 30446, "sponsored": 52368, "winning": 62070, "jump": 27246, "listening": 30846, "nesting": 36687, "isolate": 27049, "memorization": 32234, "irrelevant": 27039, "arcs": 4133, "caching": 7521, "chen": 8283, "distillation": 15566, "eliminate": 16985, "teacher": 55989, "probed": 42488, "disregarding": 15529, "unacceptable": 59200, "commodity": 9162, "traffic": 57561, "ms": 35911, "hypernym": 24334, "curse": 12047, "votes": 61738, "compensates": 9521, "ucca": 59179, "transportation": 58724, "undesirable": 59433, "innate": 26242, "agrees": 2109, "fuel": 21705, "reviewing": 48046, "separation": 49886, "lasting": 29105, "reformulating": 45785, "bird": 7183, "plurality": 41031, "capital": 7639, "contextualize": 10797, "contextualization": 10796, "naturalness": 36475, "unreliable": 59636, "debiasing": 13533, "plentiful": 41009, "kinyarwanda": 27375, "kbs": 27274, "io": 27028, "subtrees": 53682, "tiny": 57261, "biasing": 7061, "commerce": 9150, "posed": 41242, "bills": 7125, "laden": 27932, "accelerated": 803, "democratic": 13851, "prioritizing": 42435, "opened": 38471, "utilization": 61084, "51": 430, "va": 61162, "savings": 48538, "nmf": 37572, "substantively": 53653, "executable": 18979, "falls": 20377, "ary": 4506, "imitation": 24576, "tease": 56014, "hands": 23430, "ingredient": 26197, "eval": 18432, "super": 53920, "burden": 7496, "tier": 57104, "phonemic": 40823, "flat": 21095, "premises": 41812, "terminological": 56259, "encompassing": 17584, "modulation": 35750, "undergo": 59254, "subjected": 53560, "quadratic": 44463, "gaps": 21986, "nlu": 37562, "expansions": 19191, "yielded": 63103, "player": 40985, "506": 427, "borrowing": 7271, "connecting": 10179, "amazing": 2519, "sensors": 49509, "sensory": 49510, "alike": 2394, "advancements": 1896, "convenience": 10994, "acceptance": 814, "attentions": 4861, "beats": 6375, "exploitable": 19669, "avoided": 5436, "utilities": 61078, "examinations": 18859, "restrict": 47419, "preferable": 41788, "earliest": 16509, "seamlessly": 48959, "nonetheless": 37693, "uptake": 59780, "initialisation": 26221, "subspaces": 53613, "contextualised": 10791, "positioning": 41275, "generalised": 22104, "quantifiers": 44605, "quantifier": 44604, "branching": 7303, "quantum": 44643, "mover": 35891, "bm25": 7233, "typicality": 59134, "gradual": 23014, "graded": 22999, "heritage": 23614, "replicating": 46417, "reused": 48003, "angle": 2859, "orthography": 38757, "rights": 48146, "generalisability": 22100, "wikidata": 62037, "dozens": 16374, "unhealthy": 59457, "temporality": 56194, "severity": 50429, "mild": 33243, "nonexistent": 37694, "hu": 24065, "explosive": 19779, "storing": 52877, "incurring": 25488, "continues": 10834, "lesser": 30044, "plurals": 41032, "propositions": 43950, "eighth": 16949, "2019": 279, "krippendorff": 27676, "generalise": 22103, "skipping": 51423, "surprisal": 54179, "autoencoding": 5030, "tradeoff": 57504, "economy": 16581, "predominant": 41782, "tying": 59046, "regularizing": 45852, "harming": 23462, "multimedia": 36139, "endeavor": 17733, "routinely": 48365, "pool": 41122, "insensitive": 26374, "insertions": 26380, "deletions": 13824, "substitutions": 53659, "substantive": 53652, "appraisal": 3383, "arguing": 4169, "classifications": 8584, "neighbourhood": 36668, "soon": 51714, "drops": 16448, "prominently": 43153, "shapes": 50450, "hitherto": 23972, "embrace": 17255, "shi": 50541, "ci": 8359, "foundations": 21421, "imagery": 24550, "appeared": 3143, "distantly": 15560, "hashing": 23474, "formulae": 21381, "familiarity": 20386, "refrain": 45787, "signaling": 50831, "trait": 58326, "straightforwardly": 52887, "trial": 58784, "adjusts": 1856, "conneau": 10169, "assertion": 4573, "125": 117, "disorders": 15512, "gru": 23312, "skewed": 51412, "younger": 63143, "densely": 14087, "ties": 57105, "discarding": 15368, "9k": 576, "800": 523, "intuitively": 26912, "interdependencies": 26642, "participate": 39815, "fifth": 20786, "ordinal": 38674, "outputting": 39023, "concatenate": 9907, "placing": 40931, "viability": 61567, "productions": 43054, "deteriorates": 14546, "dominance": 16304, "creativity": 11754, "faceted": 20256, "wish": 62087, "fourteen": 21424, "collectively": 8992, "facebookresearch": 20249, "surrogate": 54193, "oracle": 38581, "reception": 45477, "confronted": 10153, "sharply": 50522, "optimistic": 38541, "ethical": 18415, "grown": 23304, "grading": 23013, "reliant": 46258, "instructor": 26485, "unordered": 59622, "motivate": 35857, "viable": 61569, "cleaner": 8648, "initialization": 26223, "extendable": 19834, "tunes": 58893, "drugs": 16452, "legislation": 30009, "officially": 38314, "seeds": 49046, "institution": 26472, "climate": 8665, "tradeoffs": 57505, "verbalizations": 61515, "developer": 14644, "hallucinated": 23372, "inverted": 26933, "sem": 49229, "tagset": 54760, "resnet": 47193, "ud": 59180, "v1": 61157, "equation": 18193, "1k": 214, "deciding": 13555, "traveling": 58726, "authoritative": 5004, "factorized": 20302, "documenting": 15851, "endangered": 17730, "alternate": 2492, "marginalization": 31826, "memorize": 32235, "attentive": 4862, "marginalized": 31827, "abstractive": 768, "emoji": 17282, "avoiding": 5437, "seventeen": 50422, "segmenter": 49092, "cmu": 8754, "participated": 39817, "bioasq": 7162, "batches": 6345, "gp": 22969, "dsl": 16454, "stance": 52454, "questioning": 44763, "commenting": 9143, "breaking": 7312, "tail": 54762, "synthetically": 54387, "degrading": 13812, "restricting": 47424, "cpus": 11673, "simultaneous": 51268, "delay": 13816, "5m": 452, "partitioned": 39896, "respects": 47387, "motion": 35856, "entailed": 17996, "outlines": 38776, "aforementioned": 2035, "subcategories": 53541, "abuse": 785, "crime": 11767, "disasters": 15365, "ratios": 45032, "workflow": 62866, "browsing": 7376, "expose": 19784, "continuously": 10858, "outliers": 38773, "outlier": 38771, "impressively": 24818, "unnecessary": 59619, "strengthening": 52973, "client": 8663, "device": 14722, "prevent": 42228, "sigmoid": 50826, "pertinent": 40786, "deploy": 14168, "spirit": 52344, "proto": 43969, "staff": 52425, "doctor": 15762, "sizable": 51374, "lays": 29246, "false": 20379, "violating": 61616, "pretty": 42223, "multilingualism": 36137, "postulate": 41377, "collaborate": 8930, "cooperate": 11120, "conjunct": 10164, "genia": 22639, "controller": 10988, "concretely": 9983, "bot": 7273, "enormously": 17959, "monotonically": 35822, "projective": 43144, "min": 33272, "kl": 27377, "deficiency": 13770, "divides": 15748, "moved": 35888, "ready": 45096, "foster": 21409, "delivery": 13837, "f_1": 20235, "vlsp": 61698, "controllable": 10976, "ami": 2539, "save": 48533, "fare": 20407, "loop": 31073, "bpe": 7296, "compress": 9808, "distil": 15562, "ample": 2565, "repetitive": 46398, "synthesizing": 54366, "conversely": 11068, "pruned": 44266, "abundance": 779, "disregard": 15527, "ported": 41219, "vulnerable": 61753, "restaurants": 47414, "celebrities": 7907, "100b": 69, "hi": 23636, "bn": 7234, "verbalize": 61516, "receptive": 45478, "excessive": 18962, "executing": 18984, "programmer": 43080, "fst": 21703, "mitigate": 33379, "primal": 42359, "subwords": 53690, "unusual": 59756, "bits": 7188, "predictable": 41663, "credibility": 11756, "biography": 7167, "evaluative": 18773, "speeds": 52327, "clue": 8729, "definitive": 13795, "satisfaction": 48522, "dr": 16379, "begun": 6385, "unexplored": 59441, "summarisation": 53870, "coherently": 8920, "scraped": 48941, "metaphorical": 32348, "figurative": 20789, "suboptimal": 53594, "committing": 9161, "gigaword": 22689, "confined": 10127, "md": 31983, "el": 16950, "boards": 7236, "755": 505, "mitigates": 33393, "oversampling": 39103, "regularizers": 45850, "sacrificing": 48421, "verifies": 61531, "undirected": 59436, "np": 37965, "fuses": 21848, "pivotal": 40919, "experiencing": 19229, "empower": 17406, "nontrivial": 37698, "pointer": 41055, "consortium": 10341, "aa": 581, "lasso": 29104, "l2": 27684, "rnng": 48206, "agreeing": 2102, "minimally": 33294, "nns": 37582, "emphasizes": 17314, "competency": 9526, "constructive": 10432, "boosts": 7262, "leaf": 29332, "lexicalization": 30396, "structurally": 53086, "pioneering": 40889, "mislead": 33348, "predication": 41633, "tedious": 56161, "flagging": 21090, "7000": 489, "kannada": 27266, "concluded": 9972, "foremost": 21298, "triplet": 58807, "phases": 40807, "till": 57111, "weakens": 61854, "copied": 11128, "connectionist": 10181, "lengthy": 30041, "misclassified": 33343, "born": 7268, "continuum": 10861, "exclude": 18972, "positives": 41304, "deteriorating": 14547, "allocate": 2428, "noises": 37609, "diagnosis": 14738, "inferencing": 25705, "observational": 38125, "physiological": 40865, "condensed": 9989, "diagnoses": 14736, "vectorization": 61476, "quadratically": 44466, "multitasking": 36328, "bilstm": 7126, "clips": 8678, "uncorrelated": 59243, "quantization": 44640, "artefacts": 4445, "fasttext": 20445, "continuity": 10839, "sourcing": 51843, "incompatible": 25329, "cer": 7932, "pu": 44300, "delicate": 13829, "feelings": 20726, "communications": 9254, "recommending": 45573, "emojis": 17283, "explorations": 19680, "faithful": 20364, "stands": 52554, "prominence": 43150, "nucleus": 37977, "localization": 30957, "median": 32193, "stackoverflow": 52423, "scorer": 48884, "constraining": 10367, "arc": 4015, "offensive": 38288, "disclosure": 15377, "excerpts": 18961, "suspected": 54226, "prospects": 43961, "reciprocal": 45484, "ensuring": 17993, "depict": 14164, "retains": 47928, "told": 57346, "distraction": 15615, "profiling": 43071, "flip": 21113, "whatsapp": 61951, "pathology": 39950, "frontier": 21692, "bulk": 7495, "lowering": 31227, "productivity": 43056, "hyper": 24324, "mtl": 35929, "upto": 59781, "db": 13505, "speedups": 52330, "overwhelming": 39123, "isn": 27047, "prune": 44265, "aside": 4516, "drama": 16383, "probe": 42485, "contingent": 10820, "malayalam": 31677, "london": 31001, "54": 441, "novice": 37962, "sarcasm": 48517, "categorizes": 7858, "hurt": 24303, "multichannel": 36045, "tracker": 57495, "utilise": 61074, "multi30k": 36044, "discriminator": 15451, "fooling": 21277, "adversary": 1996, "pitfalls": 40914, "adversarially": 1993, "mle": 33435, "offensiveness": 38289, "ssl": 52407, "slu": 51458, "altering": 2491, "sgd": 50434, "chi": 8286, "square": 52398, "presently": 42067, "prices": 42358, "allocated": 2429, "price": 42357, "plate": 40947, "hong": 23998, "kong": 27672, "collapse": 8934, "anchors": 2853, "scorers": 48885, "editions": 16599, "snapshots": 51546, "ml": 33427, "contradict": 10865, "clearer": 8655, "certainly": 7951, "culturally": 11938, "attending": 4704, "changed": 8175, "customised": 12060, "speeding": 52326, "losing": 31080, "excel": 18952, "traits": 58327, "2016a": 262, "incoherent": 25326, "revolutionized": 48061, "switches": 54259, "serving": 50095, "dilated": 15220, "20x": 312, "8x": 550, "maybe": 31975, "harness": 23466, "announcements": 3017, "cohort": 8923, "thirty": 57052, "300k": 361, "truths": 58843, "boxes": 7294, "misclassification": 33342, "upcoming": 59764, "predictability": 41662, "afterward": 2042, "cqa": 11674, "200k": 247, "edits": 16602, "fulfills": 21710, "discriminability": 15434, "insufficient": 26491, "underlie": 59258, "publishers": 44377, "wing": 62067, "unsurprisingly": 59750, "screening": 48948, "ot": 38758, "characterisation": 8228, "arab": 3990, "anticipate": 3119, "durations": 16475, "pretrain": 42142, "tabular": 54693, "outbreak": 38761, "outbreaks": 38764, "epidemiological": 18180, "symptoms": 54275, "moment": 35779, "disambiguated": 15355, "attaching": 4656, "spain": 51915, "encompasses": 17583, "displays": 15522, "aligns": 2393, "competence": 9525, "misaligned": 33340, "misalignment": 33341, "confident": 10120, "teams": 56006, "revisited": 48056, "subsystems": 53666, "repetition": 46396, "societal": 51607, "undertake": 59429, "citizens": 8374, "identities": 24471, "nation": 36394, "chooses": 8345, "embodies": 17253, "continually": 10824, "revisions": 48054, "propagates": 43241, "bond": 7243, "duplicated": 16470, "stated": 52714, "winners": 62069, "comprehend": 9758, "auc": 4920, "roc": 48297, "rankers": 44961, "interventions": 26751, "visible": 61632, "architectural": 4016, "deduce": 13674, "euclidean": 18423, "homonyms": 23996, "entirety": 18035, "claimed": 8382, "canadian": 7567, "encounters": 17588, "arbitrarily": 4010, "breakthroughs": 7316, "gotten": 22961, "atypical": 4919, "120": 112, "subsampling": 53601, "modulate": 35748, "approximated": 3980, "tasked": 55480, "clarifying": 8388, "misunderstandings": 33375, "250": 336, "sketch": 51409, "inspect": 26395, "alters": 2514, "advice": 2004, "discovers": 15413, "su": 53513, "episodes": 18182, "rewarding": 48073, "irrespective": 27045, "inner": 26243, "deficit": 13772, "lewis": 30346, "paced": 39134, "tightly": 57109, "deficient": 13771, "deficits": 13773, "puts": 44431, "gans": 21956, "golden": 22922, "mini": 33279, "win": 62062, "transformer": 58448, "cup": 11943, "effortless": 16931, "elicited": 16981, "chit": 8329, "120k": 114, "streaming": 52962, "suite": 53863, "probabilistically": 42469, "outer": 38769, "retailers": 47919, "quora": 44837, "voluminous": 61733, "establishment": 18368, "overhead": 39085, "thereof": 57040, "vice": 61572, "versa": 61546, "october": 38285, "545": 443, "additions": 1738, "synthesizer": 54364, "musical": 36338, "king": 27373, "reasonably": 45176, "tunable": 58852, "acl": 1430, "switchboard": 54254, "undergone": 59256, "immense": 24583, "untapped": 59751, "harmful": 23461, "interprets": 26742, "hardest": 23456, "numerals": 38058, "aged": 2048, "accelerating": 805, "intents": 26573, "renewed": 46385, "appeal": 3135, "imagination": 24559, "lacked": 27927, "failed": 20346, "moral": 35832, "sent2vec": 49512, "anthology": 3117, "ps": 44271, "journalism": 27229, "stereotypical": 52848, "terminal": 56256, "forget": 21305, "meticulously": 33108, "correlating": 11516, "brands": 7305, "procrustes": 42972, "estonian": 18391, "satisfies": 48527, "2018": 272, "v2": 61159, "2021": 296, "calls": 7558, "july": 27245, "semiotic": 49473, "typologically": 59165, "ugc": 59183, "brazilian": 7307, "risks": 48165, "princeton": 42378, "shrink": 50811, "explainable": 19597, "4m": 416, "shortage": 50575, "infrequently": 26190, "rerank": 46968, "overarching": 39055, "decomposable": 13651, "gcns": 22028, "positively": 41302, "stocks": 52861, "mentioning": 32301, "squad": 52394, "archive": 4131, "snippet": 51547, "url": 59791, "sparked": 51961, "recast": 45250, "entail": 17995, "exclusive": 18977, "crafting": 11684, "inception": 25217, "phenomenal": 40813, "proving": 44256, "alleviating": 2426, "duplication": 16473, "trending": 58778, "demands": 13844, "argumentation": 4176, "bilstms": 7138, "pas": 39915, "exhaustively": 18999, "prepare": 41814, "anchored": 2851, "shares": 50512, "ark": 4188, "elaborately": 16953, "rightarrow": 48145, "manipulating": 31710, "went": 61944, "10k": 78, "decentralized": 13550, "strive": 52996, "attained": 4671, "genome": 22640, "alleviated": 2420, "usc": 59812, "hat": 23478, "invisible": 27010, "rooted": 48343, "leap": 29340, "longstanding": 31061, "backgrounds": 5494, "bounded": 7285, "interval": 26747, "280": 346, "attributions": 4918, "disagree": 15351, "typologies": 59170, "catalyze": 7830, "overtly": 39107, "underperform": 59282, "enrichment": 17967, "gloss": 22853, "augmentation": 4946, "utilising": 61077, "came": 7562, "164": 169, "sorts": 51722, "encompass": 17582, "angles": 2860, "4x": 418, "timestep": 57259, "sec": 48993, "initializations": 26225, "task4": 55479, "supervisory": 54102, "japan": 27143, "overflow": 39084, "mitigation": 33395, "openly": 38477, "pertain": 40783, "eases": 16524, "wu": 63021, "lid": 30432, "smoother": 51537, "renowned": 46386, "externally": 19959, "keras": 27287, "realisation": 45144, "underfitting": 59253, "attends": 4705, "empathy": 17306, "dyadic": 16480, "sustain": 54227, "car": 7747, "trick": 58787, "bodies": 7237, "governing": 22966, "reformulate": 45784, "probes": 42489, "aggregates": 2074, "returned": 47998, "reformulation": 45786, "recruited": 45599, "discrepancies": 15415, "favoring": 20459, "calibrated": 7530, "headline": 23505, "6m": 483, "moderation": 35698, "16k": 170, "char": 8192, "exhibiting": 19007, "originates": 38749, "concatenating": 9910, "moves": 35893, "pulling": 44384, "derivational": 14194, "fitted": 21070, "decouples": 13664, "20k": 309, "sort": 51719, "colloquial": 8997, "unwritten": 59763, "switched": 54256, "injected": 26237, "warrants": 61778, "synergistic": 54282, "relax": 46136, "rte": 48373, "separable": 49872, "separability": 49871, "hurts": 24307, "bt": 7379, "universities": 59554, "cds": 7902, "organizational": 38685, "disregards": 15530, "entered": 18013, "formulates": 21388, "causality": 7880, "infrastructure": 26186, "infrastructures": 26187, "quo": 44836, "assets": 4596, "pan": 39244, "raise": 44854, "lessons": 30046, "schemas": 48724, "padding": 39136, "floating": 21114, "ensures": 17992, "workings": 62870, "instantaneous": 26439, "arrives": 4206, "initiated": 26233, "personalities": 40760, "slots": 51445, "grus": 23315, "posteriori": 41365, "gumbel": 23360, "indonesia": 25598, "urges": 59790, "overloaded": 39095, "ats": 4652, "envision": 18178, "apple": 3151, "protocols": 43971, "invented": 26922, "e2e": 16501, "launched": 29171, "uniqueness": 59520, "theart": 57002, "afforded": 2033, "mc": 31981, "levantine": 30053, "replay": 46411, "episodic": 18183, "catastrophic": 7832, "organizers": 38690, "kingdom": 27374, "s2s": 48416, "doc2vec": 15760, "distribute": 15619, "bottle": 7275, "noticeable": 37727, "schemata": 48725, "corruption": 11570, "forcing": 21290, "hour": 24038, "prescriptions": 41834, "domestic": 16303, "socioeconomic": 51615, "transitional": 58544, "graphic": 23181, "proves": 43998, "converges": 11026, "parties": 39894, "mp": 35903, "esim": 18259, "struggling": 53205, "heads": 23509, "transparency": 58720, "anonymity": 3025, "witness": 62089, "hide": 23652, "obfuscate": 38077, "soundness": 51738, "obfuscation": 38079, "sanity": 48513, "rg": 48084, "unstable": 59664, "fulfill": 21708, "assistants": 4614, "siri": 51362, "chatting": 8267, "hinders": 23930, "hindrance": 23944, "episode": 18181, "interpersonal": 26701, "coined": 8926, "progressed": 43120, "ear": 16504, "manages": 31694, "glance": 22815, "apt": 3988, "steady": 52786, "wikitext": 62058, "prize": 42447, "spherical": 52342, "invalid": 26914, "unsupported": 59749, "connectives": 10187, "kgs": 27362, "corrupt": 11567, "disconnected": 15379, "mature": 31946, "xgboost": 63027, "inherited": 26208, "gauge": 22008, "directionality": 15287, "hypothesise": 24351, "evidences": 18827, "delayed": 13817, "granular": 23089, "mnli": 33446, "openie": 38473, "gn": 22871, "500k": 426, "cap": 7594, "sim": 51026, "triggering": 58795, "disjunction": 15509, "ridge": 48134, "mscoco": 35915, "disfluencies": 15504, "perturbation": 40789, "json": 27235, "zenodo": 63149, "occupation": 38265, "mission": 33367, "watch": 61784, "enforce": 17751, "algerian": 2257, "rounds": 48361, "reviewer": 48044, "urls": 59795, "duplicates": 16471, "holes": 23988, "owner": 39128, "cove": 11642, "sst": 52408, "launch": 29170, "vertical": 61563, "novelties": 37960, "ablative": 664, "disagreements": 15353, "contradictory": 10869, "submitting": 53590, "enforced": 17752, "therapy": 57039, "warrant": 61776, "2k": 349, "suffice": 53795, "unaware": 59217, "shots": 50659, "cider": 8360, "shortcut": 50583, "emnlp": 17280, "romanized": 48335, "designs": 14342, "missed": 33358, "storytelling": 52882, "kanji": 27265, "ser": 50055, "wild": 62059, "generalisation": 22102, "office": 38305, "bar": 5528, "waste": 61782, "clinicians": 8676, "unsafe": 59639, "harm": 23460, "diminish": 15246, "librispeech": 30426, "ranker": 44960, "deduced": 13675, "titan": 57267, "enjoying": 17952, "senior": 49480, "encounter": 17585, "barriers": 5531, "combing": 9106, "dialogic": 14761, "decent": 13547, "procedurally": 42740, "licenses": 30431, "tolerance": 57347, "agile": 2085, "coping": 11130, "sorted": 51720, "saw": 48539, "happen": 23433, "sectors": 49038, "2017a": 270, "substructures": 53662, "appearance": 3141, "rests": 47430, "eating": 16571, "hops": 24021, "composite": 9738, "spent": 52340, "manuals": 31790, "distributing": 15631, "10m": 79, "binarized": 7141, "illustrations": 24524, "anonymized": 3027, "injection": 26239, "nuance": 37974, "guaranteed": 23321, "liked": 30515, "conditionally": 10011, "dev": 14566, "7x": 519, "swapping": 54244, "finely": 21036, "modularity": 35746, "pm": 41034, "trigrams": 58798, "routine": 48364, "multinli": 36159, "hyperbolic": 24328, "interestingness": 26657, "plan": 40940, "inject": 26236, "viewpoint": 61607, "zipfian": 63194, "curves": 12051, "deviate": 14717, "demonstrations": 14058, "revised": 48051, "opensubtitles": 38485, "profiles": 43070, "encapsulated": 17457, "restoration": 47415, "deconvolutional": 13661, "thirteen": 57051, "summarise": 53871, "retrofitting": 47995, "wikihow": 62039, "insert": 26375, "shorten": 50585, "imputation": 25203, "disfluency": 15505, "mid": 33234, "cleaned": 8646, "restarts": 47411, "preprint": 41822, "february": 20702, "blue": 7231, "fc": 20465, "pt": 44295, "decipher": 13556, "dblp": 13506, "incapable": 25215, "sums": 53918, "dark": 12096, "replicability": 46412, "estate": 18369, "translational": 58704, "reality": 45157, "shopping": 50548, "tutoring": 58997, "craft": 11676, "android": 2856, "optimizers": 38572, "optimizer": 38571, "chatbots": 8263, "brazil": 7306, "said": 48431, "unavailable": 59213, "factuality": 20324, "readings": 45094, "8m": 548, "252": 338, "accept": 808, "acceptability": 810, "january": 27142, "trump": 58827, "letting": 30052, "catching": 7837, "attract": 4871, "mse": 35916, "earth": 16519, "unfamiliar": 59446, "stochastically": 52858, "realism": 45146, "totaling": 57476, "skim": 51416, "struggles": 53204, "lemmatizer": 30020, "operationalization": 38491, "consolidating": 10338, "pyramid": 44437, "360": 378, "intelligently": 26545, "gave": 22020, "hospital": 24024, "propbank": 43247, "ubiquitously": 59177, "diversely": 15726, "customized": 12064, "neuroscience": 37120, "tells": 56169, "wording": 62354, "coffee": 8887, "clip": 8677, "assessors": 4594, "laborious": 27865, "assembly": 4571, "bidirectionally": 7086, "switzerland": 54262, "seldom": 49097, "60k": 461, "backtranslation": 5496, "susceptible": 54224, "attacks": 4664, "exemplified": 18992, "coattention": 8787, "triviaqa": 58814, "strike": 52988, "effortlessly": 16932, "scan": 48652, "systematicity": 54415, "notorious": 37735, "principally": 42383, "suspect": 54225, "srl": 52405, "compressing": 9811, "sacrifices": 48420, "skilled": 51414, "norwegian": 37715, "broadening": 7361, "reserved": 47179, "pytorch": 44443, "brittle": 7346, "trouble": 58816, "comprehending": 9759, "cumulative": 11942, "blanks": 7197, "switch": 54253, "ppl": 41453, "exploding": 19650, "repeatedly": 46394, "appending": 3149, "keys": 27347, "sgns": 50435, "timestamps": 57258, "route": 48362, "10x": 81, "facto": 20293, "serialized": 50057, "wikisql": 62057, "objectivity": 38115, "reactive": 45065, "mediation": 32196, "elaborating": 16955, "functionally": 21766, "acknowledge": 1428, "incapability": 25214, "perturbations": 40792, "mrc": 35905, "ma": 31296, "baidu": 5512, "resilience": 47189, "transformers": 58518, "unstated": 59665, "transferability": 58431, "spatially": 51988, "linearization": 30679, "gqa": 22997, "repetitions": 46397, "marco": 31815, "prohibitive": 43126, "chemistry": 8282, "inconsistency": 25337, "lancopku": 27943, "hr": 24043, "resilient": 47190, "realizations": 45159, "idiom": 24480, "remembering": 46370, "medias": 32194, "fantasy": 20392, "chatbot": 8262, "fluid": 21136, "migration": 33238, "infancy": 25631, "benign": 6598, "deterioration": 14548, "push": 44423, "azure": 5483, "illustrative": 24525, "promotion": 43196, "replicates": 46416, "maintains": 31497, "occupies": 38266, "pa": 39132, "periodically": 40725, "vis": 61629, "peer": 40010, "assessments": 4593, "instagram": 26422, "disorder": 15511, "newcomers": 37366, "likes": 30528, "casts": 7825, "oft": 38320, "avenue": 5394, "oil": 38324, "gas": 21989, "accident": 837, "symptom": 54274, "germanic": 22679, "slavic": 51426, "utmost": 61130, "sellers": 49228, "dan": 12092, "ancillary": 2855, "vaswani": 61444, "mask": 31855, "san": 48512, "roman": 48328, "tensorflow": 56226, "pinyin": 40886, "inputting": 26370, "evidently": 18830, "australian": 4996, "evidenced": 18826, "swaps": 54245, "attack": 4658, "inspirations": 26400, "session": 50096, "consulting": 10435, "forensic": 21299, "questionable": 44760, "blends": 7201, "concentrated": 9916, "characterised": 8230, "imperfections": 24629, "monitored": 35785, "numerically": 38063, "gan": 21952, "tricks": 58788, "challenged": 8025, "cyber": 12072, "security": 49041, "continual": 10821, "accumulate": 886, "intact": 26497, "disentangled": 15500, "comprise": 9816, "accommodates": 839, "scitail": 48775, "approx": 3974, "fool": 21276, "simplifies": 51240, "banks": 5527, "sota": 51723, "lifelong": 30442, "omitted": 38332, "literacy": 30850, "stably": 52414, "discerning": 15372, "jaccard": 27140, "recurrently": 45632, "ulmfit": 59189, "hindering": 23928, "319": 366, "coarser": 8786, "att": 4653, "relieves": 46271, "inefficient": 25626, "feeds": 20723, "converse": 11067, "subgraph": 53546, "coreferences": 11161, "mirrors": 33338, "yago": 63039, "discriminators": 15454, "copies": 11129, "characterise": 8229, "multidimensional": 36048, "assesses": 4585, "regulators": 45859, "grants": 23088, "infersent": 25713, "supply": 54113, "6k": 482, "noticeably": 37730, "compile": 9577, "delimiter": 13830, "overnight": 39102, "nvidia": 38072, "accelerates": 804, "embody": 17254, "vertex": 61562, "30k": 362, "covariates": 11641, "diagonal": 14742, "multiply": 36320, "viewers": 61605, "consecutively": 10197, "stating": 52730, "ambiguously": 2531, "massively": 31891, "exacerbated": 18846, "biocreative": 7164, "selections": 49161, "omit": 38331, "factorize": 20301, "reordered": 46388, "penalized": 40016, "transport": 58722, "neulab": 36926, "rc": 45040, "isolating": 27052, "worthwhile": 62980, "abstracted": 764, "permissive": 40728, "licence": 30427, "closes": 8713, "lingually": 30742, "overestimated": 39077, "neo": 36673, "banking": 5526, "regulations": 45858, "absolutely": 751, "mistake": 33370, "seriously": 50071, "won": 62106, "intensively": 26562, "epsilon": 18187, "trajectory": 58329, "noteworthy": 37725, "kappa": 27267, "reranker": 46969, "parity": 39750, "quantized": 44642, "leaderboard": 29283, "105": 74, "ideally": 24377, "allen": 2396, "secured": 49040, "memorizing": 32238, "catalan": 7828, "inconclusive": 25334, "subproblem": 53598, "proprietary": 43951, "128": 118, "256": 339, "paucity": 39980, "aryan": 4507, "magahi": 31413, "differentiating": 15145, "outlining": 38777, "irregular": 27036, "income": 25327, "charge": 8256, "depression": 14181, "minutes": 33335, "casting": 7824, "hub": 24067, "ecommerce": 16576, "dominating": 16312, "surveying": 54222, "diagrams": 14744, "understandability": 59318, "tonal": 57349, "backed": 5488, "curate": 11944, "deductive": 13677, "recalling": 45249, "bonus": 7244, "balancing": 5518, "redesign": 45646, "plagues": 40935, "researching": 47174, "interdisciplinary": 26644, "synchronized": 54278, "personalize": 40764, "capsule": 7643, "blended": 7199, "attackers": 4662, "uploaded": 59771, "sketches": 51410, "helped": 23594, "silence": 51020, "negatives": 36645, "prototypes": 43973, "precedence": 41605, "tions": 57264, "lemmatized": 30019, "coreferent": 11162, "mexican": 33212, "2018a": 278, "prefix": 41795, "agreements": 2108, "2002": 238, "suffering": 53787, "liang": 30421, "plugged": 41028, "alexa": 2253, "winograd": 62072, "quantifiable": 44601, "disentanglement": 15501, "disentangles": 15502, "clinically": 8675, "holding": 23983, "verifiability": 61523, "outdated": 38768, "underspecified": 59287, "rotation": 48346, "heatmap": 23526, "exposes": 19786, "pathological": 39949, "counterintuitive": 11621, "unimodal": 59498, "revolves": 48064, "narrating": 36380, "bidaf": 7063, "questioned": 44761, "comprehensible": 9761, "featured": 20513, "ade": 1828, "duplicating": 16472, "cloning": 8682, "blackbox": 7195, "debug": 13536, "7k": 517, "drafts": 16382, "venues": 61507, "originality": 38741, "mm": 33443, "mdp": 31985, "unexploited": 59440, "troublesome": 58817, "alternately": 2493, "peculiar": 40007, "standpoint": 52553, "hotels": 24035, "stylistically": 53511, "ate": 4645, "existent": 19020, "multiplications": 36316, "delete": 13819, "writes": 62987, "outlets": 38770, "persist": 40747, "stock": 52860, "filings": 20796, "regulatory": 45860, "markets": 31838, "makers": 31612, "decoupling": 13665, "adhere": 1836, "yearly": 63048, "appearances": 3142, "america": 2534, "programmatic": 43078, "suffices": 53796, "modularized": 35747, "adaptability": 1516, "alterations": 2489, "inadvertently": 25212, "nrc": 37969, "canada": 7566, "smm4h": 51534, "medication": 32213, "imbalance": 24561, "acc": 798, "lrls": 31233, "hrl": 24044, "da": 12080, "weighing": 61914, "textrank": 56854, "tutorials": 58996, "clarification": 8385, "kim": 27364, "estimators": 18390, "subtypes": 53683, "mixtures": 33424, "ta": 54685, "unite": 59523, "looked": 31068, "kurdish": 27682, "warning": 61774, "signs": 51019, "bernoulli": 6599, "multiparty": 36163, "perturb": 40788, "landmarks": 27944, "consolidation": 10339, "practitioner": 41490, "commitment": 9159, "alter": 2488, "latex": 29160, "mathematics": 31937, "dozen": 16373, "extrapolation": 20150, "maximise": 31953, "testable": 56391, "170": 174, "untested": 59752, "decides": 13554, "unnatural": 59617, "allenai": 2397, "obfuscated": 38078, "asymmetry": 4642, "vulnerabilities": 61751, "theorem": 57008, "delexicalized": 13825, "webnlg": 61902, "inadequacy": 25210, "cultures": 11940, "couples": 11633, "fastest": 20444, "afford": 2030, "september": 49887, "december": 13546, "filled": 20798, "snips": 51549, "iot": 27029, "1a": 211, "6th": 484, "12th": 120, "duality": 16464, "3x": 392, "ineffective": 25623, "armed": 4190, "willing": 62061, "succinctly": 53756, "discern": 15370, "cardinal": 7751, "humanoid": 24269, "coherency": 8912, "admit": 1860, "termed": 56255, "densenet": 14089, "prioritizes": 42434, "extensibility": 19845, "dating": 13496, "bert": 6600, "delta": 13838, "follower": 21260, "unintended": 59502, "sdp": 48954, "competitions": 9535, "codalab": 8789, "5x": 454, "lsh": 31236, "cuda": 11930, "hurting": 24305, "ticket": 57100, "master": 31893, "customizable": 12061, "pharmaceutical": 40803, "rat": 45011, "granted": 23087, "unfair": 59443, "richly": 48131, "enrolled": 17968, "indigenous": 25552, "humor": 24293, "funny": 21800, "misconceptions": 33345, "meme": 32231, "humorous": 24294, "memes": 32232, "sd": 48953, "icelandic": 24361, "continuing": 10836, "bags": 5510, "unresolved": 59637, "uncovering": 59246, "favour": 20462, "extralinguistic": 20145, "sememe": 49423, "tangent": 54798, "rectified": 45602, "forced": 21287, "uninformative": 59501, "revising": 48052, "validations": 61198, "advocate": 2005, "imaging": 24560, "intentional": 26570, "particularities": 39875, "penalization": 40014, "defending": 13767, "impediment": 24624, "112": 94, "differentiation": 15146, "src": 52404, "hamper": 23377, "notebook": 37722, "proliferate": 43146, "dilemma": 15221, "sake": 48432, "employment": 17402, "informs": 26185, "discriminates": 15438, "interpolating": 26706, "transferable": 58432, "monologue": 35818, "adaboost": 1497, "rmse": 48178, "financially": 20895, "industries": 25619, "labour": 27866, "subsequences": 53603, "136": 131, "assurance": 4640, "violations": 61618, "wechat": 61907, "app": 3132, "necessitating": 36539, "designated": 14305, "multiscale": 36321, "caveats": 7894, "censorship": 7910, "seeker": 49052, "unsegmented": 59660, "trip": 58800, "novelly": 37958, "prototyping": 43977, "deem": 13679, "criminal": 11769, "ultra": 59193, "pools": 41130, "worsened": 62975, "freedom": 21649, "civil": 8376, "leak": 29334, "exemplary": 18991, "stacks": 52424, "standardize": 52545, "standardization": 52544, "deaths": 13526, "diagnosed": 14735, "budgets": 7381, "affordable": 2031, "invested": 26936, "sequencing": 50034, "funds": 21799, "lighter": 30455, "mitigated": 33392, "diet": 14811, "strategically": 52889, "threat": 57084, "vulnerability": 61752, "defense": 13768, "loading": 30927, "l1": 27683, "failing": 20347, "enterprises": 18016, "focal": 21139, "seller": 49227, "arose": 4195, "ref": 45731, "scripted": 48950, "leakage": 29335, "replaces": 46409, "mechanistic": 32155, "1990s": 199, "info": 25735, "anxiety": 3124, "deteriorate": 14545, "curiosity": 11956, "transmitting": 58719, "ood": 38401, "textbook": 56850, "breaks": 7314, "neat": 36526, "overheads": 39086, "traditions": 57560, "senteval": 49813, "assists": 4618, "substantiate": 53651, "inquire": 26371, "wins": 62076, "folds": 21252, "usefully": 60398, "inherit": 26206, "vinyals": 61611, "alzheimer": 2516, "unavailability": 59212, "germeval": 22681, "kullback": 27678, "leibler": 30012, "chats": 8266, "inquiries": 26372, "suites": 53865, "perturbing": 40796, "cent": 7911, "conventionally": 11019, "broadcasts": 7359, "merges": 32316, "flows": 21122, "redundantly": 45730, "artifacts": 4486, "counterfactuals": 11619, "scarcely": 48660, "amateur": 2518, "hypernetwork": 24333, "unexpectedly": 59439, "identically": 24380, "tokenized": 57318, "diverging": 15690, "restore": 47416, "wasserstein": 61780, "supervise": 53958, "coqa": 11141, "8k": 547, "knowledgeable": 27652, "relu": 46274, "penalize": 40015, "meaningfully": 32029, "elmo": 16994, "proximal": 44260, "crowdsource": 11884, "underline": 59260, "infusion": 26194, "unchanged": 59233, "rd": 45041, "comprehensiveness": 9807, "distinctly": 15601, "hyperlink": 24332, "warranted": 61777, "qe": 44461, "broaden": 7360, "linearized": 30680, "inserted": 26376, "college": 8994, "economics": 16580, "coincide": 8925, "traversing": 58729, "contributor": 10958, "multimodality": 36158, "augmentations": 4974, "outlook": 38778, "realistically": 45156, "ungrounded": 59456, "parameterize": 39684, "permuted": 40735, "brevity": 7317, "attracts": 4897, "distress": 15618, "scaffolds": 48544, "anger": 2858, "surprise": 54180, "adjunct": 1848, "confounds": 10151, "discoveries": 15411, "instability": 26421, "optima": 38524, "thresholding": 57091, "consumed": 10437, "uncommon": 59239, "questionnaires": 44765, "hint": 23949, "initiate": 26232, "remarks": 46364, "resorted": 47207, "badly": 5499, "behavioural": 6402, "sacrifice": 48419, "differentiability": 15138, "bed": 6376, "recovers": 45596, "safe": 48426, "abstractness": 777, "tenth": 56229, "216": 314, "doubly": 16322, "rarer": 45009, "styled": 53506, "combating": 9030, "dub": 16465, "cas": 7780, "poverty": 41422, "distractors": 15617, "irregularities": 27037, "remedies": 46365, "percentages": 40054, "understandings": 59420, "thai": 56997, "jin": 27157, "udpipe": 59182, "youth": 63144, "loved": 31130, "replying": 46422, "lime": 30531, "rationality": 45030, "specializing": 52036, "pop": 41153, "sad": 48424, "audiovisual": 4934, "bimodal": 7139, "formalizing": 21362, "fight": 20787, "attraction": 4895, "clms": 8680, "clm": 8679, "conservative": 10205, "pursued": 44419, "misinformation": 33346, "trustworthiness": 58832, "purposed": 44415, "multihead": 36052, "steer": 52788, "violate": 61614, "formedness": 21371, "noisier": 37610, "rarity": 45010, "teaches": 55999, "math": 31929, "taught": 55978, "decouple": 13662, "recognizable": 45548, "offense": 38287, "instantiate": 26440, "layouts": 29244, "healthy": 23523, "eighteen": 16948, "unleash": 59589, "relieve": 46270, "driver": 16435, "nlms": 37458, "unanswerable": 59208, "reasoner": 45179, "spanned": 51950, "resolutions": 47197, "prefers": 41794, "undermine": 59280, "compounded": 9755, "pauses": 39982, "extremes": 20168, "bear": 6371, "military": 33248, "secret": 49034, "tradition": 57508, "professions": 43064, "stereotyped": 52847, "grasp": 23194, "glue": 22863, "justifying": 27261, "reconstructor": 45585, "fisher": 21066, "verifier": 61530, "conciseness": 9966, "vae": 61164, "federal": 20707, "fb": 20464, "doubts": 16324, "summarised": 53872, "finished": 21055, "seq": 49888, "wait": 61755, "zh": 63185, "privileged": 42446, "persona": 40753, "tropes": 58815, "squad2": 52397, "grand": 23085, "desktop": 14353, "bandwidth": 5522, "articulate": 4484, "cls": 8727, "explainability": 19596, "defect": 13764, "defects": 13765, "retriever": 47988, "192": 193, "gb": 22024, "universality": 59549, "inevitably": 25629, "ofthe": 38322, "journalistic": 27230, "uneven": 59437, "complaints": 9582, "bagging": 5509, "lp": 31231, "noising": 37611, "illuminate": 24513, "appended": 3148, "thunlp": 57099, "twin": 59028, "subtraction": 53680, "physicians": 40863, "assistive": 4617, "toxic": 57481, "bahdanau": 5511, "progressive": 43124, "underutilized": 59432, "quarter": 44644, "stuck": 53207, "oxford": 39130, "stimulate": 52849, "electronics": 16970, "hungry": 24300, "carries": 7773, "schedules": 48720, "nd": 36500, "notwithstanding": 37738, "supplementing": 54109, "peters": 40799, "radford": 44846, "openbookqa": 38470, "regimen": 45801, "stochasticity": 52859, "factually": 20325, "prevailing": 42224, "elaborates": 16954, "ok": 38325, "ims": 25204, "acoustics": 1439, "manifests": 31704, "gauging": 22009, "manners": 31728, "lample": 27940, "etal": 18413, "disclose": 15376, "overlapped": 39090, "miss": 33356, "slowly": 51454, "justifies": 27259, "formatting": 21369, "fulfil": 21707, "alibaba": 2349, "7th": 518, "stances": 52457, "traversal": 58727, "resorting": 47208, "differentiates": 15144, "uncertainties": 59225, "formidable": 21372, "perturbed": 40794, "academy": 797, "benefiting": 6578, "gnns": 22874, "revolution": 48059, "anchoring": 2852, "unconditional": 59240, "compliance": 9693, "134": 130, "fever": 20734, "refuted": 45789, "paralinguistic": 39640, "ablate": 652, "alice": 2350, "believes": 6415, "historic": 23954, "stays": 52784, "prerequisites": 41832, "steadily": 52785, "lend": 30021, "testset": 56418, "goods": 22950, "diagnose": 14734, "expedite": 19199, "6000": 460, "anticipation": 3121, "finetuned": 21042, "doctors": 15764, "emr": 17410, "fragile": 21432, "certified": 7953, "accumulated": 887, "zhang": 63187, "smoothness": 51541, "equivalently": 18203, "documentary": 15848, "backend": 5489, "priorities": 42431, "interleaved": 26666, "localized": 30959, "pg": 40802, "leaders": 29286, "tokenizers": 57320, "iid": 24507, "pushes": 44429, "nat": 36393, "infoboxes": 25736, "deepen": 13754, "interchange": 26638, "simplex": 51233, "career": 7753, "biaffine": 7018, "canonicalization": 7593, "bilateral": 7101, "visualisations": 61674, "3m": 388, "rectifying": 45604, "distills": 15587, "activated": 1464, "inaccuracy": 25208, "interrelated": 26743, "investing": 27008, "antecedents": 3116, "fairness": 20362, "seg": 49070, "manipulates": 31709, "symbolically": 54270, "adjustments": 1855, "iconic": 24363, "prospect": 43959, "reflections": 45780, "tech": 56016, "squeeze": 52402, "initializes": 26228, "masked": 31860, "ethnicity": 18421, "confronts": 10154, "automl": 5209, "acronym": 1449, "nq": 37968, "unambiguously": 59204, "cws": 12071, "amplify": 2571, "subordinate": 53596, "fan": 20391, "interfere": 26664, "emitting": 17279, "interwoven": 26754, "intends": 26553, "uttered": 61154, "fatal": 20450, "shallower": 50445, "firms": 21063, "flores": 21117, "nepali": 36674, "typos": 59172, "multiplicity": 36318, "peaks": 40003, "orderings": 38668, "decoupled": 13663, "spots": 52374, "dca": 13509, "tagalog": 54727, "responsibility": 47407, "fairseq": 20363, "characterizations": 8247, "economical": 16579, "western": 61947, "sf": 50432, "mnb": 33445, "attentively": 4866, "multidomain": 36049, "citet": 8371, "realm": 45165, "impeding": 24625, "chances": 8166, "impressions": 24806, "07": 24, "closing": 8716, "inefficiency": 25625, "bypasses": 7507, "aroused": 4197, "rationale": 45028, "intervene": 26749, "suffered": 53786, "exam": 18857, "affordances": 2032, "naively": 36367, "supplements": 54110, "loops": 31075, "convention": 10998, "retention": 47929, "heterogeneity": 23615, "lat": 29110, "propensity": 43249, "specifics": 52238, "processors": 42971, "openai": 38468, "deviates": 14718, "flavors": 21099, "tasking": 55482, "regeneration": 45798, "federated": 20708, "scibert": 48744, "inspires": 26419, "04": 21, "gpt": 22970, "obsolete": 38154, "bug": 7383, "scraping": 48942, "ada": 1496, "clients": 8664, "diagnosing": 14737, "anonymization": 3026, "interpreters": 26740, "tb": 55982, "activates": 1465, "disseminated": 15534, "comparator": 9325, "inclined": 25221, "pdp": 39999, "124": 116, "104": 73, "frustratingly": 21699, "brittleness": 7347, "atop": 4651, "psychiatric": 44284, "confidences": 10119, "enumeration": 18169, "eliminated": 16987, "exceptionally": 18959, "depicts": 14167, "natively": 36407, "registers": 45810, "debias": 13531, "religion": 46272, "experimentations": 19335, "professor": 43065, "intelligibility": 26546, "condense": 9988, "quicker": 44819, "lu": 31292, "2019b": 288, "wav2vec": 61786, "confirmation": 10134, "sparsely": 51972, "discernible": 15371, "occupy": 38267, "punctuated": 44386, "crowdworkers": 11892, "sqa": 52392, "103": 72, "30x": 363, "disputes": 15526, "substance": 53614, "debiased": 13532, "shortened": 50586, "8th": 549, "pb": 39995, "ernie": 18207, "masking": 31872, "masks": 31876, "autoregressively": 5227, "distracting": 15614, "youtu": 63145, "encapsulating": 17458, "iqa": 27032, "insufficiency": 26490, "existed": 19018, "concurrent": 9986, "misunderstanding": 33374, "unfortunate": 59450, "spectrograms": 52245, "bigru": 7100, "dominates": 16311, "mrs": 35910, "hotpotqa": 24036, "formulations": 21394, "marginally": 31829, "entailing": 17997, "relief": 46263, "darpa": 12097, "denoise": 14060, "denoised": 14061, "discards": 15369, "relabeling": 45884, "noised": 37608, "centralized": 7923, "graduate": 23017, "catalog": 7829, "240": 330, "gt": 23316, "runner": 48404, "slices": 51428, "sales": 48433, "heterogenous": 23625, "3000": 360, "flair": 21092, "underperforms": 59283, "35k": 375, "detailing": 14431, "assert": 4572, "minimalist": 33293, "mg": 33213, "partners": 39902, "unilm": 59497, "container": 10480, "arm": 4189, "transfers": 58437, "implausible": 24630, "academics": 796, "untrustworthy": 59755, "trustworthy": 58833, "monotonicity": 35823, "ctrl": 11929, "fosters": 21414, "underscore": 59286, "merit": 32318, "extents": 19926, "conversions": 11070, "clicks": 8662, "ge": 22029, "religious": 46273, "unfolds": 59449, "clever": 8660, "conceive": 9913, "recency": 45274, "hugely": 24081, "ramifications": 44865, "conflating": 10139, "linker": 30832, "distorted": 15611, "stabilize": 52411, "maml": 31685, "personas": 40769, "headroom": 23508, "swap": 54243, "infilling": 25714, "blank": 7196, "xnli": 63036, "leaps": 29341, "xl": 63028, "extrapolating": 20149, "vaes": 61167, "vacancy": 61163, "tan": 54795, "calling": 7557, "appreciable": 3384, "adversaries": 1995, "propaganda": 43238, "disinformation": 15507, "mechanics": 32094, "lf": 30416, "144": 143, "prescribed": 41833, "inductively": 25614, "lifts": 30447, "navigating": 36494, "generically": 22634, "crossmodal": 11878, "endeavors": 17734, "speculation": 52247, "kaggle": 27264, "latencies": 29115, "neurological": 37117, "masculine": 31854, "feminine": 20732, "continued": 10830, "ece": 16573, "flaws": 21102, "humanlike": 24268, "wolf": 62103, "stark": 52563, "optionally": 38578, "isomorphic": 27054, "ka": 27263, "schools": 48742, "21st": 316, "6x": 485, "organisations": 38681, "poincar": 41042, "regressive": 45824, "ro": 48209, "slovene": 51446, "736": 496, "factorizing": 20303, "defend": 13766, "organizes": 38691, "invertible": 26934, "mathcal": 31931, "april": 3987, "swahili": 54242, "synchronization": 54277, "restores": 47417, "diversify": 15729, "shuffle": 50813, "706": 491, "9th": 577, "magnitudes": 31419, "pivots": 40923, "tones": 57351, "ideologies": 24473, "ideology": 24474, "attacking": 4663, "polar": 41086, "lifestyle": 30444, "ungrammatical": 59454, "shareable": 50463, "suits": 53866, "orthogonality": 38753, "infection": 25634, "distractor": 15616, "multiview": 36331, "encouragingly": 17607, "dominate": 16309, "sibling": 50822, "ethics": 18419, "justice": 27255, "prioritized": 42433, "kd": 27275, "labelers": 27774, "strategic": 52888, "yang": 63043, "shortcuts": 50584, "consequential": 10202, "profession": 43059, "diversification": 15727, "decay": 13543, "roberta": 48213, "mlm": 33437, "electra": 16963, "corrupting": 11569, "neglects": 36651, "finetune": 21040, "xlnet": 63032, "ingests": 26196, "wmt19": 62101, "laser": 29103, "wordpiece": 62357, "locates": 30964, "plagued": 40934, "traction": 57500, "useless": 60402, "ht": 24045, "exacerbate": 18845, "exacerbates": 18847, "mi": 33215, "filipino": 20797, "tl": 57272, "privately": 42445, "lossless": 31110, "forefront": 21295, "immensely": 24584, "lee": 29996, "hoping": 24020, "shades": 50436, "das": 12098, "memorizes": 32237, "registered": 45808, "burst": 7502, "stretch": 52982, "thompson": 57053, "antagonistic": 3114, "politicians": 41114, "patches": 39942, "bypassing": 7508, "farther": 20409, "transe": 58348, "mrl": 35908, "frustrating": 21698, "198": 195, "imperceptible": 24627, "urgency": 59786, "adapter": 1556, "untrained": 59754, "disentangling": 15503, "pairings": 39165, "unparalleled": 59626, "deliberation": 13828, "polarization": 41089, "liberal": 30422, "tagsets": 54761, "undesired": 59434, "sustainable": 54228, "regime": 45800, "quad": 44462, "xlm": 63029, "spanbert": 51935, "mil": 33242, "misuse": 33376, "depicting": 14166, "fingerprints": 21053, "paddlepaddle": 39137, "checkpoints": 8278, "enlarging": 17955, "confer": 10107, "diagnostics": 14741, "reversal": 48020, "barely": 5529, "intending": 26552, "caregivers": 7764, "empowered": 17407, "disruptive": 15531, "dispersed": 15516, "draft": 16380, "rises": 48158, "asrs": 4565, "synthesizes": 54365, "fills": 20804, "classroom": 8635, "abductive": 587, "monolithic": 35817, "strides": 52987, "hallucination": 23373, "guessing": 23325, "muse": 36336, "reuses": 48004, "rand": 44867, "provably": 43978, "relaxes": 46139, "hallmark": 23370, "victims": 61576, "texttt": 56950, "leader": 29281, "stationary": 52731, "annotates": 2929, "entanglement": 18011, "tailed": 54765, "recruit": 45598, "abc": 586, "nyt": 38073, "skew": 51411, "gpt2": 22992, "retrained": 47934, "2020": 289, "vectorizer": 61478, "rogue": 48300, "ace05": 1103, "toxicity": 57482, "lagging": 27935, "wnut": 62102, "spider": 52343, "volunteers": 61735, "crimes": 11768, "dm": 15755, "142": 142, "impedes": 24623, "checkpoint": 8277, "180k": 181, "700k": 490, "utilises": 61076, "attributing": 4913, "feeling": 20725, "sans": 48514, "mbert": 31977, "isotropic": 27056, "contextualizing": 10817, "optimizations": 38560, "leaderboards": 29285, "alias": 2348, "placement": 40929, "innocuous": 26246, "tactics": 54723, "hypersphere": 24340, "visit": 61645, "overestimation": 39078, "subclass": 53542, "selector": 49165, "exceeded": 18946, "milestone": 33247, "unfaithful": 59444, "presumptions": 42140, "commonsenseqa": 9240, "270": 343, "favorite": 20460, "sway": 54247, "lexeme": 30348, "narrowed": 36389, "pertains": 40785, "bin": 7140, "unnecessarily": 59618, "nsp": 37971, "hans": 23432, "hotspot": 24037, "countering": 11620, "calm": 7559, "propensities": 43248, "rephrase": 46399, "incredibly": 25480, "resistant": 47192, "populate": 41210, "stylized": 53512, "supervisions": 54101, "qualified": 44468, "2019a": 287, "speculative": 52248, "conducive": 10022, "incidence": 25218, "jeopardize": 27155, "auditing": 4936, "excels": 18956, "medications": 32214, "multifarious": 36051, "followup": 21274, "depths": 14192, "inflated": 25716, "infused": 26192, "acute": 1490, "truncated": 58828, "june": 27247, "band": 5520, "117": 96, "hazards": 23492, "miulab": 33398, "carbon": 7748, "tough": 57479, "risen": 48157, "empowering": 17408, "strikingly": 52990, "sari": 48519, "truthfulness": 58842, "ng": 37435, "california": 7538, "admits": 1861, "technically": 56021, "tp": 57484, "prepending": 41818, "scholar": 48735, "initialised": 26222, "regulated": 45855, "joined": 27161, "swift": 54250, "flag": 21089, "initiatives": 26235, "degenerate": 13796, "dashboard": 12099, "berts": 6740, "latch": 29111, "linearizing": 30681, "alternation": 2495, "vein": 61505, "cg": 7955, "residents": 47183, "reinforcing": 45878, "totalling": 57477, "determinantal": 14549, "trusted": 58831, "gc": 22025, "holistically": 23991, "lottery": 31126, "subnetworks": 53593, "subnetwork": 53592, "signify": 51018, "unintuitive": 59505, "bart": 5532, "variances": 61231, "conceptualizations": 9951, "harms": 23465, "giant": 22686, "assembles": 4569, "mlms": 33441, "compiling": 9580, "grave": 23196, "hugging": 24082, "hire": 23952, "narrows": 36391, "parameterizations": 39683, "minima": 33281, "attainable": 4670, "plms": 41014, "negated": 36611, "priming": 42375, "verifiable": 61524, "vat": 61447, "pitfall": 40913, "dive": 15683, "forgotten": 21311, "camembert": 7563, "specialize": 52030, "ablating": 653, "temporary": 56196, "answerer": 3062, "surged": 54159, "odds": 38286, "began": 6377, "trie": 58790, "113": 95, "plm": 41012, "thu": 57096, "keg": 27283, "biologically": 7169, "autocomplete": 5023, "prime": 42374, "191": 192, "contracts": 10864, "chair": 7964, "inaccuracies": 25207, "programmatically": 43079, "180": 180, "dictated": 14801, "dangerous": 12094, "slovenian": 51447, "aaai": 582, "reserve": 47178, "scrutiny": 48952, "tighter": 57108, "jzbjyb": 27262, "accesses": 833, "spark": 51960, "endow": 17744, "eda": 16584, "rivals": 48169, "pidgin": 40873, "evoked": 18832, "omitting": 38333, "scholarship": 48739, "globe": 22852, "commonality": 9214, "excessively": 18963, "corrective": 11487, "confluence": 10143, "groundings": 23266, "sds": 48955, "physics": 40864, "mutations": 36340, "manhattan": 31700, "2m": 350, "facility": 20284, "chronologically": 8355, "underexplored": 59251, "dt": 16455, "1996": 204, "concatenates": 9909, "von": 61736, "geometrically": 22658, "24k": 331, "robbert": 48212, "counteract": 11615, "administrative": 1858, "purity": 44399, "mbart": 31976, "promptly": 43222, "prlms": 42448, "albert": 2250, "usa": 59796, "russia": 48412, "confuse": 10156, "uit": 59184, "mdd": 31984, "substituted": 53655, "restoring": 47418, "imitating": 24575, "pandemic": 39245, "causation": 7882, "samsung": 48511, "adapters": 1561, "indic": 25521, "groundtruth": 23268, "unary": 59211, "sidestep": 50825, "iwslt14": 27139, "ct": 11926, "buried": 7501, "momentum": 35781, "pretext": 42141, "signalling": 50832, "conquer": 10192, "discouraging": 15383, "undertaking": 59431, "lit": 30848, "diverges": 15689, "hampers": 23380, "spacy": 51914, "fleiss": 21104, "pursuit": 44422, "arabert": 3991, "hardness": 23458, "phobert": 40818, "heightened": 23542, "lenient": 30042, "gamma": 21951, "fallback": 20375, "fragmented": 21435, "asia": 4512, "aes": 2008, "empathetic": 17305, "pain": 39145, "discriminatory": 15455, "kit": 27376, "routines": 48366, "march": 31814, "118": 97, "esperanto": 18314, "eos": 18179, "investment": 27009, "stepping": 52838, "opinionated": 38504, "cohen": 8901, "stanza": 52561, "returning": 47999, "creators": 11755, "aesthetic": 2009, "calibrate": 7529, "t5": 54676, "winogrande": 62075, "gathers": 22004, "nigerian": 37440, "exclusion": 18976, "disparities": 15514, "learnings": 29950, "pretrains": 42222, "pypi": 44436, "lex": 30347, "10000": 68, "igbo": 24487, "insufficiently": 26495, "inserts": 26381, "adversely": 2000, "ft": 21704, "brute": 7377, "ig": 24486, "donald": 16314, "blocked": 7221, "nar": 36379, "ptlms": 44298, "emissions": 17276, "ptlm": 44297, "biobert": 7163, "covid": 11667, "gpt3": 22993, "tokenizations": 57317, "overfits": 39080, "dissect": 15532, "hinting": 23950, "msc": 35914, "lagged": 27934, "bleurt": 7215, "facilitation": 20282, "centering": 7914, "18k": 182, "c3": 7517, "causalities": 7879, "35x": 376, "superglue": 53928, "convnets": 11090, "stressed": 52981, "coronavirus": 11167, "11th": 102, "milliseconds": 33266, "palm": 39242, "exit": 19181, "oblivious": 38117, "saturated": 48531, "convince": 11088, "amortized": 2540, "cvae": 12070, "engages": 17759, "affirmative": 2028, "slide": 51429, "polarized": 41090, "anli": 2866, "disk": 15510, "git": 22691, "egregious": 16944, "impair": 24615, "prohibits": 43130, "ukplab": 59187, "august": 4994, "inuktitut": 26913, "codebase": 8870, "solvable": 51674, "spotlight": 52373, "localizing": 30960, "equipping": 18198, "wake": 61757, "unfiltered": 59447, "gum": 23359, "fixes": 21086, "paraphraser": 39743, "questioner": 44762, "rationalization": 45031, "truncation": 58829, "safely": 48427, "ai4bharat": 2125, "deploys": 14180, "21k": 315, "infected": 25633, "italy": 27113, "virus": 61628, "germany": 22680, "maltese": 31684, "unanswered": 59210, "prunes": 44267, "regex": 45799, "hallucinate": 23371, "rewrites": 48078, "atmosphere": 4648, "unmodified": 59616, "introspective": 26905, "datastore": 13491, "streamed": 52961, "favored": 20458, "blueprint": 7232, "corporate": 11259, "soap": 51552, "burdensome": 7499, "breakdown": 7311, "nel": 36671, "25k": 340, "institutional": 26473, "dispute": 15524, "subfields": 53545, "rethink": 47930, "hallucinations": 23375, "9x": 578, "svo": 54241, "mae": 31412, "gat": 21990, "interpolate": 26703, "decaying": 13544, "tor": 57473, "indiscriminately": 25558, "td": 55985, "informality": 25745, "conceptualizing": 9953, "bertweet": 6742, "unveils": 59759, "regulate": 45854, "fore": 21292, "finetunes": 21046, "tatoeba": 55976, "unaffected": 59201, "misclassify": 33344, "provider": 44176, "mines": 33278, "frank": 21635, "bbc": 6362, "streamline": 52964, "umt": 59197, "voluntary": 61734, "deberta": 13530, "distilbert": 15563, "cord": 11142, "afro": 2041, "secure": 49039, "causally": 7881, "unreliability": 59635, "collapsed": 8935, "burgeoning": 7500, "chaining": 7962, "comprehensibility": 9760, "sl": 51424, "slowing": 51453, "arduous": 4134, "fairer": 20358, "seventh": 50423, "flawed": 21101, "pioneered": 40888, "steep": 52787, "vicinity": 61574, "triage": 58783, "poly": 41116, "wd": 61846, "outperformance": 38834, "salesforce": 48434, "backdoor": 5487, "adept": 1829, "buffer": 7382, "tlm": 57273, "labse": 27868, "sci": 48743, "flowing": 21121, "tlms": 57274, "promoted": 43193, "shapley": 50452, "15th": 160, "covert": 11666, "mad": 31411, "subfield": 53544, "proactive": 42451, "672": 477, "complicate": 9695, "rethinking": 47931, "lite": 30849, "personnel": 40770, "disturbing": 15682, "goodness": 22949, "consumes": 10440, "unsolved": 59661, "pakistan": 39241, "bind": 7158, "confidently": 10122, "logit": 30998, "sap": 48516, "streamlined": 52965, "ls": 31234, "pn": 41036, "weekly": 61909, "neurips": 37114, "res": 46972, "stateof": 52717, "entangled": 18010, "750": 504, "labs": 27867, "repurpose": 46833, "harsh": 23469, "urge": 59785, "caution": 7893, "tentative": 56228, "instructed": 26476, "huggingface": 24084, "vgg": 61565, "412": 403, "closure": 8718, "controllability": 10975, "controllably": 10981, "timestamp": 57257, "delineate": 13832, "doi": 15987, "incurred": 25487, "attach": 4654, "modulo": 35777, "intricacies": 26764, "lc": 29247, "fewshot": 20743, "emulate": 17411, "concatenations": 9912, "garnered": 21988, "gnn": 22872, "tn": 57276, "csebuetnlp": 11923, "logits": 30999, "underestimate": 59249, "bf": 6997, "enumerating": 18168, "kazakh": 27269, "dailydialog": 12089, "um": 59194, "cheaply": 8270, "header": 23503, "ip": 27030, "sparsification": 51976, "primitives": 42377, "med": 32156, "protecting": 43965, "endowing": 17745, "likelihoods": 30523, "zhu": 63190, "impairments": 24619, "obtainable": 38200, "uzbek": 61155, "citing": 8373, "equips": 18199, "mismatching": 33355, "tending": 56212, "capsules": 7646, "comet": 9132, "denser": 14090, "terminate": 56257, "proceeding": 42750, "shortly": 50594, "realizes": 45162, "mixup": 33425, "participatory": 39828, "pulled": 44383, "localize": 30958, "interpolates": 26705, "decisive": 13577, "prover": 43997, "swarnahub": 54246, "unlikelihood": 59612, "layman": 29240, "forth": 21395, "coh": 8900, "moe": 35778, "superficially": 53926, "inefficiencies": 25624, "height": 23541, "instantiations": 26443, "1b": 212, "backbones": 5486, "informing": 26184, "3k": 387, "erroneously": 18209, "absorb": 752, "substituting": 53657, "air": 2222, "flops": 21116, "reflective": 45781, "138": 132, "4k": 415, "162": 168, "drafting": 16381, "bertscore": 6741, "traverse": 58728, "poetics": 41040, "layerwise": 29238, "upsampling": 59777, "smartphone": 51531, "sbert": 48542, "mt5": 35927, "101": 71, "balances": 5517, "cm": 8752, "storm": 52878, "eschewing": 18258, "deviating": 14719, "discretized": 15433, "medial": 32192, "876": 542, "instruct": 26475, "correlative": 11539, "interleaving": 26668, "performant": 40655, "reconstructs": 45586, "yale": 63041, "degeneration": 13798, "iu": 27134, "beating": 6374, "cam": 7560, "chemicals": 8281, "vi": 61566, "oftentimes": 38321, "sister": 51363, "impactful": 24611, "anisotropy": 2865, "drivers": 16436, "nguyen": 37438, "oracles": 38582, "childhood": 8292, "intertwined": 26746, "podcast": 41037, "uda": 59181, "circular": 8361, "108": 76, "moderator": 35699, "tqa": 57485, "001": 16, "175": 175, "trainings": 58322, "humanity": 24267, "realise": 45145, "fn": 21138, "quote": 44840, "weaken": 61852, "optimise": 38538, "mediaeval": 32191, "celebrated": 7906, "tesseract": 56328, "remainder": 46319, "implying": 24675, "refinements": 45769, "incoherence": 25325, "arena": 4159, "auroc": 4995, "869": 538, "verdict": 61522, "rid": 48133, "infusing": 26193, "proliferated": 43147, "clock": 8681, "manageable": 31688, "impart": 24620, "fid": 20747, "xsum": 63037, "iterate": 27116, "384": 381, "elicits": 16983, "sensor": 49508, "contend": 10510, "delve": 13840, "practiced": 41488, "780": 513, "inviting": 27012, "rejecting": 45881, "geometrical": 22657, "typographic": 59163, "quantified": 44603, "terrorist": 56327, "salt": 48444, "task2": 55478, "uncased": 59223, "corroborates": 11566, "subreddit": 53599, "glean": 22817, "popularization": 41207, "pandemics": 39246, "holdout": 23984, "docker": 15761, "119": 98, "approved": 3973, "lfs": 30417, "contrastively": 10924, "longformer": 31058, "14th": 144, "reminiscent": 46371, "vl": 61697, "pulls": 44385, "elderly": 16960, "dae": 12081, "aggregator": 2082, "retrievers": 47990, "eacl": 16503, "uniformity": 59487, "ensembled": 17983, "customizing": 12065, "extraordinary": 20147, "necessitate": 36536, "40k": 400, "subtract": 53679, "laughter": 29169, "council": 11609, "indicbert": 25550, "muril": 36335, "5281": 436, "footprints": 21284, "deployments": 14179, "unweighted": 59762, "disconnect": 15378, "accomplishes": 849, "admissible": 1859, "synthesise": 54359, "stood": 52863, "lily": 30530, "adjuncts": 1849, "summarising": 53874, "glm": 22819, "lowers": 31228, "ball": 5519, "paradox": 39634, "audit": 4935, "mislabeled": 33347, "mock": 33449, "sarcastic": 48518, "mha": 33214, "1980": 196, "galician": 21948, "isotropy": 27057, "dubious": 16467, "pl": 40925, "categorise": 7852, "unveiling": 59758, "esp": 18261, "curie": 11955, "mcl": 31982, "homes": 23993, "discretization": 15432, "leanings": 29339, "transitioning": 58545, "unrealistically": 59631, "pie": 40874, "competitiveness": 9573, "anomalies": 3021, "reconcile": 45575, "workshops": 62924, "immigration": 24585, "responds": 47390, "1991": 200, "decently": 13549, "amplification": 2568, "directives": 15302, "selectors": 49166, "criticisms": 11801, "bottlenecked": 7278, "topv2": 57472, "1200": 113, "lossy": 31111, "7b": 516, "prepend": 41817, "prompting": 43213, "overlooking": 39099, "endeavour": 17735, "devising": 14728, "infuse": 26191, "debugging": 13537, "adhering": 1837, "278": 344, "804": 525, "surging": 54160, "fighting": 20788, "v4": 61161, "2017b": 271, "stimulated": 52851, "cmcl": 8753, "traceability": 57488, "degradations": 13803, "pegasus": 40013, "superb": 53923, "meantime": 32042, "randomness": 44905, "deployable": 14169, "checklist": 8276, "organisers": 38683, "handcraft": 23397, "hypothesizes": 24354, "medically": 32212, "resp": 47342, "dietary": 14812, "correspondingly": 11563, "demanded": 13842, "multiplied": 36319, "evoke": 18831, "blindly": 7219, "widening": 62025, "invest": 26935, "quotation": 44838, "oldest": 38329, "randomization": 44895, "rnngs": 48207, "undergoing": 59255, "dire": 15249, "tolerant": 57348, "compromised": 9821, "incongruent": 25335, "anisotropic": 2864, "declare": 13579, "intimate": 26755, "threatening": 57086, "wow": 62982, "rucaibox": 48375, "320": 368, "claiming": 8383, "mentally": 32294, "lifetime": 30445, "sixteen": 51372, "inhibit": 26211, "necessitated": 36537, "risky": 48166, "sam": 48445, "dpps": 16376, "gray": 23197, "extraneous": 20146, "verbalized": 61517, "simplifications": 51237, "laypeople": 29245, "deadline": 13511, "llms": 30899, "telegram": 56164, "formulaic": 21382, "consolidated": 10337, "reversed": 48024, "generalisable": 22101, "recombining": 45561, "undermining": 59281, "redundancies": 45726, "acm": 1432, "impeded": 24622, "bp": 7295, "successively": 53753, "confusable": 10155, "forgo": 21310, "blocking": 7222, "eligible": 16984, "mtop": 35934, "3b": 385, "distrust": 15680, "resumes": 47917, "struggled": 53203, "bigbird": 7094, "spatiotemporal": 51991, "fined": 21034, "dpr": 16377, "11k": 101, "mediators": 32197, "inconvenience": 25342, "multiplex": 36314, "pet": 40798, "focussing": 21248, "evidential": 18829, "eventual": 18800, "tydiqa": 59045, "ns": 37970, "incompetent": 25330, "tt": 58849, "caught": 7866, "disturbance": 15681, "ptms": 44299, "haitian": 23364, "rewriter": 48077, "sustained": 54229, "slows": 51455, "stakes": 52453, "twist": 59029, "centred": 7925, "rectify": 45603, "competed": 9524, "bench": 6421, "copious": 11131, "inflexible": 25722, "distract": 15613, "neutralize": 37122, "converged": 11023, "invite": 27011, "daunting": 13497, "append": 3147, "llm": 30898, "768": 509, "122": 115, "quechua": 44646, "flatter": 21098, "precious": 41608, "wave": 61787, "faulty": 20451, "textcnn": 56852, "sufficiency": 53797, "347": 372, "scans": 48655, "metropolis": 33210, "hastings": 23477, "frustration": 21700, "byt5": 7511, "unavoidable": 59216, "sparsified": 51977, "4000": 398, "locked": 30970, "vii": 61610, "rejected": 45880, "damage": 12091, "distort": 15610, "interacted": 26593, "thumb": 57098, "impairment": 24618, "pull": 44382, "disambiguates": 15356, "succeeding": 53693, "burdens": 7498, "flaw": 21100, "uncontrollable": 59242, "zs": 63196, "scoping": 48781, "fl": 21088, "harmonize": 23463, "unverifiable": 59760, "poincare": 41043, "419": 404, "csl": 11924, "recipients": 45483, "democratizing": 13853, "jd": 27151, "resistance": 47191, "subpopulations": 53597, "rejection": 45882, "152": 158, "silhouette": 51021, "urgently": 59789, "hk": 23974, "headings": 23504, "jacobs": 27141, "suppression": 54147, "superfluous": 53927, "disseminate": 15533, "reasoners": 45180, "artistic": 4503, "zip": 63191, "corporations": 11260, "troubling": 58818, "emd": 17256, "540b": 442, "530b": 439, "davinci": 13498, "175b": 176, "pcl": 39997, "mistakenly": 33372, "ts": 58848, "accommodating": 840, "household": 24042, "rose": 48345, "vit": 61689, "administered": 1857, "digitisation": 15217, "requesting": 46837, "calibrator": 7537, "mirroring": 33337, "codex": 8883, "unspecified": 59663, "criticize": 11802, "exiting": 19182, "openness": 38479, "instructgpt": 26477, "regressions": 45823, "tr": 57486, "threaten": 57085, "outbound": 38760, "untouched": 59753, "underestimated": 59250, "internalize": 26692, "obscuring": 38119, "lowered": 31226, "regressor": 45828, "tcm": 55984, "responded": 47389, "003": 17, "escalating": 18257, "hf": 23635, "scopes": 48780, "borrows": 7272, "cure": 11954, "fsl": 21702, "downsides": 16329, "lieu": 30435, "catastrophically": 7835, "task1": 55477, "recruiting": 45600, "discourages": 15382, "compromises": 9822, "overlooks": 39100, "pivoted": 40921, "absorption": 753, "reshaped": 47180, "clusterings": 8749, "korea": 27673, "democratize": 13852, "ruling": 48399, "xxl": 63038, "mmlu": 33444, "t0": 54674, "flan": 21093, "avoidance": 5435, "reasoned": 45178, "rr": 48370, "11b": 99, "mauve": 31947, "rescaling": 46974, "19k": 208, "dilution": 15222, "slm": 51439, "rote": 48347, "fragility": 21433, "subsume": 53663, "commit": 9158, "alliance": 2427, "protective": 43967, "spreads": 52381, "ob": 38074, "averagely": 5424, "inheriting": 26209, "minds": 33275, "sun": 53919, "rag": 44853, "overwhelmed": 39122, "ss": 52406, "stricter": 52985, "wildly": 62060, "50x": 429, "polishing": 41104, "eyes": 20178, "embarrassingly": 17003, "monotonous": 35824, "220": 320, "asa": 4510, "537": 440, "regulating": 45856, "sm": 51460, "wrap": 62983, "staying": 52783, "indiscriminate": 25557, "behaved": 6388, "calibrating": 7533, "1992": 201, "semiparametric": 49474, "dining": 15248, "realtime": 45166, "allegedly": 2395, "2023": 307, "postprocessing": 41368, "iaa": 24358, "timeliness": 57243, "110m": 93, "quadruple": 44467, "chatgpt": 8264, "obstruct": 38157, "rude": 48376, "deeplearnxmu": 13761, "calibrates": 7532, "rap": 44984, "v100": 61158, "38k": 382, "ua": 59173, "scattering": 48681, "handcrafting": 23402, "13k": 133, "textually": 56988, "quantize": 44641, "thriving": 57093, "dig": 15205, "pubmedqa": 44381, "leaking": 29336, "executes": 18983, "thoughtful": 57074, "regressors": 45829, "loyalty": 31230, "unpredictability": 59628, "obviate": 38261, "css": 11925, "unfaithfulness": 59445, "warming": 61773, "confounders": 10148, "revolutionize": 48060, "excludes": 18974, "supposedly": 54145, "progressing": 43122, "rerankers": 46970, "settle": 50407, "provoke": 44258, "beneath": 6553, "legitimacy": 30010, "irrelevance": 27038, "oversight": 39105, "landscapes": 27946, "harmonized": 23464, "peaked": 40002, "fa": 20238, "841": 533, "relabeled": 45883, "dam": 12090, "commensurate": 9138, "potent": 41378, "worsen": 62974, "retrospectively": 47996, "theft": 57003, "225": 321, "intractability": 26762, "interleaves": 26667, "t1": 54675, "delimiters": 13831, "emergencies": 17266, "abstractly": 776, "making text": 31671, "fully explored": 21729, "explored paper": 19760, "paper discusses": 39344, "contribute development": 10929, "languages propose": 28757, "software engineering": 51638, "human computer": 24124, "computer interaction": 9890, "natural language": 36412, "language processing": 28393, "processing nlp": 42897, "number possible": 38027, "possible future": 41325, "future research": 21883, "research directions": 47021, "directions paper": 15298, "english language": 17831, "language acquisition": 27951, "speech corpora": 52254, "use syntactic": 60036, "network properties": 36789, "use global": 59901, "global view": 22847, "practical issues": 41466, "issues paper": 27096, "paper examines": 39354, "syntactic relations": 54316, "child language": 8291, "grammar based": 23062, "corpora annotation": 11176, "perform standard": 40145, "provide detailed": 44049, "general model": 22069, "model lexical": 34060, "lexical information": 30366, "hierarchy information": 23704, "language used": 28568, "model enable": 33815, "practical approach": 41460, "language understanding": 28544, "understanding reasoning": 59389, "powerful representation": 41444, "information reasoning": 26042, "reasoning approach": 45184, "real life": 45104, "language text": 28527, "text paper": 56687, "paper describes": 39319, "describes experiments": 14224, "experiments learning": 19456, "rules using": 48398, "machine learning": 31306, "different ways": 15128, "approaching problem": 3961, "related work": 45955, "work task": 62838, "task results": 55346, "background knowledge": 5492, "prior domain": 42400, "domain knowledge": 16093, "knowledge available": 27402, "available research": 5359, "methods make": 32939, "make use": 31606, "morphological syntactic": 35845, "language resources": 28475, "resources model": 47317, "model word": 34538, "word similarity": 62310, "different methods": 14989, "learning lexical": 29709, "lexical items": 30367, "strengths weaknesses": 52977, "weaknesses different": 61871, "different word": 15131, "particular focus": 39846, "focus paper": 21188, "different language": 14963, "language resource": 28474, "learning ability": 29499, "change model": 8171, "learning abilities": 29498, "agents learn": 2064, "learn language": 29387, "ability model": 625, "took place": 57354, "zipf law": 63193, "words language": 62443, "frequency word": 21680, "word semantics": 62296, "task information": 55137, "information retrieval": 26061, "document given": 15797, "user information": 60421, "user query": 60441, "proposed approach": 43720, "approach does": 3494, "data mining": 12486, "tends focus": 56214, "structure sentences": 53136, "report results": 46445, "approach unsupervised": 3729, "unsupervised training": 59744, "sequential data": 50036, "sentences provided": 49775, "text segmentation": 56756, "statistical analysis": 52735, "semantic annotations": 49236, "despite importance": 14368, "attention researchers": 4823, "researchers field": 47157, "multi document": 35952, "et al": 18393, "al 2007": 2231, "automatic summarization": 5126, "multiple sources": 36285, "similarities differences": 51079, "paper provide": 39554, "provide initial": 44091, "probabilistic model": 42465, "model applied": 33568, "alleviate problem": 2416, "problem paper": 42619, "paper present": 39443, "present automated": 41851, "automated method": 5051, "non native": 37664, "native speakers": 36406, "new method": 37250, "method based": 32398, "based extraction": 5724, "native english": 36400, "english speech": 17883, "used construct": 60126, "result work": 47456, "work developed": 62634, "achieved significant": 1268, "error reduction": 18230, "reduction compared": 45718, "romance languages": 48332, "languages french": 28677, "research development": 47018, "written text": 63012, "text processing": 56712, "data structured": 12699, "statistical approaches": 52737, "based approaches": 5572, "manually constructed": 31768, "french english": 21660, "highly complex": 23886, "complex process": 9649, "process requires": 42826, "various kinds": 61348, "like learning": 30481, "learning words": 29945, "communicative goals": 9256, "starting point": 52570, "propose enhanced": 43372, "basic linguistic": 6331, "new language": 37232, "improvement achieved": 24983, "needs paper": 36610, "raw texts": 45038, "polish language": 41103, "novel technique": 37936, "algorithm performs": 2292, "obtained applying": 38202, "post processing": 41351, "processing text": 42957, "text second": 56754, "motivated observation": 35870, "newly introduced": 37378, "frame based": 21438, "work available": 62583, "available online": 5335, "large scale": 28958, "content analysis": 10512, "corpora used": 11254, "used assess": 60094, "work presented": 62761, "analysis public": 2735, "web service": 61896, "allows users": 2483, "results indicate": 47675, "long term": 31038, "current word": 12027, "word prediction": 62267, "prediction systems": 41740, "systems make": 54558, "gram language": 23053, "language models": 28224, "models lm": 35197, "estimate probability": 18373, "word phrase": 62263, "past years": 39940, "models syntactic": 35572, "syntactic semantic": 54322, "semantic information": 49284, "latent semantic": 29133, "semantic analysis": 49233, "analysis lsa": 2692, "method shown": 32650, "shown provide": 50745, "provide reliable": 44121, "information long": 25958, "long distance": 31007, "semantic dependencies": 49267, "dependencies words": 14114, "words context": 62387, "context present": 10691, "present evaluate": 41904, "evaluate methods": 18470, "methods integrate": 32905, "based information": 5783, "standard language": 52496, "language model": 28151, "model semantic": 34350, "different forms": 14938, "methods significant": 33037, "significant improvements": 50881, "improvements compared": 25058, "model investigate": 34021, "words expressions": 62416, "understanding text": 59410, "major concern": 31506, "human beings": 24116, "given input": 22748, "short term": 50567, "term memory": 56244, "following approach": 21262, "computational model": 9849, "model construction": 33708, "word meaning": 62245, "word semantic": 62295, "semantic similarities": 49345, "semantic similarity": 49346, "similarity words": 51130, "high order": 23756, "frequency occurrence": 21676, "objective subjective": 38104, "languages like": 28712, "like english": 30469, "english french": 17807, "french german": 21661, "punctuation marks": 44388, "scientific texts": 48772, "text output": 56682, "modalities paper": 33470, "use generative": 59899, "open source": 38447, "generating sentences": 22395, "underlying linguistic": 59270, "linguistic structures": 30797, "using domain": 60667, "domain specific": 16165, "specific languages": 52102, "languages based": 28606, "implemented using": 24649, "processing domain": 42868, "languages used": 28815, "used tool": 60330, "goal paper": 22894, "present model": 41947, "memory based": 32242, "based corpus": 5652, "french corpus": 21658, "corpus million": 11381, "million words": 33261, "semantic space": 49352, "compared human": 9413, "human data": 24131, "tasks second": 55867, "models semantic": 35476, "implementation model": 24641, "model text": 34456, "text comprehension": 56503, "based models": 5865, "dictionary definitions": 14805, "certain words": 7950, "problem introduce": 42586, "introduce concept": 26788, "larger vocabulary": 29090, "provide simple": 44130, "measuring similarity": 32089, "fundamental problem": 21786, "problem natural": 42613, "article provides": 4460, "unifying framework": 59492, "short context": 50551, "proposed solution": 43897, "quite different": 44828, "different surface": 15089, "closely related": 8704, "second order": 49013, "information used": 26142, "used represent": 60290, "words common": 62381, "parallel corpora": 39642, "algorithm based": 2264, "based use": 6121, "key words": 27343, "words text": 62530, "text documents": 56541, "rules used": 48397, "bilingual corpora": 7104, "analysis allows": 2610, "assess quality": 4582, "avenues future": 5396, "using statistical": 60963, "statistical techniques": 52765, "various techniques": 61404, "using techniques": 60982, "information theory": 26123, "similar words": 51077, "unknown words": 59560, "multilingual parallel": 36108, "parallel texts": 39655, "english spanish": 17878, "texts paper": 56908, "paper deals": 39313, "xml based": 63035, "considered important": 10248, "translation quality": 58664, "quality assessment": 44492, "used corpus": 60131, "studies automatic": 53248, "automatic translation": 5133, "assessment based": 4590, "paper concludes": 39296, "text generation": 56593, "categories based": 7843, "based syntactic": 6076, "semantic properties": 49322, "applications use": 3255, "task learning": 55168, "second language": 49009, "en en": 17414, "article describes": 4448, "common syntactic": 9203, "does rely": 15966, "linguistic structure": 30796, "specific language": 52099, "model hybrid": 33967, "propose theoretical": 43671, "theoretical framework": 57021, "given corpus": 22731, "statistical information": 52743, "words vocabulary": 62546, "based statistical": 6058, "data possible": 12548, "possible build": 41317, "words share": 62511, "share common": 50456, "words tend": 62527, "semantic classes": 49247, "markov chain": 31841, "transition matrix": 58540, "probability distributions": 42474, "distributions words": 15679, "words clusters": 62380, "method yields": 32711, "method specifically": 32665, "related languages": 45915, "prior knowledge": 42403, "different languages": 14967, "hard task": 23451, "task especially": 55052, "unsupervised classification": 59685, "furthermore approach": 21804, "approach relies": 3673, "corpus extensive": 11340, "extensive experiments": 19877, "readily available": 45078, "corpus examine": 11334, "language allows": 27959, "allows efficient": 2462, "long time": 31045, "present natural": 41953, "natural languages": 36459, "evenly distributed": 18777, "linguistic phenomenon": 30782, "using artificial": 60565, "applications like": 3217, "automatic text": 5128, "text summarization": 56797, "work present": 62752, "present framework": 41918, "step automatic": 52800, "subject verb": 53558, "verb object": 61510, "pronoun resolution": 43231, "textual documents": 56961, "automatic processing": 5116, "automatic recognition": 5119, "pre defined": 41499, "necessary pre": 36532, "automatic generation": 5093, "important tasks": 24780, "tasks natural": 55759, "processing information": 42876, "retrieval machine": 47950, "machine translation": 31345, "statistical methods": 52753, "methods used": 33094, "extraction methods": 20081, "provides easy": 44193, "logic rules": 30981, "existing methods": 19092, "mutual information": 36344, "word pairs": 62261, "data based": 12185, "project gutenberg": 43134, "proposed method": 43803, "provides better": 44184, "better result": 6955, "methods known": 32913, "semantic relationships": 49329, "pairs words": 39234, "words used": 62540, "feature based": 20476, "complex networks": 9642, "networks propose": 36897, "propose algorithm": 43288, "algorithm uses": 2310, "semantic network": 49303, "network produce": 36788, "produce new": 42994, "new set": 37313, "relationships words": 46087, "words similar": 62514, "work computational": 62605, "computational modeling": 9850, "complex hand": 9626, "hand coded": 23384, "representations introduce": 46694, "words using": 62541, "using large": 60756, "large corpus": 28863, "raw text": 45037, "text automatically": 56450, "automatically discover": 5163, "discover semantic": 15408, "semantic relations": 49327, "relations words": 46064, "words evaluate": 62411, "achieves human": 1338, "human level": 24196, "level performance": 30174, "alternative approaches": 2499, "approaches able": 3751, "able reach": 717, "word frequency": 62209, "power law": 41426, "complex systems": 9667, "human communication": 24123, "recent research": 45340, "frequent words": 21683, "language different": 28028, "different levels": 14977, "semantic type": 49368, "generative model": 22597, "model behavior": 33611, "word usage": 62329, "patterns words": 39979, "use words": 60076, "article propose": 4456, "propose automatic": 43306, "build multi": 7414, "multi lingual": 35984, "lexico semantic": 30403, "semantic resources": 49336, "textual information": 56969, "information contained": 25790, "texts different": 56873, "languages method": 28728, "method uses": 32697, "mathematical model": 31934, "model called": 33641, "represent different": 46468, "given word": 22801, "word model": 62247, "model fed": 33885, "words extracted": 62417, "extracted corpus": 20007, "words meanings": 62456, "used build": 60110, "using corpora": 60631, "corpora different": 11192, "languages resources": 28773, "languages makes": 28725, "makes possible": 31630, "information languages": 25942, "meaning words": 32021, "world wide": 62967, "wide web": 61987, "probability word": 42482, "conditional probabilities": 10001, "word word": 62343, "words present": 62483, "confidence measure": 10114, "translation problem": 58660, "sequence words": 50019, "original word": 38739, "word level": 62225, "level confidence": 30080, "translation based": 58582, "lexical features": 30364, "features language": 20609, "model evaluate": 33840, "using combination": 60611, "measures based": 32075, "classification error": 8461, "error rate": 18226, "represent word": 46485, "keeps track": 27282, "contexts words": 10757, "standard supervised": 52531, "supervised machine": 54013, "learning algorithm": 29511, "semantic tasks": 49363, "tasks good": 55660, "good results": 22944, "results task": 47877, "task specific": 55387, "series experiments": 50063, "predicate argument": 41628, "argument structures": 4175, "structures used": 53197, "previously used": 42356, "used context": 60127, "context multi": 10677, "document summarization": 15837, "stages stage": 52451, "vocabulary words": 61719, "later used": 29152, "second stage": 49021, "approaches order": 3887, "order identify": 38625, "semantic roles": 49340, "semantics paper": 49410, "paper presents": 39468, "report experiments": 46436, "special type": 52024, "multiword expressions": 36333, "expressions mwes": 19809, "clear cut": 8652, "extracted large": 20015, "set examples": 50150, "results data": 47567, "data extraction": 12356, "finite state": 21057, "techniques results": 56133, "methods extracting": 32860, "models context": 34859, "context word": 10746, "phrase structure": 40846, "present paper": 41985, "sentences corpus": 49698, "corpus generated": 11349, "software developers": 51636, "data structures": 12700, "relational database": 46004, "lexicon based": 30408, "based nlp": 5912, "model efficiently": 33803, "compression techniques": 9815, "model demonstrate": 33744, "demonstrate possible": 13955, "great deal": 23202, "cognitive psychology": 8897, "computer science": 9891, "using word": 61027, "context text": 10732, "text corpora": 56515, "measures word": 32082, "similarity word": 51129, "word association": 62114, "general way": 22096, "search engine": 48970, "large range": 28951, "search queries": 48979, "queries paper": 44657, "words phrases": 62479, "express opinions": 19794, "settings including": 50377, "fundamentally different": 21796, "develop framework": 14589, "evaluation using": 18746, "scale collection": 48558, "reviews dataset": 48048, "subtle ways": 53677, "approach develop": 3486, "develop novel": 14607, "novel methods": 37869, "methods advantage": 32742, "evaluation provide": 18688, "social psychology": 51601, "different countries": 14881, "analysis linguistic": 2690, "linguistic typology": 30804, "verbs adjectives": 61521, "analysis small": 2760, "small sample": 51495, "propose computational": 43326, "process model": 42805, "model able": 33487, "able discover": 689, "able cope": 683, "noisy channel": 37613, "channel model": 8188, "syntactic structure": 54328, "structure sentence": 53135, "discourse structure": 15399, "structure text": 53141, "text given": 56610, "hierarchical model": 23678, "syntactic discourse": 54302, "generate coherent": 22183, "arbitrary length": 4013, "outperforms baseline": 38867, "sentence based": 49519, "based compression": 5631, "sentences text": 49794, "text results": 56747, "results support": 47871, "support claim": 54115, "discourse knowledge": 15391, "knowledge plays": 27569, "plays important": 40999, "important role": 24767, "entity detection": 18101, "detection tracking": 14537, "task identifying": 55122, "real world": 45121, "named entity": 36371, "coreference resolution": 11160, "resolution task": 47195, "task considering": 54971, "mention detection": 32297, "limited using": 30631, "using local": 60775, "local features": 30937, "task simultaneously": 55379, "able learn": 704, "complex non": 9645, "non local": 37661, "develop new": 14606, "model explore": 33860, "explore utility": 19751, "demonstrating effectiveness": 14051, "task paper": 55261, "paper propose": 39489, "pattern based": 39959, "based term": 6088, "term extraction": 56235, "extraction approach": 20048, "originally developed": 38744, "using morphological": 60815, "candidates based": 7584, "log likelihood": 30972, "present method": 41943, "method automatic": 32392, "lexical units": 30394, "bilingual lexicon": 7110, "based linguistic": 5816, "linguistic properties": 30785, "different aspects": 14844, "candidate translations": 7581, "collect new": 8948, "corpus web": 11457, "web pages": 61890, "non compositional": 37643, "technique based": 56028, "reach high": 45047, "high precision": 23763, "paper reviews": 39569, "indian languages": 25519, "languages paper": 28743, "paper explores": 39369, "solve problems": 51688, "chinese text": 8323, "new word": 37361, "form words": 21342, "entirely new": 18033, "add new": 1588, "new way": 37359, "traditional approach": 57510, "paper using": 39604, "using rule": 60913, "rule based": 48378, "based algorithm": 5561, "accuracy 60": 899, "model reference": 34293, "reference resolution": 45743, "overcome difficulties": 39062, "previous approaches": 42238, "approaches based": 3773, "referring expression": 45759, "entities model": 18066, "model accounts": 33498, "explicitly mentioned": 19641, "set potential": 50220, "important feature": 24726, "approach provides": 3659, "fresh perspective": 21689, "widely recognized": 61999, "annotation schemes": 2969, "need use": 36599, "use language": 59922, "linguistic annotation": 30748, "model variety": 34526, "variety different": 61267, "different annotation": 14836, "morpho syntactic": 35836, "provide overview": 44110, "framework demonstrate": 21488, "demonstrate applicability": 13863, "comparative evaluation": 9320, "data model": 12488, "model linguistic": 34067, "linguistic annotations": 30749, "use work": 60077, "research community": 47002, "alternative method": 2504, "language features": 28069, "significant number": 50900, "used various": 60347, "various strategies": 61398, "tackle challenging": 54702, "results produced": 47778, "obtained using": 38228, "using specific": 60957, "article proposes": 4458, "proposes method": 43933, "method extract": 32503, "dependency structures": 14139, "interactions words": 26624, "level words": 30233, "dependency relations": 14136, "relations extracted": 46032, "parsing process": 39793, "dependency tree": 14142, "new light": 37239, "dependency parsing": 14131, "make available": 31541, "human languages": 24194, "mathematical reasoning": 31935, "use speech": 60024, "facial expressions": 20260, "input output": 26308, "provide new": 44104, "new possibilities": 37284, "information effective": 25824, "effective efficient": 16648, "models user": 35653, "domain task": 16201, "media text": 32183, "text audio": 56444, "audio video": 4932, "representation framework": 46522, "takes account": 54777, "meaning representation": 32012, "http www": 24050, "processes paper": 42846, "english words": 17906, "words lexical": 62448, "tasks relies": 55849, "unsupervised approaches": 59682, "syntactic patterns": 54313, "precision recall": 41616, "words task": 62526, "task second": 55349, "task provided": 55308, "provided paper": 44170, "based bilingual": 5610, "comparable corpora": 9293, "noun phrases": 37742, "language evaluation": 28055, "evaluation experiment": 18617, "alignment algorithm": 2363, "method low": 32567, "important domain": 24719, "languages approach": 28602, "approach able": 3387, "words words": 62549, "work presents": 62762, "new approach": 37131, "terms computational": 56278, "new computational": 37152, "model based": 33598, "grammar model": 23064, "model simple": 34385, "artificial neural": 4497, "neural network": 36993, "widely used": 62007, "artificial intelligence": 4490, "text sentences": 56762, "sentences words": 49808, "major problems": 31520, "context work": 10748, "work explores": 62661, "important language": 24740, "performance difference": 40290, "based dynamic": 5693, "segmentation algorithm": 49079, "projection based": 43139, "based static": 6057, "involves training": 27021, "text using": 56836, "using static": 60962, "segmentation method": 49083, "compared best": 9390, "chinese texts": 8324, "studied using": 53239, "chinese japanese": 8309, "using semantic": 60919, "chinese characters": 8300, "contain information": 10463, "information meaning": 25969, "study analyze": 53324, "words occur": 62470, "word occurrence": 62252, "research based": 46992, "based approach": 5570, "world text": 62964, "source text": 51808, "investigate new": 26972, "given text": 22794, "text languages": 56644, "evolve time": 18837, "using similarity": 60942, "method used": 32694, "used modern": 60242, "recently proposed": 45454, "method avoids": 32397, "studies use": 53305, "require specific": 46889, "specific linguistic": 52105, "linguistic knowledge": 30775, "method allows": 32379, "large number": 28922, "number languages": 38015, "languages applied": 28600, "applied method": 3282, "indo european": 25596, "similar previous": 51059, "previous studies": 42286, "new information": 37222, "various languages": 61352, "method measure": 32573, "measure degree": 32048, "proposed new": 43869, "levenshtein distance": 30253, "distance words": 15550, "words meaning": 62455, "words corresponding": 62392, "corresponding different": 11548, "different meanings": 14984, "meanings words": 32037, "associated words": 4626, "problem tackled": 42672, "methodology based": 32718, "words considered": 62386, "meaning different": 32001, "different domains": 14905, "science technology": 48751, "meaning sentences": 32016, "sentences sentences": 49785, "words differ": 62397, "change time": 8173, "focus recent": 21193, "study explores": 53376, "new avenues": 37140, "study word": 53476, "analysis context": 2638, "pairs languages": 39200, "languages family": 28670, "distances words": 15552, "words associated": 62368, "large vocabulary": 29046, "useful information": 60370, "length input": 30028, "paper tackle": 39590, "tackle problem": 54708, "language language": 28130, "language spoken": 28501, "spoken written": 52367, "general purpose": 22085, "formal languages": 21349, "programming languages": 43087, "languages languages": 28706, "used study": 60314, "information processing": 26024, "processing using": 42965, "using natural": 60828, "called natural": 7550, "input sentence": 26329, "aim produce": 2157, "grammatical structures": 23079, "structures sentences": 53195, "grammar rules": 23065, "grammatical errors": 23070, "parse tree": 39754, "present main": 41939, "main issues": 31445, "machine readable": 31338, "text encoding": 56554, "phrases sentences": 40853, "language expressions": 28065, "convey information": 11083, "information textual": 26119, "textual entailment": 56962, "pairs natural": 39203, "wide range": 61964, "range natural": 44924, "processing applications": 42852, "applications including": 3210, "including question": 25290, "question answering": 44689, "answering summarization": 3097, "summarization text": 53903, "generation machine": 22487, "summarize key": 53906, "key ideas": 27316, "spatio temporal": 51990, "speech recognition": 52282, "present investigation": 41932, "paper approach": 39270, "approach developed": 3487, "developed based": 14625, "aims identify": 2197, "natural speech": 36466, "speech samples": 52292, "recognition accuracy": 45490, "accuracy 98": 930, "help accelerate": 23548, "advance state": 1883, "state art": 52574, "extraction problem": 20096, "current natural": 11989, "processing systems": 42944, "biomedical literature": 7175, "literature paper": 30858, "paper report": 39565, "performance state": 40575, "substantial improvement": 53621, "method applied": 32383, "significant impact": 50870, "poses challenge": 41244, "challenge natural": 7998, "parsers typically": 39765, "typically trained": 59158, "trained large": 57764, "scale corpora": 48559, "non technical": 37685, "text propose": 56715, "propose text": 43667, "text simplification": 56772, "reduce complexity": 45652, "order improve": 38626, "improve performance": 24885, "syntactic parsers": 54311, "syntactic parsing": 54312, "text mining": 56660, "improvement performance": 25013, "processing steps": 42943, "evaluated method": 18536, "method using": 32698, "using corpus": 60632, "sentences annotated": 49680, "empirical results": 17337, "results improvement": 47671, "original sentences": 38728, "linguistic differences": 30764, "modern english": 35706, "19th century": 210, "internet users": 26698, "various models": 61366, "linguistic variations": 30811, "named entities": 36370, "problem different": 42539, "domains natural": 16277, "context propose": 10695, "propose method": 43451, "method integrates": 32547, "based local": 5817, "paper focus": 39377, "model presented": 34228, "finally method": 20868, "method results": 32640, "results evaluation": 47620, "example sentences": 18881, "automatic classification": 5073, "received attention": 45254, "attention nlp": 4801, "based heuristics": 5768, "semantic categories": 49242, "able classify": 680, "annotated corpus": 2880, "space time": 51901, "high data": 23724, "paper developed": 39339, "mean squared": 31995, "squared error": 52401, "presented approach": 42058, "approach compared": 3455, "systems performance": 54588, "performance improvement": 40383, "simulation results": 51265, "hidden markov": 23641, "markov models": 31849, "successfully applied": 53741, "applied automatic": 3264, "automatic speech": 5124, "speech data": 52256, "data fact": 12358, "alternative models": 2507, "models better": 34781, "better able": 6843, "able account": 666, "systems paper": 54579, "present preliminary": 41987, "understanding speech": 59403, "hmm based": 23976, "based speech": 6052, "analysis uses": 2789, "rarely used": 45008, "used field": 60189, "result obtained": 47444, "real data": 45101, "data statistical": 12694, "recognition errors": 45503, "demonstrate using": 13996, "using simulation": 60944, "data resulting": 12614, "recognition error": 45502, "error rates": 18229, "taken results": 54775, "results suggest": 47865, "better understanding": 6987, "data crucial": 12264, "crucial step": 11911, "human language": 24191, "limits ability": 30638, "process text": 42833, "vector space": 61465, "space models": 51877, "paper surveys": 39588, "semantic processing": 49321, "broad classes": 7351, "document word": 15845, "word context": 62131, "broad range": 7353, "range applications": 44907, "source project": 51790, "new perspective": 37281, "novel method": 37863, "method reducing": 32637, "resolve ambiguities": 47199, "work provides": 62796, "solution problem": 51658, "problem addressed": 42499, "provides simple": 44227, "simple effective": 51150, "recognition classification": 45497, "classification named": 8504, "important component": 24711, "component natural": 9709, "nlp applications": 37463, "classification usually": 8581, "taking account": 54787, "classification paper": 8511, "paper use": 39601, "syntactic context": 54296, "context large": 10666, "ner task": 36683, "semantics language": 49406, "language provides": 28453, "provides means": 44212, "space propose": 51886, "linguistic analysis": 30747, "analysis use": 2787, "use tools": 60050, "semantic content": 49259, "spatial relations": 51986, "formal representations": 21352, "special attention": 52013, "representations previous": 46741, "reported paper": 46452, "focus language": 21173, "experimental studies": 19328, "studies propose": 53288, "static dynamic": 52723, "cross linguistic": 11861, "cognitive processing": 8896, "formal framework": 21346, "research shows": 47120, "shows language": 50787, "language specific": 28495, "specific properties": 52133, "space results": 51895, "linguistic variability": 30808, "question models": 44737, "models general": 35051, "automatically detecting": 5159, "preliminary step": 41807, "discourse parsing": 15394, "previous research": 42270, "elementary discourse": 16975, "discourse units": 15403, "linear sequence": 30669, "units paper": 59535, "present simple": 42014, "simple approach": 51135, "able produce": 715, "approach builds": 3439, "standard multi": 52508, "multi class": 35945, "class classification": 8395, "classification techniques": 8572, "techniques combined": 56069, "combined simple": 9085, "global coherence": 22822, "developed evaluated": 14630, "annotations provided": 3000, "ongoing effort": 38349, "effort create": 16924, "cross validated": 11872, "performance results": 40539, "score 73": 48808, "categorial grammar": 7839, "paper introduce": 39400, "contribution paper": 10943, "semantic model": 49300, "model using": 34514, "using soft": 60953, "soft constraints": 51621, "allow users": 2441, "build semantic": 7425, "semantic models": 49301, "descriptions using": 14254, "descriptions generated": 14252, "account context": 877, "using modern": 60809, "20th century": 311, "text corpus": 56516, "directly used": 15341, "used natural": 60247, "implicit information": 24660, "information paper": 26000, "based global": 5755, "experiment conducted": 19234, "generate new": 22223, "character recognition": 8221, "transform text": 58441, "text document": 56540, "languages arabic": 28603, "unique features": 59514, "accuracy word": 1070, "word recognition": 62276, "solution using": 51665, "network solve": 36805, "solve problem": 51684, "nlp challenge": 37470, "conducted experiments": 10083, "approach best": 3432, "best results": 6815, "results showing": 47839, "analysis particular": 2713, "work focuses": 62671, "classical nlp": 8425, "nlp pipelines": 37510, "identification tasks": 24399, "tasks various": 55959, "articles published": 4477, "programming language": 43086, "formal model": 21350, "high level": 23745, "level abstraction": 30055, "state transition": 52713, "model inspired": 34007, "word associations": 62115, "statistically significant": 52770, "documents containing": 15866, "distinguish different": 15603, "different classes": 14862, "data perform": 12541, "perform extensive": 40103, "experiments benchmark": 19362, "benchmark data": 6440, "data sets": 12646, "study performance": 53430, "performance various": 40621, "introduced measure": 26884, "performs poorly": 40711, "french language": 21664, "language linguistic": 28140, "lexicon features": 30410, "various natural": 61368, "patterns word": 39978, "word use": 62330, "human activities": 24089, "intrinsic properties": 26772, "relationship word": 46074, "characteristic features": 8232, "used develop": 60147, "develop method": 14596, "method quantify": 32631, "important aspects": 24702, "size word": 51403, "range topics": 44941, "aspects word": 4553, "word frequencies": 62208, "important word": 24792, "shorter time": 50590, "new concepts": 37154, "novel words": 37957, "words results": 62501, "word statistics": 62315, "provide novel": 44106, "novel information": 37841, "applied solve": 3294, "problems proposed": 42725, "method offers": 32594, "using context": 60620, "context driven": 10618, "sentences experimental": 49715, "experimental results": 19268, "results obtained": 47747, "algorithm presented": 2293, "research paper": 47086, "paper address": 39250, "data management": 12481, "currently available": 12031, "major challenges": 31504, "challenges present": 8069, "present approach": 41847, "research challenge": 46998, "semantic based": 49241, "based search": 6008, "language technology": 28523, "capable understanding": 7631, "understanding semantic": 59397, "language based": 27974, "use semantic": 60006, "semantic web": 49376, "paper briefly": 39282, "french italian": 21663, "multi word": 36042, "domain language": 16098, "rapidly evolving": 44994, "study properties": 53441, "information related": 26049, "set documents": 50138, "latent variables": 29146, "techniques use": 56147, "original english": 38713, "english text": 17890, "language annotations": 27962, "based set": 6030, "general rules": 22089, "existing text": 19158, "text annotated": 56432, "significant increase": 50891, "increase size": 25424, "annotation rules": 2966, "set rules": 50243, "knowledge annotated": 27395, "language closely": 27990, "try answer": 58845, "data collected": 12216, "paper studies": 39577, "linguistic constraints": 30758, "built using": 7493, "texts written": 56948, "written language": 63002, "language words": 28582, "network built": 36713, "text natural": 56672, "natural order": 36461, "small world": 51513, "scale free": 48573, "high degree": 23725, "surprising result": 54183, "underlying data": 59265, "network address": 36694, "address problem": 1783, "information speech": 26102, "speech signal": 52293, "speech based": 52253, "based dialogue": 5679, "dialogue systems": 14788, "systems using": 54663, "using phrase": 60858, "phrase level": 40841, "prosodic features": 43957, "utterance level": 61137, "features improves": 20601, "improves model": 25135, "model level": 34056, "addition models": 1627, "models used": 35651, "used predict": 60266, "varying levels": 61433, "allows compare": 2454, "based feature": 5727, "feature sets": 20505, "article presents": 4455, "main features": 31438, "features new": 20630, "web based": 61879, "classification scheme": 8539, "new generation": 37214, "dimensions including": 15244, "small scale": 51496, "studies address": 53243, "real time": 45114, "wide variety": 61979, "relation types": 45999, "probabilistic framework": 42458, "framework model": 21563, "conversational dataset": 11044, "dataset specifically": 13097, "task time": 55438, "linguistic style": 30799, "scale real": 48620, "world setting": 62958, "setting furthermore": 50325, "explore potential": 19727, "network features": 36743, "current state": 12011, "english machine": 17840, "actions taken": 1462, "translation process": 58661, "word sense": 62297, "sense disambiguation": 49483, "technique using": 56051, "using google": 60709, "presents design": 42080, "design development": 14273, "spoken words": 52366, "sequences paper": 50023, "algorithm designed": 2267, "word boundaries": 62120, "speech processing": 52281, "processing requires": 42932, "evaluate set": 18504, "adjacent words": 1842, "technique improves": 56034, "improves accuracy": 25113, "words output": 62474, "source code": 51743, "language styles": 28512, "function words": 21762, "way achieve": 61789, "language generation": 28083, "generation process": 22529, "study question": 53450, "words large": 62445, "corpus report": 11420, "compositional meaning": 9744, "sentences using": 49803, "using data": 60642, "based unsupervised": 6120, "unsupervised learning": 59705, "evaluation based": 18581, "based word": 6134, "disambiguation task": 15361, "task developed": 55017, "sentences similar": 49787, "sentences model": 49754, "model matches": 34094, "syntactic complexity": 54293, "model paper": 34173, "paper evaluates": 39352, "different tasks": 15094, "translation mt": 58637, "approach named": 3608, "use different": 59867, "different kinds": 14959, "translation english": 58605, "target language": 54823, "spanish english": 51941, "english languages": 17835, "approach easily": 3499, "easily extended": 16540, "extended languages": 19836, "topics covered": 57445, "multiple answers": 36166, "review research": 48039, "recent work": 45366, "work focused": 62670, "focused primarily": 21228, "work study": 62830, "computational linguistics": 9845, "compare approaches": 9330, "gold standard": 22915, "dataset based": 12822, "feature analysis": 20473, "learned models": 29468, "models additionally": 34694, "new semantic": 37310, "semantic relatedness": 49325, "wikipedia based": 62044, "explicit semantic": 19624, "svm classifier": 54233, "classifier trained": 8605, "wikipedia data": 62047, "data various": 12770, "word sentence": 62301, "level propose": 30183, "process involves": 42797, "multi layered": 35982, "clustering based": 8739, "sequence alignment": 49906, "learning based": 29529, "based finding": 5730, "canonical form": 7592, "analyzing large": 2843, "social networks": 51597, "improving performance": 25189, "correcting errors": 11481, "user interface": 60429, "available download": 5284, "source tool": 51812, "use novel": 59966, "novel model": 37872, "model sentence": 34353, "discourse analysis": 15385, "analysis information": 2683, "information extraction": 25860, "based shot": 6032, "approach produces": 3654, "original sentence": 38727, "scientific literature": 48764, "test corpus": 56339, "main contributions": 31432, "contributions work": 10957, "distributional semantics": 15670, "concept extraction": 9922, "extraction proposed": 20099, "proposed work": 43927, "research areas": 46984, "semi supervised": 49455, "learning systems": 29902, "proposed semi": 43892, "supervised approaches": 53962, "approaches used": 3949, "used different": 60150, "paper consider": 39302, "consider problem": 10217, "random variable": 44891, "context free": 10641, "free grammar": 21642, "space second": 51896, "widely studied": 62005, "order paper": 38645, "presents new": 42093, "previously developed": 42332, "special cases": 52015, "cases paper": 7811, "paper introduces": 39406, "annotation framework": 2951, "best practices": 6803, "additional features": 1669, "features support": 20678, "syntactic phenomena": 54314, "phenomena including": 40810, "case study": 7798, "annotated corpora": 2878, "greatly increased": 23234, "simple way": 51225, "query languages": 44672, "languages proposed": 28760, "multiple layers": 36240, "easy learn": 16565, "query language": 44671, "language particular": 28375, "framework based": 21461, "multiple levels": 36241, "document language": 15801, "language simple": 28488, "simple intuitive": 51182, "expressive power": 19814, "commonly used": 9222, "used tasks": 60324, "tasks require": 55854, "compare language": 9344, "translate natural": 58551, "language sentences": 28481, "knowledge representation": 27591, "representation language": 46534, "language uses": 28569, "lambda calculus": 27939, "using input": 60737, "semantic representation": 49332, "representation words": 46606, "languages including": 28694, "order logic": 38631, "answer set": 3055, "uses syntactic": 60539, "construct semantic": 10402, "semantic meaning": 49298, "parser used": 39762, "used addition": 60081, "learn semantic": 29418, "use existing": 59883, "statistical learning": 52747, "learning approach": 29519, "assign weights": 4601, "multiple meanings": 36245, "improved results": 24963, "results standard": 47853, "standard corpora": 52477, "corpora natural": 11225, "language interfaces": 28121, "database queries": 12784, "understand natural": 59306, "text answer": 56434, "answer questions": 3051, "questions given": 44790, "given natural": 22762, "language achieve": 27950, "able process": 714, "process natural": 42808, "able capture": 679, "knowledge text": 27629, "formal language": 21348, "words sentence": 62507, "approach uses": 3733, "developed methods": 14633, "methods learn": 32923, "training sentence": 58246, "sentence meaning": 49600, "pairs evaluate": 39185, "methods compare": 32792, "compare existing": 9341, "capable automatically": 7616, "translating english": 58565, "provide solutions": 44132, "approach using": 3734, "using probabilistic": 60874, "able distinguish": 690, "meanings word": 32036, "parameters learned": 39706, "using ontology": 60847, "large set": 29009, "paper investigates": 39414, "ad hoc": 1495, "retrieval task": 47971, "experiments open": 19484, "data proposed": 12572, "proposed technique": 43911, "data collection": 12218, "experiments demonstrated": 19409, "demonstrated promising": 14014, "promising results": 43179, "language like": 28138, "rhetorical relations": 48087, "play different": 40967, "different roles": 15055, "discourse relations": 15397, "paper gives": 39387, "data high": 12401, "level correlation": 30092, "emotional content": 17296, "writing style": 62990, "study conducted": 53345, "inter rater": 26583, "likert scale": 30527, "results different": 47592, "distributions different": 15675, "different sentence": 15063, "analysis identify": 2675, "main objective": 31448, "design automatic": 14263, "scoring mechanism": 48935, "sentence level": 49577, "study important": 53388, "purpose paper": 44408, "presents method": 42090, "based lexical": 5813, "lexical semantic": 30383, "method takes": 32678, "linguistic processing": 30784, "processing tools": 42960, "ontology based": 38399, "concepts semantic": 9943, "semantic annotation": 49235, "linguistic resources": 30791, "resources paper": 47323, "paper details": 39337, "process building": 42762, "used annotation": 60089, "speech understanding": 52314, "domain paper": 16128, "lexical ambiguity": 30352, "word different": 62138, "specific task": 52153, "task commonly": 54958, "commonly referred": 9221, "disambiguation wsd": 15363, "sense words": 49491, "source word": 51819, "methods based": 32764, "based main": 5829, "research area": 46983, "knowledge based": 27411, "based method": 5846, "corpus based": 11286, "hypothesis word": 24350, "requires knowledge": 46934, "knowledge sources": 27614, "order solve": 38652, "sources different": 51828, "approach combines": 3452, "combines various": 9104, "various sources": 61393, "sources knowledge": 51832, "sources information": 51831, "information order": 25996, "order achieve": 38588, "achieve good": 1145, "finally paper": 20873, "presents comprehensive": 42077, "comprehensive study": 9800, "evaluation methods": 18642, "literary texts": 30855, "english previous": 17860, "provide examples": 44065, "linguistic aspects": 30750, "previous work": 42302, "work using": 62855, "results previous": 47776, "approach problem": 3652, "proposed knowledge": 43796, "knowledge domain": 27448, "social behavior": 51554, "english sentences": 17873, "location time": 30968, "cause effect": 7884, "knowledge base": 27404, "method automatically": 32393, "language machine": 28144, "role natural": 48316, "language applications": 27964, "applications information": 3211, "proper nouns": 43254, "technical terms": 56020, "phoneme based": 40821, "model proposed": 34249, "little research": 30884, "framework multiple": 21568, "models operate": 35279, "comparison models": 9499, "models framework": 35042, "framework using": 21622, "modeling models": 34600, "way improve": 61808, "improve machine": 24869, "models effective": 34946, "effective models": 16676, "explore possibility": 19725, "tools used": 57387, "used produce": 60276, "word length": 62224, "information content": 25791, "english german": 17812, "german spanish": 22676, "usage words": 59810, "frequently used": 21687, "negative words": 36640, "frequency information": 21673, "communication social": 9253, "presents novel": 42096, "novel algorithm": 37751, "sentiment word": 49866, "chinese language": 8310, "language proposed": 28450, "proposed algorithm": 43715, "algorithm applied": 2262, "sentiment classification": 49833, "using proposed": 60879, "experiment shows": 19253, "shows proposed": 50798, "algorithm achieves": 2260, "outperforming existing": 38851, "biggest challenges": 7097, "challenges development": 8040, "spoken dialogue": 52354, "spoken language": 52359, "generation module": 22500, "dialogue context": 14768, "promising approach": 43160, "generation uses": 22576, "knowledge automatically": 27401, "application domain": 3164, "individual user": 25585, "complex information": 9629, "template based": 56174, "based generator": 5752, "tuned domain": 58872, "domain method": 16110, "method easily": 32470, "generally perform": 22169, "perform better": 40072, "better models": 6921, "models trained": 35602, "trained tested": 57892, "content selection": 10555, "knowledge results": 27600, "results provide": 47788, "sentence structure": 49651, "finally evaluate": 20854, "contribution different": 10941, "different feature": 14932, "gram features": 23052, "features features": 20584, "features based": 20530, "higher level": 23830, "level linguistic": 30151, "linguistic representations": 30789, "representations paper": 46732, "vector based": 61450, "based representations": 5995, "representations meaning": 46716, "approaches problem": 3901, "approach present": 3648, "present different": 41893, "framework use": 21619, "words represented": 62498, "meaning sentence": 32015, "sentence represented": 49634, "logical form": 30983, "form paper": 21332, "indian language": 25518, "applications natural": 3222, "like machine": 30482, "translation speech": 58680, "speech tagging": 52300, "retrieval question": 47964, "feature selection": 20502, "important factor": 24723, "using conditional": 60618, "conditional random": 10003, "random field": 44870, "field crf": 20755, "genetic algorithm": 22637, "fold cross": 21250, "cross validation": 11873, "function model": 21757, "model demonstrated": 33745, "crf based": 11762, "application paper": 3174, "presents preliminary": 42100, "socio linguistic": 51613, "allow easy": 2435, "personal information": 40757, "information speakers": 26099, "annotation tasks": 2974, "tasks used": 55953, "kind information": 27367, "paper evaluate": 39351, "evaluate various": 18516, "new version": 37358, "corpus used": 11454, "used evaluation": 60173, "evaluation campaign": 18585, "features make": 20621, "plain text": 40938, "text format": 56584, "nlp paper": 37507, "presents work": 42111, "work relies": 62804, "fine grained": 20925, "linguistic information": 30772, "information provided": 26035, "existing resources": 19138, "various features": 61342, "different types": 15108, "freely available": 21652, "text information": 56628, "information presented": 26019, "internal representation": 26688, "representation natural": 46561, "subject object": 53554, "answer generation": 3035, "generation based": 22425, "persons organizations": 40772, "proposed algorithms": 43716, "information systems": 26111, "twitter messages": 59038, "using dataset": 60644, "furthermore present": 21832, "using machine": 60782, "interactive web": 26635, "web application": 61877, "tagging using": 54755, "transformation based": 58443, "based learning": 5811, "statistics based": 52777, "developed using": 14640, "critical achieving": 11776, "achieving good": 1406, "results method": 47714, "lexical relations": 30378, "preprocessing step": 41828, "grammatical relations": 23075, "relations sentences": 46056, "sentences use": 49801, "use context": 59851, "parse trees": 39755, "structure language": 53112, "existing studies": 19150, "studies used": 53306, "comparative studies": 9322, "studies using": 53307, "using raw": 60894, "problem use": 42680, "collect corpus": 8939, "particular attention": 39833, "privacy issues": 42440, "corpus statistics": 11438, "60 000": 457, "mandarin chinese": 31697, "step step": 52828, "generate semantic": 22239, "visualization techniques": 61680, "communication information": 9250, "information network": 25990, "social network": 51593, "network analysis": 36695, "space words": 51906, "words related": 62494, "theory based": 57036, "based binary": 5612, "introduced model": 26885, "fully automated": 21713, "automated processing": 5056, "processing language": 42881, "processing natural": 42894, "human brain": 24119, "bridge gap": 7319, "knowledge given": 27494, "given fact": 22743, "aim build": 2141, "given knowledge": 22751, "knowledge state": 27618, "state given": 52698, "present examples": 41907, "translation paper": 58655, "present extension": 41913, "analysis method": 2695, "wikipedia page": 62051, "evaluate method": 18469, "method text": 32683, "text classification": 56465, "increases precision": 25439, "finally provide": 20879, "direct comparison": 15255, "used knowledge": 60220, "reasoning based": 45185, "computer systems": 9893, "sentiment analysis": 49815, "positive negative": 41284, "document paper": 15817, "set human": 50165, "human emotions": 24140, "work model": 62725, "model contains": 33710, "resulting model": 47468, "model compare": 33672, "obtaining significant": 38237, "improvements baseline": 25050, "positive sentiment": 41297, "domains paper": 16281, "recognition asr": 45493, "arabic language": 4000, "work limited": 62711, "present work": 42054, "languages present": 28755, "lexical analysis": 30353, "tokens input": 57327, "sequence tokens": 50012, "context sensitive": 10711, "systems approach": 54432, "high risk": 23797, "space representations": 51893, "tensor product": 56225, "representation space": 46582, "space language": 51870, "language evolution": 28056, "experiments conducted": 19381, "models different": 34914, "different social": 15071, "social communication": 51558, "systems like": 54549, "like natural": 30490, "groups paper": 23281, "paper claim": 39289, "complex tasks": 9669, "success task": 53725, "task oriented": 55253, "oriented dialogue": 38697, "results confirm": 47556, "based grammatical": 5759, "pre processing": 41509, "processing step": 42942, "sentences task": 49792, "information use": 26141, "naive bayesian": 36366, "tagged corpus": 54729, "sentences experiments": 49717, "experiments analysis": 19349, "achieves good": 1327, "good result": 22943, "simple sentences": 51209, "sentences complex": 49693, "complex sentences": 9660, "mimic human": 33268, "semantic distance": 49270, "wordnet based": 62356, "based measures": 5840, "measures human": 32078, "human judgment": 24183, "received little": 45260, "little attention": 30870, "resource poor": 47260, "poor languages": 41138, "attention paid": 4803, "strengths limitations": 52976, "based distributional": 5687, "raw data": 45035, "use knowledge": 59918, "knowledge rich": 27602, "new measures": 37248, "exhaustive comparison": 18997, "significant research": 50918, "research problems": 47098, "lead better": 29256, "gives overview": 22808, "outputs work": 39022, "work attempt": 62577, "multiple new": 36255, "new features": 37203, "pos tagging": 41233, "answering natural": 3085, "language questions": 28458, "problem solve": 42660, "answering qa": 3088, "qa systems": 44458, "systems perform": 54586, "perform information": 40115, "retrieval ir": 47948, "overall performance": 39045, "performance example": 40329, "documents retrieved": 15910, "questions paper": 44797, "text retrieval": 56748, "used evaluate": 60169, "evaluate performance": 18481, "query expansion": 44666, "method data": 32449, "data driven": 12298, "words help": 62429, "difficult questions": 15184, "used improve": 60208, "methods simple": 33043, "correctly predicted": 11495, "possible explanation": 41324, "solve complex": 51677, "ranging simple": 44945, "image processing": 24542, "human spoken": 24242, "convert text": 11073, "text processed": 56711, "world applications": 62927, "spelling errors": 52335, "text especially": 56559, "vocabulary size": 61712, "input speech": 26342, "low quality": 31167, "quality paper": 44560, "paper proposes": 39544, "post editing": 41346, "asr error": 4555, "error correction": 18218, "correction method": 11483, "errors generated": 18239, "asr systems": 4561, "systems proposed": 54606, "error detection": 18219, "detection algorithm": 14457, "generation algorithm": 22417, "algorithm selecting": 2301, "selecting best": 49123, "best candidate": 6753, "dataset contains": 12867, "world data": 62932, "data word": 12775, "word sequences": 62307, "extracted web": 20026, "having large": 23489, "vocabulary experiments": 61702, "different speakers": 15075, "asr errors": 4556, "research improve": 47053, "paper aims": 39262, "shed light": 50524, "especially social": 18301, "networks provide": 36901, "new insights": 37225, "initial experiments": 26214, "experiments machine": 19460, "learning framework": 29651, "framework various": 21626, "various aspects": 61303, "content features": 10523, "paper review": 39568, "translation systems": 58685, "cross language": 11827, "understanding ways": 59417, "information achieves": 25753, "research question": 47107, "way information": 61812, "end develop": 17629, "analysis framework": 2669, "framework build": 21464, "build corpus": 7392, "information able": 25748, "able control": 682, "significant differences": 50863, "use common": 59844, "word choices": 62127, "easy apply": 16558, "apply new": 3343, "new contexts": 37157, "construct large": 10390, "lexical database": 30361, "lexical semantics": 30385, "used extensively": 60182, "propose alternative": 43290, "used computational": 60120, "text experiments": 56566, "experiments performed": 19487, "performed using": 40667, "using known": 60747, "known benchmarks": 27655, "benchmarks results": 6541, "results compared": 47545, "compared systems": 9464, "systems use": 54661, "measuring semantic": 32088, "difficult evaluate": 15165, "information represented": 26055, "optical character": 38522, "paper based": 39279, "source document": 51763, "method detecting": 32459, "real word": 45120, "approach use": 3730, "data set": 12642, "vocabulary word": 61718, "reliable source": 46253, "dictionary based": 14804, "exploit information": 19655, "information extracted": 25858, "set experiments": 50153, "written different": 62996, "future developments": 21869, "output text": 39004, "context based": 10594, "database containing": 12783, "terms word": 56323, "suggest possible": 53827, "process experiments": 42780, "experiments carried": 19368, "significant improvement": 50873, "measures semantic": 32080, "similarity using": 51128, "compare results": 9364, "based similarity": 6036, "similarity measures": 51104, "human judges": 24182, "nlp systems": 37528, "respectively paper": 47375, "lexical knowledge": 30368, "qualitative quantitative": 44478, "present statistical": 42024, "analysis english": 2657, "english texts": 17891, "texts wikipedia": 56945, "address issue": 1762, "language complexity": 27999, "simple english": 51166, "english wikipedia": 17904, "language limited": 28139, "limited vocabulary": 30632, "detailed analysis": 14412, "speech tags": 52304, "simple complex": 51143, "shorter sentences": 50589, "language varieties": 28575, "complexity language": 9679, "finally investigate": 20865, "propose new": 43494, "evaluation metric": 18643, "metric called": 33112, "edit distance": 16592, "inter annotator": 26575, "annotator agreement": 3010, "improvement state": 25026, "art propose": 4361, "propose using": 43696, "evaluate automatic": 18441, "terms human": 56294, "human performance": 24214, "world natural": 62949, "language world": 28585, "web data": 61885, "data paper": 12529, "introduce new": 26831, "new type": 37351, "uses semantic": 60533, "lexical unit": 30393, "overview existing": 39112, "introduce semantic": 26857, "use present": 59978, "architecture approach": 4026, "human readable": 24226, "recent years": 45381, "years growing": 63061, "research nlp": 47082, "nlp tasks": 37531, "tasks particular": 55793, "evaluation systems": 18733, "present new": 41958, "methodology allows": 32716, "automated analysis": 5035, "low cost": 31137, "central idea": 7919, "setting paper": 50338, "scientific research": 48769, "research results": 47117, "confirm effectiveness": 10129, "effectiveness approach": 16766, "spell checker": 52332, "words perform": 62478, "based regular": 5986, "suffer data": 53761, "data sparseness": 12681, "sparseness problem": 51975, "words including": 62435, "proper names": 43253, "specific terms": 52157, "errors text": 18251, "proposes new": 43937, "new context": 37156, "spelling correction": 52334, "digital text": 15215, "documents approach": 15856, "set consists": 50125, "gram word": 23060, "generator based": 22615, "based character": 5614, "character gram": 8203, "gram model": 23057, "model generates": 33933, "conducted set": 10095, "set text": 50264, "documents different": 15870, "study proposed": 53445, "lower computational": 31208, "computational cost": 9839, "aim paper": 2155, "text knowledge": 56636, "domain context": 16031, "context knowledge": 10663, "limited data": 30577, "set domain": 50140, "linguistic data": 30763, "corpus corpus": 11310, "corpus collection": 11295, "collection text": 8986, "used test": 60327, "test data": 56340, "set evaluate": 50146, "evaluate nlp": 18479, "systems available": 54438, "available corpus": 5273, "corpus domain": 11328, "representative corpus": 46795, "corpus evaluation": 11333, "major components": 31505, "knowledge model": 27552, "model evaluation": 33844, "lexical resources": 30381, "straight forward": 52884, "identification extraction": 24388, "framework semantic": 21595, "analysis corpus": 2639, "based context": 5641, "free grammars": 21643, "recently explored": 45425, "use simple": 60017, "simple modification": 51196, "important aspect": 24701, "ranked list": 44957, "number characters": 37987, "current study": 12015, "data best": 12188, "observed data": 38143, "data empirically": 12318, "naive bayes": 36363, "address task": 1803, "task assigning": 54918, "parsing task": 39799, "information propose": 26029, "topic model": 57414, "documents languages": 15891, "languages multilingual": 28735, "latent topics": 29142, "multilingual corpora": 36072, "related documents": 45899, "provides new": 44215, "new framework": 37210, "topic models": 57418, "using topic": 60992, "corpora introduce": 11210, "present ensemble": 41903, "ensemble method": 17976, "method capable": 32410, "text tokens": 56814, "tokens use": 57341, "task solve": 55383, "related high": 45910, "high quality": 23768, "selection task": 49154, "task finding": 55084, "large collection": 28855, "text present": 56704, "parsing accuracy": 39770, "accuracy 97": 929, "based ensemble": 5704, "directly predicting": 15332, "efficiency method": 16845, "method demonstrates": 32455, "confidence predictions": 10116, "provides valuable": 44233, "mental lexicon": 32291, "number syllables": 38041, "relation words": 46001, "words paper": 62475, "tool based": 57359, "psycho linguistic": 44286, "target word": 54856, "nature language": 36480, "existing natural": 19114, "processing methods": 42890, "methods limited": 32929, "limited scope": 30612, "understanding aims": 59322, "understanding language": 59357, "focused understanding": 21232, "language using": 28570, "languages english": 28651, "texts second": 56922, "methods analyze": 32748, "given sentences": 22781, "sentences based": 49684, "based sentence": 6020, "sentence patterns": 49618, "words methods": 62458, "able understand": 730, "learn new": 29403, "new words": 37362, "words addition": 62361, "future work": 21899, "propose general": 43396, "general method": 22068, "contrary previous": 10872, "presented method": 42060, "method does": 32466, "highly structured": 23919, "datasets obtained": 13352, "obtained human": 38213, "human annotation": 24101, "annotation effort": 2945, "unannotated corpus": 59206, "document collection": 15775, "input method": 26297, "input corpus": 26262, "examples include": 18911, "latent dirichlet": 29120, "dirichlet allocation": 15344, "similarity measure": 51102, "measure word": 32065, "method generate": 32515, "types including": 59092, "related word": 45953, "automatically generating": 5179, "represent semantic": 46479, "data extracted": 12354, "language texts": 28528, "novel approach": 37757, "using method": 60801, "semantic lexical": 49295, "analysis text": 2777, "represented using": 46808, "universal language": 59541, "language translation": 28538, "translation method": 58628, "important understanding": 24789, "public opinion": 44325, "paper adopt": 39258, "problems involving": 42704, "goal propose": 22898, "prior work": 42418, "text text": 56807, "text written": 56848, "facilitate study": 20277, "analyses suggest": 2607, "occur frequently": 38269, "popular text": 41193, "work data": 62617, "dynamically generated": 16497, "regular expressions": 45832, "larger corpus": 29071, "yields improved": 63124, "improved performance": 24956, "performance previous": 40496, "requires training": 46955, "training data": 57968, "data allows": 12131, "available https": 5307, "https github": 24058, "github com": 22693, "architecture text": 4090, "unlabeled data": 59564, "data used": 12759, "used learn": 60226, "learn representations": 29415, "representations used": 46782, "features supervised": 20677, "example text": 18882, "text applications": 56436, "high dimensional": 23727, "dimensional space": 15235, "size vocabulary": 51402, "learn low": 29394, "low rank": 31170, "left right": 29998, "right contexts": 48139, "step procedure": 52823, "sample complexity": 48447, "single step": 51339, "efficacy approach": 16831, "representations learned": 46705, "tasks pos": 55800, "superior performance": 53935, "performance neural": 40453, "neural probabilistic": 37085, "probabilistic language": 42463, "gram models": 23058, "long training": 31046, "training times": 58301, "datasets training": 13461, "computationally expensive": 9874, "propose fast": 43386, "simple algorithm": 51133, "algorithm training": 2307, "noise contrastive": 37595, "contrastive estimation": 10896, "penn treebank": 40024, "reduces training": 45699, "order magnitude": 38632, "quality resulting": 44575, "resulting models": 47469, "importance sampling": 24688, "far fewer": 20399, "approach training": 3725, "training neural": 58188, "neural language": 36961, "word corpus": 62134, "word vocabulary": 62342, "obtaining state": 38238, "art results": 4369, "microsoft research": 33233, "sentence completion": 49529, "challenge dataset": 7974, "dataset multilingual": 13002, "multilingual text": 36126, "classification problems": 8521, "languages share": 28782, "labeling cost": 27780, "cost training": 11596, "training classification": 57950, "classification model": 8494, "model individual": 33998, "individual language": 25570, "language important": 28101, "language classification": 27989, "paper develop": 39338, "multi view": 36039, "view learning": 61598, "learning method": 29726, "method cross": 32447, "classification method": 8492, "method built": 32408, "parallel documents": 39651, "empirical study": 17350, "study large": 53404, "classification tasks": 8569, "tasks shows": 55887, "method consistently": 32436, "consistently outperforms": 10301, "methods domain": 32829, "domain adaptation": 15995, "adaptation methods": 1527, "methods multi": 32949, "learning methods": 29730, "fully annotated": 21712, "paper discuss": 39343, "framework evaluating": 21509, "nlp models": 37501, "models particular": 35307, "approach help": 3552, "error analysis": 18213, "intrinsic extrinsic": 26770, "extrinsic evaluations": 20172, "evaluations new": 18764, "novel learning": 37855, "probabilistic generative": 42459, "model composed": 33685, "composed multiple": 9734, "layer layer": 29185, "fine tuning": 20977, "tuning step": 58961, "various state": 61396, "art supervised": 4416, "supervised learning": 53993, "learning algorithms": 29512, "support vector": 54131, "vector machine": 61453, "machine svm": 31342, "maximum entropy": 31968, "entropy model": 18165, "bayes classifier": 6348, "principal component": 42380, "component analysis": 9701, "words given": 62427, "surrounding context": 54196, "context words": 10747, "words knowledge": 62441, "conducted experiment": 10082, "study presents": 53438, "approach based": 3427, "based clustering": 5622, "study uses": 53472, "uses combination": 60497, "search space": 48984, "space work": 51907, "work automatic": 62581, "work based": 62585, "score 82": 48817, "provides powerful": 44220, "usually used": 61073, "sentence structures": 49652, "relations sentence": 46055, "recent advance": 45277, "questions asked": 44774, "similar different": 51037, "turing test": 58979, "new level": 37238, "language approach": 27965, "method inspired": 32545, "algorithm allows": 2261, "average length": 5411, "russian english": 48414, "average word": 5419, "growing rapidly": 23301, "content words": 10573, "words contribute": 62388, "length word": 30039, "desired language": 14348, "using computer": 60616, "people use": 40041, "language english": 28050, "resolve issue": 47201, "input text": 26345, "significant success": 50927, "available natural": 5330, "language parsing": 28374, "focus problem": 21191, "analyze different": 2811, "biomedical domain": 7173, "corpora method": 11222, "level translation": 30227, "large variety": 29044, "words generate": 62424, "overall quality": 39047, "french translation": 21666, "process reducing": 42824, "word form": 62205, "language highly": 28097, "aims provide": 2211, "practical applications": 41458, "applications nlp": 3225, "recent literature": 45318, "analyze challenges": 2807, "survey current": 54204, "current future": 11978, "science research": 48750, "general paper": 22079, "possibility using": 41312, "using linear": 60771, "random fields": 44872, "fields crf": 20778, "corpus present": 11406, "approach detecting": 3484, "makes use": 31642, "allows identify": 2469, "identify words": 24452, "specific word": 52174, "based methods": 5847, "methods automatic": 32761, "grammatical structure": 23078, "number sentences": 38036, "example user": 18883, "important sentences": 24771, "highlight important": 23863, "sentence containing": 49532, "topic sentence": 57429, "information overload": 25999, "meta data": 32329, "multiple dimensions": 36199, "help better": 23553, "better understand": 6986, "paper build": 39284, "build models": 7413, "introducing new": 26901, "corpora consisting": 11184, "consisting million": 10316, "evaluate models": 18474, "models prediction": 35343, "prediction tasks": 41744, "tasks use": 55952, "use model": 59950, "second use": 49028, "sentences best": 49685, "datasets consider": 13191, "model recover": 34287, "evaluation model": 18656, "art approaches": 4214, "scale datasets": 48564, "world datasets": 62935, "datasets introduce": 13303, "introduce model": 26824, "sentiment words": 49867, "words automatically": 62370, "automatically learn": 5187, "aspect specific": 4535, "specific sentiment": 52146, "present study": 42028, "study relationship": 53452, "networks using": 36922, "using novel": 60845, "novel corpus": 37793, "twitter users": 59042, "users prior": 60474, "use linguistic": 59935, "level language": 30144, "based work": 6142, "language does": 28035, "does match": 15958, "classifier model": 8600, "significantly fewer": 50958, "gender language": 22037, "computational methods": 9848, "methods social": 33045, "offers new": 38303, "analysis dataset": 2642, "dataset comprising": 12856, "using latent": 60762, "latent vector": 29147, "autoregressive model": 5221, "model aggregate": 33549, "thousands words": 57081, "identify high": 24424, "united states": 59527, "states model": 52721, "model robust": 34330, "results analysis": 47499, "plays central": 40989, "central role": 7921, "single unified": 51354, "american english": 2536, "english paper": 17853, "paper demonstrate": 39316, "discuss results": 15481, "analysis named": 2701, "entity recognition": 18126, "present findings": 41916, "analyses paper": 2600, "method extracting": 32504, "allows generation": 2467, "translation pairs": 58654, "words source": 62517, "ranking methods": 44972, "based translation": 6112, "based features": 5728, "features used": 20689, "used select": 60295, "select best": 49100, "translation obtain": 58650, "obtain average": 38160, "average precision": 5415, "language pairs": 28366, "pairs english": 39183, "historical documents": 23959, "modern chinese": 35702, "research challenges": 46999, "issues using": 27106, "lexical syntactic": 30389, "semantic pragmatic": 49319, "language information": 28115, "computer scientists": 9892, "prove useful": 43983, "contexts using": 10755, "particularly useful": 39893, "semantic concept": 49253, "work shown": 62822, "model effective": 33797, "effective semantic": 16693, "transition based": 58537, "based dependency": 5676, "dependency parsers": 14129, "different representations": 15051, "use classifier": 59841, "experiments multilingual": 19472, "earlier work": 16508, "low resource": 31172, "support hypothesis": 54120, "vector machines": 61455, "parsing performance": 39791, "size training": 51399, "training set": 58250, "english based": 17776, "based recently": 5981, "recently emerged": 45421, "proposed improve": 43794, "popular approach": 41155, "approach solve": 3698, "need able": 36542, "look ahead": 31064, "way existing": 61801, "methods algorithms": 32745, "specifically designed": 52193, "parsing approach": 39771, "large subset": 29019, "shows approach": 50762, "approach practical": 3642, "statistical machine": 52748, "translation smt": 58678, "parallel corpus": 39644, "pair languages": 39152, "term goal": 56238, "problem study": 42670, "parallel sentences": 39653, "sentences wikipedia": 49806, "using pos": 60864, "main focus": 31441, "syntactic features": 54303, "features languages": 20610, "human evaluation": 24142, "evaluation performed": 18670, "shows promising": 50795, "results comparison": 47552, "comparison baseline": 9491, "baseline language": 6177, "language learning": 28135, "learning language": 29694, "language learn": 28133, "vast number": 61441, "method finding": 32508, "relative frequencies": 46097, "propose way": 43702, "use techniques": 60043, "language speech": 28499, "speech information": 52265, "information included": 25917, "conclude discussion": 9969, "language use": 28567, "use paper": 59970, "perform natural": 40124, "processing tasks": 42946, "processing techniques": 42956, "techniques based": 56065, "based words": 6141, "corpus using": 11455, "using lexicon": 60769, "question based": 44722, "chinese social": 8319, "social media": 51567, "writing systems": 62992, "use visual": 60072, "knowledge understanding": 27638, "understanding current": 59336, "current events": 11977, "analysis popular": 2718, "specific problem": 52130, "problem difficult": 42540, "new ways": 37360, "paper new": 39428, "uses language": 60516, "new efficient": 37179, "efficient method": 16884, "experimental evaluations": 19264, "efficiency improvements": 16843, "compared existing": 9405, "existing techniques": 19156, "technique used": 56049, "used large": 60224, "syntactic analysis": 54292, "solution based": 51651, "based idea": 5777, "idea using": 24375, "groups words": 23284, "parts sentence": 39907, "idea based": 24368, "structure complex": 53093, "set sentences": 50244, "deep learning": 13700, "learning models": 29750, "considerable success": 10236, "success natural": 53712, "processing deep": 42866, "deep architectures": 13683, "representations lead": 46703, "lead improvements": 29263, "improvements various": 25110, "various tasks": 61403, "tasks difficult": 55590, "difficult interpret": 15170, "particularly difficult": 39879, "difficult paper": 15179, "makes sense": 31635, "present analysis": 41844, "known model": 27662, "model produces": 34239, "structural representations": 53083, "representations text": 46770, "certain tasks": 7946, "significantly reduced": 51011, "classification accuracy": 8427, "using human": 60731, "online content": 38356, "methods identify": 32891, "identify entities": 24421, "entities unstructured": 18088, "unstructured text": 59671, "text machine": 56652, "learning knowledge": 29691, "extraction systems": 20117, "web scale": 61892, "massive text": 31888, "corpora present": 11233, "10 times": 52, "times faster": 57249, "nlp pipeline": 37509, "pipeline consists": 40897, "high performance": 23758, "close state": 8691, "art speech": 4411, "speech pos": 52277, "pos tagger": 41231, "based named": 5895, "entity recognizer": 18137, "goal research": 22900, "way present": 61827, "applications demonstrate": 3198, "demonstrate power": 13957, "solving problems": 51705, "problems natural": 42713, "does require": 15968, "large sets": 29010, "sets training": 50309, "used solve": 60308, "problems field": 42701, "field nlp": 20764, "nlp problems": 37515, "problem predicting": 42626, "piece text": 40877, "text based": 56452, "produce text": 43015, "text containing": 56510, "results research": 47811, "predict correct": 41637, "time natural": 57182, "summarization based": 53877, "highly relevant": 23910, "relevant sentences": 46234, "approach solving": 3699, "problems nlp": 42717, "nlp knowledge": 37492, "knowledge bases": 27412, "applications benefit": 3186, "easily accessible": 16532, "relational knowledge": 46010, "lack knowledge": 27896, "knowledge new": 27560, "new entities": 37188, "entities relations": 18078, "relations work": 46065, "large unannotated": 29037, "relationships entities": 46079, "entities based": 18038, "introduce neural": 26829, "neural tensor": 37102, "tensor network": 56223, "model predicts": 34226, "model improved": 33979, "entity representations": 18144, "representations word": 46788, "word vectors": 62339, "vectors learned": 61490, "learned unsupervised": 29487, "unsupervised fashion": 59696, "entities present": 18072, "model generalizes": 33927, "outperforms existing": 38896, "existing models": 19108, "models problem": 35359, "accuracy 75": 909, "work deep": 62621, "learning neural": 29778, "neural networks": 37032, "representations input": 46691, "recent progress": 45337, "progress field": 43097, "efficient effective": 16869, "effective methods": 16673, "method efficient": 32475, "important neural": 24747, "network representations": 36795, "representations method": 46717, "method consists": 32439, "propose novel": 43520, "model architectures": 33577, "continuous vector": 10854, "vector representations": 61462, "representations words": 46791, "large data": 28865, "quality representations": 44572, "similarity task": 51124, "compared previously": 9440, "previously best": 42330, "best performing": 6794, "based different": 5680, "types neural": 59105, "observe large": 38136, "large improvements": 28890, "improvements accuracy": 25045, "learn high": 29378, "quality word": 44599, "billion words": 7122, "provide state": 44133, "art performance": 4324, "performance test": 40596, "test set": 56369, "semantic word": 49377, "word similarities": 62309, "native language": 36402, "problems language": 42706, "review recent": 48037, "sufficient data": 53801, "data learn": 12459, "linguistic input": 30773, "ability learn": 619, "learn linguistic": 29390, "language production": 28444, "form meaning": 21326, "simplicity approach": 51235, "learn specific": 29427, "linguistic constructions": 30759, "al 2010": 2232, "new learning": 37237, "outperform existing": 38793, "distributional models": 15665, "models face": 35012, "nlp text": 37556, "source tools": 51814, "using python": 60886, "makes easy": 31621, "specific tasks": 52155, "tasks long": 55731, "document corpus": 15781, "corpus level": 11371, "analytical results": 2801, "easy use": 16568, "web interface": 61888, "english portuguese": 17858, "portuguese language": 41226, "features text": 20682, "text extraction": 56577, "token frequency": 57290, "text search": 56753, "conversational agent": 11038, "urgent need": 59788, "order test": 38656, "application real": 3177, "real users": 45118, "wizard oz": 62096, "capture interactions": 7685, "understand people": 59308, "rapid development": 44988, "development natural": 14689, "non experts": 37655, "learning paradigm": 29796, "understanding natural": 59370, "classification problem": 8520, "answers questions": 3112, "art domain": 4250, "domain approach": 16021, "approach used": 3731, "language interface": 28120, "describes submission": 14233, "based tree": 6113, "using publicly": 60884, "publicly available": 44334, "accomplish task": 846, "provided training": 44174, "data built": 12196, "translation model": 58630, "language pair": 28365, "approach work": 3741, "carried experiments": 7770, "experiments english": 19427, "english italian": 17826, "italian english": 27109, "urdu language": 59784, "improvement baseline": 24986, "baseline bleu": 6159, "kendall tau": 27285, "detailed description": 14418, "reproduce results": 46823, "results possible": 47768, "possible directions": 41322, "limited languages": 30597, "french spanish": 21665, "problem effectively": 42547, "using comparable": 60613, "multilingual information": 36086, "key issues": 27322, "widely accepted": 61989, "fact different": 20289, "based metrics": 5854, "proposed paper": 43875, "paper method": 39425, "cosine similarities": 11574, "based ranking": 5972, "experiments results": 19514, "performs better": 40698, "better traditional": 6981, "frequency based": 21670, "expressions used": 19812, "used specific": 60309, "specific contexts": 52061, "fields machine": 20782, "translation information": 58620, "retrieval information": 47946, "extraction text": 20122, "text categorization": 56462, "bilingual dictionary": 7107, "translation cross": 58592, "retrieval paper": 47961, "paper addresses": 39255, "addresses issues": 1812, "alignment based": 2366, "based multi": 5888, "multi level": 35983, "approach method": 3600, "method computes": 32429, "candidate sentence": 7579, "words usually": 62542, "enhance performance": 17918, "corpus paper": 11398, "experiment results": 19247, "results multi": 47731, "better performance": 6927, "performance existing": 40330, "existing method": 19090, "graph language": 23145, "network proposed": 36791, "text structure": 56790, "computer assisted": 9887, "language learners": 28134, "types language": 59097, "comprehension tasks": 9777, "field natural": 20762, "challenges faced": 8047, "key success": 27336, "work natural": 62731, "nlp rely": 37520, "represent linguistic": 46476, "linguistic phenomena": 30781, "trees paper": 58769, "graphs using": 23193, "using graph": 60714, "overcome problem": 39070, "problem document": 42543, "important terms": 24782, "terms using": 56321, "paper different": 39342, "different stages": 15076, "stop words": 52866, "unique words": 59518, "documents using": 15926, "using term": 60985, "tf idf": 56990, "based minimum": 5856, "approach reduce": 3669, "selection method": 49144, "accuracy experiments": 973, "present results": 42000, "research goal": 47046, "goal automatically": 22877, "automatically creating": 5156, "available resources": 5364, "resources natural": 47319, "tasks machine": 55734, "spanish language": 51945, "existing english": 19065, "english resources": 17865, "approach consists": 3468, "word senses": 62300, "results comparing": 47551, "wikipedia article": 62042, "extracted wikipedia": 20027, "results using": 47899, "spanish japanese": 51944, "japanese english": 27146, "reports results": 46461, "main goal": 31442, "generality proposed": 22107, "previously applied": 42328, "work extend": 62663, "apply technique": 3355, "experiments showed": 19523, "obtain good": 38173, "tasks important": 55670, "important step": 24775, "classification english": 8459, "previously known": 42336, "achieved using": 1281, "aspects linguistic": 4545, "linguistic contexts": 30761, "identify specific": 24445, "specific lexical": 52104, "complexity task": 9689, "results despite": 47589, "based classification": 5618, "useful tool": 60392, "large language": 28897, "resources required": 47331, "mt systems": 35925, "monolingual bilingual": 35792, "bilingual data": 7105, "data web": 12773, "building blocks": 7439, "building language": 7450, "web documents": 61886, "rich textual": 48126, "textual content": 56952, "parallel data": 39645, "available language": 5317, "language order": 28363, "experiments used": 19552, "using sentence": 60923, "sentence alignment": 49515, "successfully used": 53751, "used domain": 60155, "systems work": 54673, "results experimental": 47625, "work develop": 62631, "class based": 8392, "results approach": 47502, "human effort": 24136, "effort required": 16930, "development language": 14680, "complex problem": 9647, "information classification": 25780, "proposes use": 43944, "use automatically": 59830, "semantic class": 49246, "unsupervised clustering": 59686, "clustering task": 8746, "english results": 17867, "different lexical": 14979, "role information": 48308, "sparse data": 51967, "task achieving": 54878, "holds promise": 23987, "task performance": 55273, "exist paper": 19016, "approach overcome": 3632, "based using": 6124, "number natural": 38021, "languages making": 28726, "additionally approach": 1713, "approach allows": 3416, "allows automatic": 2451, "user friendly": 60414, "multilingual natural": 36102, "work demonstrates": 62627, "existing technologies": 19157, "extension existing": 19848, "languages domains": 28647, "models play": 35323, "play crucial": 40965, "crucial role": 11910, "sensitive changes": 49495, "takes place": 54782, "approach speech": 3701, "speech language": 52268, "model adaptation": 33532, "self training": 49222, "training language": 58144, "model parameters": 34180, "based automatically": 5590, "particularly challenging": 39877, "challenging settings": 8141, "conversational speech": 11053, "work propose": 62771, "propose model": 43462, "model considers": 33701, "errors asr": 18233, "asr output": 4559, "instead using": 26468, "using just": 60744, "improve self": 24923, "demonstrate improved": 13921, "topic based": 57393, "based language": 5802, "language modeling": 28205, "results best": 47525, "training using": 58311, "conversations paper": 11059, "given question": 22775, "question query": 44746, "play important": 40971, "answering question": 3092, "answering questions": 3093, "improves quality": 25154, "candidate answers": 7569, "word graph": 62214, "graph model": 23148, "given document": 22737, "improved version": 24972, "words non": 62468, "words experimental": 62413, "better state": 6969, "art task": 4422, "task answering": 54904, "low level": 31156, "lan guage": 27942, "context question": 10701, "developing methods": 14655, "methods extract": 32859, "extract useful": 20000, "information large": 25943, "large collections": 28857, "collections documents": 8989, "search engines": 48971, "aim answer": 2137, "answer question": 3050, "documents contain": 15865, "correct answer": 11465, "task build": 54942, "improving pre": 25190, "analysis word": 2794, "word ambiguity": 62111, "ambiguity word": 2528, "sense word": 49490, "sentences paper": 49762, "describes model": 14226, "model uses": 34512, "speech tagger": 52299, "supervised unsupervised": 54064, "unsupervised methods": 59711, "methods combined": 32788, "algorithm used": 2309, "efficient accurate": 16858, "word based": 62117, "based domain": 5690, "domain information": 16085, "accuracy work": 1071, "work evaluated": 62649, "finding best": 20898, "translation translation": 58697, "translation natural": 58642, "using automated": 60571, "like india": 30477, "process language": 42800, "language paper": 28369, "paper look": 39418, "various machine": 61358, "languages discuss": 28643, "discuss various": 15483, "various approaches": 61300, "building machine": 7453, "alignment text": 2385, "human written": 24259, "presents challenges": 42076, "text alignment": 56427, "problems including": 42702, "individual words": 25588, "new methods": 37256, "based hidden": 5769, "models specifically": 35526, "problem demonstrate": 42531, "summarization task": 53901, "room improvement": 48340, "contains different": 10493, "different features": 14933, "features including": 20603, "modern standard": 35720, "standard arabic": 52464, "arabic msa": 4001, "research uses": 47139, "used classify": 60115, "article aims": 4447, "order extract": 38618, "extract semantic": 19991, "generation semantic": 22543, "based parsing": 5934, "parsing semantic": 39796, "semantic tagging": 49361, "semantic features": 49278, "syntactic dependencies": 54299, "applied text": 3302, "text types": 56827, "answering systems": 3098, "relations expressed": 46031, "relation labels": 45986, "semantically related": 49388, "paper reports": 39566, "user feedback": 60413, "empirical success": 17352, "compositional semantics": 9749, "consider types": 10225, "types textual": 59123, "models capable": 34802, "capable capturing": 7617, "address shortcomings": 1801, "current models": 11988, "models capture": 34805, "solution propose": 51659, "space representation": 51892, "use representation": 59994, "different variants": 15120, "released open": 46177, "open license": 38437, "creative commons": 11752, "nc sa": 36498, "linguistic research": 30790, "words corpus": 62391, "word forms": 62207, "based text": 6090, "textual resources": 56978, "translation techniques": 58688, "probabilistic context": 42455, "compared supervised": 9463, "version text": 61557, "text patterns": 56695, "learning process": 29821, "process automatic": 42759, "automatic learning": 5101, "learning techniques": 29907, "patterns paper": 39973, "paper apply": 39269, "learning technique": 29906, "based structural": 6063, "polysemous words": 41120, "using types": 61006, "accuracy rates": 1034, "knowledge time": 27630, "problem finding": 42567, "interface allows": 26660, "allows user": 2482, "ask questions": 4519, "language large": 28132, "number applications": 37982, "interaction humans": 26601, "end user": 17724, "ease use": 16523, "use various": 60070, "million people": 33254, "people world": 40044, "available information": 5312, "united nations": 59526, "paper covers": 39309, "brief overview": 7328, "different components": 14871, "advantages disadvantages": 1951, "techniques used": 56148, "used paper": 60259, "fast effective": 20422, "key phrases": 27326, "main concepts": 31428, "document used": 15841, "used purpose": 60281, "paper investigate": 39409, "investigate use": 26993, "use additional": 59815, "features pre": 20643, "improve automatic": 24825, "key phrase": 27325, "phrase extraction": 40839, "features include": 20602, "use signal": 60014, "lead significant": 29270, "accuracy results": 1039, "document pre": 15819, "sentences document": 49707, "main content": 31429, "standard set": 52529, "set labeled": 50177, "labeled documents": 27756, "documents training": 15921, "training evaluation": 58091, "subjective nature": 53565, "standard used": 52539, "amazon mechanical": 2521, "mechanical turk": 32092, "obtain useful": 38198, "improvements performance": 25091, "shallow semantic": 50444, "combination pre": 9047, "scores propose": 48915, "computational framework": 9842, "framework identifying": 21537, "new corpus": 37158, "use evaluate": 59881, "domain independent": 16084, "key components": 27301, "classifier achieves": 8592, "close human": 8687, "performance effective": 40312, "domains use": 16299, "use framework": 59895, "framework study": 21606, "achieve high": 1150, "stack exchange": 52416, "finally apply": 20837, "preliminary analysis": 41800, "management systems": 31691, "learning data": 29578, "data available": 12176, "decision making": 13563, "making easier": 31652, "extraction key": 20073, "provides framework": 44200, "process machine": 42803, "extraction classification": 20052, "design new": 14292, "popular word": 41199, "pointwise mutual": 41083, "information pmi": 26009, "experiments large": 19454, "available datasets": 5279, "best known": 6776, "distributional similarity": 15671, "level document": 30103, "document level": 15803, "level concept": 30079, "combined use": 9087, "use document": 59869, "performance gains": 40357, "distributed word": 15629, "word representations": 62286, "word embeddings": 62156, "embeddings recently": 17201, "competitive performance": 9553, "performance language": 40406, "tasks work": 55968, "work train": 62845, "train word": 57659, "100 languages": 61, "languages using": 28816, "demonstrate utility": 13997, "embeddings using": 17240, "features training": 20687, "languages performance": 28750, "performance competitive": 40261, "near state": 36514, "art methods": 4282, "methods english": 32840, "features captured": 20535, "help researchers": 23587, "applications machine": 3218, "emerging research": 17272, "source language": 51776, "text target": 56803, "overall translation": 39053, "translation propose": 58662, "propose use": 43694, "hindi language": 23941, "language current": 28015, "current research": 12004, "opinion mining": 38501, "web resources": 61891, "discussion forums": 15492, "important problem": 24754, "product reviews": 43045, "produce summary": 43013, "techniques developed": 56077, "key tasks": 27338, "analysis paper": 2711, "morphologically rich": 35847, "rich language": 48106, "general approach": 22045, "used development": 60149, "token based": 57281, "based given": 5754, "information previous": 26020, "determine best": 14555, "evaluation machine": 18635, "important research": 24760, "field machine": 20758, "structure words": 53149, "words proper": 62484, "plays significant": 41005, "significant role": 50922, "role improving": 48307, "improving quality": 25194, "quality machine": 44547, "text english": 56556, "perform automatic": 40070, "automatic analysis": 5068, "specifically collect": 52184, "focus analysis": 21143, "people using": 40042, "using nlp": 60841, "nlp methods": 37498, "methods present": 32988, "present series": 42008, "key insights": 27321, "data multiple": 12500, "area research": 4149, "analyzing language": 2842, "based structure": 6064, "50 000": 420, "words set": 62510, "words based": 62371, "based proposed": 5959, "clustering techniques": 8747, "type data": 59051, "set provided": 50229, "years research": 63072, "new techniques": 37339, "development automatic": 14670, "automatic machine": 5102, "performance increase": 40389, "present evaluation": 41905, "evaluation human": 18624, "automatic evaluation": 5081, "evaluation metrics": 18646, "sentence document": 49543, "latent variable": 29143, "variable model": 61223, "model discover": 33771, "semantic frames": 49279, "analyze model": 2822, "model learns": 34053, "issues including": 27091, "parameter learning": 39672, "learning small": 29882, "accuracy paper": 1019, "paper establish": 39350, "texts contain": 56867, "concepts used": 9946, "sentences containing": 49696, "text according": 56422, "pairs use": 39226, "results demonstrate": 47573, "demonstrate effectiveness": 13893, "effectiveness method": 16789, "clinical domain": 8669, "gained increasing": 21916, "increasing attention": 25444, "vital role": 61692, "decision support": 13569, "sequence labeling": 49933, "representations models": 46719, "models post": 35332, "events using": 18799, "using current": 60641, "art sequence": 4399, "approach create": 3471, "inference based": 25644, "sentences natural": 49756, "abstract concepts": 756, "applied proposed": 3288, "sentences semantic": 49782, "processing algorithms": 42850, "semantics words": 49421, "knowledge used": 27643, "provide valuable": 44151, "public sentiment": 44328, "based machine": 5826, "limited use": 30630, "twitter data": 59034, "data research": 12609, "presented paper": 42061, "end framework": 17674, "twitter corpus": 59033, "approach machine": 3594, "approach implemented": 3560, "framework compared": 21473, "compared using": 9470, "good correlation": 22930, "correlation results": 11527, "approach large": 3582, "large volumes": 29048, "methods need": 32954, "big data": 7089, "data techniques": 12726, "graph theoretic": 23174, "theoretic analysis": 57011, "analysis reveals": 2746, "introduce method": 26821, "method detect": 32458, "given sequence": 22782, "linear time": 30674, "input sequence": 26331, "approaches unsupervised": 3947, "work paper": 62743, "describes approach": 14218, "increase number": 25419, "sampling based": 48499, "based alignment": 5563, "alignment method": 2373, "method approach": 32387, "distribution used": 15656, "leads better": 29306, "better evaluation": 6886, "evaluation results": 18700, "results statistical": 47857, "translation tasks": 58687, "alignment approach": 2364, "new data": 37160, "contain significant": 10473, "source knowledge": 51775, "existing knowledge": 19078, "level individual": 30132, "individual sentences": 25579, "levels sentence": 30248, "100 000": 57, "000 sentences": 11, "subset sentences": 53609, "inconsistent results": 25341, "process extracting": 42783, "word given": 62212, "application natural": 3170, "latest developments": 29157, "algorithms results": 2340, "results text": 47883, "mixed effects": 33404, "modeling linguistic": 34590, "applications using": 3257, "likelihood ratio": 30521, "used text": 60328, "considerable attention": 10228, "attention given": 4753, "emotion analysis": 17286, "limited small": 30617, "used generate": 60197, "generate large": 22215, "large high": 28884, "propose solutions": 43641, "word choice": 62126, "choice question": 8335, "help identify": 23569, "sense level": 49487, "level word": 30231, "higher inter": 23828, "degree semantic": 13814, "processing including": 42875, "including machine": 25272, "manually created": 31771, "automatic method": 5106, "method identify": 32526, "pairs based": 39171, "based hypothesis": 5775, "hypothesis pair": 24346, "strongly related": 53073, "human agreement": 24092, "key features": 27312, "features different": 20560, "present automatic": 41852, "methods paper": 32972, "paper considers": 39304, "estimating quality": 18380, "translation outputs": 58653, "human intervention": 24176, "addressed using": 1806, "various measures": 61360, "quality automatic": 44493, "produce good": 42984, "produce results": 43005, "level paper": 30172, "features extracted": 20582, "extracted input": 20013, "input sentences": 26330, "obtained based": 38203, "based bayesian": 5596, "bayesian inference": 6358, "shared task": 50490, "task total": 55440, "tokens used": 57342, "used shared": 60299, "dependency trees": 14143, "trees using": 58771, "using heuristics": 60725, "morphological analysis": 35839, "analysis provided": 2732, "end users": 17725, "language tasks": 28521, "extracting information": 20032, "based application": 5568, "language analysis": 27960, "generates natural": 22349, "focused using": 21233, "task machine": 55196, "language independent": 28105, "language input": 28116, "syntactically semantically": 54342, "language textual": 28529, "analysis emotion": 2655, "emotion detection": 17289, "sentiment emotion": 49842, "task useful": 55460, "issues like": 27093, "analysis tools": 2781, "digital libraries": 15212, "task classification": 54950, "classification algorithms": 8431, "address specific": 1802, "specific challenges": 52053, "datasets used": 13468, "classification task": 8561, "test approach": 56332, "digital library": 15213, "representations semantic": 46751, "space model": 51876, "model suitable": 34427, "algorithm able": 2259, "able recognize": 719, "highly similar": 23916, "semantic composition": 49251, "best models": 6783, "significantly different": 50953, "tasks model": 55748, "matches performance": 31907, "performance best": 40218, "best previous": 6805, "previous models": 42264, "model consists": 33705, "domain domain": 16051, "various ways": 61417, "model relations": 34299, "paper develops": 39340, "automate process": 5033, "process generating": 42786, "missing word": 33365, "language structures": 28508, "based large": 5805, "large monolingual": 28912, "monolingual data": 35798, "languages small": 28791, "data uses": 12764, "distributed representation": 15623, "linear mapping": 30659, "vector spaces": 61470, "spaces languages": 51910, "languages despite": 28637, "despite simplicity": 14390, "surprisingly effective": 54186, "achieve 90": 1107, "words english": 62408, "method makes": 32570, "learning word": 29943, "representations recently": 46747, "recently seen": 45465, "sequences word": 50030, "word tokens": 62323, "languages word": 28820, "word segmentation": 62292, "non trivial": 37688, "trivial task": 58812, "naturally occurring": 36474, "data propose": 12569, "propose learn": 43437, "learn text": 29437, "text representations": 56738, "representations directly": 46640, "directly raw": 15334, "character sequences": 8225, "sequences training": 50028, "simple recurrent": 51204, "recurrent network": 45620, "network predict": 36786, "network uses": 36821, "hidden layer": 23639, "demonstrate usefulness": 13995, "learned text": 29484, "text embeddings": 56549, "embeddings use": 17237, "character level": 8206, "level text": 30223, "labeling task": 27795, "task recognizing": 55325, "spans text": 51958, "language code": 27991, "using embeddings": 60672, "substantially improve": 53636, "improve baseline": 24826, "baseline uses": 6221, "character grams": 8204, "highly multilingual": 23905, "european union": 18431, "multi label": 35975, "label classification": 27695, "manually labelled": 31786, "labelled data": 27800, "data automatically": 12172, "automatically assign": 5143, "ranking task": 44977, "trained classifiers": 57688, "parallel training": 39656, "data languages": 12453, "document collections": 15776, "document representation": 15825, "consistency human": 10267, "process used": 42837, "feature vector": 20508, "various language": 61350, "tasks including": 55677, "including cross": 25245, "cross lingual": 11828, "plagiarism detection": 40933, "detection sentence": 14521, "sentence selection": 49638, "available large": 5319, "typically used": 59160, "improve speed": 24926, "recognition ner": 45518, "new resource": 37304, "use large": 59926, "news media": 37407, "work automated": 62580, "official languages": 38309, "languages particularly": 28748, "particularly important": 39883, "languages order": 28742, "order capture": 38601, "capture complementary": 7653, "news content": 37393, "extracted information": 20012, "information present": 26018, "present publicly": 41991, "publicly accessible": 44333, "discuss implications": 15469, "able achieve": 668, "representation speech": 46584, "speaker listener": 52000, "novel computational": 37785, "words model": 62459, "representations uses": 46784, "key component": 27300, "information real": 26040, "method create": 32444, "create large": 11705, "used obtain": 60255, "level annotations": 30063, "data quality": 12581, "improve semantic": 24924, "semantic coherence": 49248, "response paper": 47396, "new hybrid": 37220, "algorithm combines": 2266, "character based": 8195, "approaches presented": 3899, "approach extended": 3530, "distance metric": 15548, "importance token": 24691, "token level": 57295, "large arabic": 28848, "arabic dataset": 3996, "dataset experimental": 12916, "results proposed": 47781, "types errors": 59086, "different writing": 15136, "writing styles": 62991, "compared results": 9447, "algorithms using": 2346, "dataset proposed": 13039, "achieving higher": 1410, "relies heavily": 46266, "analysis used": 2788, "collections text": 8990, "occurring words": 38282, "words finally": 62419, "finally compare": 20843, "wider range": 62028, "describes new": 14227, "new freely": 37212, "scale multilingual": 48600, "multilingual news": 36105, "analysis combined": 2630, "20 different": 224, "different scripts": 15059, "used number": 60254, "systems learn": 54546, "translation results": 58670, "details regarding": 14433, "new variant": 37357, "algorithm propose": 2294, "using learned": 60766, "recognition experiments": 45506, "deep neural": 13735, "network dnn": 36733, "slightly better": 51435, "widespread use": 62033, "amounts text": 2557, "words work": 62550, "help people": 23583, "growing number": 23299, "available different": 5281, "different natural": 15001, "discuss approaches": 15460, "automated approaches": 5037, "addition propose": 1636, "new architecture": 37136, "used example": 60175, "perform cross": 40083, "time series": 57210, "linguistic elements": 30766, "people interact": 40030, "words order": 62472, "allow researchers": 2440, "cognitive science": 8898, "level analysis": 30060, "end paper": 17692, "computational efficiency": 9841, "art entity": 4258, "machine translated": 31343, "approach paper": 3633, "types entities": 59084, "nouns verbs": 37746, "statistical model": 52754, "model nlp": 34131, "text analysis": 56430, "like information": 30478, "text speech": 56784, "speech synthesis": 52296, "speech tag": 52298, "approaches proposed": 3903, "pos taggers": 41232, "unigram bigram": 59494, "tag set": 54725, "accuracy 77": 910, "translation research": 58669, "evaluation important": 18626, "mt output": 35923, "based evaluation": 5708, "evaluation english": 18615, "research work": 47143, "using different": 60653, "different machine": 14982, "like google": 30472, "evaluation process": 18682, "process using": 42838, "using approaches": 60562, "approaches human": 3840, "evaluation automatic": 18578, "automatic metric": 5108, "semantic relationship": 49328, "various types": 61413, "words sentences": 62508, "sentences documents": 49708, "according semantic": 868, "knowledge representations": 27592, "representations support": 46766, "intelligent agents": 26543, "human ability": 24087, "comprehensive survey": 9801, "instances based": 26434, "based semantic": 6014, "extensively studied": 19919, "political issues": 41110, "paper analyze": 39265, "written form": 62999, "level features": 30120, "significant results": 50919, "significant changes": 50855, "measured terms": 32070, "usage different": 59800, "different characters": 14860, "different words": 15135, "best knowledge": 6768, "knowledge work": 27649, "weakly supervised": 61859, "learning scenario": 29857, "resource rich": 47268, "guide learning": 23337, "learning languages": 29697, "past approaches": 39929, "gold labels": 22914, "labels training": 27854, "training propose": 58218, "transfer model": 58405, "model uncertainty": 34497, "crf model": 11764, "evaluated standard": 18548, "chinese english": 8307, "german english": 22665, "datasets method": 13328, "f1 scores": 20227, "labeled data": 27738, "accuracy supervised": 1056, "labeled sentences": 27762, "sentences furthermore": 49726, "furthermore combined": 21807, "labeled examples": 27758, "yields significant": 63128, "improvements state": 25100, "supervised methods": 54018, "methods achieving": 32736, "achieving best": 1394, "best reported": 6812, "begin introducing": 6379, "branch natural": 7300, "constrained inference": 10366, "way model": 61821, "model long": 34072, "relationships text": 46084, "integer linear": 26499, "linear programming": 30664, "problem automatically": 42510, "results propose": 47780, "propose extension": 43383, "language present": 28385, "complex syntactic": 9666, "syntax semantic": 54352, "particular case": 39835, "information words": 26163, "general information": 22062, "information theoretic": 26120, "theoretic framework": 57013, "information maximization": 25968, "online user": 38392, "semi automated": 49446, "dataset real": 13053, "knowledge corpus": 27428, "corpus available": 11284, "available real": 5355, "preliminary results": 41804, "results used": 47896, "used baseline": 60100, "research dataset": 47010, "dataset released": 13061, "structured knowledge": 53159, "semantic interpretation": 49290, "interpretation terms": 26736, "terms semantic": 56314, "compute similarity": 9880, "order perform": 38646, "similarity methods": 51105, "structure information": 53109, "approaches additionally": 3757, "additionally present": 1728, "based standard": 6054, "standard benchmarks": 52473, "evaluation measures": 18638, "hundreds millions": 24297, "beneficial downstream": 6555, "downstream natural": 16343, "applications question": 3239, "summarization paper": 53895, "new task": 37333, "detection significantly": 14527, "identifying key": 24460, "large dataset": 28868, "class task": 8411, "task accuracy": 54874, "accuracy 73": 907, "class baseline": 8393, "baseline finally": 6168, "resources developed": 47299, "purpose propose": 44410, "propose study": 43652, "study novel": 53422, "novel supervised": 37932, "supervised approach": 53961, "approach learning": 3586, "annotated training": 2925, "training examples": 58094, "proposed semantic": 43891, "occurrence statistics": 38278, "textual units": 56985, "present efficient": 41896, "efficient algorithm": 16860, "learning semantic": 29866, "models training": 35625, "training sample": 58236, "sufficiently large": 53810, "large unstructured": 29042, "coherent texts": 8919, "texts approach": 56859, "models specific": 35525, "results extensive": 47629, "small large": 51480, "method effective": 32471, "competitive state": 9566, "semantic parsing": 49308, "parsing framework": 39780, "learning inference": 29681, "inference framework": 25659, "mapping natural": 31804, "language formal": 28077, "formal representation": 21351, "representation meaning": 46549, "translation evaluation": 58608, "metrics proposed": 33191, "proposed literature": 43800, "require human": 46862, "human reference": 24232, "compare output": 9352, "accurate results": 1086, "text different": 56536, "paper proposed": 39542, "new human": 37219, "quality text": 44588, "translation text": 58690, "text data": 56522, "process information": 42793, "language context": 28006, "paper try": 39600, "words according": 62360, "paper used": 39602, "way build": 61794, "lexicon using": 30414, "word input": 62218, "novel semantic": 37914, "based phrase": 5936, "source target": 51799, "valued vector": 61211, "representations low": 46713, "low dimensional": 31142, "multi layer": 35980, "layer neural": 29194, "data learning": 12460, "directly optimize": 15326, "end end": 17635, "end machine": 17681, "experimental evaluation": 19262, "tasks english": 55613, "results new": 47740, "model significantly": 34379, "significantly improves": 50972, "improves performance": 25141, "phrase based": 40835, "bleu points": 7209, "discourse representation": 15398, "language statements": 28505, "art technologies": 4426, "technologies natural": 56157, "language syntax": 28515, "given set": 22783, "potential limitations": 41398, "based reasoning": 5978, "generation aims": 22413, "new challenge": 37146, "main topic": 31464, "existing approaches": 19025, "approaches neglect": 3882, "hierarchical topic": 23695, "topic structure": 57432, "news corpus": 37395, "generation paper": 22513, "time dependent": 57143, "generation model": 22494, "detect different": 14438, "topic information": 57410, "structure used": 53146, "used sentence": 60296, "selection based": 49133, "based topic": 6101, "sentences selected": 49781, "considering different": 10258, "systems evaluate": 54489, "evaluate long": 18467, "performance comparison": 40260, "comparison different": 9493, "different systems": 15091, "demonstrates effectiveness": 14029, "effectiveness model": 16794, "model terms": 34451, "rouge metrics": 48352, "metrics word": 33208, "embeddings resulting": 17205, "models shown": 35496, "variety nlp": 61285, "tasks architecture": 55506, "difficult train": 15191, "time consuming": 57127, "instead propose": 26460, "movie review": 35895, "similar better": 51030, "performance deep": 40280, "embeddings provide": 17199, "provide easy": 44057, "embeddings specific": 17217, "tasks paper": 55781, "meaning representations": 32014, "newly emerging": 37377, "emerging field": 17271, "task simple": 55378, "sentence space": 49649, "based baseline": 5594, "interesting challenging": 26649, "challenging problem": 8126, "problem machine": 42600, "learning community": 29564, "distributed representations": 15625, "meaning natural": 32005, "natural way": 36470, "data sparsity": 12683, "sparsity problems": 51983, "information semantic": 26076, "discrete representations": 15427, "proven useful": 43995, "useful nlp": 60378, "tasks recent": 55836, "semantic representations": 49335, "representations successfully": 46764, "applications sentiment": 3248, "work learning": 62707, "learning shared": 29873, "level representations": 30194, "representations languages": 46699, "combine approaches": 9062, "method learning": 32563, "learning distributed": 29596, "multilingual setup": 36121, "learns assign": 29952, "aligned sentences": 2358, "sentence aligned": 49514, "word alignments": 62110, "lingual document": 30698, "document classification": 15770, "task outperform": 55258, "outperform previous": 38809, "previous state": 42282, "multiple language": 36234, "pairs model": 39202, "learns representations": 29972, "representations capture": 46625, "capture semantic": 7706, "recursive neural": 45636, "network models": 36767, "words seen": 62503, "tasks known": 55705, "ability accurately": 592, "accurately capture": 1092, "capture aspects": 7651, "linguistic meaning": 30778, "model new": 34125, "corpus constructed": 11305, "logical reasoning": 30986, "short sentences": 50565, "representations generalize": 46675, "generalize new": 22144, "new types": 37352, "learned representation": 29478, "representation models": 46556, "task generating": 55105, "language nl": 28359, "lexico syntactic": 30404, "pattern matching": 39962, "matching based": 31910, "based techniques": 6086, "light weight": 30454, "lead accurate": 29255, "learning linguistic": 29711, "analysis using": 2790, "dl based": 15752, "framework learning": 21554, "sentences non": 49758, "sentences requires": 49778, "tool called": 57360, "observed significant": 38148, "language query": 28456, "given user": 22800, "language semantic": 28478, "logic based": 30979, "query question": 44675, "paper proposing": 39553, "dataset present": 13033, "flexible framework": 21110, "probability estimates": 42475, "kneser ney": 27381, "training efficient": 58080, "efficient approach": 16861, "approach outperforms": 3622, "outperforms state": 38946, "baselines terms": 6309, "terms perplexity": 56308, "large corpora": 28862, "bleu score": 7213, "translation task": 58686, "task recent": 55321, "learning multilingual": 29771, "multilingual word": 36134, "usually relies": 61061, "use word": 60073, "translated sentences": 58558, "sentences order": 49760, "embeddings different": 17112, "autoencoder model": 5027, "model learning": 34052, "bag word": 5502, "word representation": 62283, "representation given": 46525, "given sentence": 22780, "sentence encoded": 49548, "representation extracted": 46515, "translation evaluate": 58607, "evaluate approach": 18437, "multilingual document": 36079, "classification performed": 8516, "experiments observe": 19483, "compares favorably": 9476, "previously proposed": 42340, "method exploits": 32499, "level alignments": 30059, "learn word": 29444, "mining information": 33315, "information integration": 25928, "variety approaches": 61262, "right wrong": 48144, "certain degree": 7938, "human behavior": 24114, "selecting appropriate": 49122, "task significant": 55375, "significant challenge": 50853, "domain expert": 16066, "approach evaluated": 3520, "comparison human": 9497, "human judgments": 24184, "average performance": 5412, "approach study": 3706, "used extract": 60183, "success rate": 53721, "extraction method": 20080, "documents collected": 15863, "collected different": 8961, "different corpora": 14879, "content different": 10519, "languages text": 28804, "applied small": 3293, "small sets": 51501, "sets languages": 50296, "large self": 29008, "providing training": 44254, "data manually": 12483, "tuning results": 58952, "various multilingual": 61367, "developing natural": 14658, "complex text": 9670, "online news": 38375, "news articles": 37385, "make easier": 31567, "develop highly": 14591, "mining applications": 33310, "users select": 60480, "german italian": 22672, "portuguese spanish": 41227, "turkish english": 58984, "news domain": 37402, "available systems": 5373, "source translation": 51817, "translation named": 58641, "language making": 28148, "making use": 31673, "use separate": 60010, "news titles": 37421, "specific style": 52148, "large volume": 29047, "daily news": 12088, "approach fully": 3541, "large text": 29023, "statistical language": 52744, "approach enable": 3507, "information needed": 25987, "language comprehension": 28000, "inferring semantic": 25711, "free text": 21646, "semi structured": 49453, "user generated": 60415, "especially relevant": 18297, "relevant domain": 46212, "pros cons": 43954, "underlying semantic": 59276, "expert annotations": 19572, "different labels": 14962, "using noisy": 60842, "latent topic": 29141, "model review": 34327, "bayesian model": 6360, "model joint": 34023, "joint inference": 27172, "semantically meaningful": 49387, "evaluations demonstrate": 18756, "demonstrate model": 13939, "model substantially": 34422, "substantially outperforms": 53644, "outperforms alternative": 38865, "single multiple": 51324, "multiple documents": 36202, "capabilities current": 7596, "complex questions": 9652, "questions need": 44796, "factual information": 20321, "information scattered": 26072, "scattered different": 48680, "different documents": 14903, "documents specifically": 15914, "temporal relations": 56192, "complex question": 9651, "novel aspect": 37769, "easily extensible": 16542, "present methodology": 41945, "layer perform": 29201, "evaluated compared": 18524, "compared general": 9412, "better results": 6956, "evaluation measure": 18637, "analysis texts": 2778, "linguistic tools": 30803, "study text": 53468, "text example": 56562, "facilitate research": 20275, "aiming reduce": 2169, "effectively reduce": 16754, "accurately identify": 1095, "set possible": 50219, "possible causes": 41319, "involves identifying": 27019, "investigate approaches": 26942, "approaches exploit": 3820, "automatically constructed": 5152, "using simple": 60943, "simple heuristic": 51177, "second approach": 48996, "identification text": 24400, "learn models": 29399, "using models": 60808, "models label": 35158, "heuristic based": 23627, "approach given": 3548, "given sufficient": 22790, "sufficient training": 53806, "data outperform": 12525, "outperform baseline": 38780, "baseline significantly": 6210, "bilingual parallel": 7113, "words high": 62430, "high probability": 23766, "intermediate representation": 26676, "words introduce": 62439, "word expressions": 62203, "compare performance": 9353, "performance different": 40292, "text segments": 56757, "algorithm paper": 2290, "sentence compression": 49530, "naturally capture": 36473, "capture structural": 7712, "decoding framework": 13631, "model trained": 34467, "large margin": 28903, "framework experimental": 21512, "results sentence": 47823, "bring significant": 7334, "art model": 4292, "model task": 34445, "quality information": 44534, "extraction paper": 20090, "examples domain": 18897, "available paper": 5339, "model predicting": 34222, "model explored": 33861, "experiments demonstrate": 19401, "improve f1": 24852, "language semantics": 28479, "requires access": 46912, "vast amounts": 61437, "common sense": 9197, "world knowledge": 62943, "work field": 62665, "based purely": 5966, "manual efforts": 31738, "method called": 32409, "grained semantic": 23044, "derived wikipedia": 14205, "explicitly represent": 19647, "represent meaning": 46477, "meaning text": 32019, "based concepts": 5634, "evaluate effectiveness": 18454, "results significant": 47843, "improvements previous": 25093, "art tasks": 4423, "use natural": 59957, "human users": 24255, "specific entity": 52080, "paper novel": 39429, "approach proposed": 3657, "proposed identify": 43793, "identified using": 24405, "set syntactic": 50254, "syntactic rules": 54321, "identification results": 24396, "results comparable": 47542, "comparable obtained": 9300, "human efforts": 24138, "text automated": 56448, "automated manner": 5050, "aspects text": 4552, "tasks text": 55931, "approach measuring": 3599, "implicit semantic": 24664, "approach exploits": 3529, "approach introduce": 3576, "new measure": 37247, "measure semantic": 32062, "validate method": 61180, "method evaluate": 32489, "performance semantic": 40549, "similarity relatedness": 51116, "word analogy": 62112, "evaluating performance": 18566, "performance method": 40435, "method measuring": 32574, "text semantic": 56758, "relatedness tasks": 45960, "tasks sentence": 55871, "sentence sentence": 49641, "sentence similarity": 49647, "recognition experimental": 45505, "evaluation shows": 18719, "method outperforms": 32597, "method semantic": 32647, "used data": 60135, "based hybrid": 5774, "hybrid approaches": 24310, "approaches paper": 3890, "describes method": 14225, "small parallel": 51491, "rules based": 48391, "statistical mt": 52757, "extracted sentence": 20019, "conducted using": 10097, "quality improved": 44532, "word translation": 62324, "translation transfer": 58695, "using hand": 60720, "method present": 32619, "entirely unsupervised": 18034, "rules applied": 48390, "argument structure": 4174, "sentence plays": 49620, "plays critical": 40991, "critical role": 11791, "systems semantic": 54627, "semantic dependency": 49268, "pipeline framework": 40900, "real applications": 45099, "maintaining competitive": 31489, "parsing word": 39805, "word pair": 62260, "pair classification": 39147, "problem using": 42683, "using maximum": 60792, "entropy classifier": 18161, "feature space": 20506, "space use": 51902, "achieves state": 1378, "performance evaluation": 40327, "evaluation data": 18602, "task pipeline": 55276, "understanding generation": 59347, "especially resource": 18298, "resource constraint": 47214, "present semantic": 42004, "clustering approach": 8738, "document clusters": 15774, "measure similarity": 32063, "constituent words": 10357, "candidate phrase": 7574, "using vector": 61017, "statistical models": 52756, "point wise": 41052, "wise mutual": 62082, "outperforms competing": 38886, "fully unsupervised": 21748, "unlabeled text": 59582, "standard maximum": 52502, "maximum likelihood": 31971, "markov model": 31847, "task performs": 55275, "inductive bias": 25610, "large model": 28910, "model capacity": 33646, "learning objective": 29785, "non parametric": 37676, "orthographic features": 38755, "rare words": 45000, "develop efficient": 14584, "efficient learning": 16881, "computationally intensive": 9876, "standard training": 52536, "training provide": 58221, "provide open": 44108, "source implementation": 51772, "experiments diverse": 19419, "diverse languages": 15705, "achieve significant": 1191, "compared previous": 9433, "previous methods": 42261, "methods task": 33068, "extraction using": 20130, "information various": 26154, "heuristic rules": 23631, "rules training": 48396, "training machine": 58162, "algorithm called": 2265, "random forest": 44875, "similarity features": 51096, "identification task": 24398, "task approach": 54908, "approach presented": 3649, "used identify": 60206, "running text": 48406, "extraction important": 20071, "applications ranging": 3241, "summarization semantic": 53899, "semantic search": 49342, "document clustering": 15773, "graph based": 23101, "large domain": 28874, "domain training": 16215, "training corpus": 57962, "approaches knowledge": 3851, "online systems": 38388, "systems remains": 54617, "remains largely": 46337, "paper experiment": 39356, "noun phrase": 37741, "analyze performance": 2823, "performance benchmark": 40211, "benchmark datasets": 6446, "methods results": 33023, "results competitive": 47553, "better strong": 6971, "strong unsupervised": 53057, "unsupervised baselines": 59684, "baselines propose": 6288, "propose lexical": 43440, "related event": 45905, "data different": 12281, "languages especially": 28658, "internal structure": 26690, "phenomena like": 40811, "conversational systems": 11054, "systems previous": 54596, "work applied": 62567, "learning approaches": 29522, "acquire new": 1442, "words approaches": 62367, "approaches shown": 3919, "shown promise": 50740, "issues related": 27103, "behavior human": 6392, "human machine": 24205, "psycholinguistic studies": 44289, "studies shown": 53301, "eye movement": 20176, "acquisition process": 1448, "previous unsupervised": 42298, "systems generally": 54512, "domain vocabulary": 16226, "different functions": 14940, "conversation context": 11030, "context important": 10653, "address issues": 1769, "developed new": 14637, "new approaches": 37134, "approaches incorporate": 3848, "approaches context": 3788, "results shown": 47840, "contextual information": 10770, "information significantly": 26086, "performance propose": 40502, "novel language": 37850, "independent approach": 25494, "approach improving": 3566, "improve translation": 24935, "language given": 28092, "limited number": 30600, "taking advantage": 54788, "word order": 62254, "improve word": 24939, "poor language": 41137, "english using": 17897, "absolute gain": 742, "points respectively": 41079, "improvement best": 24990, "approaches using": 3950, "using additional": 60552, "additional data": 1664, "lexical entailment": 30363, "proposed strategy": 43904, "context vectors": 10743, "problem learning": 42594, "relations using": 46062, "using supervised": 60969, "relation classification": 45967, "recent state": 45347, "designed capture": 14311, "contexts word": 10756, "represents word": 46820, "learning training": 29918, "feature vectors": 20509, "additionally introduce": 1722, "introduce approach": 26779, "approach new": 3612, "differences similarities": 14828, "set reference": 50235, "approaches use": 3948, "use vector": 60071, "semantics based": 49399, "extensive evaluation": 19868, "evaluation approaches": 18576, "different datasets": 14889, "datasets proposed": 13381, "performs significantly": 40713, "significantly better": 50939, "approaches datasets": 3794, "datasets dataset": 13207, "dataset significantly": 13087, "significantly worse": 51017, "semantic relation": 49326, "spoken languages": 52363, "languages world": 28825, "world research": 62955, "translation language": 58624, "pair paper": 39156, "paper focuses": 39381, "art chinese": 4232, "popular approaches": 41156, "approaches machine": 3867, "available parallel": 5342, "explore alternative": 19686, "pivot language": 40918, "use english": 59878, "english arabic": 17775, "based smt": 6042, "language chinese": 27988, "direct translation": 15260, "objective work": 38108, "community work": 9277, "work important": 62682, "languages given": 28681, "applications require": 3246, "pay attention": 39990, "arabic text": 4005, "text snippets": 56775, "string based": 52992, "approaches standard": 3924, "topic segmentation": 57428, "shown useful": 50758, "useful natural": 60376, "applications present": 3233, "labeling tasks": 27796, "conversations propose": 11061, "approach extends": 3531, "recent graph": 45313, "methods nlp": 32959, "novel unsupervised": 37946, "unsupervised models": 59713, "models exploit": 34998, "novel graph": 37833, "supervised model": 54020, "model combines": 33667, "topic features": 57406, "random walk": 44892, "models respectively": 35451, "different sources": 15074, "empirical evaluation": 17323, "performed best": 40659, "highly correlated": 23890, "correlated human": 11510, "human annotations": 24103, "learning allows": 29514, "allows use": 2481, "use training": 60055, "data language": 12452, "level alignment": 30058, "sentences parallel": 49766, "corpora work": 11258, "work explore": 62657, "explore use": 19748, "autoencoder based": 5026, "methods cross": 32805, "bag words": 5503, "words representations": 62497, "sentences languages": 49746, "languages fact": 28669, "issues propose": 27099, "propose compare": 43324, "compare different": 9335, "different variations": 15121, "setting propose": 50343, "leads significant": 29325, "empirically investigate": 17363, "problem cross": 42526, "given language": 22755, "generalize different": 22138, "achieving 10": 1389, "percentage point": 40052, "improvements best": 25054, "reported results": 46454, "knowledge resources": 27598, "tasks compared": 55547, "compared monolingual": 9423, "like wordnet": 30514, "bilingual dictionaries": 7106, "typically provide": 59150, "structured information": 53156, "range possible": 44928, "word paper": 62262, "improve quality": 24911, "extraction module": 20084, "chinese data": 8303, "respectively study": 47384, "study focus": 53379, "different real": 15044, "field paper": 20765, "paper represent": 39567, "goal text": 22903, "speech tts": 52313, "particular language": 39850, "natural sounding": 36465, "like hindi": 30474, "task identify": 55121, "quality output": 44558, "speech paper": 52275, "increase efficiency": 25413, "text furthermore": 56589, "comparative study": 9323, "developing countries": 14650, "learning classification": 29555, "identify suitable": 24447, "text demonstrate": 56530, "term frequency": 56236, "achieve comparable": 1121, "comparable performance": 9301, "performance number": 40462, "research finally": 47039, "finally demonstrate": 20849, "order increase": 38628, "increase performance": 25420, "performance accuracy": 40178, "accuracy present": 1029, "new algorithm": 37125, "algorithm model": 2285, "grammatical rules": 23076, "despite fact": 14361, "input data": 26263, "success learning": 53706, "learning model": 29743, "use data": 59859, "common form": 9176, "distributions given": 15676, "representation used": 46600, "used capture": 60112, "capture meaning": 7697, "language utterances": 28572, "recently new": 45445, "gradually increasing": 23016, "increasing number": 25457, "little effort": 30875, "based analysis": 5565, "usage word": 59809, "distributions word": 15678, "word class": 62128, "using twitter": 61004, "tested using": 56400, "usage patterns": 59804, "used create": 60132, "short answer": 50550, "task topic": 55439, "modeling approaches": 34560, "approaches applied": 3764, "performing systems": 40689, "leverage lexical": 30276, "level syntactic": 30219, "syntactic information": 54304, "score given": 48849, "nlp community": 37474, "largest corpus": 29093, "overview methods": 39113, "task using": 55463, "data explore": 12348, "explore extent": 19706, "features contribute": 20548, "scoring task": 48940, "task way": 55468, "manual effort": 31737, "challenging task": 8143, "constraints model": 10375, "performance hand": 40371, "hard constraints": 23441, "popular technique": 41192, "prediction given": 41711, "given existing": 22742, "existing algorithms": 19022, "perform prediction": 40130, "performing inference": 40679, "inference given": 25660, "automatically generate": 5174, "optimization problem": 38553, "problem training": 42678, "training allows": 57931, "obtain substantial": 38197, "substantial gains": 53620, "gains accuracy": 21931, "accuracy new": 1015, "new challenging": 37148, "extraction dataset": 20055, "al 2012": 2234, "frequency inverse": 21674, "inverse document": 26927, "document frequency": 15795, "frequency tf": 21677, "demonstrate use": 13993, "relational data": 46003, "useful identifying": 60367, "identifying relevant": 24464, "relevant words": 46245, "words terms": 62528, "parser based": 39759, "syntax semantics": 54353, "words word": 62547, "vectors generated": 61486, "individual domains": 25566, "domains approach": 16235, "approach general": 3542, "adapted new": 1554, "new domains": 37176, "domains propose": 16286, "texts compared": 56865, "texts experiments": 56879, "experiments methods": 19466, "reliably identify": 46256, "identify different": 24420, "types texts": 59122, "hierarchical structures": 23694, "used methods": 60236, "methods information": 32904, "retrieval natural": 47959, "explore application": 19687, "problems using": 42737, "present novel": 41966, "metric using": 33129, "text matching": 56656, "using arabic": 60564, "test case": 56334, "different sentences": 15064, "classification sentence": 8542, "important information": 24734, "extraction task": 20118, "task applications": 54905, "systems multi": 54565, "methods work": 33104, "work explored": 62660, "new multi": 37262, "event types": 18789, "f1 average": 20181, "problem low": 42598, "results adding": 47490, "adding new": 1596, "single label": 51310, "label multi": 27715, "relationship words": 46075, "researchers developed": 47151, "language tools": 28532, "application area": 3160, "area natural": 4142, "language structure": 28506, "play key": 40974, "key role": 27332, "way solve": 61830, "sentence existing": 49554, "approach suited": 3712, "huge data": 24072, "data machine": 12472, "larger data": 29072, "data better": 12189, "better accuracy": 6844, "training proposed": 58220, "approach takes": 3716, "sentence input": 49571, "dependency relation": 14135, "tree like": 58748, "structure using": 53147, "using hybrid": 60733, "hybrid approach": 24309, "proposed tool": 43915, "quality existing": 44519, "huge number": 24074, "contain rich": 10469, "order analyze": 38594, "time space": 57216, "space paper": 51879, "massive data": 31883, "order verify": 38661, "verify effectiveness": 61535, "manually annotate": 31755, "set conduct": 50123, "results macro": 47709, "plays crucial": 40993, "development machine": 14683, "systems order": 54576, "output human": 38977, "human translation": 24249, "various automatic": 61306, "automatic metrics": 5109, "different metrics": 14991, "metrics used": 33206, "free word": 21647, "adjectives adverbs": 1846, "work adopt": 62560, "complex network": 9641, "perform comparative": 40077, "comparative analysis": 9319, "english polish": 17857, "observed real": 38146, "problem mapping": 42606, "english present": 17859, "aim research": 2159, "research make": 47071, "common semantic": 9196, "abstract syntax": 763, "automatic extraction": 5091, "annotated sentences": 2915, "provide unified": 44147, "method comparing": 32426, "semantic syntactic": 49358, "represent sentences": 46480, "convolutional architecture": 11101, "convolutional neural": 11112, "max pooling": 31950, "pooling operation": 41128, "varying length": 61431, "short long": 50559, "long range": 31018, "range relations": 44931, "binary multi": 7151, "distant supervision": 15557, "network achieves": 36693, "achieves excellent": 1323, "excellent performance": 18954, "performance tasks": 40594, "strongest baseline": 53066, "step text": 52830, "main purpose": 31454, "different grammatical": 14944, "forms word": 21377, "noun adjective": 37740, "uses information": 60514, "metrics measuring": 33180, "applications based": 3185, "corpus training": 11448, "domains require": 16289, "size corpus": 51378, "newly added": 37369, "higher correlation": 23817, "domains different": 16247, "method gives": 32519, "metrics different": 33160, "different data": 14885, "used metric": 60237, "metrics evaluation": 33163, "plays vital": 41007, "manually automatically": 31765, "manual evaluation": 31739, "evaluation time": 18740, "use automatic": 59829, "hindi english": 23938, "english hindi": 17820, "data provided": 12575, "provided input": 44164, "output using": 39008, "using various": 61016, "metrics like": 33177, "like bleu": 30465, "bleu meteor": 7205, "results human": 47663, "human ranking": 24220, "able answer": 674, "long standing": 31030, "standing goal": 52551, "promising progress": 43174, "progress recently": 43115, "recently achieved": 45401, "logical forms": 30984, "approaches effective": 3803, "cost large": 11587, "large amounts": 28832, "amounts human": 2550, "human labeled": 24186, "paper instead": 39398, "learning map": 29721, "feature representations": 20500, "method trained": 32686, "trained new": 57830, "optimization procedure": 38554, "stochastic gradient": 52855, "gradient descent": 23006, "followed fine": 21257, "step using": 52835, "using weak": 61019, "weak supervision": 61850, "supervision provided": 54090, "empirically demonstrate": 17358, "model capture": 33647, "major improvements": 31511, "method able": 32354, "trained similar": 57870, "weakly labeled": 61857, "data present": 12556, "distributional hypothesis": 15663, "multilingual data": 36076, "space embeddings": 51857, "embeddings models": 17176, "models leverage": 35182, "embeddings semantically": 17209, "semantically equivalent": 49386, "models rely": 35430, "rely word": 46305, "number diverse": 37995, "extend approach": 19819, "approach learn": 3585, "representations document": 46642, "models cross": 34874, "tasks outperforming": 55779, "outperforming prior": 38859, "prior state": 42413, "qualitative analysis": 44471, "analysis study": 2770, "method leverage": 32565, "chinese character": 8299, "character embedding": 8200, "similar semantic": 51063, "existing chinese": 19045, "word character": 62124, "paper gap": 39386, "learning continuous": 29571, "continuous representation": 10850, "neural architecture": 36931, "architecture effectively": 4044, "effectively learn": 16746, "chinese word": 8326, "existing embedding": 19062, "embedding learning": 17036, "official language": 38308, "languages spoken": 28794, "texts especially": 56877, "supervised information": 53990, "information form": 25884, "form word": 21341, "word clusters": 62130, "recently neural": 45441, "network based": 36703, "models explored": 35002, "generate highly": 22210, "highly informative": 23902, "words known": 62442, "known word": 27670, "embeddings paper": 17186, "new form": 37208, "leverage information": 30270, "information relevant": 26051, "representations use": 46781, "use neural": 59960, "neural word": 37111, "embeddings achieve": 17077, "achieve state": 1203, "achieves f1": 1325, "f1 score": 20194, "score 90": 48825, "conll 2003": 10168, "better previous": 6945, "public data": 44311, "foreign language": 21297, "language properties": 28447, "language work": 28583, "work provide": 62795, "provide empirical": 44060, "empirical evidence": 17326, "strong correlation": 53022, "structural features": 53078, "english second": 17870, "native languages": 36403, "languages leverage": 28710, "structure directly": 53098, "text perform": 56696, "target languages": 54831, "method achieves": 32360, "prediction task": 41742, "task result": 55344, "highly competitive": 23883, "methods rely": 33013, "develop model": 14598, "level model": 30162, "space using": 51903, "tokens given": 57326, "proposed recently": 43886, "used combination": 60117, "extracted relations": 20018, "using set": 60933, "application method": 3168, "model simultaneously": 34389, "simultaneously learns": 51273, "alignments model": 2391, "model captures": 33648, "semantic context": 49260, "context prior": 10693, "advantage approach": 1938, "approach demonstrated": 3481, "outperform prior": 38815, "published state": 44373, "based systems": 6078, "hand paper": 23395, "semi automatic": 49447, "construction method": 10427, "corpora domain": 11195, "text annotation": 56433, "text fragments": 56586, "word occurrences": 62253, "latent semantics": 29135, "computed using": 9884, "different large": 14972, "scale text": 48630, "hierarchical clustering": 23663, "identify common": 24417, "structures text": 53196, "large size": 29011, "quite limited": 44831, "limited coverage": 30576, "coverage paper": 11651, "approach extracting": 3534, "high coverage": 23723, "crowd sourced": 11881, "providing new": 44251, "new state": 37323, "art performances": 4354, "unsupervised settings": 59732, "tasks using": 55954, "approach experiments": 3527, "media data": 32165, "work carried": 62594, "data base": 12184, "speech text": 52308, "works studied": 62908, "used training": 60336, "training procedure": 58215, "general problem": 22083, "problem approach": 42505, "local optimum": 30948, "search algorithm": 48962, "independent feature": 25498, "new direction": 37172, "basic idea": 6329, "quite simple": 44832, "set results": 50242, "results better": 47526, "language knowledge": 28125, "knowledge paper": 27564, "present comparison": 41868, "occurrence networks": 38275, "occurring sentence": 38281, "terms average": 56269, "shortest path": 50592, "path length": 39948, "furthermore perform": 21831, "perform analysis": 40068, "based results": 5997, "results point": 47766, "used developing": 60148, "challenge developing": 7978, "increasing accuracy": 25442, "accuracy efficiency": 966, "efficiency proposed": 16853, "accurate efficient": 1077, "systems question": 54609, "question answer": 44685, "community members": 9267, "largely unknown": 29066, "factors like": 20311, "present case": 41861, "extract high": 19975, "model predict": 34220, "significantly improving": 50980, "research psychology": 47105, "conclude future": 9970, "open problems": 38440, "problems paper": 42718, "texts complex": 56866, "text level": 56649, "level experiments": 30115, "text number": 56679, "number words": 38056, "obtained results": 38221, "results showed": 47837, "standard approach": 52462, "texts results": 56919, "fixed length": 21076, "features bag": 20529, "despite popularity": 14374, "words features": 62418, "ordering words": 38667, "paragraph vector": 39637, "algorithm learns": 2283, "variable length": 61222, "texts sentences": 56923, "sentences paragraphs": 49765, "represents document": 46817, "dense vector": 14084, "trained predict": 57841, "predict words": 41661, "words document": 62401, "words models": 62460, "models empirical": 34953, "paragraph vectors": 39638, "models techniques": 35588, "techniques text": 56143, "representations finally": 46669, "achieve new": 1174, "classification sentiment": 8543, "analysis tasks": 2775, "scalable method": 48548, "method integrating": 32548, "based probabilistic": 5954, "model approach": 33572, "evaluated context": 18525, "efficient implementation": 16876, "results range": 47796, "range languages": 44921, "languages demonstrate": 28635, "representations perform": 46735, "perform word": 40160, "similarity tasks": 51125, "lead substantial": 29277, "rich languages": 48107, "languages large": 28707, "large vocabularies": 29045, "models obtain": 35271, "obtain improvements": 38178, "improvements bleu": 25055, "relative baseline": 46089, "baseline using": 6222, "using gram": 60713, "models present": 35346, "terms categories": 56274, "using measure": 60795, "wikipedia corpus": 62046, "directed graph": 15267, "provides unique": 44231, "apply method": 3333, "corpus evaluate": 11331, "increase 10": 25403, "10 compared": 37, "compared standard": 9456, "propose unsupervised": 43691, "unsupervised method": 59710, "available form": 5294, "based networks": 5900, "different time": 15100, "time points": 57193, "obtain word": 38199, "conduct thorough": 10067, "thorough evaluation": 57059, "evaluation proposed": 18684, "proposed methodology": 43834, "evaluation indicates": 18629, "correctly identify": 11492, "approach applied": 3419, "like word": 30512, "article investigate": 4453, "network structure": 36808, "window size": 62065, "point increase": 41046, "combinatorial optimization": 9057, "recently applied": 45406, "method practical": 32616, "nlp literature": 37494, "problems machine": 42711, "inference models": 25671, "models significantly": 35506, "graphical models": 23184, "models chinese": 34812, "phonetic information": 40827, "enhance text": 17925, "text model": 56667, "model obtain": 34137, "obtain better": 38162, "standard nlp": 52514, "mining methods": 33318, "methods evaluate": 32844, "linear svm": 30672, "non parallel": 37674, "tasks article": 55509, "transductive learning": 58347, "corpus method": 11379, "method requires": 32639, "requires small": 46951, "small labeled": 51477, "labeled corpus": 27737, "corpus large": 11369, "large unlabeled": 29038, "unlabeled corpus": 59563, "corpus build": 11289, "build high": 7404, "performance classifier": 40235, "corpus experimental": 11337, "results combining": 47538, "method effectively": 32472, "performance machine": 40426, "related information": 45911, "project aims": 43132, "resourced language": 47288, "goal project": 22897, "parts speech": 39910, "model languages": 34039, "resourced languages": 47289, "terms based": 56270, "languages lack": 28702, "able automatically": 677, "quality produced": 44565, "texts based": 56861, "manually written": 31789, "translated texts": 58561, "distinguish types": 15605, "texts similar": 56926, "language quality": 28454, "professional translators": 43061, "limited time": 30626, "available present": 5346, "fluent natural": 21131, "domain experts": 16068, "aim generate": 2148, "generate fluent": 22202, "fluent coherent": 21129, "multi sentence": 36005, "texts multiple": 56904, "multiple languages": 36236, "domain dependent": 16045, "use stage": 60025, "resources available": 47292, "produces significantly": 43034, "resources created": 47297, "multiple natural": 36253, "languages important": 28690, "applications data": 3194, "heterogeneous data": 23617, "data sources": 12678, "factors including": 20309, "linguistic characteristics": 30753, "characteristics paper": 8242, "language languages": 28131, "framework uses": 21621, "based new": 5909, "proposed framework": 43778, "case studies": 7796, "substantial improvements": 53622, "main components": 31427, "constraints used": 10380, "better representation": 6953, "yield better": 63090, "results present": 47774, "novel framework": 37826, "demonstrate capabilities": 13877, "english korean": 17829, "korean language": 27675, "specific prior": 52129, "knowledge training": 27632, "model supports": 34432, "parsing generation": 39781, "generation present": 22521, "human evaluations": 24158, "limited domain": 30582, "explore various": 19753, "using task": 60978, "task definition": 54998, "kernel based": 27289, "computing similarity": 9906, "learning task": 29904, "task semantic": 55352, "semantic knowledge": 49291, "knowledge experiments": 27473, "experiments suggest": 19536, "results various": 47903, "outperforming baselines": 38847, "baselines large": 6275, "comparable current": 9294, "results semantic": 47821, "task understanding": 55455, "understanding meaning": 59363, "individual word": 25587, "meaning word": 32020, "approach semantics": 3682, "semantics word": 49420, "word represented": 62291, "context vector": 10742, "generation problem": 22526, "given context": 22729, "000 000": 2, "potential solutions": 41408, "algorithm generates": 2278, "time paper": 57187, "novel neural": 37882, "network model": 36765, "rnn encoder": 48191, "encoder decoder": 17495, "recurrent neural": 45622, "networks rnn": 36907, "length vector": 30037, "vector representation": 61461, "decoder proposed": 13613, "proposed model": 43842, "model jointly": 34024, "jointly trained": 27222, "trained maximize": 57786, "conditional probability": 10002, "target sequence": 54840, "given source": 22787, "source sequence": 51797, "additional feature": 1668, "log linear": 30973, "linear model": 30660, "model qualitatively": 34263, "semantically syntactically": 49395, "translation applications": 58577, "knowledge language": 27540, "knowledge encoded": 27458, "domain models": 16114, "models approach": 34721, "example based": 18876, "approach topic": 3722, "topic paper": 57421, "paper paper": 39438, "improve accuracy": 24823, "accuracy translation": 1066, "approach improve": 3561, "select appropriate": 49099, "10 million": 46, "computational resources": 9858, "focus natural": 21185, "svm based": 54232, "method experiments": 32497, "results results": 47814, "dependency features": 14120, "features related": 20653, "effects different": 16825, "recent developments": 45303, "given rise": 22778, "build knowledge": 7406, "attempts learn": 4699, "eye tracking": 20177, "user study": 60451, "text source": 56778, "source domain": 51765, "difficult task": 15187, "task key": 55152, "key problems": 27330, "approach automatically": 3426, "proof concept": 43236, "use cases": 59839, "extraction knowledge": 20074, "framework called": 21467, "framework used": 21620, "language documents": 28034, "extracted knowledge": 20014, "text generated": 56591, "approach brings": 3436, "text considered": 56507, "main challenges": 31426, "paper provides": 39558, "translation multilingual": 58640, "different approach": 14839, "non linguistic": 37660, "digital world": 15216, "automated machine": 5048, "long way": 31048, "languages hindi": 28687, "translation approach": 58578, "language research": 28473, "shows significant": 50801, "direction paper": 15272, "paper methods": 39426, "specifically look": 52214, "methods detect": 32819, "texts propose": 56913, "propose methods": 43458, "methods handle": 32883, "specific use": 52168, "analysis results": 2743, "results experiments": 47627, "evaluate impact": 18463, "process paper": 42814, "major problem": 31519, "language makes": 28147, "instead just": 26454, "ongoing work": 38351, "art technique": 4424, "extract information": 19978, "information texts": 26118, "texts provide": 56915, "provide different": 44053, "practical solution": 41474, "social sciences": 51604, "methods tools": 33075, "tools automatically": 57377, "automatically extract": 5168, "information natural": 25983, "texts language": 56897, "fall short": 20374, "fail consider": 20331, "mixed language": 33406, "code switching": 8861, "monolingual english": 35801, "english speakers": 17881, "given new": 22764, "new class": 37150, "model yields": 34551, "validate model": 61181, "present evidence": 41906, "english english": 17800, "task automatic": 54922, "classification text": 8573, "predefined categories": 41624, "problem text": 42674, "studied different": 53222, "different communities": 14869, "processing data": 42865, "retrieval text": 47974, "classification important": 8479, "information management": 25965, "tasks like": 55723, "like topic": 30509, "topic identification": 57409, "language identification": 28098, "performance text": 40598, "patterns used": 39977, "improvement text": 25034, "extraction step": 20114, "structure learning": 53116, "work nlp": 62736, "information phrase": 26007, "answering information": 3075, "detailed comparison": 14417, "emotion recognition": 17293, "speech features": 52263, "used speech": 60310, "features work": 20699, "work evaluate": 62648, "frame level": 21439, "level feature": 30119, "feature extraction": 20485, "features paper": 20637, "ongoing research": 38350, "morphology syntax": 35851, "20 languages": 228, "knowledge particular": 27567, "word units": 62328, "semantic level": 49293, "demonstrate approach": 13865, "rich morphology": 48113, "taking inspiration": 54789, "takes advantage": 54778, "inflected forms": 25718, "available http": 5304, "multilingual corpus": 36073, "corpus study": 11439, "study paper": 53426, "presents overview": 42099, "language developed": 28027, "level information": 30133, "information fusion": 25887, "including lexical": 25267, "range linguistic": 44922, "semantic structures": 49357, "model hmm": 33962, "language natural": 28353, "processing task": 42945, "task speech": 55406, "grammatical features": 23071, "analysis language": 2687, "statistical based": 52738, "probability given": 42476, "corpus linguistic": 11373, "automatically extracted": 5170, "90 accuracy": 552, "based knowledge": 5796, "knowledge graph": 27496, "graph nodes": 23155, "nodes represent": 37593, "various real": 61382, "given data": 22733, "entity linking": 18115, "task mapping": 55206, "entities extracted": 18051, "present knowledge": 41934, "inherently difficult": 26205, "unstructured nature": 59670, "limited context": 30575, "multiple entities": 36210, "task report": 55336, "art systems": 4420, "entity disambiguation": 18102, "inference approach": 25642, "approach compare": 3454, "base population": 5548, "text analytics": 56431, "experimental setup": 19326, "model compute": 33691, "probability distribution": 42473, "generated words": 22335, "representation methods": 46551, "application areas": 3161, "network systems": 36810, "use nlp": 59964, "analysis systems": 2773, "extract classify": 19969, "using small": 60947, "word embedding": 62142, "embedding method": 17040, "supervised tasks": 54056, "maps words": 31812, "words occurring": 62471, "similar contexts": 51034, "embeddings including": 17149, "including recent": 25292, "framework multilingual": 21567, "parsing results": 39795, "embeddings tasks": 17225, "tasks investigate": 55696, "results multilingual": 47732, "embeddings languages": 17159, "languages available": 28604, "available public": 5351, "public use": 44329, "propose approach": 43296, "language family": 28067, "parameters language": 39703, "compared classical": 9391, "based predictive": 5947, "non monotonic": 37663, "order support": 38654, "auxiliary information": 5232, "multiple sets": 36282, "different possible": 15027, "addition new": 1628, "new content": 37155, "tv shows": 59000, "ner systems": 36682, "systems need": 54569, "models new": 35256, "preliminary study": 41808, "focus entity": 21160, "entity type": 18152, "collected twitter": 8969, "evaluation sets": 18713, "entities corresponding": 18042, "final model": 20823, "model shows": 34375, "strong evidence": 53028, "set entities": 50144, "entities training": 18086, "biomedical text": 7177, "text enables": 56550, "language process": 28392, "attempt address": 4678, "address challenge": 1742, "large annotated": 28843, "systems training": 54657, "expert annotators": 19573, "periods time": 40727, "time recent": 57203, "recent studies": 45349, "turk amt": 58981, "generate high": 22206, "quality annotations": 44490, "pubmed abstracts": 44380, "based simple": 6037, "increases number": 25437, "quality results": 44576, "valuable tool": 61206, "shared knowledge": 50474, "widely adopted": 61991, "using monte": 60813, "monte carlo": 35826, "network network": 36772, "widespread adoption": 62031, "results shed": 47832, "language change": 27986, "syntax trees": 54356, "input tokens": 26350, "syntax tree": 54355, "form set": 21334, "infer new": 25637, "new knowledge": 37230, "knowledge present": 27575, "distributional semantic": 15669, "models improves": 35115, "improves existing": 25128, "important ways": 24791, "gold standards": 22921, "development models": 14687, "adjective noun": 1844, "noun verb": 37743, "performance models": 40440, "unlike existing": 59594, "standard evaluations": 52492, "automatic approaches": 5071, "art models": 4293, "models perform": 35309, "future improvements": 21876, "representation learning": 46536, "learning architectures": 29524, "architectures work": 4130, "present application": 41846, "proposed unsupervised": 43920, "keyword extraction": 27350, "extraction algorithm": 20047, "language domain": 28036, "method language": 32557, "set non": 50203, "heavily depends": 23530, "automatic approach": 5070, "statistical properties": 52761, "short text": 50571, "text messages": 56657, "messages social": 32324, "communication channel": 9247, "additional challenges": 1656, "degrade performance": 13806, "performance traditional": 40603, "using real": 60895, "set large": 50181, "media corpus": 32164, "analyze effectiveness": 2813, "effectiveness machine": 16788, "order detect": 38608, "explored different": 19757, "detection using": 14539, "using text": 60988, "text normalization": 56678, "validity proposed": 61200, "baseline approaches": 6155, "approaches provide": 3904, "provide comparative": 44029, "study neural": 53418, "based occurrence": 5924, "different semantic": 15061, "semantic spaces": 49353, "compositional models": 9746, "models test": 35593, "approaches tasks": 3936, "tasks involving": 55698, "additionally evaluate": 1719, "spaces using": 51913, "larger scale": 29087, "paraphrase detection": 39739, "dialogue act": 14764, "provides method": 44213, "method improving": 32536, "contrast previous": 10881, "relatively simple": 46128, "models work": 35687, "work use": 62852, "robust model": 48255, "linear regression": 30667, "approach model": 3602, "model independent": 33997, "texts using": 56941, "using non": 60843, "non standard": 37683, "standard words": 52542, "features non": 20631, "scientific text": 48771, "conducted different": 10080, "used features": 60188, "features second": 20660, "standard deviation": 52485, "features representation": 20656, "algorithms used": 2345, "experiments best": 19367, "results achieved": 47486, "using feature": 60692, "feature set": 20504, "accuracy 87": 919, "features highly": 20597, "experiments neural": 19479, "neural machine": 36967, "unlike traditional": 59611, "traditional statistical": 57547, "translation neural": 58644, "translation aims": 58576, "aims building": 2179, "building single": 7470, "single neural": 51325, "network jointly": 36752, "translation performance": 58656, "models proposed": 35373, "encoder decoders": 17509, "encoder encodes": 17512, "source sentence": 51795, "decoder generates": 13596, "use fixed": 59893, "decoder architecture": 13586, "architecture propose": 4079, "propose extend": 43381, "allowing model": 2446, "model automatically": 33591, "parts source": 39908, "predicting target": 41683, "approach achieve": 3390, "performance comparable": 40246, "existing state": 19146, "based task": 6083, "task english": 55049, "recently introduced": 45434, "systems suffer": 54644, "suffer significant": 53781, "significant drop": 50864, "long sentences": 31024, "sentences unlike": 49800, "way address": 61790, "form final": 21320, "relatively new": 46125, "purely neural": 44397, "networks neural": 36881, "translation models": 58633, "decoder encoder": 13590, "correct translation": 11477, "representation paper": 46564, "translation using": 58701, "models rnn": 35463, "newly proposed": 37379, "network neural": 36773, "relatively short": 46127, "performance degrades": 40283, "length sentence": 30034, "furthermore proposed": 21836, "convolutional network": 11107, "network learns": 36759, "report describes": 46430, "development nlp": 14693, "easy access": 16557, "common nlp": 9187, "nlp data": 37477, "semantic graph": 49281, "triples extracted": 58805, "semantic graphs": 49282, "using information": 60736, "translating natural": 58566, "text describing": 56531, "predictive power": 41778, "related posts": 45925, "posts twitter": 41375, "geographical location": 22649, "tasks language": 55708, "significantly outperform": 50988, "majority class": 31528, "performance improved": 40382, "complex natural": 9638, "topic modeling": 57415, "textual features": 56965, "design implement": 14286, "semantics preserving": 49412, "languages make": 28723, "use unlabeled": 60064, "problem solved": 42661, "present unique": 42049, "algorithms based": 2322, "based extensive": 5721, "extensive empirical": 19862, "empirical analysis": 17319, "text input": 56629, "using pre": 60865, "size required": 51398, "algorithms work": 2347, "best possible": 6801, "text problem": 56710, "simple word": 51227, "presents algorithm": 42069, "processing algorithm": 42849, "algorithm perform": 2291, "perform comprehensive": 40081, "growing concern": 23293, "approaches mainly": 3869, "mainly focus": 31472, "focus developing": 21154, "developing automatic": 14648, "methods help": 32886, "help users": 23593, "work used": 62853, "finally proposed": 20878, "voting scheme": 61743, "different approaches": 14840, "approaches improve": 3842, "improve classification": 24829, "classification performance": 8514, "000 tweets": 13, "users use": 60486, "typically contain": 59139, "nlp tools": 37558, "used english": 60164, "order make": 38637, "make sense": 31595, "individual tokens": 25584, "data presents": 12557, "presents findings": 42085, "techniques applied": 56059, "data twitter": 12746, "approach effective": 3500, "effective tool": 16705, "content important": 10529, "important source": 24773, "data given": 12391, "language patterns": 28377, "specific domain": 52072, "domain data": 16038, "biomedical texts": 7178, "reported performance": 46453, "performance terms": 40595, "terms precision": 56309, "recall score": 45246, "dependency parse": 14125, "investigation reveals": 27006, "play vital": 40980, "degraded performance": 13808, "supervised classification": 53967, "classification models": 8496, "models learned": 35178, "labeled unlabeled": 27770, "classification natural": 8506, "data expensive": 12339, "expensive difficult": 19208, "human experts": 24164, "wide spread": 61977, "classification study": 8558, "explore idea": 19709, "past work": 39937, "political science": 41112, "model texts": 34461, "explicitly model": 19642, "demonstrate benefits": 13875, "approach improved": 3563, "prediction ability": 41689, "ability perform": 630, "corpora paper": 11229, "new tool": 37345, "analysis tool": 2780, "corpus finally": 11342, "study linguistic": 53407, "linguistic variation": 30810, "data common": 12224, "common nouns": 9189, "tree structures": 58760, "method creating": 32446, "applied language": 3276, "dictionary used": 14808, "used define": 60143, "systems text": 54653, "extraction accuracy": 20043, "data use": 12758, "explores use": 19775, "use machine": 59941, "approaches specifically": 3923, "decision tree": 13570, "nearest neighbour": 36524, "na ive": 36358, "documents task": 15917, "task automatically": 54923, "predefined set": 41625, "methods applied": 32752, "applied english": 3272, "studies conducted": 53252, "bangla language": 5524, "documents order": 15900, "methods produce": 32994, "satisfactory performance": 48524, "using variety": 61015, "text resources": 56743, "data easy": 12308, "words appear": 62365, "appear frequently": 3138, "brown corpus": 7371, "natural logic": 36460, "tasks remains": 55851, "remains open": 46342, "open question": 38441, "question possible": 44744, "possible train": 41338, "address question": 1795, "question using": 44757, "using neural": 60832, "models learning": 35179, "learning embeddings": 29614, "tensor networks": 56224, "networks experiments": 36853, "experiments evaluate": 19430, "models ability": 34650, "simulated data": 51260, "positive results": 41294, "results promising": 47779, "promising future": 43166, "representations applied": 46618, "dependency syntax": 14141, "text used": 56833, "better suited": 6972, "sets based": 50283, "selection criteria": 49136, "annotation scheme": 2968, "scheme based": 48728, "semi automatically": 49449, "structure based": 53091, "set machine": 50189, "improved translation": 24969, "words edges": 62404, "sentence new": 49606, "dependencies work": 14115, "work paves": 62745, "paves way": 39986, "translation data": 58595, "main challenge": 31425, "active learning": 1473, "challenge test": 8019, "applying natural": 3370, "information access": 25749, "news text": 37419, "number new": 38022, "new challenges": 37147, "context dependent": 10610, "dynamic nature": 16488, "tagging named": 54744, "recognition entity": 45501, "work new": 62735, "dataset conduct": 12857, "conduct empirical": 10038, "number state": 38038, "improve state": 24927, "manual automatic": 31733, "discuss advantages": 15458, "paradigm task": 39630, "translation nmt": 58645, "translation shown": 58676, "shown promising": 50741, "traditional approaches": 57511, "nmt systems": 37577, "relatively small": 46129, "vocabulary oov": 61707, "oov word": 38406, "effective technique": 16703, "problem train": 42677, "train nmt": 57621, "data augmented": 12169, "output word": 39010, "word alignment": 62109, "word target": 62319, "target sentence": 54838, "corresponding word": 11561, "word source": 62312, "sentence information": 49570, "word using": 62332, "using dictionary": 60652, "task method": 55211, "method provides": 32629, "improvement bleu": 24991, "does use": 15981, "use technique": 60042, "best result": 6814, "result achieved": 47433, "task present": 55287, "inference algorithms": 25641, "significantly reducing": 51013, "reducing computation": 45704, "features sequence": 20664, "high confidence": 23717, "small fraction": 51473, "parameter estimation": 39669, "present experiments": 41911, "run time": 48403, "errors introduced": 18242, "automating process": 5207, "methods investigate": 32908, "combining methods": 9115, "methods using": 33095, "using random": 60891, "random forests": 44879, "performance supervised": 40589, "typically require": 59154, "require training": 46894, "data investigate": 12441, "methods unsupervised": 33092, "requiring large": 46963, "amounts training": 2560, "data experiments": 12346, "experiments reveal": 19515, "small data": 51469, "data sufficient": 12707, "information sources": 26097, "building domain": 7442, "labor intensive": 27863, "process study": 42831, "study present": 53436, "present semi": 42005, "approach building": 3438, "wikipedia articles": 62043, "wide coverage": 61962, "domain ontology": 16123, "media texts": 32184, "texts significant": 56925, "significant information": 50894, "areas including": 4153, "unfortunately existing": 59452, "existing solutions": 19144, "tasks named": 55757, "texts usually": 56942, "perform poorly": 40128, "tweets using": 59026, "annotated data": 2881, "sets experiments": 50293, "better fit": 6893, "recognition performance": 45524, "different settings": 15068, "task given": 55107, "paper suggest": 39583, "suggest method": 53823, "parsing based": 39773, "based supervised": 6068, "learning used": 29928, "algorithm select": 2300, "sentence furthermore": 49562, "results encouraging": 47606, "approach automatic": 3425, "automatic detection": 5078, "develop simple": 14612, "framework capable": 21468, "analyzing texts": 2847, "language embedded": 28041, "samples used": 48493, "related language": 45914, "word2vec model": 62350, "mikolov et": 33240, "attracted great": 4878, "great attention": 23199, "attention recent": 4816, "words learned": 62446, "word2vec models": 62351, "semantic meanings": 49299, "useful various": 60397, "various nlp": 61372, "similar techniques": 51072, "process word": 42840, "embedding models": 17045, "provides detailed": 44192, "models including": 35118, "continuous bag": 10841, "skip gram": 51419, "optimization techniques": 38559, "techniques including": 56100, "including hierarchical": 25260, "negative sampling": 36634, "understanding model": 59364, "lot research": 31119, "words vector": 62543, "distributional approaches": 15662, "number tasks": 38043, "language usually": 28571, "level meaning": 30159, "fundamental task": 21792, "task nlp": 55240, "methods learning": 32924, "linguistic units": 30807, "neural models": 36973, "models suitable": 35561, "semantically rich": 49391, "rich representations": 48115, "representations representations": 46748, "multiple state": 36289, "models apply": 34719, "representations various": 46786, "tasks nlp": 55767, "efficiency paper": 16849, "explore effect": 19703, "better semantic": 6963, "input word": 26358, "positive effect": 41279, "deep models": 13728, "models explore": 35001, "graphical model": 23183, "uses text": 60540, "based dialog": 5678, "model user": 34511, "model infer": 33999, "learn context": 29351, "paper analyse": 39263, "different texts": 15099, "datasets furthermore": 13281, "furthermore compare": 21808, "results existing": 47623, "language similar": 28486, "similar languages": 51050, "languages conclude": 28621, "significant progress": 50915, "semantic parsers": 49306, "representation introduce": 46531, "techniques tackle": 56140, "tackle problems": 54711, "style semantic": 53496, "eliminates need": 16989, "fully exploit": 21725, "order better": 38598, "better guide": 6895, "graph generation": 23140, "structured representation": 53173, "representation input": 46529, "text pre": 56701, "vectors representing": 61497, "vector representing": 61464, "logistic regression": 30994, "regression classifier": 45813, "compare method": 9345, "method constructing": 32441, "training effective": 58077, "disambiguation tasks": 15362, "theoretic approach": 57012, "confounding factors": 10150, "gives rise": 22809, "spurious correlations": 52388, "stylistic features": 53510, "features propose": 20650, "propose test": 43666, "topic conversation": 57398, "problem proposed": 42635, "high scores": 23800, "representation word": 46605, "models use": 35650, "use single": 60018, "study effects": 53366, "using multiple": 60822, "approach offers": 3617, "objective project": 38101, "answering using": 3102, "additional modules": 1689, "questions work": 44816, "network cnn": 36719, "structure data": 53095, "image data": 24534, "structure word": 53148, "accurate prediction": 1084, "prediction instead": 41712, "using low": 60780, "dimensional word": 15240, "vectors input": 61487, "input directly": 26266, "directly apply": 15306, "data leads": 12458, "small text": 51505, "image text": 24547, "convolution layer": 11093, "layer proposed": 29204, "combine multiple": 9069, "higher accuracy": 23812, "comparison state": 9506, "based limited": 5814, "corpus english": 11329, "point view": 41051, "text use": 56832, "tagging task": 54752, "systems focus": 54506, "focus small": 21201, "small set": 51499, "large fine": 28879, "label set": 27726, "dramatic improvements": 16385, "improvements downstream": 25070, "downstream tasks": 16352, "labeled training": 27766, "data existing": 12336, "existing fine": 19069, "systems obtain": 54573, "automatically using": 5205, "entities types": 18087, "depends context": 14162, "generalization propose": 22128, "propose task": 43659, "task context": 54976, "local context": 30931, "context sentence": 10713, "new resources": 37305, "task 12": 54867, "provide baseline": 44012, "data develop": 12277, "language specifically": 28498, "specifically model": 52217, "english model": 17844, "model language": 34036, "word question": 62274, "question model": 44736, "important task": 24777, "task natural": 55233, "processing used": 42964, "applications automatic": 3184, "scale applications": 48554, "applications previous": 3234, "datasets paper": 13360, "build large": 7408, "dataset million": 12995, "challenges real": 8074, "world scenarios": 62957, "cost function": 11582, "learning problem": 29817, "feature learning": 20494, "based deep": 5672, "model complicated": 33682, "novel feature": 37821, "based neural": 5901, "network outperforms": 36776, "outperforms methods": 38908, "features specifically": 20671, "best performance": 6789, "performance model": 40438, "model surpasses": 34433, "baseline significant": 6209, "relative improvement": 46101, "research topics": 47133, "decades ago": 13541, "set languages": 50180, "empirical studies": 17349, "entirely different": 18032, "knowledge transfer": 27633, "purpose language": 44403, "results recent": 47800, "years witnessed": 63081, "based question": 5970, "systems systems": 54646, "web information": 61887, "information produce": 26025, "systems designed": 54475, "answering named": 3083, "analysis approach": 2615, "convert input": 11072, "input question": 26324, "results performance": 47764, "respectively furthermore": 47372, "easily applied": 16537, "applied new": 3286, "new languages": 37233, "time human": 57162, "relational semantics": 46012, "end employ": 17633, "results knowledge": 47687, "base completion": 5541, "representations trained": 46773, "recent works": 45376, "predictive models": 41776, "embeddings trained": 17233, "corresponding words": 11562, "present systematic": 42033, "systematic study": 54404, "study use": 53470, "good performance": 22936, "performance word": 40631, "dimensionality reduction": 15242, "encoding function": 17566, "function used": 21761, "used infer": 60212, "unseen words": 59659, "clear advantage": 8651, "models train": 35601, "train new": 57620, "allocation lda": 2431, "addition proposed": 1639, "proposed use": 43922, "recently developed": 45419, "method unsupervised": 32692, "approach improves": 3564, "improves interpretability": 25134, "allows better": 2452, "computational performance": 9853, "results future": 47643, "applied improve": 3275, "problem word": 42687, "method generating": 32517, "search large": 48974, "approach generating": 3547, "generating word": 22405, "efficient compared": 16865, "boosted performance": 7259, "performance natural": 40451, "tasks usually": 55956, "words multiple": 62462, "negative effect": 36617, "representations language": 46698, "simple model": 51193, "model enables": 33816, "recent techniques": 45358, "vectors represent": 61496, "able effectively": 691, "computationally efficient": 9873, "efficient manner": 16882, "words bow": 62374, "common approach": 9164, "used feature": 60186, "training classifier": 57951, "number features": 38005, "information loss": 25960, "information lost": 25961, "overcome limitation": 39066, "model provide": 34255, "provide good": 44081, "word vector": 62334, "propose average": 43309, "representations obtain": 46727, "obtain representations": 38188, "means clustering": 32039, "semantic concepts": 49254, "model outperforms": 34156, "similar results": 51062, "results traditional": 47886, "model far": 33881, "method integrate": 32546, "lines work": 30689, "work unsupervised": 62851, "semantics semantic": 49413, "relations text": 46059, "consists components": 10321, "semantic role": 49337, "role labeling": 48311, "labeling model": 27787, "given rich": 22777, "rich set": 48122, "syntactic lexical": 54307, "model relies": 34305, "predict argument": 41635, "annotated resources": 2911, "method performs": 32613, "performs par": 40709, "induction methods": 25607, "unlike previous": 59598, "incorporate prior": 25361, "prior linguistic": 42405, "language neural": 28356, "models learn": 35174, "representations embeddings": 46648, "embeddings capture": 17091, "capture rich": 7705, "rich linguistic": 48110, "embeddings learned": 17164, "learned neural": 29470, "models recently": 35410, "model embeddings": 33807, "models outperform": 35283, "monolingual models": 35804, "models tasks": 35586, "require knowledge": 46864, "syntactic role": 54320, "desirable properties": 14344, "languages finally": 28673, "method training": 32687, "neural translation": 37108, "models large": 35164, "vocabulary expansion": 61701, "algorithm results": 2298, "embedding spaces": 17063, "online demo": 38361, "web page": 61889, "analyses indicate": 2598, "based embeddings": 5696, "embeddings used": 17238, "used applications": 60090, "according similarity": 869, "monolingual embeddings": 35800, "embeddings better": 17089, "inter word": 26590, "word relatedness": 62277, "zero shot": 63152, "representations extracted": 46665, "extracted text": 20022, "learn general": 29373, "mapping functions": 31802, "feature spaces": 20507, "vectors used": 61499, "high proportion": 23767, "propose simple": 43631, "simple method": 51191, "leads consistent": 29310, "consistent improvements": 10278, "shot experiments": 50615, "experiments cross": 19393, "image retrieval": 24546, "domains present": 16284, "distributed vector": 15627, "representation based": 46494, "gradient based": 23003, "based training": 6103, "gives state": 22810, "dimension reduction": 15224, "current work": 12028, "provides interesting": 44207, "better capturing": 6861, "dot product": 16318, "cosine similarity": 11575, "decision boundaries": 13560, "density based": 14092, "learning representations": 29842, "gaussian distributions": 22012, "various word": 61418, "investigate ability": 26938, "embeddings model": 17175, "explore novel": 19721, "present hierarchical": 41924, "document model": 15812, "model architecture": 33576, "architecture designed": 4041, "document structure": 15835, "using model": 60806, "model use": 34507, "computer vision": 9894, "identify extract": 24422, "topic relevant": 57426, "sentences introduce": 49740, "evaluation technique": 18736, "automatic sentence": 5123, "consuming human": 10446, "validation data": 61193, "investigate problem": 26977, "relation learning": 45987, "algorithm takes": 2304, "good predictors": 22939, "experiments task": 19540, "specific embeddings": 52076, "quality efficiency": 44513, "space based": 51850, "based best": 5602, "second based": 48998, "faster convergence": 20435, "existing translation": 19166, "model specific": 34401, "framework introduce": 21548, "dimensional feature": 15232, "lexical resource": 30380, "applications paper": 3227, "approach construct": 3469, "applying machine": 3364, "method construct": 32440, "effectiveness proposed": 16803, "competitive result": 9560, "compared english": 9403, "sets respectively": 50305, "generating novel": 22386, "textual description": 56959, "interesting problem": 26653, "vision natural": 61641, "processing paper": 42918, "able generate": 696, "sentences given": 49729, "given sample": 22779, "image model": 24539, "model strong": 34412, "image representation": 24544, "representation generated": 46524, "previously trained": 42353, "trained convolutional": 57696, "phrases used": 40856, "given image": 22747, "simple language": 51183, "model produce": 34238, "given test": 22793, "models achieves": 34683, "achieves comparable": 1312, "comparable results": 9308, "results recently": 47802, "dataset speech": 13098, "topic natural": 57419, "nlp task": 37530, "task language": 55159, "words important": 62432, "systems used": 54662, "used new": 60251, "new applications": 37130, "lexical categories": 30355, "words use": 62539, "class words": 8414, "closed class": 8696, "syntactical features": 54337, "used research": 60291, "ability approach": 594, "knowledge human": 27518, "shows performance": 50791, "used syntactic": 60320, "texts including": 56891, "current challenges": 11965, "data representation": 12602, "inter intra": 26582, "standard data": 52481, "data format": 12371, "new publicly": 37292, "simple powerful": 51202, "set metrics": 50193, "metrics quantify": 33194, "recognition propose": 45527, "novel metrics": 37871, "proposed metrics": 43840, "quantitative analysis": 44616, "analysis based": 2621, "visual information": 61656, "multimodal models": 36153, "representations learning": 46707, "learning predict": 29813, "set words": 50278, "visual representations": 61667, "linguistic visual": 30812, "visual features": 61655, "models achieve": 34666, "performance variety": 40619, "shot setup": 50644, "model training": 34479, "models discover": 34924, "paving way": 39988, "meaning paper": 32009, "paper concerned": 39294, "nearest neighbor": 36518, "neighbor search": 36659, "model ranked": 34269, "space provides": 51889, "provides important": 44204, "information different": 25812, "used word": 60353, "sense induction": 49485, "used determine": 60146, "define set": 13779, "models provide": 35379, "known semantic": 27666, "attracting attention": 4892, "building recent": 7465, "shot learning": 50624, "paying attention": 39992, "data containing": 12249, "implicitly learn": 24668, "achieve better": 1115, "linguistic representation": 30788, "approach performs": 3640, "performs comparably": 40702, "outperforms various": 38959, "significantly improve": 50962, "object recognition": 38083, "languages exhibit": 28660, "lexicon induction": 30411, "research effort": 47026, "research improving": 47054, "level models": 30163, "models similar": 35511, "approach outperform": 3620, "outperform word": 38833, "challenge machine": 7994, "used approach": 60091, "approach apply": 3421, "apply word": 3356, "model lm": 34069, "sentence words": 49673, "best list": 6777, "present methods": 41946, "methods deep": 32814, "visual modalities": 61660, "audio visual": 4933, "study approach": 53327, "uni modal": 59460, "deep networks": 13734, "networks trained": 36917, "trained separately": 57864, "hidden layers": 23640, "deep network": 13733, "fusion model": 21858, "model achieves": 33510, "second present": 49017, "new deep": 37170, "network architecture": 36699, "architecture uses": 4096, "softmax layer": 51632, "class specific": 8410, "rate reduction": 45015, "task develop": 55016, "agglutinative languages": 2070, "structure natural": 53120, "language sentence": 28480, "information word": 26159, "help understand": 23592, "understand language": 59301, "literature survey": 30863, "understand different": 59291, "languages various": 28818, "techniques paper": 56117, "survey research": 54218, "research papers": 47090, "network language": 36753, "train large": 57600, "address questions": 1796, "respect model": 47348, "model size": 34391, "set size": 50247, "computational costs": 9840, "analysis shows": 2758, "relative word": 46112, "word error": 62200, "asr task": 4562, "recently released": 45461, "billion word": 7121, "word language": 62221, "language modelling": 28223, "bleu point": 7208, "prediction language": 41714, "models generate": 35057, "generate target": 22253, "phrases words": 40858, "model dependency": 33749, "solving problem": 51704, "model attempts": 33583, "attempts solve": 4700, "sub problems": 53528, "model determine": 33759, "scale monolingual": 48598, "data order": 12523, "alleviate data": 2402, "sparsity problem": 51982, "experiments chinese": 19371, "english translation": 17893, "using syntactic": 60973, "framework supports": 21610, "syntactic tags": 54332, "language training": 28535, "training texts": 58298, "focus work": 21214, "types semantic": 59115, "including named": 25278, "pre existing": 41502, "described paper": 14214, "significantly outperformed": 50993, "baseline model": 6185, "highest scores": 23857, "scores reported": 48918, "english test": 17889, "supports hypothesis": 54142, "information improve": 25913, "quality based": 44496, "language propose": 28448, "model combination": 33664, "self organizing": 49201, "12 million": 109, "semantic consistency": 49257, "showed high": 50665, "level semantic": 30205, "time periods": 57192, "level method": 30160, "highly scalable": 23914, "results popular": 47767, "popular datasets": 41162, "datasets task": 13452, "paper uses": 39603, "uses natural": 60523, "tree based": 58741, "based regression": 5985, "methods combination": 32786, "algorithm outperforms": 2289, "model addresses": 33543, "sentence embedding": 49546, "hot topic": 24030, "processing research": 42933, "research using": 47140, "using recurrent": 60900, "networks long": 36870, "long short": 31027, "memory lstm": 32258, "lstm cells": 31254, "ability capture": 597, "capture long": 7693, "lstm rnn": 31281, "richer information": 48129, "layer network": 29193, "network provides": 36792, "representation sentence": 46578, "sentence paper": 49612, "supervised manner": 54015, "web search": 61894, "analysis performed": 2715, "works model": 62898, "embedding vector": 17070, "vector used": 61472, "different applications": 14838, "automatic keyword": 5098, "detection topic": 14536, "network perform": 36781, "document retrieval": 15828, "difficult language": 15172, "embedding vectors": 17071, "search task": 48987, "shown significantly": 50751, "generates sentence": 22355, "retrieval tasks": 47972, "tasks comparison": 55550, "method paper": 32607, "significantly outperforms": 50996, "order features": 38620, "extend previous": 19826, "corpus order": 11396, "surface syntactic": 54155, "achieving absolute": 1391, "web text": 61900, "address problems": 1791, "state transducers": 52712, "representation allows": 46489, "use explore": 59885, "classification experiments": 8469, "dataset results": 13068, "compared approaches": 9380, "terms accuracy": 56263, "accuracy recall": 1035, "recall f1": 45240, "models great": 35070, "great progress": 23212, "progress improving": 43100, "models predict": 35341, "predict target": 41656, "target translation": 54854, "translation source": 58679, "context information": 10658, "does depend": 15942, "paper explore": 39361, "prediction propose": 41731, "based convolutional": 5650, "network learn": 36758, "learn sentence": 29421, "sentence semantic": 49639, "representations sentence": 46752, "feature representation": 20499, "feed forward": 20711, "forward neural": 21405, "network better": 36711, "translations using": 58711, "local global": 30938, "global information": 22831, "scale experiments": 48572, "experiments method": 19462, "method obtain": 32591, "strong baseline": 53002, "model augmented": 33588, "augmented neural": 4982, "joint model": 27178, "model superior": 34428, "sequence information": 49932, "information time": 26124, "lstm networks": 31276, "strong results": 53046, "results variety": 47901, "sequence modeling": 49955, "modeling tasks": 34629, "lstm structure": 31283, "linear chain": 30649, "syntactic properties": 54315, "tree lstm": 58749, "tree structured": 58759, "existing systems": 19153, "lstm baselines": 31248, "baselines tasks": 6308, "tasks predicting": 55805, "predicting semantic": 41680, "task sentiment": 55360, "stanford sentiment": 52560, "systems usually": 54664, "usually use": 61072, "use linear": 59934, "linear combination": 30653, "features model": 20623, "model quality": 34264, "quality translation": 44592, "model current": 33733, "propose non": 43518, "non linear": 37659, "interaction features": 26598, "training non": 58195, "linear models": 30661, "models discuss": 34927, "discuss possible": 15478, "learning performance": 29802, "performance experimental": 40335, "features hierarchical": 20595, "method produce": 32622, "complex task": 9668, "paper make": 39421, "make attempt": 31540, "develop general": 14590, "general framework": 22061, "tasks define": 55573, "using measures": 60796, "compare simple": 9365, "learning problems": 29818, "represent input": 46474, "input texts": 26347, "effect performance": 16616, "researchers practitioners": 47163, "formulating problem": 21390, "sequential model": 50046, "based optimization": 5929, "optimization technique": 38558, "models competitive": 34839, "based latent": 5809, "variable models": 61224, "models neural": 35252, "topic classification": 57394, "problems approach": 42695, "black box": 7190, "text require": 56740, "require manual": 46877, "manual tuning": 31752, "knowledge shown": 27610, "tasks approaches": 55504, "variety knowledge": 61274, "knowledge proposed": 27580, "approach robust": 3677, "discussed paper": 15486, "propose regularization": 43600, "regularization terms": 45844, "conduct extensive": 10049, "robustness proposed": 48294, "proposed methods": 43835, "methods experimental": 32850, "demonstrate proposed": 13963, "methods obtain": 32964, "remarkable improvements": 46357, "baselines present": 6287, "task particular": 55270, "automatically generated": 5176, "generated speech": 22320, "alignment using": 2388, "using state": 60960, "art visual": 4438, "deep convolutional": 13688, "technique outperforms": 56041, "based keyword": 5794, "proposed neural": 43868, "devlin et": 14730, "al 2014": 2236, "source context": 51757, "context window": 10744, "achieving state": 1424, "relevant source": 46235, "source information": 51773, "target information": 54819, "context entire": 10625, "unified representation": 59477, "representation target": 46589, "tasks proposed": 55822, "model achieve": 33502, "points average": 41067, "understand meaning": 59304, "explore methods": 19714, "number methods": 38017, "defined word": 13788, "word ordering": 62255, "areas research": 4158, "framework generating": 21530, "data training": 12741, "training model": 58178, "highly effective": 23897, "extraction technique": 20120, "method translation": 32691, "capture context": 7655, "curriculum learning": 12043, "learning strategy": 29897, "strategy train": 52952, "train model": 57606, "model classify": 33661, "phrase sentence": 40845, "level context": 30081, "context using": 10741, "using training": 60995, "approach significantly": 3692, "propose neural": 43488, "response generator": 47394, "decoder framework": 13594, "decoding process": 13640, "process based": 42761, "latent representation": 29131, "encoding decoding": 17564, "conversation data": 11032, "study shows": 53460, "grammatically correct": 23082, "appropriate responses": 3966, "outperforming state": 38860, "state arts": 52696, "retrieval based": 47941, "translation question": 58666, "short texts": 50573, "problem called": 42515, "called deep": 7543, "setting approach": 50317, "structure test": 53140, "matching problem": 31919, "wang et": 61765, "al 2013": 2235, "including using": 25318, "using dependency": 60650, "trees based": 58767, "large margins": 28907, "vanishing gradient": 61219, "network capture": 36716, "range dependencies": 44912, "traditional neural": 57537, "semantic matching": 49297, "model internal": 34014, "internal structures": 26691, "step goal": 52811, "propose convolutional": 43340, "vision speech": 61643, "proposed models": 43862, "patterns different": 39967, "matching tasks": 31923, "tasks different": 55589, "tasks demonstrates": 55581, "demonstrates efficacy": 14032, "efficacy proposed": 16834, "explore usage": 19747, "words second": 62502, "provide robust": 44126, "principled approach": 42386, "statistical structure": 52764, "language remains": 28466, "words frequency": 62421, "jensen shannon": 27153, "shannon divergence": 50447, "suggest future": 53818, "end neural": 17688, "based architectures": 5576, "en fr": 17417, "factors success": 20315, "availability high": 5249, "quality parallel": 44561, "work investigate": 62697, "monolingual corpora": 35793, "corpora neural": 11226, "translation compared": 58589, "based hierarchical": 5770, "resource language": 47232, "task chinese": 54949, "targeted tasks": 54860, "tasks parallel": 55791, "high resource": 23791, "resource languages": 47238, "bleu scores": 7214, "analysis important": 2678, "studies word": 53312, "word structure": 62316, "nlp research": 37521, "analysis techniques": 2776, "techniques popular": 56121, "day day": 13501, "morphological structure": 35844, "structure work": 53150, "based finite": 5737, "model lstm": 34079, "principled way": 42389, "language image": 28100, "text fundamental": 56588, "achieving performance": 1417, "performance better": 40219, "architecture named": 4067, "word sequence": 62306, "sequence prediction": 49967, "different previous": 15033, "work neural": 62734, "modeling generation": 34579, "rnn lstm": 48201, "instead use": 26465, "use convolutional": 59855, "predict word": 41660, "different existing": 14924, "networks language": 36868, "model effectively": 33798, "designed task": 14332, "task argue": 54912, "dependencies model": 14108, "model fast": 33882, "easy train": 16566, "experiments text": 19545, "powerful approach": 41433, "unstructured textual": 59674, "textual data": 56956, "data unstructured": 12756, "electronic medical": 16969, "model single": 34390, "score based": 48835, "common phenomenon": 9191, "structural semantic": 53084, "paper firstly": 39376, "examples finally": 18903, "method apply": 32385, "input words": 26359, "features words": 20698, "related data": 45895, "domain semantic": 16152, "extended version": 19839, "languages work": 28822, "work addresses": 62558, "addresses problem": 1814, "number language": 38014, "spanish portuguese": 51946, "spanish chinese": 51939, "proficiency levels": 43067, "measure based": 32045, "algorithm works": 2312, "unsupervised word": 59748, "embeddings shown": 17215, "problem unsupervised": 42679, "representative models": 46799, "embeddings observe": 17182, "observe consistent": 38130, "improvements languages": 25077, "analyze effect": 2812, "effect various": 16623, "embeddings downstream": 17118, "results paper": 47757, "set methods": 50192, "words like": 62449, "normalization methods": 37706, "learn rich": 29416, "rich semantic": 48119, "recent nlp": 45328, "research developing": 47017, "developing models": 14657, "learn useful": 29443, "representations phrases": 46736, "bridging gap": 7326, "language embedding": 28042, "models effectively": 34947, "general knowledge": 22063, "tasks neural": 55763, "better existing": 6887, "commercial systems": 9155, "systems rely": 54616, "specific engineering": 52078, "results highlight": 47657, "effectiveness neural": 16799, "neural embedding": 36948, "definition based": 13793, "models understand": 35645, "networks dnns": 36846, "significant performance": 50902, "language recognition": 28464, "recognition tasks": 45543, "possible using": 41340, "using single": 60945, "approach shown": 3688, "substantial performance": 53626, "performance improvements": 40386, "recognition task": 45542, "recognition evaluation": 45504, "constituency trees": 10353, "based convolution": 5649, "architecture allows": 4025, "output layer": 38981, "enables effective": 17439, "tasks sentiment": 55873, "analysis question": 2736, "outperforms previous": 38919, "results including": 47673, "existing neural": 19116, "shedding light": 50530, "based style": 6067, "recent times": 45360, "involving human": 27025, "human bias": 24117, "new metric": 37257, "proposed metric": 43839, "different people": 15022, "metric human": 33117, "classification process": 8522, "process experimental": 42778, "performance using": 40617, "novel metric": 37870, "different human": 14950, "human expert": 24162, "paper contributes": 39308, "joint embedding": 27167, "embedding model": 17043, "pair entities": 39151, "texts proposed": 56914, "dimensional vector": 15237, "make accurate": 31539, "accurate predictions": 1085, "performance approach": 40196, "cutting edge": 12068, "experiments model": 19467, "achieves significant": 1364, "relation extraction": 45972, "extraction present": 20095, "work identify": 62681, "identify relevant": 24440, "semantic levels": 49294, "inference text": 25699, "features like": 20616, "common words": 9211, "approaches reported": 3914, "binary classification": 7144, "traditional chinese": 57512, "experiments test": 19543, "individual features": 25568, "interesting results": 26655, "written texts": 63013, "textual properties": 56975, "paper study": 39578, "representation text": 46592, "text graph": 56611, "law distribution": 29173, "distribution experiments": 15638, "metrics correlate": 33154, "authorship attribution": 5010, "particular types": 39869, "words information": 62436, "applications involving": 3214, "better language": 6907, "model propose": 34245, "amounts data": 2546, "smaller training": 51526, "significant reduction": 50916, "text useful": 56834, "useful learning": 60374, "domain text": 16209, "using social": 60951, "useful data": 60359, "space specifically": 51899, "methods developed": 32823, "validated using": 61187, "using high": 60728, "quality datasets": 44507, "state affairs": 52573, "context paper": 10684, "art natural": 4306, "studied paper": 53231, "systems dealing": 54471, "human cognition": 24122, "intelligent systems": 26544, "robust automatic": 48240, "exploit large": 19659, "extremely difficult": 20156, "unsupervised machine": 59706, "probabilistic models": 42467, "models text": 35595, "text recently": 56728, "standard approaches": 52463, "approaches relying": 3912, "difficult scale": 15186, "report present": 46443, "present empirical": 41897, "variational inference": 61248, "scheme applied": 48727, "online inference": 38371, "qualitative results": 44481, "model need": 34120, "accurately model": 1096, "propose self": 43614, "self adaptive": 49173, "sentence model": 49602, "representations suitable": 46765, "suitable task": 53860, "task hand": 55112, "models benchmark": 34765, "sets word": 50312, "unlabelled data": 59587, "data shown": 12659, "shown high": 50717, "paper perform": 39440, "extrinsic evaluation": 20170, "evaluation popular": 18675, "embedding methods": 17041, "sequence labelling": 49941, "task based": 54929, "representations using": 46785, "training instances": 58135, "sufficient achieve": 53799, "achieve competitive": 1125, "competitive results": 9561, "results word": 47912, "embeddings lead": 17162, "oov words": 38407, "words domain": 62403, "little difference": 30873, "tasks consider": 55554, "analysis document": 2653, "used document": 60154, "called emph": 7545, "open data": 38416, "space word": 51904, "nlp especially": 37485, "especially given": 18277, "given recent": 22776, "recent methods": 45319, "work assumes": 62576, "single vector": 51356, "vector word": 61474, "word type": 62325, "tasks present": 55807, "learns multiple": 29967, "multiple embeddings": 36208, "embeddings word": 17246, "performing word": 40693, "learning non": 29783, "context task": 10730, "task demonstrate": 55000, "billion tokens": 7120, "base kb": 5543, "new facts": 37200, "making inferences": 31657, "multi hop": 35969, "presents approach": 42072, "network rnn": 36796, "vector embeddings": 61452, "binary relation": 7153, "unseen training": 59658, "training time": 58299, "time single": 57214, "high capacity": 23709, "predict new": 41648, "compositional model": 9745, "new dataset": 37163, "method improves": 32534, "leveraging pre": 30336, "pre trained": 41520, "trained embeddings": 57716, "related concepts": 45889, "related entities": 45904, "entities given": 18054, "given topic": 22797, "wikipedia text": 62055, "compute semantic": 9879, "given query": 22774, "study examine": 53372, "important entities": 24722, "entities relationships": 18079, "classification used": 8578, "lot attention": 31114, "attention recently": 4818, "recently popular": 45447, "canonical correlation": 7590, "correlation analysis": 11518, "approaches learn": 3858, "learn joint": 29385, "joint representation": 27187, "approaches outperform": 3888, "approaches task": 3935, "task transfer": 55445, "transfer learning": 58374, "approach called": 3440, "learned using": 29488, "approaches recent": 3908, "recent advances": 45280, "map words": 31798, "rich information": 48102, "language representation": 28468, "approach simple": 3695, "work language": 62702, "probability model": 42481, "million sentences": 33257, "yelp reviews": 63085, "user posts": 60433, "temporal evolution": 56186, "constructed using": 10417, "graph structure": 23170, "allows easy": 2459, "benchmark dataset": 6443, "propose concept": 43327, "concept level": 9924, "cause model": 7885, "model utilized": 34518, "event related": 18787, "related tweets": 45950, "results dataset": 47568, "sina weibo": 51280, "baseline methods": 6183, "corpus specifically": 11435, "specifically proposed": 52225, "uses lexical": 60519, "lexical patterns": 30376, "automatically identify": 5181, "compared model": 9420, "evaluated using": 18553, "current standard": 12010, "novel evaluation": 37819, "evaluation set": 18712, "shown good": 50711, "representations natural": 46722, "language vocabulary": 28580, "paper summarizes": 39586, "applying neural": 3372, "models task": 35585, "similarity evaluation": 51094, "depending task": 14158, "task introduce": 55144, "task achieved": 54876, "models previously": 35354, "texts russian": 56921, "national corpus": 36396, "models outperforming": 35290, "larger corpora": 29070, "especially true": 18308, "trained larger": 57772, "performance high": 40372, "semantic vectors": 49375, "learned way": 29491, "way used": 61836, "used variety": 60346, "variety linguistic": 61278, "linguistic tasks": 30800, "exciting field": 18970, "field study": 20770, "matrix multiplication": 31942, "recognition algorithm": 45491, "rewriting systems": 48080, "running time": 48407, "currently best": 12032, "mildly context": 33245, "combinatory categorial": 9059, "approach detection": 3485, "associated text": 4625, "additional information": 1675, "lead improved": 29260, "methods relation": 33011, "using distributional": 60663, "distributional information": 15664, "using approach": 60561, "approach cross": 3473, "validation accuracy": 61192, "accuracy dataset": 956, "dataset improved": 12960, "human labeling": 24188, "labeling results": 27792, "score 86": 48821, "pairs present": 39207, "approach extract": 3533, "information text": 26117, "critically important": 11799, "range domains": 44916, "documents propose": 15905, "based entity": 5706, "comprehensive set": 9798, "set common": 50120, "highly robust": 23913, "text approach": 56438, "finally present": 20875, "rise social": 48155, "identify new": 24432, "new opportunities": 37276, "level approach": 30064, "detection sentiment": 14522, "sentiment polarity": 49854, "linguistically motivated": 30817, "motivated features": 35866, "accuracy furthermore": 980, "furthermore introduce": 21824, "introduce automatic": 26784, "collected annotated": 8954, "dialectal arabic": 14748, "arabic tweets": 4007, "performance levels": 40417, "framework different": 21495, "types features": 59088, "popular social": 41187, "data rich": 12618, "increasing popularity": 25459, "detection algorithms": 14458, "data time": 12734, "users provide": 60476, "systems widely": 54670, "systems literature": 54551, "algorithms including": 2327, "presents results": 42102, "correlate human": 11503, "collected crowdsourcing": 8957, "based weighted": 6132, "multiple domains": 36204, "domains language": 16267, "sequence approach": 49907, "benchmarking datasets": 6508, "tagging accuracy": 54735, "advancing research": 1934, "high cost": 23721, "implemented evaluated": 24648, "crowdsourcing approach": 11889, "approach produce": 3653, "existing corpus": 19049, "agreement human": 2106, "human annotators": 24105, "data annotation": 12136, "annotation guidelines": 2953, "work text": 62841, "generation task": 22558, "task used": 55459, "object categories": 38081, "introduce dataset": 26796, "annotated natural": 2907, "language descriptions": 28022, "learn data": 29356, "textual descriptions": 56960, "method successfully": 32674, "generation previous": 22522, "approach human": 3556, "evaluation task": 18734, "automated metric": 5053, "strongly correlates": 53070, "correlates human": 11514, "framework allows": 21457, "aspects language": 4543, "syntactic structures": 54330, "structures sentence": 53194, "extra information": 19962, "information conveyed": 25793, "current paper": 11995, "paper extend": 39371, "extend framework": 19822, "framework order": 21576, "information using": 26147, "study analyzes": 53325, "european parliament": 18430, "evolved time": 18839, "considering context": 10256, "analyzed using": 2836, "using new": 60837, "modeling method": 34597, "matrix factorization": 31941, "findings suggest": 20917, "paper overview": 39437, "overview shared": 39116, "segmentation speech": 49087, "micro blog": 33222, "dataset shared": 13082, "task consists": 54973, "task sub": 55417, "sub tasks": 53534, "systems different": 54479, "resources introduce": 47307, "dataset task": 13113, "participating systems": 39824, "test results": 56364, "results online": 47751, "available open": 5337, "questions designed": 44783, "designed evaluate": 14313, "evaluate human": 18462, "human intelligence": 24172, "comprehension questions": 9775, "measure human": 32053, "multiple senses": 36279, "especially deep": 18271, "learning technologies": 29908, "quite challenging": 44826, "simply applying": 51247, "applying existing": 3362, "performance mainly": 40430, "complex relations": 9656, "tackle challenges": 54700, "challenges propose": 8071, "framework consisting": 21478, "build classifier": 7389, "specific type": 52166, "novel word": 37956, "method considers": 32435, "type questions": 59067, "questions propose": 44800, "relation representations": 45995, "representations experimental": 46660, "shown proposed": 50744, "framework outperform": 21577, "methods solving": 33047, "study results": 53454, "uses deep": 60503, "step closer": 52803, "closer human": 8710, "article present": 4454, "approaches attempt": 3768, "interpretable way": 26732, "detecting different": 14446, "information capturing": 25776, "knowledge learned": 27547, "related topics": 45948, "possible approaches": 41315, "generation new": 22506, "research explore": 47032, "conducted series": 10093, "similarity sentences": 51120, "comparison results": 9504, "sequence sequence": 49978, "translation methods": 58629, "based generation": 5748, "model recently": 34282, "recently shown": 45466, "results tasks": 47878, "text image": 56621, "image captioning": 24531, "work approach": 62571, "quality terms": 44587, "terms bleu": 56272, "applicability models": 3154, "task input": 55139, "generation approach": 22420, "able significantly": 725, "bi directional": 6999, "directional long": 15281, "lstm neural": 31277, "networks use": 36920, "alignment information": 2370, "conventional approaches": 11000, "propose employ": 43366, "architectures learning": 4115, "modal interactions": 33459, "words question": 62491, "layer learn": 29187, "representation classification": 46498, "demonstrate efficacy": 13905, "qa datasets": 44450, "significantly outperforming": 50994, "data method": 12484, "models words": 35686, "time models": 57180, "used end": 60162, "scores obtained": 48911, "obtained method": 38215, "features combined": 20539, "recent success": 45355, "application neural": 3173, "networks model": 36875, "model various": 34527, "architecture neural": 4068, "layers capture": 29219, "capture important": 7681, "higher order": 23834, "interaction network": 26609, "multitask learning": 36323, "network parameters": 36780, "arabic english": 3999, "english chinese": 17782, "low high": 31153, "present approaches": 41848, "speech signals": 52294, "transition probabilities": 58542, "networks dnn": 36845, "approach combined": 3451, "adaptation method": 1526, "gains natural": 21938, "long texts": 31044, "texts like": 56900, "longer documents": 31050, "documents challenging": 15862, "recurrent networks": 45621, "networks models": 36876, "models paper": 35298, "step generation": 52810, "task training": 55443, "lstm long": 31271, "auto encoder": 5014, "lstm model": 31273, "reconstruct original": 45578, "using standard": 60959, "standard metrics": 52505, "models able": 34653, "preserve syntactic": 42117, "discourse coherence": 15387, "generating coherent": 22367, "coherent text": 8918, "text units": 56831, "footnote code": 21279, "code models": 8834, "networks successfully": 36914, "tasks resulting": 55862, "models difficult": 34919, "building sentence": 7469, "models nlp": 35263, "introduce simple": 26860, "information flow": 25880, "test methods": 56357, "methods sentiment": 33033, "wide applications": 61959, "outperform simple": 38819, "learning distinct": 29595, "models vector": 35672, "representations multi": 46720, "methods proposed": 32996, "understanding tasks": 59408, "introduce multi": 26825, "based chinese": 5616, "embeddings language": 17157, "test performance": 56361, "model speech": 34403, "recognition sentiment": 45535, "analysis semantic": 2749, "relation identification": 45984, "embeddings improve": 17147, "tasks speech": 55904, "various forms": 61345, "information tasks": 26114, "tasks results": 55863, "highlight importance": 23862, "models real": 35401, "quality quantity": 44569, "articles wikipedia": 4483, "varies greatly": 61257, "translation tools": 58692, "specific needs": 52117, "content wikipedia": 10571, "studies rely": 53295, "paper compare": 39291, "compare data": 9334, "data acquisition": 12115, "self reported": 49203, "age gender": 2046, "accuracy text": 1061, "giving best": 22813, "introduce corpus": 26792, "scale corpus": 48560, "corpus annotated": 11274, "annotated using": 2928, "using amazon": 60556, "inherent difficulty": 26202, "annotation task": 2973, "linguistic variables": 30809, "level annotated": 30061, "mental state": 32292, "investigate feasibility": 26959, "experiments present": 19492, "present models": 41948, "models predicting": 35342, "distributional word": 15672, "relations empirical": 46024, "various data": 61320, "results potential": 47769, "potential use": 41410, "embedding words": 17073, "gained lot": 21918, "methods provide": 33000, "provide efficient": 44059, "unclear paper": 59238, "paper argue": 39271, "ranking problem": 44975, "metrics based": 33141, "insight propose": 26384, "attention mechanism": 4770, "robustness noise": 48289, "compared state": 9458, "art word": 4440, "embedding techniques": 17067, "significant margin": 50898, "million tokens": 33259, "performs existing": 40707, "similarity benchmark": 51086, "available general": 5296, "abstract meaning": 760, "representation amr": 46490, "open domain": 38419, "rich semantics": 48121, "fields like": 20781, "event extraction": 18784, "generation typically": 22571, "dictionary lookup": 14807, "robust learning": 48252, "learning stage": 29892, "generalize better": 22137, "previous approach": 42237, "classifier improve": 8599, "improve previous": 24909, "art result": 4368, "end performance": 17695, "used human": 60204, "modeling human": 34582, "language abilities": 27948, "scale neural": 48605, "working memory": 62869, "memory model": 32270, "network takes": 36811, "takes input": 54781, "input neural": 26304, "flow information": 21119, "components neural": 9721, "gating mechanisms": 22007, "capable learning": 7624, "priori knowledge": 42430, "role different": 48304, "based interface": 5791, "using open": 60848, "open ended": 38429, "incremental learning": 25483, "output sentences": 38999, "sentences expressing": 49719, "range language": 44920, "visually grounded": 61687, "textual visual": 56986, "visual input": 61657, "input model": 26300, "gated recurrent": 21996, "recurrent unit": 45627, "uses multi": 60521, "multi task": 36015, "task objective": 55244, "visual representation": 61666, "representations individual": 46689, "visual scenes": 61669, "learns effectively": 29958, "sequential structure": 50051, "structure semantic": 53133, "online social": 38384, "problem classifying": 42520, "sentiment user": 49865, "user comments": 60406, "comments news": 9146, "cover wide": 11648, "domains including": 16261, "particular domain": 39843, "diverse topics": 15723, "holistic view": 23990, "useful applications": 60356, "paper formulate": 39384, "formulate problem": 21385, "problem entity": 42550, "entity specific": 18150, "novel features": 37822, "features specific": 20670, "news comments": 37392, "results models": 47728, "outperform state": 38822, "art baselines": 4222, "neural sequence": 37096, "sequence model": 49952, "task essential": 55053, "based encoder": 5698, "decoder model": 13600, "memory recurrent": 32279, "networks lstm": 36872, "language instructions": 28118, "action sequences": 1457, "based representation": 5993, "model focus": 33903, "focus sentence": 21197, "contrast existing": 10875, "methods model": 32947, "specific annotations": 52043, "achieves best": 1304, "results reported": 47806, "single sentence": 51333, "dataset competitive": 12852, "results limited": 47701, "limited training": 30627, "training multi": 58181, "model series": 34363, "components model": 9719, "high accuracy": 23707, "accuracy model": 1008, "generative models": 22599, "models allowing": 34708, "fast accurate": 20420, "inference propose": 25686, "propose efficient": 43362, "efficient decoding": 16867, "decoding algorithm": 13625, "beam size": 6370, "uncertainty model": 59231, "jointly predicting": 27218, "pos tags": 41236, "model obtains": 34140, "obtains better": 38242, "model performing": 34200, "learning large": 29698, "large unlabelled": 29041, "corpus model": 11382, "distilling knowledge": 15586, "new research": 37301, "research topic": 47130, "performance particularly": 40482, "tasks propose": 55818, "specific knowledge": 52093, "set high": 50164, "dimensional embeddings": 15231, "reduce model": 45671, "model complexity": 33681, "efficiency performance": 16850, "performance experiments": 40337, "experiments tasks": 19541, "tasks reveal": 55865, "directly training": 15339, "structured neural": 53166, "networks encode": 36849, "models best": 34778, "sequence based": 49912, "sequence models": 49958, "models like": 35186, "compositional structure": 9750, "tasks clear": 55539, "data demonstrate": 12271, "artificial data": 4489, "data task": 12724, "lstm based": 31243, "based sequence": 6027, "model learn": 34050, "tree structure": 58758, "large training": 29028, "training sets": 58253, "structure paper": 53126, "method proposed": 32626, "audio features": 4927, "features generated": 20590, "process data": 42767, "valence arousal": 61170, "attributes like": 4908, "training test": 58290, "test sets": 56377, "feature weighting": 20510, "nearest neighbors": 36522, "quality research": 44574, "total number": 57475, "published papers": 44370, "paramount importance": 39736, "paper devise": 39341, "89 accuracy": 546, "systematic analysis": 54390, "successful application": 53733, "methods improve": 32894, "performance identifying": 40377, "artificially generated": 4502, "vision language": 61637, "language long": 28141, "intelligence ai": 26535, "images videos": 24558, "available corpora": 5272, "propose set": 43628, "quality metrics": 44551, "metrics evaluating": 33162, "language datasets": 28018, "datasets using": 13474, "using complex": 60614, "complex language": 9631, "different strengths": 15082, "data attention": 12147, "performance range": 40518, "range tasks": 44937, "image caption": 24530, "adaptation model": 1528, "model used": 34508, "used machine": 60229, "task applied": 54906, "alleviate issue": 2408, "long inputs": 31017, "finally propose": 20876, "adverse drug": 1998, "extraction information": 20072, "crucial task": 11913, "task detecting": 55012, "detecting classifying": 14445, "report analysis": 46426, "analysis complex": 2634, "terms time": 56318, "impact quality": 24605, "quality data": 44505, "data analysis": 12132, "robust language": 48251, "reasonable performance": 45173, "approach promising": 3655, "baseline paper": 6199, "dynamic time": 16492, "demonstrated proposed": 14016, "considerable performance": 10234, "improvement existing": 25004, "certain types": 7949, "types information": 59093, "extraction tasks": 20119, "traditional rule": 57541, "knowledge general": 27490, "traditional text": 57552, "based rules": 6004, "task involving": 55149, "dialogue corpus": 14769, "corpus dataset": 11317, "dataset containing": 12865, "containing million": 10484, "multi turn": 36035, "turn dialogues": 58990, "100 million": 62, "resource research": 47266, "research building": 46996, "models make": 35213, "amounts unlabeled": 2562, "data dataset": 12269, "dataset multi": 13001, "state tracking": 52711, "challenge datasets": 7975, "dataset provide": 13043, "provide benchmark": 44016, "benchmark performance": 6486, "performance task": 40593, "consider task": 10221, "learning control": 29574, "language barrier": 27972, "environments challenging": 18176, "deep reinforcement": 13746, "reinforcement learning": 45864, "framework jointly": 21551, "jointly learn": 27199, "state representations": 52708, "framework enables": 21502, "text descriptions": 56533, "capture semantics": 7710, "baselines using": 6317, "using bag": 60580, "outperforms baselines": 38873, "demonstrating importance": 14054, "manually annotated": 31756, "mining sentiment": 33322, "different sets": 15067, "000 words": 15, "200 words": 235, "annotated resource": 2910, "based tensor": 6087, "query candidate": 44663, "train classifier": 57573, "classifier using": 8609, "vectors using": 61500, "simple features": 51169, "features achieves": 20517, "achieves average": 1303, "average f1": 5406, "score 40": 48792, "dataset comparable": 12848, "count based": 11611, "paper improve": 39396, "performance recurrent": 40524, "rnn language": 48194, "model incorporating": 33992, "incorporating syntactic": 25394, "relevant contexts": 46205, "10 points": 50, "points accuracy": 41066, "achieve results": 1186, "comparable state": 9311, "task consider": 54969, "number text": 38045, "distributional properties": 15666, "properties data": 43258, "approaches allow": 3761, "information high": 25905, "high low": 23751, "space embedding": 51856, "data consider": 12238, "data contrast": 12252, "addition use": 1648, "query based": 44662, "deeper understanding": 13760, "computational approaches": 9834, "opinion analysis": 38499, "model sentiment": 34356, "given different": 22736, "variety languages": 61277, "communication humans": 9249, "provide natural": 44103, "survey existing": 54206, "english end": 17799, "goal article": 22876, "article provide": 4459, "provide common": 44028, "researchers interested": 47160, "design decisions": 14272, "network approaches": 36698, "approaches recently": 3909, "achieved state": 1273, "distance dependencies": 15543, "model exploit": 33858, "exploit various": 19668, "model improves": 33982, "tasks achieves": 55489, "achieves highest": 1337, "work area": 62572, "transfer based": 58353, "developed english": 14629, "little work": 30890, "languages currently": 28630, "focus designing": 21152, "using transfer": 60997, "structure parsing": 53127, "generate text": 22254, "language better": 27979, "require large": 46868, "aligned data": 2355, "data translation": 12745, "available languages": 5318, "languages transfer": 28810, "knowledge languages": 27543, "languages source": 28792, "language target": 28518, "online information": 38372, "make possible": 31585, "possible directly": 41323, "information expressed": 25852, "expressed text": 19801, "order able": 38585, "new natural": 37266, "automatically extracting": 5171, "extracting relevant": 20035, "relevant information": 46220, "pieces information": 40880, "information obtain": 25993, "use unsupervised": 60066, "tasks demonstrate": 55575, "morphological features": 35841, "features compared": 20542, "considered paper": 10250, "concept relation": 9925, "focus task": 21205, "task extracting": 55072, "belong different": 6417, "propose semi": 43619, "supervised method": 54017, "manually defined": 31774, "linguistic patterns": 30779, "automatically learned": 5188, "search results": 48983, "results addition": 47491, "concepts based": 9932, "saliency scores": 48438, "method generates": 32516, "results high": 47654, "addresses question": 1816, "online discussion": 38362, "relative importance": 46100, "task proposed": 55304, "proposed based": 43742, "shows importance": 50783, "importance different": 24681, "community paper": 9268, "method solve": 32662, "answering task": 3099, "task employ": 55042, "model calculate": 33639, "question similarity": 44750, "extracting features": 20030, "propose learning": 43438, "learning rank": 29831, "algorithm train": 2305, "ranking tasks": 44978, "tasks experimental": 55629, "similarity model": 51108, "baseline systems": 6213, "bring improvements": 7331, "answer sentence": 3054, "previous systems": 42295, "systems standard": 54638, "set paper": 50211, "analysis task": 2774, "binary tree": 7156, "using hierarchical": 60727, "fast pace": 20428, "systems based": 54439, "methods order": 32969, "order ensure": 38615, "lattice rescoring": 29165, "models suffer": 35556, "solution use": 51663, "single pass": 51329, "second pass": 49014, "hinge loss": 23946, "beam search": 6365, "approach gives": 3549, "decoding performance": 13637, "instead consider": 26446, "approach inspired": 3573, "structured prediction": 53170, "obtain high": 38175, "model second": 34341, "texts work": 56947, "showed method": 50666, "method learn": 32562, "learn embedding": 29367, "analysis provide": 2731, "provide thorough": 44144, "models document": 34934, "document similarity": 15833, "better methods": 6917, "methods propose": 32995, "embeddings vector": 17243, "useful semantic": 60387, "results date": 47572, "structured documents": 53154, "previous works": 42318, "works focused": 62891, "focused modeling": 21227, "recently methods": 45438, "text specific": 56782, "build general": 7400, "remains important": 46334, "terms model": 56300, "efficiency propose": 16852, "method model": 32578, "framework leverages": 21557, "information learn": 25950, "topic word": 57435, "word distributions": 62140, "topic distributions": 57403, "mining tasks": 33326, "efficient variational": 16909, "inference method": 25668, "em algorithm": 17000, "parameters propose": 39718, "propose large": 43433, "results effectiveness": 47602, "effectiveness efficiency": 16778, "efficiency robustness": 16855, "model state": 34408, "methods document": 32827, "document modeling": 15813, "prediction text": 41746, "semantic embeddings": 49274, "dimensional data": 15228, "classification work": 8583, "learning vector": 29936, "embedding space": 17062, "vectors word": 61501, "better comparable": 6864, "unsupervised text": 59742, "text embedding": 56548, "attracting increasing": 4893, "methods usually": 33096, "learning tasks": 29905, "tasks possible": 55801, "learn representation": 29414, "unsupervised way": 59745, "information available": 25766, "available task": 5375, "task low": 55194, "dimensional representations": 15234, "applicable different": 3156, "tasks particularly": 55794, "tuned task": 58889, "gap proposing": 21979, "supervised representation": 54035, "text labeled": 56639, "levels word": 30251, "occurrence information": 38274, "dimensional embedding": 15230, "words documents": 62402, "particular task": 39865, "task compared": 54960, "compared recent": 9445, "fewer parameters": 20739, "knowledge source": 27612, "existing work": 19173, "structured tables": 53177, "using question": 60890, "answer pairs": 3044, "central challenge": 7917, "domain results": 16149, "results open": 47752, "set relations": 50237, "obtains significant": 38255, "improvements natural": 25084, "baselines evaluation": 6257, "created new": 11730, "world languages": 62945, "language interactions": 28119, "model needs": 34121, "entailment relations": 18006, "models external": 35006, "interactions results": 26620, "model present": 34227, "parsing method": 39784, "based parser": 5933, "lstm recurrent": 31279, "networks learn": 36869, "parsing model": 39786, "model benefits": 33617, "level relation": 30189, "networks cnn": 36837, "feature engineering": 20483, "based cnn": 5623, "cnn based": 8760, "especially long": 18283, "distance dependency": 15544, "pairs paper": 39205, "simple framework": 51173, "based recurrent": 5982, "based model": 5857, "semeval 2010": 49425, "task dataset": 54992, "dataset introduce": 12968, "experiments different": 19414, "rnn based": 48183, "performance relation": 40528, "learning long": 29712, "makes suitable": 31637, "suitable real": 53858, "models model": 35228, "network encoder": 36737, "attention based": 4714, "achieves word": 1387, "rate wer": 45016, "model 10": 33481, "tweets tweets": 59023, "period time": 40724, "tweet sentiment": 59006, "different groups": 14949, "sentences collected": 49689, "different categories": 14857, "bengali text": 6597, "model useful": 34510, "sentences contain": 49695, "sentences different": 49705, "structures semantic": 53193, "classification machine": 8490, "learning information": 29682, "existing research": 19136, "research efforts": 47027, "approached problem": 3749, "problem introducing": 42587, "user queries": 60440, "web using": 61901, "leveraging linguistic": 30331, "using datasets": 60645, "datasets consisting": 13194, "results confirmed": 47557, "develop language": 14592, "question propose": 44745, "focus modeling": 21182, "study impact": 53387, "shared language": 50475, "energy function": 17750, "best strategy": 6825, "shared languages": 50476, "tasks current": 55567, "propose language": 43432, "language style": 28511, "easier process": 16528, "negatively impact": 36644, "conduct studies": 10063, "causal relationship": 7877, "evaluations using": 18771, "models sequence": 35484, "sequence tagging": 50002, "tagging models": 54743, "models include": 35117, "bidirectional lstm": 7077, "lstm bi": 31250, "bi lstm": 7012, "crf layer": 11763, "layer lstm": 29189, "lstm crf": 31257, "layer bi": 29181, "work apply": 62569, "nlp benchmark": 37468, "past future": 39932, "input features": 26279, "use sentence": 60008, "produce state": 43011, "investigate effect": 26953, "analysis sa": 2747, "common language": 9182, "classification using": 8580, "using generated": 60704, "previously generated": 42334, "ive bayes": 27137, "using bidirectional": 60594, "bidirectional lstms": 7079, "traditional word": 57556, "type model": 59062, "model requires": 34313, "fixed set": 21079, "set parameters": 50214, "model despite": 33755, "yield state": 63101, "results language": 47690, "modeling speech": 34625, "processing problem": 42926, "problem model": 42608, "model selection": 34346, "approaches rely": 3911, "grid search": 23248, "coarse grained": 8784, "methods allow": 32746, "allow efficient": 2436, "efficient model": 16886, "gaussian processes": 22018, "tree kernels": 58746, "better prediction": 6942, "prediction performance": 41727, "performance compared": 40249, "search framework": 48972, "framework proposed": 21587, "languages provide": 28761, "provide strong": 44135, "pairs existing": 39186, "models suited": 35562, "model latent": 34045, "likelihood training": 30522, "model address": 33542, "approximate inference": 3976, "model contrastive": 33716, "scales large": 48645, "existing generative": 19072, "models exploiting": 35000, "used automatic": 60097, "gram based": 23050, "does consider": 15938, "entropy based": 18158, "words method": 62457, "health conditions": 23513, "make inferences": 31577, "concept text": 9928, "required achieve": 46898, "achieve propose": 1183, "propose adapt": 43281, "adapt existing": 1502, "evaluate proposed": 18489, "using collection": 60610, "results combination": 47536, "based mt": 5887, "real valued": 45119, "relatively low": 46124, "network named": 36771, "shown encode": 50705, "encode semantic": 17468, "length vectors": 30038, "experimental evidence": 19266, "evidence using": 18825, "specific corpus": 52064, "technique text": 56047, "distributional representations": 15668, "order produce": 38648, "area nlp": 4144, "based rich": 5999, "sentence embeddings": 49547, "jointly learned": 27200, "context furthermore": 10644, "furthermore word": 21843, "process evaluate": 42776, "qualitatively quantitatively": 44484, "effectiveness framework": 16780, "results state": 47855, "mt metrics": 35919, "evaluating different": 18558, "tend produce": 56206, "humans usually": 24292, "recall precision": 45244, "biases present": 7059, "present data": 41881, "data does": 12293, "knowledge types": 27637, "training unlabeled": 58308, "initially trained": 26231, "trained standard": 57881, "adaptation domain": 1523, "problem solving": 42662, "statistical dependencies": 52740, "joint probability": 27185, "key semantic": 27333, "concepts text": 9944, "embedding based": 17014, "networks nlp": 36882, "strategies including": 52906, "embeddings embedding": 17123, "hyperparameter tuning": 24338, "combining different": 9110, "neural nlp": 37081, "models existing": 34987, "existing word": 19170, "methods models": 32948, "methods typically": 33086, "solved using": 51695, "singular value": 51360, "information addition": 25754, "global latent": 22833, "latent factors": 29126, "model generative": 33937, "way incorporate": 61810, "propose generative": 43401, "easy interpret": 16564, "serve basis": 50075, "model inference": 34000, "experiments common": 19375, "common benchmark": 9166, "models language": 35161, "tasks translation": 55944, "sub word": 53537, "improve model": 24870, "model performance": 34185, "help overcome": 23582, "overcome data": 39060, "attain better": 4667, "related words": 45954, "improves models": 25137, "performance provides": 40510, "applied tasks": 3301, "modelling word": 34646, "models automatically": 34745, "list words": 30841, "models limited": 35189, "leads improvements": 29318, "improvements task": 25105, "networks shown": 36909, "shown improve": 50720, "settings paper": 50387, "layers introduce": 29225, "modeling demonstrate": 34569, "demonstrate ability": 13859, "models machine": 35206, "models maintain": 35211, "models natural": 35244, "posterior distribution": 41359, "user trust": 60453, "analysis present": 2723, "method analyze": 32380, "used models": 60241, "sampling algorithm": 48497, "confidence intervals": 10113, "datasets comprising": 13186, "explore impact": 19710, "conduct series": 10060, "series analyses": 50059, "prediction experiments": 41707, "experiments datasets": 19398, "datasets results": 13409, "present general": 41919, "bipartite graph": 7181, "proposed document": 43759, "set using": 50275, "nlp approaches": 37464, "provide limited": 44098, "linguistic rules": 30792, "non expert": 37653, "general categories": 22047, "interactive interface": 26629, "user studies": 60450, "nlp non": 37506, "produce high": 42985, "quality labels": 44541, "release source": 46167, "majority languages": 31531, "languages considered": 28623, "alternative hypothesis": 2503, "context approach": 10584, "instruction following": 26480, "explicitly modeling": 19644, "level compositional": 30078, "level structure": 30217, "diverse set": 15717, "set benchmark": 50114, "benchmark tasks": 6498, "tasks task": 55925, "outperform strong": 38824, "baselines achieve": 6226, "representations useful": 46783, "capturing semantic": 7742, "applied variety": 3308, "tasks especially": 55617, "especially english": 18275, "english work": 17907, "english word": 17905, "similarity text": 51126, "effectiveness models": 16796, "language article": 27968, "present survey": 42032, "field computational": 20752, "aim provide": 2158, "provide comprehensive": 44036, "comprehensive overview": 9795, "use social": 60020, "social interaction": 51564, "demonstrate potential": 13956, "research communities": 47001, "showing large": 50681, "scale data": 48562, "driven methods": 16429, "methods widely": 33100, "complement existing": 9584, "challenge methods": 7995, "open challenges": 38413, "models discrete": 34925, "discrete latent": 15422, "shown beneficial": 50696, "applications work": 3260, "work exploit": 62656, "syntactic dependency": 54300, "used additional": 60082, "information potentially": 26012, "capturing fine": 7733, "fine grain": 20924, "evaluate word": 18518, "recognition semantic": 45534, "observe improvements": 38135, "cases results": 7813, "models advantage": 34698, "nmt models": 37576, "models typically": 35639, "fixed vocabulary": 21084, "open vocabulary": 38466, "problem previous": 42628, "effective approach": 16629, "nmt model": 37575, "model capable": 33644, "subword units": 53688, "based intuition": 5792, "segmentation based": 49080, "byte pair": 7514, "pair encoding": 39149, "models improve": 35110, "english russian": 17869, "exploratory analysis": 19682, "propose end": 43369, "neural encoder": 36950, "joint task": 27190, "surface realization": 54154, "model encodes": 33819, "network utilizes": 36823, "utilizes novel": 61115, "novel coarse": 37783, "coarse fine": 8782, "small subset": 51504, "decoder generate": 13595, "generate free": 22203, "free form": 21640, "selection generation": 49141, "generation results": 22541, "generation benchmark": 22426, "dataset despite": 12893, "despite using": 14403, "using specialized": 60956, "features linguistic": 20618, "resources using": 47339, "perform series": 40138, "model components": 33684, "generalizability model": 22109, "competitive better": 9545, "used derive": 60144, "surface features": 54150, "features document": 20562, "rhetorical structure": 48088, "structure theory": 53143, "level sentiment": 30209, "local information": 30942, "information discourse": 25817, "based sentiment": 6022, "offers significant": 38304, "classification based": 8437, "algorithm improve": 2280, "input query": 26323, "linear interpolation": 30657, "information experiments": 25849, "english respectively": 17866, "respectively experimental": 47366, "achieves consistent": 1320, "art method": 4280, "accuracy speech": 1049, "languages limited": 28715, "speech resources": 52291, "lack data": 27882, "data data": 12267, "language study": 28510, "study develop": 53359, "develop techniques": 14616, "techniques extracting": 56086, "data particular": 12538, "domain corpora": 16033, "different dialects": 14899, "data selection": 12633, "selection strategies": 49152, "cross entropy": 11822, "data improve": 12418, "performance baseline": 40208, "baseline machine": 6181, "report preliminary": 46442, "preliminary experiments": 41803, "experiments using": 19553, "using automatically": 60576, "automatically translated": 5203, "data additional": 12121, "additional training": 1706, "setting multi": 50332, "multi perspective": 35999, "constructed semantic": 10416, "perform detailed": 40087, "analysis evaluation": 2660, "ted talks": 56160, "applications human": 3209, "used task": 60323, "research direction": 47019, "validate quality": 61184, "previous results": 42274, "results supervised": 47870, "additional datasets": 1665, "datasets languages": 13311, "word formation": 62206, "embeddings enable": 17126, "present unsupervised": 42050, "unsupervised approach": 59681, "semantic vector": 49373, "based analyses": 5564, "translation experiments": 58613, "experiments semantic": 19517, "framework developed": 21494, "framework make": 21559, "text form": 56583, "text framework": 56587, "random test": 44890, "possible improve": 41329, "text prior": 56709, "need developing": 36555, "development paper": 14697, "sliding window": 51431, "training process": 58217, "method incorporating": 32540, "conducted compare": 10076, "content paper": 10544, "theoretical understanding": 57026, "embeddings semantic": 17208, "new evaluation": 37191, "evaluation tasks": 18735, "contrast prior": 10885, "random walks": 44893, "embedding algorithms": 17009, "semantic language": 49292, "studied problem": 53232, "document using": 15842, "information document": 25819, "devise method": 14725, "method make": 32568, "make predictions": 31587, "domains like": 16270, "paper analyzes": 39267, "problem specifically": 42665, "assess performance": 4581, "extensive experimental": 19872, "evaluation effectiveness": 18613, "art methodologies": 4281, "pave way": 39984, "select set": 49111, "features order": 20634, "detection problem": 14512, "problem task": 42673, "based twitter": 6114, "problem propose": 42631, "content based": 10514, "widely available": 61994, "available resource": 5363, "resource limited": 47249, "good quality": 22940, "motivates use": 35879, "use statistical": 60029, "statistical translation": 52767, "systems unfortunately": 54660, "quantity quality": 44639, "quality training": 44590, "data limited": 12465, "limited availability": 30571, "especially languages": 18281, "text domains": 56544, "research present": 47095, "various text": 61405, "specific domains": 52073, "approach domain": 3496, "domain neural": 16120, "translation present": 58659, "present research": 41999, "different training": 15104, "training methods": 58175, "translation used": 58699, "medical data": 32200, "parallel text": 39654, "used basis": 60102, "used analysis": 60085, "performs slightly": 40716, "does perform": 15964, "perform significantly": 40140, "segmentation task": 49088, "useful resource": 60385, "research explores": 47034, "mining data": 33312, "corpora task": 11248, "method building": 32407, "corpora wikipedia": 11257, "sentence pairs": 49611, "pairs new": 39204, "effects various": 16829, "various training": 61409, "medical texts": 32211, "texts various": 56943, "models development": 34911, "bleu nist": 7207, "models hierarchical": 35085, "hierarchical models": 23679, "different alignment": 14834, "alignment methods": 2374, "automatic data": 5076, "bilingual evaluation": 7108, "evaluation understudy": 18743, "understudy bleu": 59428, "evaluation perform": 18668, "perform experiments": 40102, "existing evaluation": 19067, "makes existing": 31622, "correlation human": 11520, "human translators": 24251, "word orders": 62256, "accuracy machine": 1003, "systems nlp": 54572, "tools text": 57386, "tasks requiring": 55857, "research proposes": 47102, "text domain": 56543, "semantic text": 49364, "structure analysis": 53089, "compared sentence": 9450, "training settings": 58254, "language various": 28577, "effects data": 16824, "morphological information": 35842, "results neural": 47739, "models powerful": 35335, "powerful tool": 41448, "models generally": 35055, "relies availability": 46265, "diverse training": 15724, "specialised domains": 52026, "obtaining large": 38236, "corpus limited": 11372, "limited range": 30607, "entities knowledge": 18060, "model integrates": 34011, "diverse data": 15697, "information achieve": 25751, "recent models": 45321, "link prediction": 30828, "training labels": 58143, "learning different": 29589, "detailed information": 14427, "information web": 26157, "daily basis": 12084, "common knowledge": 9181, "research shown": 47119, "shown possible": 50734, "obtain similar": 38192, "lines research": 30688, "method combines": 32421, "model successfully": 34424, "information language": 25939, "years neural": 63065, "emerged powerful": 17262, "yielding state": 63111, "image recognition": 24543, "recognition speech": 45538, "processing recently": 42931, "perspective natural": 40775, "input encoding": 26273, "forward networks": 21404, "convolutional networks": 11109, "networks recurrent": 36903, "measured using": 32071, "language means": 28149, "different contexts": 14876, "results applying": 47501, "provide evidence": 44063, "proved effective": 43987, "fixed dimensional": 21075, "space resulting": 51894, "embeddings need": 17178, "word types": 62326, "old new": 38327, "task best": 54935, "best approach": 6748, "pairs train": 39222, "produce embeddings": 42981, "pairs different": 39180, "performs similarly": 40715, "best previously": 6806, "previously published": 42343, "published results": 44372, "introduces novel": 26894, "approach tackle": 3714, "systems currently": 54467, "sentences approach": 49681, "disambiguation problem": 15359, "using concepts": 60617, "enable use": 17430, "extract latent": 19984, "use latent": 59929, "label data": 27701, "silver standard": 51025, "set train": 50268, "classifier predict": 8602, "predict topic": 41659, "topic distribution": 57402, "current sentence": 12006, "sentence experimental": 49555, "results large": 47694, "domain proposed": 16139, "effective predicting": 16687, "shows potential": 50793, "interactive setting": 26632, "common use": 9209, "classifiers model": 8619, "words recently": 62493, "shown perform": 50732, "scenario small": 48689, "small number": 51486, "referring expressions": 45761, "network extract": 36742, "image features": 24536, "positional information": 41274, "information model": 25975, "achieves performance": 1352, "conceptually simpler": 9957, "explore role": 19734, "paper define": 39315, "networks approach": 36831, "networks multi": 36877, "multi party": 35996, "supervised training": 54061, "learn task": 29434, "embeddings words": 17249, "data new": 12510, "words test": 62529, "embeddings task": 17224, "loss objective": 31100, "objective function": 38089, "analysis experimental": 2662, "properties word": 43272, "embeddings proposed": 17197, "controlled experiments": 10984, "relations word": 46063, "demonstrated using": 14025, "model experiments": 33852, "level noise": 30169, "distribution word": 15659, "space defined": 51853, "non zero": 37691, "languages use": 28814, "order words": 38663, "test hypothesis": 56349, "differences human": 14822, "efficient processing": 16892, "gain insights": 21910, "research issues": 47059, "studies utilized": 53309, "sources text": 51841, "shared multiple": 50481, "similar approaches": 51029, "newspaper articles": 37430, "work analyze": 62565, "methods generating": 32878, "based linear": 5815, "interpretable results": 26730, "randomly selected": 44904, "topical structure": 57440, "structure documents": 53101, "documents related": 15909, "method promising": 32624, "learning common": 29563, "representations multiple": 46721, "multiple views": 36310, "data typically": 12749, "using parallel": 60853, "work address": 62555, "world scenario": 62956, "specific downstream": 52074, "downstream applications": 16332, "cross modal": 11863, "performance multilingual": 40447, "available multilingual": 5329, "dataset created": 12872, "largest publicly": 29100, "corpus use": 11453, "previously reported": 42346, "independent evaluation": 25497, "data second": 12631, "evaluate performances": 18485, "performances various": 40652, "bi lstms": 7016, "dataset create": 12871, "predictions multiple": 41765, "multiple models": 36251, "models ensemble": 34970, "performance achieves": 40180, "dataset finally": 12929, "finally discuss": 20852, "discuss future": 15466, "corpus text": 11443, "text quality": 56722, "semantic structural": 49354, "processed using": 42843, "terms quality": 56312, "available web": 5390, "used ground": 60200, "used order": 60257, "order evaluate": 38616, "evaluate results": 18501, "various systems": 61402, "systems experiments": 54499, "data processing": 12565, "used compare": 60119, "quality scores": 44578, "english translations": 17894, "relatively high": 46116, "score 64": 48800, "approach multi": 3605, "multi language": 35979, "given target": 22791, "sequence generation": 49925, "generation models": 22495, "aligned english": 2356, "sentences significant": 49786, "meteor scores": 32351, "scores models": 48909, "trained multiple": 57820, "languages compared": 28618, "corpora propose": 11234, "corpora study": 11247, "text span": 56780, "experiment different": 19237, "perform error": 40098, "analysis component": 2635, "using multi": 60816, "accuracy 74": 908, "held test": 23545, "reveals interesting": 48018, "interesting findings": 26650, "best systems": 6830, "bidirectional long": 7075, "shown effective": 50702, "data speech": 12688, "properties natural": 43268, "study propose": 53442, "set state": 50252, "accuracy achieved": 934, "features approach": 20523, "based sparse": 6050, "relations different": 46022, "different syntactic": 15090, "parameters model": 39709, "network different": 36732, "parameters different": 39691, "different degrees": 14895, "parameters languages": 39704, "determined using": 14560, "approach captures": 3443, "geographical regions": 22650, "approaches primarily": 3900, "primarily focused": 42363, "lexical variation": 30395, "method identifies": 32525, "extend recently": 19830, "approach explicitly": 3528, "explicitly account": 19630, "model study": 34419, "online data": 38358, "tweets twitter": 59024, "spanning different": 51952, "finally using": 20886, "100 years": 66, "rational speech": 45026, "speech acts": 52252, "model treats": 34492, "shown capture": 50698, "knowledge data": 27432, "data address": 12123, "address concerns": 1751, "activation function": 1467, "opens new": 38482, "new application": 37129, "application domains": 3165, "learning effectively": 29609, "expression generation": 19805, "task showing": 55372, "performance achieved": 40179, "statistical parsing": 52759, "adapting models": 1566, "models languages": 35163, "difficult languages": 15173, "make significant": 31597, "leads improved": 29315, "approaches compared": 3786, "dependency parser": 14128, "score compared": 48841, "model f1": 33874, "score 89": 48824, "constituency parsing": 10351, "arabic twitter": 4008, "entity names": 18122, "topic detection": 57400, "detection propose": 14513, "propose approaches": 43298, "approaches tackle": 3933, "tackle issue": 54704, "relevant tweets": 46244, "given entity": 22741, "task shown": 55373, "shown competitive": 50699, "task according": 54873, "according evaluation": 858, "approach sentence": 3685, "formulate task": 21386, "programming ilp": 43085, "techniques proposed": 56126, "introduce novel": 26842, "orders magnitude": 38670, "magnitude faster": 31415, "evaluation demonstrates": 18607, "does degrade": 15941, "single best": 51286, "results introduce": 47683, "sequence learning": 49944, "learning deep": 29582, "output sequence": 39000, "lstm network": 31275, "automatically create": 5154, "features represent": 20655, "learns make": 29964, "make decision": 31564, "used input": 60215, "learning attention": 29525, "natural sentences": 36464, "experiments indicate": 19446, "sentences training": 49797, "outperformed baseline": 38836, "sentences terms": 49793, "based lstm": 5825, "designed predict": 14328, "generation probability": 22525, "time step": 57224, "generated based": 22270, "modeling power": 34611, "achieves results": 1359, "results current": 47566, "achieving competitive": 1400, "effective modeling": 16675, "documents study": 15916, "applied various": 3309, "tagging tasks": 54753, "specific features": 52084, "solution uses": 51664, "uses set": 60535, "set task": 50257, "task independent": 55133, "independent features": 25499, "internal representations": 26689, "representations learnt": 46708, "text tasks": 56806, "explore different": 19698, "different neural": 15005, "network architectures": 36700, "question making": 44735, "regularization methods": 45840, "popularly used": 41209, "used train": 60332, "train deep": 57577, "study different": 53361, "different context": 14875, "performance compare": 40248, "models multimodal": 35240, "computational tools": 9868, "cases using": 7816, "historical information": 23960, "information extract": 25857, "person names": 40750, "information integrated": 25927, "non negative": 37667, "negative matrix": 36625, "held data": 23544, "data able": 12104, "able train": 728, "data important": 12417, "important practical": 24753, "data usually": 12766, "training algorithm": 57929, "algorithm using": 2311, "training experiments": 58101, "results use": 47895, "loss function": 31093, "information perform": 26005, "small amounts": 51462, "thousand words": 57077, "model real": 34275, "data taking": 12719, "data source": 12674, "data improves": 12421, "based relevant": 5992, "estimation method": 18382, "study focused": 53380, "focused automatic": 21216, "automatic identification": 5097, "extracted using": 20025, "based reference": 5984, "lexical diversity": 30362, "model does": 33784, "level lexical": 30150, "recent past": 45332, "studies published": 53291, "demonstrate simple": 13976, "important context": 24714, "hope paper": 24011, "models analysis": 34710, "analysis data": 2641, "data natural": 12503, "translation proposed": 58663, "approaches typically": 3945, "typically use": 59159, "representing information": 46812, "represent information": 46473, "information original": 25997, "entities introduce": 18058, "new test": 37341, "test language": 56353, "unlike standard": 59610, "task predicting": 55283, "frequency words": 21681, "range state": 44935, "different way": 15127, "representations long": 46712, "art neural": 4308, "text encoded": 56551, "single words": 51358, "meaningful information": 32024, "window based": 62064, "self supervision": 49220, "performance paper": 40473, "propose structured": 43651, "label sequence": 27725, "structured input": 53158, "special case": 52014, "structured learning": 53162, "proposed perform": 43877, "aim extract": 2147, "unstructured texts": 59673, "identify main": 24429, "particular text": 39866, "text crucial": 56520, "research studies": 47123, "order determine": 38609, "task goal": 55108, "tweet dataset": 59004, "dataset using": 13130, "data unsupervised": 12757, "topic drift": 57404, "key challenge": 27297, "provide analysis": 44007, "target dataset": 54806, "approaches model": 3875, "level recurrent": 30187, "network gcn": 36747, "training generative": 58114, "accuracy evaluation": 969, "resource english": 47226, "extraction generation": 20070, "approach leverages": 3589, "extract set": 19994, "patterns based": 39965, "based data": 5663, "work studies": 62829, "multimodal data": 36145, "given piece": 22769, "corresponding semantic": 11557, "image video": 24548, "end introduce": 17676, "training objective": 58196, "objective learning": 38092, "learning multimodal": 29772, "present extensive": 41914, "extensive results": 19911, "embeddings various": 17242, "promise approach": 43155, "characters words": 8255, "information provides": 26036, "using joint": 60743, "model target": 34442, "modeled sequence": 34555, "sequence word": 50018, "benefit approach": 6559, "challenges associated": 8033, "languages model": 28730, "results par": 47761, "models complex": 34840, "complex compositional": 9617, "language challenging": 27985, "challenging language": 8106, "provides strong": 44230, "performance underlying": 40612, "underlying models": 59272, "models truly": 35634, "truly understanding": 58826, "visual content": 61652, "multi modal": 35988, "visual question": 61663, "answering vqa": 3103, "questions specifically": 44810, "level semantics": 30206, "dataset language": 12976, "language priors": 28390, "better chance": 6862, "balanced dataset": 5515, "approach matches": 3598, "dataset outperforms": 13022, "dataset recent": 13055, "advances neural": 1918, "deep latent": 13698, "distributions latent": 15677, "inference network": 25673, "discrete text": 15429, "different text": 15097, "standard test": 52532, "test corpora": 56338, "selection model": 49146, "model employs": 33814, "layer attention": 29178, "mechanism extract": 32119, "answer pair": 3043, "answering benchmarks": 3065, "benchmarks model": 6531, "paper exploit": 39360, "language variation": 28573, "key idea": 27315, "novel attention": 37770, "architecture attention": 4027, "review data": 48028, "data transfer": 12743, "setting task": 50351, "task settings": 55367, "tasks example": 55623, "trained language": 57758, "used recognize": 60287, "little training": 30886, "learning cross": 29576, "learning shows": 29876, "abstract features": 757, "features learned": 20615, "learned deep": 29455, "models transfer": 35626, "data distributions": 12291, "data types": 12748, "types model": 59101, "model structures": 34417, "model types": 34496, "review paper": 48034, "highlight potential": 23869, "interesting research": 26654, "research field": 47036, "improvements machine": 25080, "tested method": 56396, "corpora based": 11180, "corpora data": 11189, "corpus sentences": 11427, "refer entities": 45733, "entities domain": 18045, "validation results": 61195, "results accurate": 47484, "explicit implicit": 19615, "implicit relationships": 24663, "data critical": 12261, "perform complex": 40080, "reasoning tasks": 45228, "tasks efficient": 55603, "model report": 34307, "report state": 46447, "answering tasks": 3100, "proved difficult": 43986, "approaches learning": 3859, "learning representation": 29841, "al 2015": 2237, "created large": 11728, "trade offs": 57502, "memory usage": 32286, "reduce memory": 45669, "memory footprint": 32255, "art embedding": 4251, "impact performance": 24604, "representation better": 46496, "better interpretability": 6904, "attention lately": 4761, "impressive performance": 24811, "numerous natural": 38067, "tasks semantic": 55870, "similarity measurement": 51103, "despite success": 14394, "methods consider": 32799, "relational structure": 46013, "semantic lexicons": 49296, "learnt using": 29980, "integrating knowledge": 26523, "knowledge semantic": 27606, "propose joint": 43425, "corpus proposed": 11410, "statistically significantly": 52775, "outperforms previously": 38928, "methods incorporating": 32901, "incorporating semantic": 25393, "datasets semantic": 13414, "computational semantics": 9861, "multi domain": 35954, "words proposed": 62487, "domain texts": 16210, "obtained state": 38224, "pairs using": 39228, "training target": 58283, "translation investigate": 58623, "use monolingual": 59952, "work combines": 62601, "models separately": 35482, "separately trained": 49883, "explore strategies": 19739, "train monolingual": 57612, "data automatic": 12171, "data obtain": 12519, "low resourced": 31197, "results fine": 47635, "tuning domain": 58907, "monolingual parallel": 35809, "topic specific": 57431, "framework built": 21466, "classification methods": 8493, "methods currently": 32808, "wide use": 61978, "intensive methods": 26559, "methods latent": 32921, "mental health": 32290, "documents high": 15884, "set phrases": 50218, "method extended": 32500, "computational time": 9867, "word lists": 62241, "methods literature": 32930, "literature present": 30860, "methods offer": 32966, "unstructured documents": 59668, "knowledge form": 27485, "entities context": 18041, "set training": 50269, "data using": 12765, "context features": 10638, "new patterns": 37280, "evaluated proposed": 18543, "proposed english": 43766, "language dataset": 28017, "documents corpus": 15868, "work leverage": 62708, "extract structured": 19996, "used reduce": 60288, "reduce data": 45657, "data requirements": 12606, "particular demonstrate": 39841, "discover new": 15407, "words vectors": 62544, "data science": 12629, "new york": 37364, "approach natural": 3609, "self contained": 49192, "networks used": 36921, "fundamental natural": 21781, "learning general": 29659, "embeddings based": 17086, "based supervision": 6071, "textual similarity": 56981, "similarity datasets": 51091, "distribution training": 15654, "complex architectures": 9616, "networks perform": 36890, "perform best": 40071, "data domain": 12294, "domain scenarios": 16150, "averaging model": 5427, "model competitive": 33679, "extremely efficient": 20157, "use order": 59968, "conduct experiments": 10042, "experiments supervised": 19537, "supervised nlp": 54027, "classification word": 8582, "pretrained sentence": 42181, "using prior": 60873, "feature extractor": 20489, "leads performance": 29322, "entailment tasks": 18008, "tasks release": 55847, "hope serve": 24013, "new baseline": 37141, "universal sentence": 59546, "used method": 60235, "method computing": 32430, "upper bound": 59773, "theoretical analysis": 57018, "language data": 28016, "data generated": 12384, "ways improve": 61842, "novel type": 37944, "representations make": 46715, "sensitive word": 49505, "demonstrated effective": 14004, "effective capturing": 16634, "unsupervised representation": 59724, "unlabeled corpora": 59562, "corpora learn": 11215, "learn similar": 29423, "similar representations": 51061, "information important": 25912, "important knowledge": 24739, "knowledge help": 27513, "category information": 7862, "information documents": 25820, "representations learn": 46704, "wise manner": 62081, "models word": 35683, "tasks evaluate": 55619, "learned word": 29492, "models demonstrated": 34894, "capable achieving": 7615, "achieving remarkable": 1420, "remarkable performance": 46358, "performance sentence": 40550, "work combine": 62600, "combine strengths": 9074, "novel unified": 37945, "unified model": 59474, "sentence representation": 49631, "cnn extract": 8767, "level phrase": 30176, "phrase representations": 40844, "representations fed": 46668, "network lstm": 36763, "obtain sentence": 38189, "capture local": 7691, "sentence semantics": 49640, "proposed architecture": 43738, "classification question": 8526, "outperforms cnn": 38881, "cnn lstm": 8772, "achieve excellent": 1134, "problem modeling": 42609, "relationship modeling": 46071, "modeling structured": 34626, "prediction problem": 41729, "supervised framework": 53988, "framework learn": 21553, "partially labeled": 39809, "use set": 60013, "knowledge investigate": 27534, "demonstrate framework": 13914, "framework outperforms": 21578, "outperforms competitive": 38887, "competitive baselines": 9542, "translation slt": 58677, "major challenge": 31503, "sentence segmentation": 49637, "downstream language": 16340, "explore problem": 19728, "problem identifying": 42579, "sentence boundaries": 49522, "recognition systems": 45541, "span annotations": 51919, "specific words": 52177, "words characters": 62378, "analyze text": 2829, "languages single": 28790, "single model": 51317, "model small": 34393, "small vocabulary": 51512, "multilingual models": 36098, "results similar": 47848, "training datasets": 58054, "external data": 19932, "sources models": 51836, "problem joint": 42588, "linguistic semantic": 30793, "text provide": 56719, "competitive baseline": 9540, "solve data": 51678, "especially data": 18269, "does exist": 15945, "combined model": 9081, "employ word": 17395, "models additional": 34693, "experimental result": 19267, "result shows": 47451, "method significantly": 32653, "outperforms conventional": 38890, "lexicon model": 30413, "model phrase": 34209, "results chinese": 47532, "using english": 60676, "powerful models": 41437, "models widely": 35681, "widely applied": 61993, "modeling translation": 34632, "notable improvements": 37717, "remains challenge": 46326, "machine translations": 31393, "translations paper": 58710, "model directly": 33769, "prediction experimental": 41705, "various baseline": 61307, "systems automatically": 54437, "automatically recognize": 5198, "essay scoring": 18316, "detection classification": 14467, "new feature": 37202, "accuracy accuracy": 933, "features useful": 20690, "present dataset": 41883, "dataset manually": 12991, "texts order": 56907, "automatic methods": 5107, "relation type": 45998, "type prediction": 59064, "computational analysis": 9832, "given pair": 22766, "negative neutral": 36627, "relationship text": 46073, "embeddings build": 17090, "symbolic knowledge": 54268, "previous neural": 42265, "performance reasoning": 40521, "tasks having": 55662, "unseen entities": 59648, "learn embeddings": 29368, "like human": 30475, "using computational": 60615, "computational linguistic": 9844, "features trained": 20686, "linguistic features": 30771, "features results": 20659, "presents end": 42082, "model named": 34116, "neural generative": 36959, "generative question": 22608, "questions based": 44775, "model built": 33638, "framework sequence": 21597, "trained corpus": 57699, "triples knowledge": 58806, "questions answers": 44772, "demonstrates proposed": 14039, "model outperform": 34152, "based qa": 5967, "qa model": 44451, "model neural": 34123, "dialogue model": 14778, "trained data": 57703, "data real": 12585, "real systems": 45112, "texts particular": 56911, "network methods": 36764, "novel models": 37876, "network representation": 36794, "way proposed": 61828, "relevant topics": 46242, "methods outperform": 32970, "outperform traditional": 38829, "textual representation": 56976, "understand human": 59296, "process human": 42788, "human natural": 24209, "research provide": 47103, "reading comprehension": 45080, "scientific papers": 48766, "different styles": 15085, "models analyze": 34712, "makes difficult": 31619, "people different": 40028, "existing machine": 19087, "designing new": 14341, "paper attempt": 39275, "attempt improve": 4688, "set language": 50179, "czech english": 12078, "english vietnamese": 17901, "settings language": 50379, "data adaptation": 12119, "adaptation techniques": 1543, "techniques employed": 56081, "train language": 57597, "models develop": 34908, "corpora use": 11253, "explored use": 19767, "use domain": 59870, "alignment models": 2376, "models unsupervised": 35649, "used bleu": 60109, "metrics results": 33198, "indicate approach": 25524, "positive impact": 41282, "quality multilingual": 44554, "data systems": 12717, "systems limited": 54550, "improvements current": 25065, "computation time": 9830, "various domains": 61329, "mined data": 33277, "quality propose": 44566, "minimum risk": 33307, "risk training": 48164, "training end": 58085, "unlike conventional": 59592, "likelihood estimation": 30518, "experiments approach": 19355, "approach achieves": 3393, "applied neural": 3285, "novel concept": 37786, "learning generate": 29660, "target corpus": 54804, "implicit relations": 24662, "relations concepts": 46020, "datasets measuring": 13327, "compared prior": 9441, "statistical significance": 52762, "methods study": 33057, "methods measuring": 32943, "task entity": 55051, "entity extraction": 18106, "tool developed": 57362, "model utilizes": 34519, "information like": 25954, "pos tag": 41229, "features enhance": 20571, "submitted runs": 53585, "models based": 34753, "datasets released": 13399, "project develop": 43133, "problem developing": 42537, "text datasets": 56525, "multiple classes": 36183, "question text": 44752, "produces best": 43026, "corpora text": 11249, "sentence representations": 49633, "paper concerns": 39295, "good candidate": 22928, "provide preliminary": 44114, "gaining popularity": 21929, "systematic comparison": 54393, "introduce efficient": 26802, "method popular": 32615, "popular benchmarks": 41159, "speed accuracy": 52320, "provide information": 44089, "limited present": 30606, "present graph": 41923, "based semi": 6018, "automatically construct": 5150, "small seed": 51498, "size high": 51385, "11 languages": 89, "languages addition": 28594, "automatically created": 5155, "features improve": 20600, "performance downstream": 40305, "tagging dependency": 54738, "pair sentences": 39158, "critical issue": 11784, "paraphrase identification": 39741, "work deals": 62620, "individual task": 25581, "task fine": 55086, "tuning specific": 58957, "models sentence": 35480, "manually designed": 31775, "sentences make": 49752, "make contributions": 31554, "applied wide": 3311, "variety tasks": 61292, "modeling sentence": 34622, "propose attention": 43301, "takes consideration": 54779, "sentence pair": 49609, "pair representations": 39157, "specific semantic": 52143, "relations propose": 46053, "approach models": 3604, "directed acyclic": 15264, "acyclic graph": 1492, "mechanism using": 32148, "make sure": 31602, "proposed design": 43757, "design allows": 14262, "validated results": 61186, "task artificial": 54914, "paper target": 39595, "generation framework": 22465, "framework framework": 21524, "framework consists": 21479, "including topic": 25311, "understanding sentence": 59399, "run experiments": 48401, "chinese corpus": 8302, "easily adapted": 16535, "remaining challenges": 46323, "particular linguistic": 39852, "model problem": 34236, "character sequence": 8224, "problem present": 42627, "model solving": 34398, "trained supervised": 57886, "supervised semi": 54039, "supervised settings": 54045, "seven datasets": 50417, "languages achieve": 28590, "generation recent": 22537, "recent language": 45315, "models especially": 34973, "networks rnns": 36908, "possible generate": 41327, "generate natural": 22221, "probability language": 42478, "translation summarization": 58683, "summarization question": 53896, "systems existing": 54495, "typically learn": 59147, "generated texts": 22326, "particular word": 39873, "approaches solve": 3922, "model provided": 34256, "model variants": 34523, "position sentence": 41269, "results generated": 47647, "negative sentences": 36635, "translate english": 58550, "established new": 18358, "sentence translation": 49663, "methods achieve": 32728, "movie reviews": 35896, "document embeddings": 15788, "embeddings methods": 17174, "document vectors": 15844, "predicting words": 41687, "imdb movie": 24570, "review dataset": 48029, "dataset shows": 13085, "shows model": 50789, "robust results": 48265, "model combined": 33666, "models source": 35521, "code model": 8833, "single word": 51357, "pair wise": 39160, "model multi": 34108, "sentence multiple": 49604, "multiple words": 36313, "defined categories": 13783, "combination words": 9054, "correct meaning": 11470, "pair model": 39155, "model recognize": 34284, "entailment contradiction": 18000, "model tree": 34493, "element wise": 16973, "combine information": 9067, "information individual": 25922, "results model": 47720, "existing sentence": 19140, "sentence encoding": 49551, "approaches large": 3854, "systems built": 54444, "systems languages": 54542, "used building": 60111, "building systems": 7473, "systems best": 54441, "type systems": 59071, "resources english": 47300, "english target": 17887, "using language": 60752, "sequence neural": 49961, "potentially useful": 41420, "information introduce": 25931, "function neural": 21758, "neural mt": 36986, "information source": 26094, "target sentences": 54839, "implement model": 24636, "ranking method": 44971, "method introduce": 32550, "models offers": 35275, "consistent performance": 10281, "performance boost": 40222, "standard lstm": 52498, "lstm attention": 31241, "noisy training": 37626, "data way": 12772, "labels example": 27819, "corpus improve": 11359, "multi source": 36006, "model train": 34466, "combination methods": 9043, "provide extensive": 44070, "affects performance": 2026, "performance wide": 40627, "finding optimal": 20900, "tasks benefit": 55522, "simple concatenation": 51144, "learned different": 29456, "additional contribution": 1662, "contribution propose": 10945, "learns word": 29978, "code mixed": 8823, "bengali english": 6593, "english tamil": 17886, "tamil english": 54794, "overall accuracy": 39033, "accuracy 70": 904, "obtains highest": 38251, "highest average": 23850, "tasks understanding": 55946, "challenge paper": 8004, "propose recurrent": 43597, "memory network": 32272, "rnn architecture": 48180, "discover underlying": 15409, "patterns data": 39966, "network large": 36756, "english dataset": 17793, "dataset additionally": 12805, "perform depth": 40086, "depth analysis": 14184, "analysis various": 2791, "various linguistic": 61356, "sentence coherence": 49528, "art large": 4272, "specific architectures": 52045, "used sequence": 60297, "paper enhance": 39349, "model label": 34034, "label dependencies": 27702, "encoded vector": 17485, "initial state": 26218, "methods predict": 32987, "predict label": 41643, "information input": 25924, "slot filling": 51441, "filling task": 20803, "essential component": 18323, "understanding using": 59415, "art f1": 4261, "score 95": 48829, "data consists": 12244, "sequences sentences": 50025, "sentences word": 49807, "word labels": 62220, "translations languages": 58709, "data noisy": 12514, "use source": 60022, "learning new": 29781, "leads significantly": 29328, "performance multi": 40445, "source transfer": 51816, "gaussian noise": 22016, "online learning": 38373, "learning finally": 29644, "present corpus": 41878, "quantitative qualitative": 44623, "including non": 25285, "addition approach": 1601, "features speech": 20672, "based solutions": 6047, "number people": 38026, "closed set": 8699, "human speech": 24241, "mechanism evaluate": 32115, "result poor": 47447, "approach adopted": 3412, "time cost": 57140, "cost paper": 11590, "novel deep": 37802, "model deep": 33742, "network used": 36820, "acoustic features": 1434, "features input": 20604, "aware training": 5475, "framework multi": 21566, "learning domain": 29604, "performance gain": 40356, "approach self": 3681, "training semi": 58244, "automatically classifying": 5148, "adding additional": 1593, "context language": 10664, "tasks training": 55939, "training does": 58070, "does lead": 15956, "lead performance": 29266, "training beneficial": 57944, "step process": 52824, "process developing": 42771, "classifier able": 8590, "able adapt": 672, "adapt new": 1506, "art nlp": 4316, "sequences words": 50031, "words generated": 62425, "model takes": 34439, "grammatical correctness": 23068, "model aims": 33556, "automated approach": 5036, "approach evaluate": 3519, "evaluate quality": 18495, "quality generated": 44524, "evaluation generated": 18623, "designed use": 14336, "learning tools": 29914, "reasoning natural": 45208, "reasoning model": 45204, "models provided": 35380, "detailed analyses": 14411, "users need": 60470, "need know": 36573, "trained human": 57747, "human raters": 24222, "core concepts": 11147, "models important": 35107, "parameters models": 39710, "models easily": 34944, "easily overfit": 16549, "data sparse": 12680, "popular solution": 41189, "alignment paper": 2379, "propose framework": 43392, "framework generalizes": 21527, "performance data": 40275, "examine various": 18871, "gives best": 22803, "nlp information": 37491, "multiple word": 36312, "sparse coding": 51966, "approach applies": 3420, "using variant": 61013, "arora et": 4193, "al 2016": 2238, "used verify": 60349, "surface form": 54151, "form input": 21322, "input outputs": 26311, "description generation": 14243, "generation natural": 22503, "recently received": 45460, "space provide": 51888, "review existing": 48032, "models highlighting": 35089, "image datasets": 24535, "datasets evaluation": 13255, "machine generated": 31299, "future directions": 21871, "task loss": 55193, "learning present": 29814, "standard reference": 52521, "translation experiment": 58610, "improves translation": 25166, "approaches employ": 3805, "feedback learning": 20717, "news events": 37403, "information prior": 26021, "time inference": 57165, "gibbs sampling": 22688, "sampling procedure": 48506, "encouraging results": 17606, "retrieval model": 47955, "clustering method": 8741, "method competitive": 32428, "competitive best": 9544, "extractive summarization": 20139, "summarization aims": 53876, "selecting set": 49129, "sentences source": 49788, "critical issues": 11785, "methods designed": 32818, "designed model": 14325, "far aware": 20396, "motivated observations": 35871, "major contributions": 31507, "methods directly": 32826, "increase diversity": 25412, "set representative": 50239, "relevant given": 46219, "cover important": 11646, "important sub": 24776, "make step": 31600, "step forward": 52808, "document sentence": 15829, "framework enhance": 21506, "empirical evaluations": 17325, "methods tend": 33070, "tend make": 56203, "relationships word": 46086, "clustering algorithm": 8736, "benefit using": 6574, "predict sentiment": 41655, "sentiment given": 49846, "short time": 50574, "information linguistic": 25956, "invariant representations": 26921, "information specifically": 26101, "specifically demonstrate": 52190, "information embedded": 25827, "verify hypothesis": 61540, "achieved high": 1239, "accuracy using": 1068, "structures paper": 53191, "reduction word": 45724, "provide insight": 44092, "mechanism performs": 32135, "parsing approaches": 39772, "domain question": 16141, "ask question": 4518, "paraphrase generation": 39740, "semantic parser": 49305, "evaluation experiments": 18620, "dataset performance": 13026, "improves strong": 25165, "strong baselines": 53008, "like news": 30493, "news article": 37384, "provide quantitative": 44117, "learning experiments": 29636, "experiments showing": 19524, "key task": 27337, "parametric models": 39731, "types linguistic": 59099, "data approaches": 12144, "approaches apply": 3765, "takes form": 54780, "using synthetic": 60974, "synthetic data": 54370, "set real": 50232, "real datasets": 45102, "proposed test": 43913, "types data": 59080, "sequence level": 49949, "better handling": 6897, "input propose": 26321, "propose machine": 43445, "machine reading": 31340, "neural attention": 36934, "single sequence": 51335, "architecture experiments": 4048, "experiments language": 19451, "analysis natural": 2703, "language inference": 28108, "inference model": 25670, "matches outperforms": 31906, "huge volume": 24079, "volume data": 61728, "opinions social": 38506, "social networking": 51595, "networking sites": 36825, "like twitter": 30510, "twitter facebook": 59037, "allow people": 2439, "lot work": 31124, "data survey": 12714, "provide survey": 44140, "comparative analyses": 9318, "approaches evaluation": 3815, "metrics using": 33207, "algorithms like": 2329, "data streams": 12696, "introduces new": 26893, "distribution words": 15660, "languages task": 28800, "task text": 55434, "levels text": 30249, "units meaning": 59533, "development methods": 14686, "independence assumption": 25492, "used prediction": 60267, "analysis propose": 2728, "exhibit better": 19001, "model words": 34542, "words texts": 62531, "study suggests": 53464, "study various": 53474, "words propose": 62486, "accuracy high": 985, "research works": 47144, "level approaches": 30065, "approaches natural": 3879, "handling rare": 23428, "architecture utilizes": 4098, "recurrent layers": 45617, "validate proposed": 61183, "model large": 34040, "tasks compare": 55546, "comparable performances": 9305, "toolkit provides": 57372, "incorporate social": 25364, "social scientists": 51605, "topic modelling": 57417, "raw corpus": 45034, "processing input": 42878, "based vector": 6128, "model conditional": 33694, "unsupervised topic": 59743, "models output": 35293, "output space": 39003, "methods estimating": 32843, "words languages": 62444, "shared embedding": 50466, "estimation methods": 18383, "data require": 12604, "require parallel": 46882, "evaluation method": 18639, "shown correlate": 50700, "correlate better": 11501, "previous ones": 42268, "network trained": 36815, "new loss": 37241, "classification loss": 8487, "obtain promising": 38183, "networks natural": 36879, "inference achieve": 25639, "achieve similar": 1198, "similar performance": 51058, "vector embedding": 61451, "feature embeddings": 20481, "embeddings feature": 17136, "use information": 59912, "approach results": 3676, "embeddings achieved": 17078, "sampling methods": 48503, "methods recent": 33007, "recent empirical": 45309, "research focused": 47041, "learned language": 29463, "data simple": 12665, "range semantic": 44933, "identifying relationships": 24463, "key aspect": 27295, "scientific knowledge": 48763, "knowledge graphs": 27504, "medical domain": 32202, "domain make": 16108, "make language": 31579, "electronic health": 16967, "health records": 23517, "perform knowledge": 40117, "graph completion": 23115, "relationships tokens": 46085, "knowledge reasoning": 27585, "multiple choice": 36180, "choice questions": 8337, "task obtain": 55246, "useful researchers": 60384, "researchers working": 47171, "diverse range": 15712, "extraction question": 20100, "provide solution": 44131, "answer given": 3036, "present unified": 42047, "max margin": 31949, "framework learns": 21555, "shows framework": 50778, "outperforms strong": 38948, "wealth information": 61873, "task domain": 55032, "entity resolution": 18145, "studied literature": 53228, "rank based": 44948, "proposed solutions": 43898, "specialized knowledge": 52034, "current systems": 12016, "systems fine": 54505, "grained entity": 23031, "entity typing": 18155, "type labels": 59058, "entity mentions": 18120, "entity mention": 18118, "define new": 13776, "task label": 55156, "label noise": 27718, "examples given": 18907, "set candidate": 50117, "labels individual": 27833, "entity types": 18154, "unique challenges": 59510, "task propose": 55299, "mentions text": 32310, "text features": 56579, "semantically close": 49381, "training example": 58093, "manner using": 31727, "learned embeddings": 29457, "embeddings text": 17228, "margin based": 31819, "based loss": 5823, "robust noisy": 48260, "noisy labels": 37622, "models type": 35637, "experiments public": 19500, "datasets demonstrate": 13210, "effectiveness robustness": 16811, "improvement accuracy": 24982, "accuracy compared": 948, "best method": 6779, "way using": 61837, "knowledge unsupervised": 27641, "methods identifying": 32892, "sequence data": 49918, "related fields": 45908, "level hierarchical": 30129, "word learning": 62223, "cognitive processes": 8895, "type token": 59073, "token frequencies": 57289, "enables model": 17443, "propose train": 43677, "uni directional": 59459, "paper shows": 39573, "apply natural": 3340, "methods classification": 32781, "prediction target": 41741, "computer aided": 9886, "hand crafted": 23385, "levels abstraction": 30236, "model incorporate": 33990, "incorporate contextual": 25348, "contextual features": 10769, "topics model": 57454, "specific nlp": 52119, "tasks word": 55965, "prediction results": 41737, "corpora english": 11196, "english documents": 17797, "google news": 22955, "models baseline": 34761, "baseline lstm": 6180, "lstm models": 31274, "accuracy improvements": 991, "news dataset": 37398, "demonstrates significant": 14041, "like question": 30494, "dialog systems": 14760, "tasks effective": 55600, "effective way": 16712, "extract meaningful": 19986, "sentences produce": 49770, "matching score": 31921, "inspired success": 26417, "based extracted": 5723, "recognition model": 45513, "successfully identify": 53746, "identify salient": 24441, "demonstrate superiority": 13984, "baselines work": 6321, "text clustering": 56493, "use small": 60019, "design novel": 14293, "novel objective": 37888, "clustering process": 8744, "optimize objective": 38563, "data unlabeled": 12753, "data iteratively": 12442, "representation current": 46502, "current neural": 11991, "results datasets": 47569, "method works": 32710, "better text": 6979, "clustering methods": 8742, "linear discriminant": 30655, "discriminant analysis": 15436, "performance degradation": 40282, "data size": 12667, "presents solution": 42105, "development data": 14672, "data required": 12605, "improved accuracy": 24944, "topics existing": 57450, "given small": 22786, "seed words": 49045, "uses neural": 60525, "related terms": 45944, "built pre": 7489, "methods focus": 32869, "parts input": 39905, "meanings sentences": 32035, "sentences work": 49809, "model represents": 34311, "sentence word": 49671, "cnn model": 8773, "model employed": 33812, "capture features": 7670, "similarity score": 51117, "model gets": 33938, "task achieves": 54877, "event data": 18780, "syntactic level": 54306, "syntactic parse": 54309, "new structure": 37327, "method learns": 32564, "relevant task": 46237, "embeddings generated": 17142, "reach state": 45053, "words annotated": 62364, "types lexical": 59098, "information sentiment": 26080, "training order": 58198, "key challenges": 27299, "challenges natural": 8062, "domains languages": 16268, "investigate robustness": 26984, "detection systems": 14531, "fundamental tasks": 21794, "tasks information": 55686, "corpus task": 11442, "preserve original": 42115, "systematic evaluation": 54396, "rnn architectures": 48181, "outperform best": 38785, "relative error": 46094, "setting achieve": 50315, "performance cross": 40270, "cross domain": 11813, "domain setting": 16156, "traditional methods": 57529, "methods similar": 33042, "similar task": 51070, "task named": 55231, "efficient inference": 16877, "parsing language": 39783, "modeling experiments": 34575, "experiments provide": 19499, "provide better": 44020, "model better": 33624, "profound impact": 43074, "sentence using": 49667, "models auto": 34742, "predicting correct": 41674, "data corpus": 12253, "hope work": 24017, "results based": 47518, "method efficiently": 32476, "empirical comparisons": 17321, "sample data": 48448, "methods studied": 33056, "models developed": 34909, "mainly english": 31471, "european languages": 18429, "multiple corpora": 36188, "models evaluated": 34977, "task knowledge": 55153, "task work": 55472, "vectors trained": 61498, "comparable accuracy": 9288, "single corpus": 51289, "corpus trained": 11447, "trained models": 57793, "combination multiple": 9045, "learning order": 29791, "methods analyzing": 32749, "activation patterns": 1468, "use multi": 59953, "architecture consisting": 4036, "trained predicting": 57842, "visual scene": 61668, "corresponding input": 11553, "predicting word": 41686, "method estimate": 32488, "final prediction": 20827, "sensitive information": 49500, "selective attention": 49163, "input token": 26349, "differently depending": 15148, "furthermore propose": 21833, "time steps": 57226, "term dependencies": 56233, "identified text": 24404, "text word": 56845, "key step": 27334, "ner using": 36685, "word boundary": 62121, "provide richer": 44125, "representations jointly": 46695, "yield significant": 63098, "jointly training": 27223, "absolute improvement": 744, "results main": 47710, "using character": 60601, "combination convolutional": 9035, "based attention": 5579, "based bidirectional": 5607, "bidirectional recurrent": 7081, "results source": 47852, "representations order": 46730, "applying method": 3366, "available pre": 5344, "trained word": 57915, "leads new": 29321, "dataset method": 12992, "downstream task": 16350, "task dialogue": 55019, "dialogue state": 14784, "domains introduce": 16262, "novel simple": 37923, "convolution neural": 11096, "cnn architecture": 8758, "architecture multi": 4066, "embeddings sentence": 17211, "sentence classification": 49525, "input embedding": 26271, "sets model": 50297, "model simpler": 34387, "time furthermore": 57159, "baseline models": 6188, "models article": 34727, "algorithm detect": 2268, "results presented": 47775, "different studies": 15084, "problem studied": 42669, "size paper": 51392, "propose combine": 43323, "combine different": 9065, "supervised classifiers": 53970, "training evaluating": 58090, "restaurant reviews": 47413, "measure robustness": 32061, "methods robust": 33025, "preprocessing steps": 41829, "applications text": 3252, "context aware": 10589, "solve tasks": 51692, "approach make": 3595, "semantic structure": 49355, "output representation": 38996, "variational autoencoder": 61243, "image classification": 24533, "order reduce": 38651, "reduce computational": 45653, "computational complexity": 9838, "optimization method": 38549, "reduction techniques": 45721, "ag news": 2044, "improves classification": 25119, "compared pure": 9443, "achieves competitive": 1316, "advanced methods": 1889, "methods state": 33051, "methods large": 32916, "developments field": 14712, "media analysis": 32158, "combining information": 9112, "media posts": 32180, "users explore": 60463, "multiple criteria": 36189, "environment propose": 18173, "despite lack": 14371, "direct supervision": 15258, "available speech": 5369, "text similar": 56770, "model implemented": 33974, "ground truth": 23251, "achieves 20": 1286, "outperforming previous": 38855, "10 absolute": 35, "require pre": 46883, "pre specified": 41514, "shown great": 50713, "methods require": 33017, "types training": 59124, "class classifier": 8398, "large labeled": 28893, "set based": 50111, "features limited": 20617, "certain domains": 7939, "using linguistic": 60772, "related knowledge": 45913, "novel joint": 37845, "mentions using": 32311, "using representations": 60909, "representations framework": 46673, "crafted features": 11679, "new domain": 37175, "language furthermore": 28080, "specific context": 52060, "context representation": 10705, "representation experiments": 46514, "genres news": 22643, "discussion forum": 15491, "systems trained": 54656, "data results": 12615, "domains general": 16258, "historical texts": 23962, "approach fails": 3537, "paper assess": 39274, "benchmark task": 6497, "evaluate domain": 18452, "unsupervised domain": 59694, "outperforms word": 38961, "methods better": 32772, "data necessary": 12506, "necessary build": 36529, "early detection": 16511, "relevant data": 46207, "data train": 12738, "public health": 44321, "prior systems": 42416, "media twitter": 32185, "effective data": 16641, "data difficult": 12284, "difficult acquire": 15155, "aims address": 2172, "forest classifier": 21301, "data gathered": 12379, "state level": 52701, "level statistics": 30216, "art accuracy": 4209, "level data": 30095, "recent approaches": 45292, "based artificial": 5577, "results short": 47834, "systems leverage": 54547, "texts model": 56903, "dialog act": 14751, "performing tasks": 40691, "support new": 54121, "digital humanities": 15210, "new field": 37204, "task textual": 55437, "tasks textual": 55933, "studies illustrate": 53270, "pretrained word": 42194, "tasks small": 55895, "scale multi": 48599, "agent based": 2054, "step paper": 52819, "novel question": 37904, "mathematical framework": 31933, "explore question": 19730, "automatically predict": 5192, "model features": 33884, "features use": 20688, "extracted features": 20010, "hybrid model": 24319, "parser model": 39760, "25 times": 335, "models integrated": 35137, "data little": 12468, "loss accuracy": 31082, "entailment task": 18007, "task significantly": 55376, "encoding models": 17572, "method jointly": 32555, "jointly learning": 27201, "phrase embeddings": 40838, "types embeddings": 59083, "scoring function": 48933, "jointly optimized": 27211, "embeddings experiments": 17133, "experiments apply": 19353, "joint learning": 27174, "method task": 32679, "human ratings": 24224, "previous best": 42248, "best model": 6781, "ensemble technique": 17981, "improves results": 25156, "tasks existing": 55625, "models focus": 35037, "data directly": 12287, "directly use": 15340, "novel end": 37814, "model source": 34399, "structure model": 53118, "model attention": 33585, "mechanism enables": 32111, "english japanese": 17827, "dataset demonstrate": 12883, "considerably outperforms": 10242, "outperforms sequence": 38941, "favorably state": 20456, "complementary approaches": 9587, "methods supervised": 33061, "current best": 11963, "path based": 39945, "research attention": 46987, "dependency paths": 14134, "methods extend": 32857, "improving state": 25197, "task sentence": 55357, "recognizing textual": 45559, "memory networks": 32274, "tasks similar": 55890, "similar model": 51054, "perspective propose": 40777, "comparing performance": 9483, "performance common": 40243, "convolutional recurrent": 11115, "recurrent attention": 45609, "tasks datasets": 55570, "problem evaluating": 42554, "models propose": 35369, "new datasets": 37168, "currently used": 12040, "introduce unified": 26875, "source software": 51798, "tasks enables": 55610, "trained sentence": 57862, "model set": 34365, "set new": 50199, "dialogue dataset": 14771, "present deep": 41885, "hierarchical recurrent": 23685, "network sequence": 36803, "recurrent units": 45629, "character word": 8226, "tags model": 54758, "independent language": 25500, "extend model": 19825, "task cross": 54986, "joint training": 27191, "results multiple": 47733, "languages benchmark": 28607, "demonstrate multi": 13947, "training improve": 58125, "problem sequence": 42648, "sequence seq2seq": 49975, "seq2seq learning": 49899, "language communication": 27995, "humans tend": 24289, "based seq2seq": 6024, "learning propose": 29823, "new model": 37259, "decoder structure": 13615, "word generation": 62211, "copying mechanism": 11138, "sub sequences": 53531, "sequences input": 50022, "summarization tasks": 53902, "tasks study": 55914, "selection methods": 49145, "number different": 37994, "tagging parsing": 54746, "parsing experiments": 39779, "experiments languages": 19453, "models higher": 35087, "works better": 62879, "final results": 20830, "art languages": 4271, "languages time": 28805, "reducing number": 45712, "recently works": 45476, "domain natural": 16117, "methods word": 33102, "advanced state": 1893, "art various": 4437, "propose scalable": 43611, "present experimental": 41909, "demonstrate performance": 13954, "performance proposed": 40504, "gram method": 23056, "demonstrate better": 13876, "better approach": 6850, "learned representations": 29479, "representations propose": 46743, "pipeline models": 40905, "use hidden": 59906, "test time": 56387, "languages universal": 28812, "universal dependencies": 59538, "art graph": 4266, "generation tasks": 22562, "text conditioned": 56504, "structured unstructured": 53179, "arbitrary number": 4014, "number input": 38010, "effective training": 16706, "using framework": 60700, "framework address": 21451, "problem generating": 42573, "language structured": 28507, "create new": 11711, "corpus demonstrate": 11319, "allows model": 2472, "relatively rare": 46126, "useful research": 60383, "applications study": 3250, "obtaining data": 38232, "makes good": 31623, "research task": 47127, "order build": 38600, "based wikipedia": 6133, "comparable data": 9296, "evaluation quality": 18691, "second method": 49011, "given domain": 22739, "order train": 38657, "past decade": 39930, "scale supervised": 48628, "learning researchers": 29845, "corpora available": 11179, "factoid question": 20296, "evaluated human": 18533, "human evaluators": 24161, "using automatic": 60573, "metrics including": 33174, "translation sentence": 58674, "similarity metrics": 51107, "evaluation criteria": 18600, "question generation": 44729, "baseline furthermore": 6170, "generated questions": 22310, "comparable quality": 9307, "real human": 45103, "human generated": 24167, "standing problem": 52552, "recently researchers": 45464, "word text": 62320, "neural net": 36991, "better capture": 6857, "patterns text": 39975, "lack training": 27922, "data words": 12776, "label propagation": 27721, "demonstrate state": 13977, "results especially": 47615, "semantic textual": 49365, "similarity sts": 51122, "evaluate semantic": 18503, "assessing quality": 4588, "information encoded": 25830, "set evaluating": 50147, "pairs annotated": 39168, "annotated semantic": 2913, "models gram": 35068, "data freely": 12373, "available recent": 5357, "good capturing": 22929, "capturing linguistic": 7738, "linguistic regularities": 30787, "simple linear": 51185, "words different": 62398, "proposed learning": 43799, "learning document": 29602, "document representations": 15826, "structure learned": 53115, "question design": 44727, "new document": 37174, "analogy task": 2584, "semantic regularities": 49324, "models results": 35456, "results reveal": 47815, "based document": 5689, "representations work": 46792, "work better": 62589, "conventional methods": 11007, "work examines": 62652, "demonstrate language": 13927, "driven model": 16430, "data target": 12720, "languages furthermore": 28679, "language typology": 28543, "factors contribute": 20307, "form text": 21338, "semantic patterns": 49317, "sentiment polarities": 49853, "data semantic": 12637, "used achieve": 60079, "achieve goal": 1142, "popular topic": 41195, "purpose study": 44411, "context sentiment": 10715, "systems operate": 54575, "access large": 826, "collection documents": 8983, "documents work": 15930, "explore task": 19740, "incorporating external": 25385, "domains training": 16298, "data scarce": 12623, "extraction new": 20089, "using reinforcement": 60903, "learns select": 29973, "select optimal": 49109, "based contextual": 5643, "reward function": 48067, "demonstrate significantly": 13975, "outperforms traditional": 38955, "methods use": 33093, "best accuracy": 6744, "regression model": 45814, "model actually": 33530, "online health": 38368, "various applications": 61299, "annotated dataset": 2889, "dataset supervised": 13108, "classifiers based": 8612, "cnn models": 8774, "task classifying": 54952, "cnn classifier": 8764, "analysis topic": 2782, "embedding word": 17072, "neural model": 36971, "trained using": 57908, "visual objects": 61662, "given textual": 22796, "multilayer perceptron": 36059, "perceptron mlp": 40058, "embedding layer": 17033, "mentioned text": 32300, "text work": 56847, "work contributes": 62612, "contributes new": 10937, "method select": 32644, "features fine": 20587, "tuning method": 58926, "improves f1": 25129, "f1 measure": 20186, "task semeval": 55354, "semeval 2013": 49426, "attempt solve": 4691, "prepositional phrase": 41820, "motivation work": 35885, "trained english": 57722, "present technique": 42037, "male female": 31681, "generated approach": 22268, "potential errors": 41389, "lines code": 30687, "using generic": 60706, "goal natural": 22892, "text passages": 56694, "understanding work": 59418, "investigate machine": 26965, "machine comprehension": 31298, "limited size": 30616, "dataset neural": 13008, "neural approach": 36929, "simple neural": 51199, "using manually": 60788, "level sentence": 30207, "networks operate": 36886, "embedding representations": 17058, "text trained": 56817, "designed help": 14319, "data parallel": 12535, "model sets": 34366, "sets new": 50298, "feature engineered": 20482, "neural approaches": 36930, "discriminative model": 15446, "single document": 51296, "model selects": 34347, "summary based": 53913, "learned large": 29465, "units text": 59536, "improve cross": 24837, "cross sentence": 11869, "trained end": 57720, "outperforms prior": 38931, "linguistic quality": 30786, "generally considered": 22164, "models outperformed": 35289, "target words": 54857, "set improvement": 50168, "learning multi": 29769, "parallel sentence": 39652, "outperform monolingual": 38804, "monolingual counterparts": 35795, "available test": 5376, "models lms": 35198, "network nn": 36774, "models represent": 35438, "superior performances": 53940, "paper examine": 39353, "chinese speech": 8321, "task visual": 55467, "range nlp": 44926, "tasks visual": 55963, "multimodal tasks": 36156, "tasks image": 55668, "description text": 14249, "augments existing": 4993, "existing multimodal": 19113, "labels propose": 27846, "using textual": 60989, "embeddings textual": 17229, "embeddings perform": 17188, "supervised setting": 54044, "sentence relation": 49629, "relation modeling": 45988, "pairs quality": 39212, "complex semantic": 9658, "challenge propose": 8009, "architecture jointly": 4056, "leverage pre": 30282, "representations inputs": 46692, "lstm learn": 31269, "approach consistently": 3465, "methods standard": 33050, "standard evaluation": 52490, "evaluation datasets": 18605, "datasets natural": 13341, "help language": 23574, "classifiers different": 8614, "different error": 14919, "error types": 18231, "models motivated": 35234, "issues present": 27098, "present neural": 41955, "approach language": 3581, "core component": 11145, "network attention": 36701, "approach dataset": 3477, "noisy user": 37628, "generated text": 22325, "collected english": 8962, "model method": 34099, "f_ score": 20234, "demonstrate training": 13992, "training network": 58187, "used millions": 60239, "millions people": 33264, "share information": 50457, "variety domains": 61268, "tailored specific": 54769, "specific user": 52170, "depend availability": 14098, "existing unsupervised": 19167, "unsupervised semi": 59728, "approaches focused": 3830, "focused identifying": 21224, "contrast work": 10892, "work proposes": 62792, "data achieve": 12110, "performance fine": 40349, "grained analysis": 23021, "discuss limitations": 15472, "word speech": 62314, "domains demonstrate": 16245, "task existing": 55061, "existing ones": 19122, "modern nlp": 35716, "rely heavily": 46286, "engineered features": 17764, "combine word": 9075, "word contextual": 62133, "features combination": 20538, "large numbers": 28928, "reduce parameter": 45678, "parameter space": 39680, "improve prediction": 24906, "furthermore investigate": 21825, "investigate methods": 26966, "tasks relation": 55845, "approaches language": 3853, "uses simple": 60536, "driven approaches": 16421, "training approach": 57934, "approach requires": 3675, "behavior model": 6393, "time compared": 57124, "using existing": 60684, "unknown target": 59558, "easier train": 16529, "train character": 57571, "based ones": 5925, "english czech": 17791, "models handle": 35076, "words best": 62372, "best achieves": 6746, "achieves new": 1348, "20 bleu": 222, "character models": 8220, "models successfully": 35553, "learn generate": 29374, "language complex": 27998, "english source": 17876, "ensemble approach": 17970, "search query": 48980, "based entities": 5705, "entities compared": 18039, "compared traditional": 9466, "keyword based": 27349, "model identify": 33970, "approach instead": 3574, "varying complexity": 61427, "collect real": 8951, "models contextual": 34860, "like wikipedia": 30511, "approach utilizes": 3735, "approach data": 3476, "set created": 50132, "set contains": 50129, "use supervised": 60034, "semantics model": 49407, "trained wikipedia": 57914, "micro averaged": 33220, "score 97": 48830, "approach question": 3662, "question multiple": 44738, "relevance score": 46194, "relevance scores": 46195, "statistical modeling": 52755, "model gives": 33940, "baseline approach": 6154, "traditional nlp": 57538, "asr transcripts": 4564, "essential task": 18335, "models enable": 34958, "joint multi": 27181, "multilingual neural": 36104, "model character": 33652, "dimensional vectors": 15239, "process results": 42828, "based classifier": 5620, "produce human": 42988, "error patterns": 18221, "amr graphs": 2574, "manually crafted": 31769, "method selecting": 32645, "amr graph": 2573, "level neural": 30167, "amr parser": 2575, "semeval 2016": 49428, "represent words": 46486, "words embeddings": 62406, "embeddings machine": 17169, "machine learned": 31305, "learned vector": 29490, "embeddings produced": 17195, "large multilingual": 28914, "higher previous": 23838, "commonsense knowledge": 9234, "deep language": 13695, "received lot": 45262, "hindered lack": 23927, "lack proper": 27907, "proper evaluation": 43251, "evaluation framework": 18621, "framework paper": 21580, "paper attempts": 39276, "problem new": 42616, "cloze test": 8726, "evaluation corpus": 18599, "everyday life": 18804, "story generation": 52880, "generation experimental": 22456, "baselines state": 6302, "high score": 23799, "understanding paper": 59378, "corpora recent": 11237, "based architecture": 5575, "datasets showing": 13424, "showing significant": 50689, "framework neural": 21571, "data scenarios": 12628, "effective low": 16667, "train high": 57595, "model transfer": 34485, "transfer learned": 58373, "learned parameters": 29471, "method improve": 32529, "average bleu": 5403, "word replacement": 62282, "performance low": 40423, "resource machine": 47252, "syntax based": 54347, "additionally using": 1737, "art low": 4277, "translation word": 58703, "ambiguous words": 2530, "text large": 56645, "used approaches": 60092, "approaches perform": 3894, "word used": 62331, "performance work": 40632, "features derived": 20556, "global features": 22828, "embeddings results": 17206, "network classifiers": 36718, "based long": 5821, "accuracy 95": 927, "knowledge extraction": 27480, "graph construction": 23117, "shallow parsing": 50443, "design hierarchical": 14285, "entity knowledge": 18111, "finally experimental": 20856, "results prove": 47787, "prove method": 43981, "effective communication": 16636, "structural properties": 53081, "based framework": 5740, "framework modeling": 21564, "narrative text": 36383, "text modeling": 56668, "network use": 36819, "use sentiment": 60009, "network framework": 36744, "unique characteristics": 59511, "social interactions": 51565, "study problem": 53439, "english code": 17786, "mixed social": 33410, "data developed": 12278, "knowledge attempt": 27398, "bit ly": 7186, "models presenting": 35349, "presenting novel": 42066, "novel multi": 37877, "existing open": 19124, "systems typically": 54658, "conversation propose": 11035, "context candidate": 10596, "aims determine": 2186, "problem nlp": 42617, "applications recently": 3245, "recently deep": 45414, "problem significant": 42655, "improvements achieved": 25046, "propose view": 43701, "idea propose": 24372, "deep architecture": 13682, "constructed capture": 10407, "capture word": 7722, "local interactions": 30943, "score calculated": 48838, "exact matching": 18854, "dynamic programming": 16489, "demonstrate attention": 13872, "grammatical error": 23069, "automated evaluation": 5041, "scientific writing": 48773, "decoder models": 13601, "particularly effective": 39880, "based counterpart": 5656, "highest performing": 23855, "representation important": 46527, "long history": 31014, "work knowledge": 62701, "graph embedding": 23130, "entity classification": 18099, "representation method": 46550, "method knowledge": 32556, "generative process": 22606, "semantic units": 49372, "2016 task": 261, "task 10": 54866, "primarily based": 42361, "based combination": 5625, "combination word": 9053, "works using": 62916, "networks based": 36833, "boost performance": 7256, "selection problem": 49148, "works used": 62915, "used deep": 60141, "methods like": 32928, "rnn cnn": 48190, "end learning": 17680, "similarity metric": 51106, "metric learning": 33119, "tokens proposed": 57334, "model demonstrates": 33746, "qa dataset": 44449, "memory bi": 32243, "networks recently": 36902, "proven successful": 43994, "little known": 30879, "input representations": 26327, "representations target": 46767, "languages data": 28632, "data sizes": 12668, "novel bi": 37779, "auxiliary loss": 5233, "obtains state": 38258, "22 languages": 318, "especially morphologically": 18287, "analysis suggests": 2772, "semeval 2015": 49427, "annotation process": 2961, "shown approach": 50695, "sparsity issues": 51980, "dataset obtain": 13015, "cluster based": 8733, "adaptive training": 1581, "method deep": 32452, "used decoding": 60140, "method large": 32558, "spontaneous speech": 52370, "task evaluated": 55056, "baseline word": 6224, "11 relative": 90, "relative reduction": 46110, "language challenge": 27984, "response problem": 47399, "build training": 7431, "information parallel": 26003, "labelling task": 27806, "rich features": 48100, "features finally": 20586, "labelled training": 27804, "data translating": 12744, "universal schema": 59545, "textual patterns": 56974, "base construction": 5542, "entity pairs": 18124, "relations represented": 46054, "generalization unseen": 22133, "unseen text": 59656, "work step": 62827, "step propose": 52825, "explicit entity": 19614, "entity pair": 18123, "representations instead": 46693, "instead learning": 26455, "representations entity": 46653, "benchmark demonstrate": 6461, "match performance": 31899, "comparable model": 9298, "model explicit": 33856, "attention relation": 4820, "types demonstrate": 59081, "seen training": 49066, "training present": 58214, "performance according": 40177, "text genres": 56609, "annotation automatic": 2936, "rater agreement": 45019, "memory neural": 32276, "attracted wide": 4889, "lstm architecture": 31239, "architecture consists": 4037, "mechanism paper": 32133, "accurately predict": 1097, "cognitive process": 8894, "article study": 4461, "corpus work": 11461, "tasks address": 55494, "words single": 62515, "unrelated words": 59634, "phrase generation": 40840, "alignment word": 2389, "handcrafted features": 23400, "single attention": 51285, "characteristics specific": 8243, "limits effectiveness": 30641, "effectiveness tasks": 16816, "propose architecture": 43299, "architecture based": 4028, "based gated": 5744, "attention pooling": 4809, "determine given": 14556, "given task": 22792, "apply framework": 3329, "support research": 54123, "linguistic cues": 30762, "detection social": 14528, "difficult identify": 15169, "classification approach": 8435, "lda topic": 29251, "approach extracts": 3535, "information topic": 26128, "dataset self": 13076, "collected dataset": 8959, "methods terms": 33071, "averaged f1": 5421, "called word": 7556, "embedding language": 17032, "languages natural": 28736, "dataset collection": 12846, "variety language": 61275, "language families": 28066, "use parallel": 59971, "languages perform": 28749, "12 different": 105, "human subjects": 24246, "model humans": 33966, "similarity languages": 51099, "dialect identification": 14746, "work word": 62859, "task furthermore": 55099, "lstm language": 31265, "basic model": 6332, "model outperforming": 34155, "produce novel": 42995, "fine tune": 20947, "questions ask": 44773, "context self": 10709, "models character": 34811, "model designed": 33753, "overcome problems": 39072, "inter sentence": 26586, "representation level": 46546, "learning scheme": 29859, "order alleviate": 38593, "network end": 36738, "end fashion": 17673, "quantitatively qualitatively": 44633, "act classification": 1452, "capture implicit": 7680, "implicit explicit": 24659, "semantics sentence": 49414, "performance end": 40317, "generating text": 22400, "written using": 63014, "python library": 44440, "finnish english": 21062, "tasks significant": 55888, "obtained best": 38204, "better training": 6983, "various components": 61316, "semantic alignment": 49232, "multiclass classification": 36047, "results algorithm": 47495, "systems terms": 54650, "dataset terms": 13115, "terms overall": 56304, "overall score": 39049, "analysis social": 2761, "important challenging": 24706, "classification data": 8450, "data requires": 12607, "requires modeling": 46944, "modeling various": 34635, "various contexts": 61318, "social context": 51559, "use hierarchical": 59907, "hierarchical lstm": 23676, "rich contexts": 48094, "particularly long": 39884, "range context": 44908, "context experimental": 10633, "perform sentiment": 40137, "single sentences": 51334, "textual context": 56954, "sentences language": 49745, "reducing time": 45713, "time required": 57205, "higher degree": 23821, "present set": 42010, "set relevant": 50238, "relevant aspects": 46199, "set context": 50130, "achieved average": 1218, "related context": 45890, "method evaluated": 32490, "context independent": 10657, "work mainly": 62718, "mainly focused": 31473, "datasets experiment": 13262, "manual annotation": 31731, "data publicly": 12579, "publicly released": 44362, "context existing": 10632, "analysis scientific": 2748, "tools help": 57379, "various textual": 61407, "summarization systems": 53900, "selection approach": 49132, "content input": 10531, "input document": 26268, "maps input": 31811, "important ones": 24750, "ones used": 38344, "used classification": 60114, "features introduce": 20607, "introduce different": 26798, "identify important": 24425, "important concepts": 24713, "select informative": 49107, "informative content": 26170, "extensive evaluations": 19869, "suggest using": 53832, "models alleviate": 34706, "alleviate issues": 2410, "scale chinese": 48557, "english task": 17888, "enhanced model": 17933, "language phenomena": 28380, "requires high": 46932, "time large": 57172, "large memory": 28908, "usage paper": 59803, "issue introducing": 27064, "output vocabulary": 39009, "time memory": 57177, "target vocabulary": 54855, "model bilingual": 33631, "traditional machine": 57526, "model experimental": 33850, "scale english": 48569, "achieves better": 1307, "better translation": 6985, "performance bleu": 40221, "models recurrent": 35416, "sequences different": 50021, "using shared": 60935, "shared representations": 50486, "cross linguistically": 11862, "shared feature": 50471, "intrinsic evaluation": 26768, "evaluation downstream": 18611, "phonetic features": 40826, "models ii": 35101, "higher quality": 23840, "quality learned": 44545, "bayesian optimization": 6361, "representations features": 46667, "ranking function": 44970, "variety downstream": 61269, "entity information": 18110, "large knowledge": 28891, "meaningful semantic": 32027, "framework handle": 21532, "patterns training": 39976, "data test": 12727, "proven difficult": 43992, "limited datasets": 30580, "contrast human": 10877, "human behaviour": 24115, "similar tasks": 51071, "tasks provide": 55826, "feedforward neural": 20719, "network paper": 36779, "does contain": 15939, "new results": 37306, "semantics natural": 49408, "lexical meaning": 30372, "data report": 12601, "health issues": 23516, "limited lack": 30596, "lack large": 27901, "data labeled": 12448, "present large": 41936, "set novel": 50204, "analysis methods": 2696, "methods measure": 32942, "applying techniques": 3379, "datasets focused": 13279, "certain topic": 7947, "datasets contain": 13197, "contain words": 10477, "various categories": 61312, "similarity scores": 51118, "scores words": 48931, "work discuss": 62639, "evaluation procedure": 18680, "humans provide": 24285, "provide list": 44099, "commonalities differences": 9213, "human judgements": 24181, "pairwise similarity": 39240, "believe proposed": 6411, "datasets test": 13456, "model score": 34338, "slightly modified": 51437, "search decoder": 48968, "practical advantages": 41456, "pieces evidence": 40879, "language question": 28457, "based evidence": 5712, "model integrate": 34009, "baselines demonstrate": 6251, "demonstrate benefit": 13874, "comprehension model": 9767, "task small": 55381, "small datasets": 51471, "datasets research": 13404, "new open": 37274, "open dataset": 38417, "news based": 37389, "apply proposed": 3347, "entailment model": 18002, "model similar": 34384, "test questions": 56363, "dataset improve": 12959, "improve neural": 24877, "automatic post": 5114, "problem achieve": 42496, "different models": 14996, "model allowing": 33561, "output source": 39002, "string matching": 52993, "used control": 60128, "translation output": 58652, "data generate": 12383, "unseen test": 59655, "submitted shared": 53587, "task large": 55163, "micro blogging": 33223, "based recent": 5979, "advances deep": 1909, "models detect": 34905, "score 92": 48827, "including social": 25300, "media platforms": 32176, "platforms twitter": 40956, "paper explored": 39368, "speech act": 52251, "set manually": 50190, "method achieved": 32358, "performance average": 40205, "score 70": 48805, "different granularities": 14945, "type specific": 59070, "specific topic": 52162, "perform novel": 40126, "analysis existing": 2661, "existing model": 19107, "model previously": 34234, "previously shown": 42350, "learns identify": 29961, "relations present": 46051, "model exploits": 33859, "analysis model": 2697, "models literature": 35192, "multiple data": 36190, "separate encoders": 49875, "information sentence": 26078, "propose deep": 43351, "architecture model": 4063, "specifically introduce": 52209, "informative features": 26172, "features experiments": 20577, "large datasets": 28869, "communication channels": 9248, "situational awareness": 51370, "processing social": 42938, "availability data": 5246, "human annotated": 24095, "present human": 41925, "corpora collected": 11183, "19 different": 185, "train machine": 57603, "learning classifiers": 29557, "word2vec word": 62352, "variations paper": 61251, "data corresponding": 12254, "data information": 12430, "approach order": 3619, "speaker information": 51999, "experiments proposed": 19493, "method achieve": 32355, "points improvement": 41076, "publicly release": 44359, "data manual": 12482, "annotation propose": 2963, "propose interactive": 43421, "interactive multimodal": 26631, "natural text": 36468, "referential games": 45755, "language need": 28355, "provide promising": 44115, "agents trained": 2065, "trained way": 57910, "analysis annotation": 2611, "text semi": 56760, "automatic annotation": 5069, "annotation tool": 2976, "salient features": 48440, "outperform standard": 38821, "standard models": 52507, "models distinguish": 34930, "model novel": 34133, "novel embedding": 37811, "extraction based": 20050, "dependency path": 14133, "specifically method": 52216, "treated sequence": 58734, "embedding features": 17029, "context dependency": 10609, "extraction experimental": 20065, "results semeval": 47822, "features achieve": 20515, "method incorporates": 32539, "yields better": 63117, "extraction models": 20083, "sentence given": 49566, "propose variational": 43700, "generates target": 22358, "hidden representations": 23645, "representations source": 46758, "source sentences": 51796, "model introduces": 34020, "continuous latent": 10847, "model underlying": 34498, "underlying semantics": 59277, "guide generation": 23334, "generation target": 22557, "perform efficient": 40094, "posterior inference": 41361, "scale training": 48631, "build neural": 7416, "lower bound": 31207, "german translation": 22678, "baselines paper": 6284, "interactions multiple": 26618, "levels granularity": 30240, "words sub": 62524, "attention network": 4795, "learn interactions": 29384, "attention matrix": 4769, "soft attention": 51620, "attention weight": 4851, "convolution based": 11092, "based learned": 5810, "incorporate semantic": 25363, "achieves substantial": 1382, "prediction accuracy": 41690, "component models": 9707, "information multiple": 25980, "multiple systems": 36297, "systems improve": 54526, "approach different": 3489, "cold start": 8928, "lingual entity": 30701, "object detection": 38082, "detection tasks": 14533, "tasks obtain": 55773, "obtain new": 38181, "detection task": 14532, "approach evaluation": 3522, "metrics accuracy": 33134, "accuracy precision": 1026, "nlp evaluation": 37486, "collecting large": 8977, "number human": 38009, "human responses": 24235, "model compares": 33675, "performance human": 40375, "able provide": 716, "performance standard": 40573, "accuracy score": 1041, "systems developed": 54478, "data human": 12410, "translation image": 58618, "generation systems": 22556, "according automatic": 853, "metrics bleu": 33144, "methods detecting": 32820, "domain corpus": 16034, "general language": 22064, "language corpus": 28009, "domain present": 16136, "tools available": 57378, "modeling single": 34623, "multi agent": 35937, "specific data": 52065, "data case": 12199, "receives input": 45270, "structure present": 53129, "convergence speed": 11025, "preference learning": 41790, "model generate": 33929, "asking questions": 4524, "attention model": 4787, "model conditioned": 33695, "model evaluated": 33843, "model help": 33955, "performance measured": 40434, "indicate model": 25527, "architectures using": 4129, "improves performances": 25147, "cloze style": 8724, "documents model": 15896, "gated attention": 21994, "architecture novel": 4070, "mechanism based": 32102, "specific representations": 52138, "results benchmarks": 47523, "benchmarks task": 6545, "cnn daily": 8765, "daily mail": 12087, "news stories": 37417, "dataset effectiveness": 12904, "ablation study": 659, "study comparing": 53342, "code available": 8792, "dominant approach": 16306, "model increases": 33995, "convolutional layers": 11105, "report improvements": 46437, "tasks best": 55524, "processing propose": 42929, "uses attention": 60491, "stanford natural": 52556, "inference snli": 25691, "snli dataset": 51551, "obtain state": 38193, "magnitude fewer": 31416, "order information": 38629, "intra sentence": 26760, "sentence attention": 49518, "order account": 38587, "yields improvements": 63126, "automatically answer": 5142, "questions like": 44794, "provide rich": 44124, "base propose": 5549, "deep recurrent": 13744, "neural embeddings": 36949, "achieves accuracy": 1302, "largest public": 29099, "outperforms current": 38891, "propose enhance": 43371, "decoder neural": 13605, "external memory": 19950, "representation source": 46581, "designed better": 14310, "capture information": 7682, "set neural": 50198, "low frequency": 31151, "sentence propose": 49625, "method alleviate": 32377, "using attention": 60566, "attention vector": 4850, "model select": 34343, "probabilities model": 42471, "methods combine": 32787, "experiments corpora": 19391, "given english": 22740, "automatic alignment": 5067, "suffers data": 53789, "small size": 51502, "data english": 12323, "paper formalize": 39383, "alignment problem": 2380, "based syntax": 6077, "address data": 1752, "experiments verify": 19559, "method english": 32483, "results significantly": 47846, "interactions different": 26616, "consistently performs": 10309, "expectation maximization": 19194, "languages current": 28629, "paid attention": 39142, "play significant": 40978, "data captured": 12198, "measure extent": 32052, "processing semantic": 42936, "data achieved": 12112, "various topics": 61408, "composition model": 9740, "dependency based": 14118, "strong ability": 52999, "art wide": 4439, "completion task": 9613, "report new": 46439, "extraction tool": 20124, "designed extract": 14314, "based sequential": 6029, "sequential labeling": 50044, "automatic manual": 5103, "provide access": 44003, "access training": 830, "compare traditional": 9372, "compare models": 9349, "models traditional": 35600, "distinct tasks": 15595, "tasks sequence": 55877, "treebank ptb": 58764, "corpora results": 11240, "effective paper": 16682, "words learning": 62447, "ability predict": 633, "better word": 6993, "based embedding": 5695, "models help": 35083, "better human": 6898, "research neural": 47080, "language agnostic": 27956, "apply neural": 3342, "task arabic": 54909, "compare standard": 9366, "extensive comparison": 19859, "comparison using": 9510, "various configurations": 61317, "perform comparably": 40075, "domain test": 16204, "world deployment": 62937, "achieved impressive": 1244, "impressive results": 24815, "using little": 60774, "external linguistic": 19948, "learning capability": 29551, "mt models": 35921, "models does": 34935, "does make": 15957, "easily incorporated": 16543, "layer encoder": 29184, "attentional encoder": 4857, "features addition": 20518, "dependency labels": 14124, "labels input": 27834, "english romanian": 17868, "quality according": 44487, "perplexity bleu": 40738, "nlp tool": 37557, "proven effective": 43993, "effective text": 16704, "generation sequence": 22545, "local word": 30953, "work introduce": 62691, "training scheme": 58238, "learn global": 29376, "training loss": 58159, "efficient training": 16904, "highly optimized": 23908, "baselines different": 6253, "different sequence": 15066, "sequence tasks": 50010, "use deep": 59863, "actor critic": 1485, "learning rl": 29851, "domain expertise": 16067, "remove need": 46375, "markov decision": 31845, "decision processes": 13568, "practical deployment": 41462, "data efficiently": 12314, "text understanding": 56829, "understanding machine": 59362, "mapping words": 31807, "aware word": 5477, "remains challenging": 46327, "training large": 58147, "tasks new": 55765, "datasets propose": 13379, "dictionary learning": 14806, "mechanism learn": 32126, "learn good": 29377, "learning phase": 29804, "embeddings extracted": 17134, "tasks test": 55929, "pre training": 41566, "critical information": 11783, "vast majority": 61440, "materials methods": 31927, "methods introduce": 32907, "systems compare": 54454, "systems datasets": 54470, "dataset largest": 12980, "identification dataset": 24385, "performance previously": 40499, "engineering paper": 17769, "effective model": 16674, "approach leverage": 3588, "network shared": 36804, "shared words": 50511, "words enables": 62407, "size model": 51390, "effective use": 16709, "approach standard": 3702, "standard datasets": 52484, "data employ": 12319, "external resources": 19952, "resources knowledge": 47308, "crafted rules": 11682, "systems employ": 54484, "making difficult": 31651, "knowledge achieve": 27387, "introduce data": 26794, "conditional independence": 9995, "train sequence": 57630, "rnn model": 48202, "model structural": 34414, "parsing datasets": 39777, "datasets leading": 13315, "leading new": 29293, "dataset models": 13000, "models comparable": 34834, "task benchmark": 54932, "task reinforcement": 55326, "action space": 1458, "fixed window": 21085, "directional lstm": 15283, "different experimental": 14925, "language essential": 28053, "grounded language": 23261, "present effective": 41895, "representation model": 46552, "conditional language": 9996, "modeling task": 34628, "probing model": 42492, "model output": 34168, "training prediction": 58213, "limited applicability": 30569, "prediction approach": 41694, "encoder trained": 17544, "given training": 22798, "instead model": 26457, "multi aspect": 35940, "aspect sentiment": 4533, "annotated test": 2920, "test cases": 56335, "illustrate method": 24517, "multi way": 36041, "enables zero": 17452, "zero resource": 63151, "multilingual model": 36096, "model translate": 34490, "pivot based": 40917, "learning al": 29510, "networks cnns": 36838, "manually labeled": 31781, "minimal effort": 33287, "tuning task": 58965, "al strategies": 2248, "contrast traditional": 10891, "based uncertainty": 6117, "uncertainty sampling": 59232, "learning discriminative": 29594, "approach document": 3493, "jointly considering": 27193, "representations model": 46718, "stochastic process": 52857, "embeddings best": 17088, "problem question": 42640, "reasoning multiple": 45207, "multiple facts": 36216, "propose query": 43596, "context sentences": 10714, "sentence time": 49658, "time experiments": 57156, "produces state": 43035, "tasks real": 55834, "oriented dialog": 38696, "dataset addition": 12803, "formulation allows": 21393, "time complexity": 57125, "training inference": 58129, "remarkable progress": 46360, "rely parallel": 46296, "usually limited": 61057, "quality coverage": 44503, "especially low": 18284, "training nmt": 58194, "corpora using": 11256, "target target": 54845, "target source": 54842, "models serve": 35486, "language source": 28490, "language experiments": 28061, "dataset approach": 12812, "systems present": 54595, "evaluate new": 18478, "model natural": 34117, "generation nlg": 22507, "current generation": 11979, "generation context": 22439, "context user": 10740, "use standard": 60026, "present information": 41928, "results users": 47898, "length information": 30027, "cognitive load": 8892, "compared base": 9382, "learned policy": 29473, "prior approaches": 42393, "common linguistic": 9184, "sentences multiple": 49755, "multiple target": 36298, "representation work": 46607, "context neural": 10680, "decoder architectures": 13587, "end goal": 17675, "specifically consider": 52187, "consider case": 10208, "available training": 5379, "stage model": 52434, "model converts": 33719, "jointly learns": 27202, "representation evaluate": 46510, "evaluate model": 18472, "model tasks": 34446, "architectures paper": 4119, "end method": 17683, "generating short": 22396, "semantically diverse": 49385, "learning architecture": 29523, "generated content": 22276, "modified version": 35732, "learning context": 29569, "large space": 29015, "larger context": 29069, "simpler models": 51231, "models faster": 35020, "model finally": 33888, "datasets develop": 13226, "high number": 23755, "ill formed": 24510, "instead relying": 26462, "relying solely": 46310, "data samples": 12620, "comprehensive data": 9785, "model created": 33729, "created using": 11734, "denoising autoencoder": 14065, "data providing": 12577, "use high": 59908, "topics using": 57464, "able model": 707, "need pre": 36588, "spam detection": 51917, "algorithms proposed": 2336, "approach achieving": 3404, "97 accuracy": 571, "understanding requires": 59394, "requires deep": 46922, "deep semantic": 13749, "discourse information": 15390, "discriminatively trained": 15450, "trained neural": 57825, "generate embeddings": 22197, "using perplexity": 60857, "helps improve": 23608, "systems complex": 54456, "consider information": 10211, "information traditional": 26129, "traditional media": 57528, "ignoring rich": 24502, "provided user": 44175, "optimization framework": 38548, "framework designed": 21491, "automatic evaluations": 5090, "datasets cover": 13199, "produces informative": 43032, "systems human": 54522, "agreement disagreement": 2105, "detection online": 14508, "online discussions": 38364, "segment level": 49075, "existing general": 19070, "sentiment lexicons": 49851, "performance evaluate": 40325, "tagging model": 54742, "online debates": 38360, "model shown": 34374, "shown outperform": 50731, "datasets example": 13257, "egyptian arabic": 16946, "efforts focused": 16939, "using tools": 60991, "model class": 33658, "features neural": 20628, "model unsupervised": 34504, "ranked second": 44958, "data deep": 12270, "coverage semantic": 11652, "opposite direction": 38518, "position paper": 41268, "dataset evaluate": 12910, "computational models": 9851, "models simply": 35513, "art language": 4268, "novel benchmark": 37776, "challenging test": 8158, "encourage development": 17591, "development new": 14692, "new models": 37261, "context natural": 10678, "tasks explore": 55633, "introduce general": 26808, "various dimensions": 61326, "highly related": 23909, "terms evaluation": 56285, "better baseline": 6852, "optimal results": 38531, "provide qualitative": 44116, "provide set": 44128, "order generate": 38622, "score 67": 48802, "domain general": 16077, "general semantic": 22090, "current approaches": 11960, "approaches largely": 3856, "largely rely": 29064, "additional supervision": 1701, "generalize domains": 22142, "present generative": 41920, "demonstrate application": 13864, "sentence generation": 49565, "generation work": 22581, "novel application": 37755, "prediction present": 41728, "work introduced": 62694, "prediction addition": 41691, "limitations work": 30559, "work examine": 62651, "level embeddings": 30107, "hot encoding": 24029, "large multi": 28913, "class multi": 8407, "demonstrate efficiency": 13908, "performance benefits": 40215, "significant portion": 50912, "performance automatic": 40202, "noisy environments": 37617, "training strategy": 58275, "strategy called": 52929, "multi stage": 36010, "stage training": 52445, "use method": 59946, "training samples": 58237, "methods evaluated": 32845, "end speech": 17710, "wall street": 61762, "street journal": 52968, "journal corpus": 27228, "compared conventional": 9398, "training method": 58173, "core problem": 11154, "space previous": 51885, "work relied": 62803, "dataset increase": 12965, "increase coverage": 25410, "model combining": 33669, "recurrent convolutional": 45611, "highway network": 23924, "directional recurrent": 15285, "network bi": 36712, "outperforms common": 38882, "models cnn": 34819, "cnn rnn": 8775, "task analysis": 54900, "sequence length": 49947, "good representation": 22941, "long text": 31043, "problem requires": 42645, "select correct": 49102, "answering model": 3081, "key insight": 27320, "semantic parses": 49307, "neural baselines": 36941, "users paper": 60472, "challenges involved": 8056, "prediction models": 41720, "novel semi": 37915, "supervised neural": 54024, "set consisting": 50124, "shown superior": 50756, "successfully deployed": 53743, "production systems": 43052, "advantages proposed": 1954, "user engagement": 60410, "proposed task": 43908, "propose systematic": 43656, "analyze behavior": 2806, "behavior models": 6394, "models step": 35536, "models attention": 34734, "attention attention": 4713, "despite recent": 14381, "hidden representation": 23644, "understanding models": 59366, "models studied": 35542, "hidden state": 23647, "noise work": 37607, "visual analysis": 61648, "focus understanding": 21210, "tool allows": 57356, "focus local": 21176, "domain use": 16223, "tool analyzing": 57357, "properties dataset": 43259, "tool used": 57368, "domain different": 16048, "novel hybrid": 37839, "model generalization": 33921, "generalization ability": 22114, "ability neural": 628, "layer model": 29190, "task neural": 55237, "learning recently": 29835, "promising paradigm": 43171, "paradigm machine": 39623, "description paper": 14246, "recently published": 45459, "used neural": 60249, "build systems": 7429, "2016 shared": 259, "shared tasks": 50508, "tasks automatic": 55514, "dimensions word": 15245, "structures natural": 53189, "measure quality": 32060, "just like": 27251, "positive correlation": 41278, "correlation model": 11526, "model downstream": 33790, "downstream semantic": 16349, "evaluation tool": 18741, "space semantic": 51897, "models considered": 34849, "structure propose": 53130, "simple baselines": 51140, "computing power": 9904, "text make": 56654, "essential building": 18322, "learning unsupervised": 29925, "learn semantics": 29420, "words entities": 62409, "aim learn": 2154, "model shared": 34368, "strategy improves": 52937, "unsupervised framework": 59698, "existing domain": 19060, "chen et": 8284, "al 2011": 2233, "specific constraints": 52058, "features evaluate": 20572, "task outperforms": 55260, "outperforms unsupervised": 38957, "baselines existing": 6258, "summarization method": 53890, "rouge score": 48354, "reach competitive": 45045, "models need": 35248, "knowledge distillation": 27439, "models domains": 34938, "standard knowledge": 52495, "level prediction": 30180, "novel sequence": 37919, "eliminate need": 16986, "teacher model": 55992, "model best": 33620, "student model": 53212, "loss performance": 31101, "performance significantly": 40561, "trained knowledge": 57755, "greedy decoding": 23243, "model 13": 33483, "13 times": 128, "times fewer": 57250, "parameters original": 39713, "concepts methods": 9938, "representing words": 46815, "nodes connected": 37590, "semantically similar": 49392, "number studies": 38040, "studies carried": 53251, "pattern recognition": 39963, "using traditional": 60993, "recognition process": 45526, "process addition": 42755, "representations based": 46622, "based bipartite": 5613, "problem approaches": 42506, "approaches consider": 3787, "consider possible": 10216, "context target": 10729, "results revealed": 47817, "excellent results": 18955, "method outperformed": 32596, "small training": 51506, "training dataset": 58052, "dataset available": 12819, "method useful": 32695, "useful improve": 60368, "models popular": 35329, "various semantic": 61388, "semantic phenomena": 49318, "novel probabilistic": 37897, "advances machine": 1915, "learning particular": 29799, "based logical": 5818, "networks finally": 36855, "demonstrate feasibility": 13911, "vocabulary set": 61711, "trained novel": 57835, "novel applications": 37756, "outside nlp": 39025, "words existing": 62412, "training systems": 58282, "network data": 36728, "transfer paper": 58412, "proposed benchmark": 43744, "dataset showing": 13084, "resulting significant": 47476, "representations documents": 46643, "vectors jointly": 61488, "tokens using": 57343, "hierarchical framework": 23670, "model document": 33782, "learn continuous": 29353, "similar documents": 51038, "learning user": 29930, "user specific": 60448, "specific vectors": 52171, "news data": 37397, "indicate proposed": 25531, "outperforming current": 38849, "margin paper": 31822, "models dialogue": 34912, "experiments standard": 19530, "rnn models": 48203, "models state": 35533, "dataset specific": 13096, "architectures used": 4128, "performance benchmarks": 40213, "models close": 34818, "continuous representations": 10851, "challenging requires": 8136, "model given": 33939, "does model": 15960, "directly trained": 15338, "manually engineered": 31778, "perform task": 40152, "analysis performance": 2714, "objective reduce": 38102, "performance does": 40300, "compared common": 9392, "common practice": 9192, "future context": 21864, "specific applications": 52044, "applications real": 3242, "visual data": 61654, "data previous": 12560, "propose probabilistic": 43592, "jointly leveraging": 27205, "leveraging text": 30339, "text images": 56623, "crafted feature": 11678, "design end": 14279, "set existing": 50151, "large gap": 28882, "biomedical clinical": 7172, "texts research": 56918, "research articles": 46986, "subject research": 53557, "extraction process": 20097, "texts existing": 56878, "use manually": 59944, "methods create": 32804, "features fed": 20585, "results methods": 47719, "methods highly": 32889, "highly dependent": 23891, "quality user": 44596, "designed features": 14316, "curse dimensionality": 12048, "work focus": 62668, "focus extracting": 21163, "learn features": 29370, "features automatically": 20528, "reduce dependency": 45658, "manual feature": 31741, "good model": 22935, "model relation": 34298, "clinical text": 8674, "expert knowledge": 19583, "quality features": 44523, "role determining": 48303, "word dependencies": 62136, "sentence work": 49674, "focus reducing": 21194, "propose domain": 43357, "domain invariant": 16090, "particular propose": 39859, "features employ": 20569, "performance obtained": 40465, "models obtained": 35272, "performance classification": 40234, "scores sentence": 48920, "training use": 58310, "scores given": 48902, "compared popular": 9430, "romanian english": 48334, "learning directly": 29592, "feature weights": 20511, "advantage large": 1940, "large body": 28852, "body work": 7242, "work machine": 62716, "interpretable model": 26725, "results small": 47851, "small performance": 51492, "scale sentence": 48623, "sentence length": 49576, "providing fine": 44244, "novel human": 37838, "representation scheme": 46576, "experiment language": 19240, "generating data": 22371, "dialogue history": 14776, "dialogue turn": 14792, "dialogue acts": 14765, "score furthermore": 48846, "furthermore model": 21829, "used original": 60258, "finer granularity": 21039, "particularly problematic": 39887, "topic related": 57424, "approaches work": 3956, "applications need": 3224, "nature texts": 36491, "purpose task": 44412, "computational language": 9843, "statistical approach": 52736, "logical structure": 30990, "linguistic theory": 30802, "questions remain": 44804, "strong performance": 53040, "tasks research": 55858, "performance multiple": 40450, "multiple types": 36307, "prediction based": 41696, "corpora language": 11212, "methods traditional": 33077, "use research": 59996, "research demonstrate": 47014, "effectively used": 16760, "used downstream": 60157, "permutation invariant": 40733, "different prior": 15036, "regression problem": 45816, "problem deep": 42530, "progress deep": 43095, "simple domain": 51147, "method neural": 32588, "supervised domain": 53980, "improving generalization": 25181, "generalization performance": 22126, "performance target": 40591, "target domain": 54812, "domain using": 16225, "using source": 60955, "domain dataset": 16041, "datasets labeled": 13308, "generation existing": 22455, "tune model": 58857, "dataset training": 13122, "training source": 58264, "source dataset": 51761, "dataset design": 12891, "domain target": 16199, "adaptation technique": 1542, "technique proposed": 56044, "trained cross": 57700, "entropy loss": 18162, "datasets performance": 13370, "improvements domain": 25069, "text sources": 56779, "representations word2vec": 46790, "enables users": 17451, "sources like": 51834, "map word": 31797, "capture linguistic": 7690, "concepts like": 9937, "technique uses": 56050, "based observation": 5921, "words representation": 62496, "additionally propose": 1729, "propose represent": 43605, "special tokens": 52022, "continuous vectors": 10856, "reveal proposed": 48012, "proposed approaches": 43737, "quality neural": 44557, "systems significantly": 54633, "create high": 11698, "humans machines": 24280, "promising applications": 43159, "content given": 10527, "challenges developing": 8039, "developing systems": 14663, "work serves": 62817, "advances field": 1911, "annotations used": 3005, "used directly": 60152, "directly paper": 15330, "approach sequence": 3686, "correct errors": 11467, "using explicit": 60686, "000 tokens": 12, "exceeds state": 18950, "resource settings": 47275, "models utilizing": 35665, "novel extension": 37820, "extension work": 19849, "using target": 60977, "results consistent": 47559, "quality language": 44542, "context model": 10674, "model extended": 33864, "generalization capabilities": 22118, "complex reasoning": 9655, "scale knowledge": 48581, "bases kbs": 6325, "relations entities": 46026, "entities entity": 18047, "task achieve": 54875, "relations shared": 46057, "art code": 4236, "code data": 8798, "github io": 22717, "space possible": 51881, "possible learn": 41331, "learn efficient": 29366, "augment existing": 4940, "parsing models": 39787, "global model": 22835, "model non": 34132, "new objective": 37271, "objective encourages": 38086, "tiny fraction": 57262, "accuracy f1": 975, "finds optimal": 20919, "propose effective": 43360, "statistical word": 52768, "models novel": 35268, "alignment training": 2386, "additional signal": 1698, "decoder network": 13603, "network novel": 36775, "mt quality": 35924, "general domain": 22052, "speech translation": 52311, "systems outperforms": 54578, "order control": 38604, "use multiple": 59956, "level sequence": 30210, "task studies": 55415, "words current": 62393, "addresses issue": 1811, "task datasets": 54993, "developed machine": 14631, "models data": 34880, "sentence language": 49575, "representation enables": 46507, "text context": 56513, "compared text": 9465, "model context": 33711, "final outcome": 20824, "using recent": 60897, "outperform text": 38828, "data text": 12729, "performs best": 40697, "sensitivity analysis": 49507, "style reading": 53495, "data greatly": 12395, "present chinese": 41865, "comprehension datasets": 9764, "datasets consist": 13192, "dataset propose": 13037, "problem aims": 42501, "attention words": 4853, "words query": 62490, "baselines public": 6291, "public datasets": 44314, "comprehension task": 9776, "type based": 59049, "line research": 30646, "representations linguistic": 46711, "models lack": 35159, "required train": 46906, "share parameters": 50460, "methods enable": 32837, "enable zero": 17432, "words training": 62535, "embeddings neural": 17179, "applications particular": 3231, "models classify": 34816, "media messages": 32171, "service providers": 50093, "30 different": 356, "achieve accuracy": 1109, "accuracy 85": 917, "using lstm": 60781, "traditional techniques": 57551, "vary different": 61421, "models deployed": 34899, "customer support": 12058, "meta information": 32335, "information diverse": 25818, "sentiment text": 49863, "information features": 25873, "problems present": 42720, "tasks evaluation": 55622, "evaluation word": 18748, "embeddings outperform": 17184, "architectures based": 4103, "achieving new": 1415, "systems natural": 54567, "tasks ranging": 55833, "modeling language": 34588, "paper simple": 39574, "simple baseline": 51139, "baseline achieves": 6152, "51 relative": 432, "improvement compared": 24998, "model datasets": 33737, "yields competitive": 63121, "results second": 47819, "second dataset": 49002, "dataset study": 13104, "study investigates": 53399, "investigates use": 27003, "sequence features": 49921, "reduce manual": 45668, "effectiveness compared": 16771, "set baseline": 50112, "representation approach": 46492, "performance unsupervised": 40614, "demonstrate significant": 13972, "improvements terms": 25107, "using unsupervised": 61011, "approaches literature": 3864, "captured existing": 7725, "allows effectively": 2461, "effectively leverage": 16747, "meaning language": 32004, "methods open": 32967, "models arbitrary": 34724, "information making": 25964, "models combine": 34826, "significantly improved": 50970, "model provides": 34257, "using mean": 60794, "mean field": 31991, "entailment based": 17999, "networks proven": 36899, "effective natural": 16678, "answering machine": 3079, "technical report": 56019, "present detailed": 41889, "process input": 42794, "limited dependence": 30581, "syntactic tree": 54334, "structured model": 53165, "middle ground": 33236, "mechanism applied": 32099, "showing model": 50682, "model achieved": 33507, "different nlp": 15009, "art recurrent": 4363, "continuous word": 10857, "tasks popular": 55799, "popular models": 41173, "train models": 57610, "appear training": 3139, "data evaluate": 12329, "representations different": 46639, "tasks comparing": 55549, "representations vectors": 46787, "research literature": 47067, "challenges understanding": 8079, "understanding previous": 59383, "information collected": 25781, "collected data": 8958, "promising research": 43175, "novel methodology": 37868, "capable extracting": 7619, "extracting meaningful": 20034, "study role": 53456, "bilingual word": 7115, "performance second": 40547, "language similarity": 28487, "similarity target": 51123, "language additionally": 27954, "languages results": 28775, "numerous applications": 38065, "hot topics": 24031, "results time": 47885, "ability recognize": 640, "content web": 10570, "propose paper": 43579, "cross document": 11812, "turn used": 58991, "performances tasks": 40649, "demonstrating potential": 14055, "potential approach": 41382, "problems learning": 42708, "learning search": 29862, "imitation learning": 24577, "training highly": 58121, "processing existing": 42870, "pairs propose": 39210, "propose label": 43431, "tease apart": 56015, "improve robustness": 24919, "key ingredient": 27318, "performance furthermore": 40355, "network layers": 36757, "increasing model": 25454, "complexity inference": 9677, "inference time": 25700, "probabilistic modeling": 42466, "number parameters": 38024, "mechanism automatically": 32101, "automatically learns": 5190, "embeddings particular": 17187, "learning procedure": 29819, "improvement word": 25039, "syntactic tasks": 54333, "parsing using": 39804, "joint models": 27180, "development online": 14695, "online communication": 38354, "information support": 26109, "extract relevant": 19988, "goal develop": 22881, "based experiment": 5714, "specific aspects": 52047, "frame task": 21441, "task multi": 55224, "investigate performance": 26973, "different classification": 14863, "evaluate different": 18451, "different architectures": 14841, "hierarchical approach": 23654, "approach leads": 3584, "superior results": 53942, "model makes": 34086, "cognitive linguistics": 8891, "consists set": 10328, "relevant context": 46204, "analyze effects": 2814, "mechanism neural": 32131, "results natural": 47736, "input representation": 26326, "model works": 34546, "growing research": 23302, "research automated": 46988, "text news": 56676, "able identify": 699, "time event": 57152, "time time": 57232, "annotated documents": 2892, "documents domain": 15873, "domain news": 16122, "research focuses": 47042, "utilizes context": 61113, "learning specifically": 29889, "generate dialogue": 22192, "using rnn": 60911, "information dialogue": 25811, "conversational agents": 11039, "sentence ordering": 49608, "critical task": 11795, "focused improving": 21225, "collect large": 8945, "driven approach": 16420, "source codes": 51754, "dataset paper": 13023, "tweets model": 59017, "lstm encoder": 31260, "decoder trained": 13616, "trained model": 57791, "using methods": 60802, "generated model": 22299, "representations generated": 46677, "method presented": 32620, "presented used": 42063, "model time": 34463, "scientific publications": 48768, "higher complexity": 23816, "including vocabulary": 25320, "introduced new": 26887, "terms vocabulary": 56322, "analyze dataset": 2809, "dataset composed": 12853, "year period": 63047, "generating synthetic": 22398, "models methods": 35224, "generation multiple": 22502, "inference process": 25685, "hierarchical classification": 23662, "similar domains": 51040, "selection propose": 49150, "sets results": 50306, "baseline comparison": 6163, "comparison existing": 9495, "language environment": 28051, "despite large": 14372, "cross cultural": 11810, "linguistic theories": 30801, "learning offers": 29788, "achieve human": 1158, "human like": 24199, "data enable": 12320, "building effective": 7443, "data computational": 12230, "models address": 34695, "privacy preserving": 42441, "evaluated different": 18529, "different linguistic": 14980, "linguistic levels": 30776, "machines humans": 31398, "external sources": 19954, "present current": 41880, "existing literature": 19085, "specific components": 52057, "order promote": 38649, "theory practice": 57038, "improve attention": 24824, "accuracy neural": 1014, "task model": 55215, "art traditional": 4431, "primary goal": 42371, "bias language": 7029, "focus particular": 21189, "theoretical empirical": 57019, "primary task": 42373, "task unsupervised": 55457, "unsupervised grammar": 59699, "grammar induction": 23063, "induction task": 25608, "extract salient": 19990, "particularly hard": 39882, "hard problem": 23450, "help improving": 23572, "model build": 33635, "build model": 7412, "algorithm efficiently": 2272, "examine effectiveness": 18863, "methods particularly": 32977, "current language": 11981, "models significant": 35505, "ability encode": 606, "factual knowledge": 20322, "acquire knowledge": 1441, "knowledge provided": 27582, "knowledge related": 27587, "performance generating": 40365, "smaller number": 51524, "art cnn": 4235, "models good": 35066, "purpose work": 44414, "work empirically": 62643, "empirically study": 17367, "semantically coherent": 49382, "model observe": 34136, "observation propose": 38123, "using clustering": 60606, "problem used": 42681, "datasets experimental": 13263, "usefulness approach": 60400, "performance close": 40237, "methods semantic": 33030, "methods proven": 32998, "useful tasks": 60390, "nlp natural": 37504, "corpus word": 11459, "word2vec glove": 62349, "results corpus": 47562, "range different": 44915, "attracted research": 4886, "recently gained": 45427, "gained popularity": 21921, "embeddings typically": 17236, "rare unseen": 44998, "propose improve": 43412, "embeddings incorporating": 17150, "word features": 62204, "embeddings directly": 17113, "framework word": 21627, "prior distribution": 42397, "distribution latent": 15643, "corpus approach": 11278, "approach yields": 3744, "poses major": 41249, "languages significant": 28785, "differences word": 14831, "approach utilizing": 3736, "art statistical": 4413, "clusters based": 8751, "hierarchical structure": 23692, "models generating": 35060, "used nlp": 60252, "tasks high": 55664, "high computational": 23713, "greedy algorithm": 23242, "clustering algorithms": 8737, "sub optimal": 53526, "ability produce": 635, "quality human": 44529, "human understandable": 24252, "preprocessing feature": 41826, "feature generation": 20492, "generation steps": 22552, "tuning parameters": 58939, "parameters used": 39727, "number clusters": 37989, "yielded significant": 63105, "performance resulting": 40538, "words concepts": 62384, "dimensional spaces": 15236, "addition existing": 1614, "model extend": 33863, "certain conditions": 7937, "content multiple": 10540, "standard methods": 52504, "query translation": 44679, "embeddings method": 17173, "method captures": 32412, "similar context": 51033, "use dictionary": 59866, "various methods": 61361, "difficult obtain": 15178, "resource scarce": 47271, "retrieval evaluation": 47944, "proposed word": 43926, "google translate": 22959, "fail detect": 20333, "relevant documents": 46211, "proposed address": 43712, "performance superior": 40588, "approaches present": 3898, "query document": 44665, "word mover": 62249, "mover distance": 35892, "measures proposed": 32079, "method relies": 32638, "helps identify": 23607, "document method": 15811, "data approach": 12143, "mean average": 31989, "world dataset": 62934, "dataset collected": 12845, "combine semantic": 9072, "method leads": 32561, "directional attention": 15276, "forward backward": 21402, "memory component": 32247, "model implicitly": 33976, "capture high": 7676, "12 languages": 108, "showing proposed": 50687, "scores languages": 48906, "languages introduce": 28697, "dataset evaluation": 12912, "evaluation resource": 18698, "semantic category": 49243, "research existing": 47031, "existing large": 19083, "compare human": 9343, "automatic systems": 5127, "huge gap": 24073, "gap human": 21963, "distributional representation": 15667, "models substantial": 35547, "substantial differences": 53617, "models overcome": 35296, "systems article": 54434, "results case": 47528, "recognition ocr": 45522, "models possible": 35331, "results evaluated": 47618, "methods recently": 33009, "corpus consisting": 11302, "models tested": 35594, "individual models": 25574, "corpora including": 11209, "manual transcription": 31751, "cultural heritage": 11937, "robust word": 48268, "word processing": 62271, "propose word": 43706, "robust performance": 48262, "furthermore demonstrate": 21813, "experiment human": 19239, "human reading": 24229, "model domain": 33788, "adaptation approaches": 1520, "domain shared": 16158, "transfer knowledge": 58369, "explore multi": 19717, "adaptation multiple": 1529, "multiple tasks": 36299, "tasks simultaneously": 55892, "representations better": 46624, "generalize domain": 22141, "framework domain": 21497, "tasks chinese": 55537, "experiments multi": 19471, "adaptation task": 1540, "tasks social": 55896, "correct output": 11471, "technique language": 56037, "language vision": 28578, "problem problem": 42630, "language emerging": 28044, "human understanding": 24253, "test model": 56358, "model diverse": 33781, "diverse domains": 15700, "generic language": 22630, "causal language": 7875, "work opens": 62742, "opens door": 38481, "usually require": 61064, "text aligned": 56426, "aligned word": 2359, "hypothesis propose": 24347, "propose scheme": 43612, "trained source": 57877, "trained target": 57889, "using adversarial": 60554, "results discuss": 47594, "lingual sentence": 30726, "language known": 28126, "adapted languages": 1553, "human rights": 24236, "language adaptation": 27952, "different genres": 14943, "typical text": 59133, "scale natural": 48603, "understanding task": 59407, "task publicly": 55311, "available dataset": 5278, "task predict": 55282, "reading text": 45090, "articles task": 4481, "task contains": 54975, "contains rich": 10503, "classification extraction": 8470, "extraction sub": 20116, "tasks making": 55739, "end models": 17686, "models deep": 34888, "compare various": 9376, "classification information": 8480, "answering models": 3082, "models supporting": 35566, "performing model": 40681, "accuracy 71": 905, "current nlp": 11993, "systems propose": 54604, "propose graph": 43405, "human curated": 24130, "art automatic": 4218, "systems evaluation": 54491, "languages experiment": 28663, "training classifiers": 57952, "results information": 47682, "information pos": 26010, "data reveals": 12617, "novel strategy": 37930, "processing analyze": 42851, "online conversations": 38357, "medical conditions": 32199, "learning work": 29946, "relevant content": 46203, "data exists": 12338, "length sentences": 30035, "common methods": 9186, "methods include": 32898, "averaging word": 5428, "hidden states": 23648, "networks lstms": 36873, "sentence vectors": 49669, "tasks pre": 55802, "training context": 57959, "context deep": 10606, "information capture": 25774, "encoded representations": 17484, "content word": 10572, "ability train": 645, "using representation": 60908, "analyzing different": 2841, "analysis sheds": 2755, "sheds light": 50532, "relative strengths": 46111, "resulting representations": 47475, "data analytics": 12133, "used widely": 60352, "explosive growth": 19780, "data challenges": 12204, "process large": 42801, "data finally": 12363, "efficient methods": 16885, "improve query": 24913, "usage large": 59802, "lower memory": 31217, "memory requirements": 32282, "large neural": 28916, "systems task": 54647, "translation use": 58698, "training improving": 58128, "deep model": 13727, "memory efficient": 32254, "understanding context": 59334, "increasingly important": 25473, "information nlp": 25992, "resources languages": 47310, "3rd workshop": 391, "metrics use": 33205, "use cross": 59857, "universal speech": 59548, "approaches results": 3916, "new strategies": 37325, "open information": 38433, "making processes": 31665, "diverse datasets": 15699, "datasets analysis": 13149, "completely different": 9606, "employ different": 17377, "different methodologies": 14988, "krippendorff alpha": 27677, "used measure": 60233, "graph models": 23149, "used social": 60306, "lot progress": 31117, "evaluate approaches": 18440, "learn deep": 29357, "train test": 57646, "test models": 56359, "using crowdsourcing": 60640, "demonstrate models": 13945, "text compared": 56500, "released public": 46181, "public access": 44303, "useful training": 60394, "training word": 58314, "effective framework": 16653, "framework automatic": 21459, "based universal": 6119, "universal dependency": 59539, "framework effective": 21499, "human scores": 24239, "based contexts": 5642, "time results": 57207, "english training": 17892, "training setup": 58255, "outperform previously": 38813, "proposed context": 43746, "results context": 47561, "humans read": 24286, "attempts explain": 4697, "architecture combines": 4035, "combines neural": 9099, "encoding input": 17567, "words possible": 62482, "corpus showing": 11429, "accurately predicts": 1099, "features human": 20598, "able detect": 687, "interactions paper": 26619, "sentence sequence": 49645, "words instead": 62438, "sequence sentence": 49973, "sentence proposed": 49626, "difficult learn": 15174, "problem work": 42689, "prove effectiveness": 43980, "evaluate tasks": 18511, "including word": 25321, "model powerful": 34216, "word document": 62141, "weight matrix": 61919, "embedding input": 17031, "model offer": 34142, "offer new": 38294, "methods lead": 32922, "variety neural": 61284, "models finally": 35024, "reduce size": 45680, "spoken content": 52351, "text content": 56512, "content difficult": 10520, "difficult time": 15189, "highly attractive": 23880, "develop machine": 14594, "key information": 27317, "english propose": 17861, "architecture task": 4089, "initial results": 26216, "shown word": 50760, "level attention": 30066, "robust sentence": 48266, "attention task": 4833, "field research": 20769, "consists multiple": 10324, "exponential growth": 19782, "modeling approach": 34559, "high scoring": 23801, "techniques propose": 56125, "end present": 17697, "particular application": 39832, "study provides": 53448, "provides novel": 44216, "time resource": 57206, "analysis automatic": 2619, "models derived": 34901, "observed results": 38147, "face challenge": 20240, "benchmarks demonstrate": 6514, "benchmark corpora": 6438, "performance drops": 40310, "limited set": 30613, "english newswire": 17852, "current practice": 11997, "practice training": 41486, "data single": 12666, "single domain": 51297, "data non": 12515, "non obvious": 37672, "data combining": 12222, "robust models": 48256, "daily lives": 12086, "number documents": 37996, "language detection": 28025, "texts important": 56889, "algorithms paper": 2332, "specific information": 52090, "detection results": 14519, "choose best": 8344, "detection short": 14526, "approaches include": 3846, "svm logistic": 54235, "based modified": 5885, "model include": 33987, "goal improving": 22889, "approaches evaluated": 3813, "non latin": 37658, "algorithm evaluated": 2274, "language conduct": 28001, "conduct case": 10029, "conversational text": 11055, "distantly supervised": 15561, "like language": 30480, "addition analyze": 1600, "analyze quality": 2824, "existing language": 19081, "like text": 30508, "release new": 46158, "corpus tweets": 11450, "tweets containing": 59012, "create word": 11720, "related tasks": 45943, "size data": 51379, "algorithm does": 2270, "does need": 15962, "need training": 36597, "training able": 57921, "far know": 20402, "results nlp": 47743, "collect dataset": 8941, "labeled set": 27763, "propose generate": 43398, "using sequence": 60930, "f1 improvement": 20183, "improvement non": 25010, "point improvement": 41045, "generation explore": 22459, "models identify": 35097, "reddit posts": 45644, "key aspects": 27296, "predictive model": 41775, "model analyze": 33564, "evidence suggests": 18821, "users different": 60459, "statistical measures": 52751, "similarity based": 51083, "based human": 5772, "representation results": 46575, "representation semantic": 46577, "results incorporating": 47674, "improves correlation": 25120, "various different": 61325, "used recent": 60286, "different learning": 14974, "model sentences": 34355, "selection training": 49157, "training compared": 57954, "models pre": 35338, "method shows": 32651, "different benchmarks": 14852, "datasets exhibit": 13258, "process obtain": 42812, "use graph": 59902, "degrades performance": 13810, "parameter optimization": 39674, "level convolutional": 30089, "datasets relatively": 13397, "relatively large": 46118, "conneau et": 10170, "effective learning": 16665, "representations unlike": 46779, "learning parameters": 29798, "words obtained": 62469, "modeling based": 34561, "popular language": 41166, "evidence based": 18808, "significantly reduces": 51012, "highly beneficial": 23881, "objective paper": 38098, "common type": 9207, "effectively represent": 16756, "clinical practice": 8672, "showed proposed": 50670, "available various": 5389, "popular task": 41190, "processing work": 42968, "work goal": 62676, "goal predict": 22895, "train simple": 57633, "addition present": 1634, "methods deal": 32813, "common problem": 9193, "traditional linguistic": 57525, "research methods": 47073, "speakers paper": 52008, "progress language": 43101, "specific types": 52167, "types social": 59118, "relatively little": 46121, "little evidence": 30876, "evidence support": 18822, "important resource": 24765, "people make": 40031, "make informed": 31578, "prior works": 42427, "review text": 48042, "problem detecting": 42534, "review different": 48031, "results generally": 47646, "generally outperform": 22168, "measures used": 32081, "used prior": 60274, "method extends": 32501, "customer reviews": 12056, "sentential context": 49811, "aspect based": 4527, "analysis modeling": 2698, "outperforms non": 38915, "non hierarchical": 37656, "art multilingual": 4304, "multilingual multi": 36100, "domain datasets": 16042, "hand engineered": 23392, "features external": 20580, "task use": 55458, "point scale": 41050, "embeddings contain": 17101, "sentiment information": 49847, "classification ranking": 8528, "negative sentiment": 36636, "propose improvements": 43414, "order address": 38590, "extraction multi": 20085, "results languages": 47693, "domain pairs": 16127, "present computational": 41875, "methods establish": 32842, "main results": 31458, "language translations": 28540, "language characteristics": 27987, "different original": 15014, "original ones": 38721, "supervised text": 54058, "domain trained": 16214, "evaluation scenarios": 18707, "highly accurate": 23878, "task suggest": 55424, "method determining": 32460, "use labels": 59921, "improving accuracy": 25172, "suggest simple": 53830, "mixed domain": 33403, "domain related": 16145, "related features": 45907, "original translated": 38737, "reasonable accuracy": 45172, "complex multi": 9636, "multi faceted": 35957, "great value": 23221, "practice paper": 41485, "approach developing": 3488, "use concept": 59849, "academic papers": 793, "topic words": 57436, "lexical similarity": 30386, "perspective model": 40774, "used model": 60240, "studies evaluate": 53260, "directions research": 15300, "research lack": 47061, "lack parallel": 27904, "important challenge": 24704, "common solution": 9200, "quality translations": 44593, "examine use": 18870, "quality phrase": 44562, "limited parallel": 30602, "constraints based": 10372, "direct model": 15256, "process automatically": 42760, "emerged new": 17261, "new paradigm": 37277, "learning paper": 29793, "sequence using": 50016, "decoder attention": 13588, "proposed encoder": 43762, "achieve significantly": 1195, "significantly higher": 50960, "efficiently train": 16920, "train neural": 57616, "word distribution": 62139, "time approach": 57117, "approach reduces": 3670, "reduces computational": 45688, "particularly suited": 39892, "word approach": 62113, "achieving accuracy": 1392, "com facebookresearch": 9011, "generation produce": 22530, "language previous": 28387, "language problem": 28391, "language introduce": 28122, "introduce task": 26868, "specific methods": 52110, "approach neural": 3611, "output neural": 38988, "main problems": 31453, "able handle": 698, "reduce training": 45682, "time systems": 57228, "different outputs": 15016, "based nmt": 5915, "setup work": 50411, "work investigates": 62699, "style topic": 53502, "online communities": 38355, "content style": 10562, "hybrid word": 24323, "model topic": 34464, "specific topics": 52163, "growing demand": 23294, "led great": 29989, "limited supervision": 30621, "approaches extract": 3822, "learning setting": 29870, "approach applying": 3422, "supervision cross": 54079, "approach graph": 3550, "graph representation": 23161, "sentences extract": 49721, "extract features": 19974, "features multiple": 20626, "accuracy robustness": 1040, "learn accurate": 29343, "sentence relations": 49630, "sentiment topic": 49864, "york times": 63140, "ethical concerns": 18416, "negative impact": 36620, "time automatic": 57119, "provided model": 44165, "need labeled": 36574, "data form": 12370, "propose iterative": 43424, "text classifier": 56491, "based transfer": 6104, "additionally demonstrate": 1716, "benefits proposed": 6588, "evaluation multiple": 18659, "datasets different": 13228, "provide systematic": 44141, "design features": 14283, "language design": 28023, "framework provides": 21589, "challenges evaluation": 8044, "complexity human": 9676, "way work": 61838, "task spoken": 55407, "using sets": 60934, "extracted speech": 20021, "used form": 60194, "representations speech": 46761, "encode information": 17464, "paper construct": 39305, "encodes information": 17561, "data make": 12477, "technique known": 56036, "viable alternative": 61570, "alternative model": 2506, "prediction score": 41738, "neural end": 36952, "present number": 41981, "networks evaluate": 36850, "performance similar": 40562, "use external": 59886, "external language": 19946, "model decoding": 33741, "researchers investigated": 47161, "problem extracting": 42563, "mining techniques": 33327, "proposing new": 43946, "process models": 42806, "leverage unsupervised": 30297, "little human": 30877, "human involvement": 24179, "automatically label": 5185, "use case": 59838, "demonstrates usefulness": 14049, "usefulness proposed": 60401, "software library": 51639, "integrated existing": 26513, "achieves promising": 1354, "trained parallel": 57837, "used translate": 60343, "test sentences": 56368, "propose dynamic": 43359, "fine tunes": 20974, "work small": 62825, "data obtained": 12520, "similarity search": 51119, "sentence extensive": 49558, "demonstrate method": 13934, "performance especially": 40323, "similar sentences": 51066, "sentences available": 49683, "available new": 5332, "online forums": 38367, "set known": 50175, "approach knowledge": 3580, "based small": 6041, "set initial": 50170, "effective detecting": 16644, "time new": 57184, "lda based": 29249, "model social": 34395, "data like": 12463, "nature natural": 36484, "researchers applying": 47149, "product review": 43044, "making challenging": 31647, "modeling target": 34627, "target specific": 54843, "approaches propose": 3902, "new formulation": 37209, "prior information": 42402, "model utilizing": 34520, "existing public": 19131, "data conduct": 12236, "million tweets": 33260, "provides useful": 44232, "representations fine": 46670, "grained word": 23048, "implicitly learned": 24669, "text resulting": 56746, "questions does": 44785, "climate change": 8666, "complex relationships": 9657, "refers task": 45763, "task converting": 54980, "possible ways": 41341, "context study": 10726, "written spoken": 63010, "ranking model": 44973, "language universal": 28565, "purpose multilingual": 44407, "multilingual semantic": 36117, "tagger using": 54732, "using deep": 60646, "uses word": 60543, "character representations": 8223, "includes novel": 25232, "semantic tags": 49362, "prior results": 42412, "results english": 47609, "information considered": 25786, "constructing knowledge": 10421, "plain texts": 40939, "contexts entities": 10751, "dynamically select": 16499, "informative sentences": 26176, "sentences corresponding": 49699, "corresponding entities": 11550, "propose sequential": 43626, "multiple sentences": 36281, "sentence entity": 49552, "network encode": 36736, "model measure": 34097, "build text": 7430, "representations entities": 46652, "method tasks": 32680, "indicates method": 25539, "information knowledge": 25936, "word problems": 62270, "word problem": 62269, "added existing": 1590, "existing datasets": 19054, "datasets make": 13324, "enable accurate": 17420, "used metrics": 60238, "future evaluations": 21873, "amr text": 2577, "generation generate": 22468, "meaning given": 32002, "essential step": 18334, "approaches heavily": 3838, "heavily rely": 23535, "rely hand": 46283, "features domain": 20563, "specific resources": 52140, "difficult collect": 15159, "reason paper": 45169, "neural architectures": 36933, "text experimental": 56564, "task benchmarks": 54933, "model representation": 34309, "construction model": 10429, "outperforms recent": 38937, "work low": 62715, "data potentially": 12550, "endangered languages": 17732, "languages training": 28808, "step making": 52814, "using dynamic": 60670, "trained jointly": 57753, "jointly using": 27225, "using expectation": 60685, "extremely low": 20163, "resource scenario": 47272, "model performs": 34201, "baseline introduce": 6176, "training decoding": 58055, "output different": 38967, "models instead": 35135, "attention weights": 4852, "generation experiments": 22458, "gains baseline": 21933, "learning utilize": 29934, "utilize pre": 61100, "currently exist": 12034, "usage social": 59807, "paper release": 39564, "basic language": 6330, "resource training": 47284, "training development": 58063, "development test": 14706, "data resources": 12611, "report baseline": 46427, "baseline results": 6206, "data resource": 12610, "web services": 61897, "qa task": 44459, "relevant concepts": 46202, "answers given": 3109, "recall measure": 45243, "achieved second": 1267, "require manually": 46878, "models performances": 35319, "requires expert": 46926, "expensive recent": 19217, "systematic way": 54406, "near optimal": 36508, "compared random": 9444, "models yield": 35691, "yield best": 63089, "best performances": 6793, "sequence characters": 49914, "predicted model": 41669, "demo available": 13846, "visual textual": 61671, "textual representations": 56977, "quality models": 44552, "models standard": 35531, "standard semantic": 52524, "sequential models": 50047, "outperform recent": 38817, "including ones": 25287, "best configuration": 6758, "representations deep": 46635, "data released": 12597, "80 accuracy": 521, "higher accuracies": 23811, "model easily": 33796, "additional context": 1659, "approach makes": 3596, "use sequence": 60011, "conversation threads": 11036, "work addressing": 62559, "different assumptions": 14845, "twitter datasets": 59036, "datasets collected": 13177, "non sequential": 37682, "introducing novel": 26902, "novel way": 37952, "networks achieved": 36829, "hybrid architecture": 24311, "architecture proposed": 4080, "study using": 53473, "performance rnn": 40540, "task experimental": 55065, "performance datasets": 40277, "does outperform": 15963, "outperform models": 38803, "used domains": 60156, "domains existing": 16251, "read sentences": 45067, "different weights": 15130, "sentences end": 49709, "end propose": 17699, "attention models": 4788, "derived using": 14204, "reading time": 45091, "methods significantly": 33039, "art sentence": 4397, "sequence transduction": 50014, "based generative": 5749, "sentences generate": 49727, "parsing tasks": 39800, "domains limited": 16271, "limited access": 30562, "data extend": 12349, "synthetically generated": 54388, "forms work": 21379, "annotation tools": 2978, "annotation projection": 2962, "applicable wide": 3158, "languages provides": 28762, "external information": 19937, "demonstrate validity": 13998, "great success": 23217, "tasks previous": 55810, "work investigated": 62698, "generate concise": 22186, "length paper": 30031, "results learning": 47698, "based input": 5785, "work improving": 62685, "improving efficiency": 25178, "efficiency neural": 16848, "models adopted": 34697, "candidates given": 7586, "based selection": 6010, "decoding time": 13650, "single cpu": 51291, "words input": 62437, "conventional machine": 11004, "agent learns": 2057, "make decisions": 31565, "setting experiments": 50322, "experiments state": 19532, "baselines language": 6274, "pairs demonstrate": 39178, "generic text": 22633, "text representation": 56737, "training parameters": 58205, "representation new": 46563, "new text": 37343, "work information": 62687, "benchmark paper": 6485, "use rich": 60001, "events paper": 18796, "paper outlines": 39435, "outperforms models": 38910, "basic units": 6334, "training small": 58263, "selectional preferences": 49160, "sets different": 50288, "vector models": 61458, "variety features": 61272, "error prone": 18222, "require domain": 46850, "greatly reduce": 23237, "reduce effort": 45660, "framework leverage": 21556, "leverage large": 30273, "problem limited": 42596, "limited labeled": 30592, "task experiments": 55067, "better compared": 6866, "features using": 20692, "using unlabeled": 61009, "tweets labeled": 59016, "classifiers use": 8626, "present quantitative": 41995, "text contains": 56511, "datasets small": 13433, "step direction": 52805, "times larger": 57253, "larger training": 29089, "training new": 58192, "model original": 34151, "recent attempts": 45294, "version dataset": 61552, "human baseline": 24113, "baseline provided": 6204, "human study": 24244, "despite advances": 14355, "advances natural": 1916, "short informal": 50556, "information needs": 25988, "online fashion": 38366, "models using": 35655, "twitter dataset": 59035, "smt neural": 51544, "translation directions": 58602, "efficient neural": 16889, "demonstrate current": 13886, "linguistic expressions": 30769, "systems model": 54562, "relations hold": 46034, "representations generate": 46676, "semantics different": 49402, "model generation": 33935, "generation component": 22436, "collected online": 8965, "classifier based": 8594, "based fuzzy": 5743, "used analyze": 60086, "alternative methods": 2505, "classification social": 8554, "categories used": 7850, "date paper": 13493, "propose innovative": 43418, "based expert": 5718, "novel concepts": 37787, "outlier detection": 38772, "help detect": 23556, "annotation errors": 2947, "improve overall": 24882, "diverse real": 15713, "sets demonstrate": 50287, "embeddings demonstrated": 17109, "embeddings obtained": 17183, "salient information": 48441, "results benchmark": 47520, "outperform original": 38808, "medical text": 32210, "poses challenges": 41245, "fast growing": 20425, "challenging lack": 8105, "lack labeled": 27897, "labeled dataset": 27754, "sources external": 51829, "external knowledge": 19938, "knowledge multiple": 27555, "token representations": 57304, "representations single": 46757, "real application": 45098, "score performance": 48864, "representation techniques": 46591, "overcome challenges": 39059, "propose data": 43346, "training technique": 58288, "nn model": 37580, "best score": 6820, "art average": 4219, "average score": 5417, "argue need": 4165, "experiments sentiment": 19519, "approach text": 3720, "level understanding": 30228, "developing automated": 14647, "automated tools": 5064, "dominant paradigm": 16308, "models parameters": 35306, "train better": 57568, "systems recent": 54613, "mobile devices": 33448, "access internet": 825, "limited resources": 30610, "high memory": 23753, "languages remains": 28770, "big challenge": 7088, "context machine": 10671, "deal problem": 13519, "standard machine": 52499, "obtains best": 38241, "neural based": 36938, "classification results": 8538, "work large": 62704, "large research": 28955, "research project": 47100, "decoder based": 13589, "context use": 10738, "use contextual": 59852, "contextual data": 10760, "framework method": 21562, "models investigate": 35146, "investigate behavior": 26944, "work semantic": 62816, "linking model": 30835, "language gap": 28081, "particular present": 39858, "doctor patient": 15763, "suitable training": 53861, "variety methods": 61280, "fully automatic": 21714, "augmenting training": 4989, "points absolute": 41065, "network approach": 36697, "approach predicting": 3647, "networks directly": 36844, "performing better": 40673, "better feature": 6891, "models previous": 35353, "reduce accuracy": 45648, "accuracy gap": 983, "model providing": 34258, "success neural": 53714, "systems especially": 54488, "new theoretical": 37344, "structure results": 53132, "theoretical foundation": 57020, "encoding sentence": 17575, "true false": 58820, "selection experimental": 49137, "baselines achieves": 6228, "investigate task": 26989, "automatically identifying": 5182, "multimodal information": 36150, "propose predictive": 43590, "using long": 60778, "analyze results": 2826, "based real": 5976, "new simple": 37316, "sentiment label": 49849, "model keeps": 34028, "nlp techniques": 37553, "measures degree": 32076, "proposed evaluated": 43769, "based metric": 5853, "metric proposed": 33123, "studies model": 53282, "metrics paper": 33186, "paper empirically": 39345, "empirically explore": 17362, "explore effects": 19705, "skip connections": 51418, "present comprehensive": 41870, "comprehensive experiments": 9791, "using gated": 60701, "based novel": 5919, "successfully train": 53749, "new instances": 37226, "languages develop": 28638, "extensively used": 19920, "learn better": 29348, "processing models": 42892, "information syntactic": 26110, "auxiliary task": 5240, "data come": 12223, "trained text": 57893, "learns predict": 29968, "features given": 20591, "data modalities": 12487, "visual context": 61653, "context given": 10648, "lower layers": 31213, "higher layers": 23829, "used support": 60319, "multilingual nlp": 36106, "multilingual tasks": 36125, "systematic survey": 54405, "inspire future": 26402, "research natural": 47077, "agents able": 2062, "lm using": 30915, "glove word": 22860, "idea training": 24374, "unit gru": 59522, "used lstm": 60228, "prediction network": 41723, "network designed": 36731, "tends produce": 56216, "produce coherent": 42976, "use lstm": 59940, "trained solely": 57876, "using external": 60687, "improvements predicting": 25092, "art parsers": 4323, "improvement previous": 25017, "corpus machine": 11375, "reasoning models": 45205, "shown remarkable": 50746, "context end": 10623, "end trainable": 17717, "promising performance": 43172, "performance simple": 40563, "tasks multi": 55752, "remain challenging": 46313, "complex interactions": 9630, "family models": 20389, "current progress": 12002, "field computer": 20754, "learning perspective": 29803, "learned end": 29458, "supervision signal": 54093, "experiments significant": 19527, "challenging tasks": 8156, "tasks 20": 55484, "dataset use": 13125, "dataset datasets": 12880, "datasets model": 13334, "developed nlp": 14638, "commercial use": 9156, "based decoding": 5670, "drop replacement": 16443, "main contribution": 31430, "experimental analysis": 19258, "works different": 62884, "min max": 33273, "variety existing": 61271, "kl divergence": 27378, "entailment datasets": 18001, "highest performance": 23854, "performance combined": 40242, "combined sentence": 9084, "achieved great": 1237, "challenges model": 8060, "recurrent architecture": 45607, "overall semantic": 39050, "groups different": 23280, "network sentiment": 36802, "information better": 25769, "models publicly": 35386, "available document": 5282, "analysis datasets": 2643, "datasets online": 13353, "systems large": 54543, "number training": 38049, "source framework": 51771, "framework data": 21486, "data preparation": 12554, "evaluation methodology": 18641, "ultimate goal": 59191, "foster research": 21412, "achieves overall": 1351, "f_1 score": 20237, "set evaluation": 50148, "vietnamese language": 61592, "typically employ": 59140, "acoustic models": 1438, "models compared": 34836, "gaussian mixture": 22013, "mixture models": 33422, "based acoustic": 5555, "resource constrained": 47212, "different layers": 14973, "study demonstrates": 53357, "fewer model": 20737, "network using": 36822, "number model": 38018, "adaptation data": 1522, "data result": 12613, "free approach": 21638, "save time": 48534, "time effort": 57148, "improve text": 24933, "standard neural": 52513, "models bring": 34793, "improvements tasks": 25106, "context used": 10739, "improves recall": 25155, "perform qualitative": 40131, "models lower": 35204, "lower perplexity": 31220, "human reasoning": 24231, "reasoning paper": 45212, "based memory": 5843, "memory augmented": 32240, "approach involves": 3578, "neural semantic": 37092, "accuracy previous": 1030, "obtained single": 38223, "datasets explore": 13266, "languages compare": 28617, "increase bleu": 25406, "used languages": 60223, "systems results": 54625, "extensive experimentation": 19876, "spanning multiple": 51955, "model hierarchical": 33958, "hierarchical representation": 23689, "single embedding": 51298, "character character": 8199, "ended questions": 17738, "intelligent agent": 26542, "recent open": 45329, "semantic understanding": 49370, "generating plausible": 22387, "novel task": 37933, "task answer": 54903, "list candidate": 30839, "experiment various": 19256, "including neural": 25282, "high recall": 23788, "performs competitively": 40703, "rarely seen": 45006, "core idea": 11149, "idea design": 24369, "method leverages": 32566, "character model": 8219, "correct translations": 11478, "linguistics cognitive": 30820, "raises questions": 44863, "development advanced": 14667, "explore effectiveness": 19704, "improves baseline": 25115, "bengali hindi": 6595, "general domains": 22057, "domains multi": 16276, "work demonstrate": 62625, "inference steps": 25695, "examples different": 18896, "performance benefit": 40214, "reasoning process": 45217, "models compare": 34835, "established methods": 18356, "methods represent": 33015, "research fields": 47038, "fields including": 20780, "approaches adapt": 3755, "tasks perform": 55795, "methods including": 32899, "lack annotated": 27872, "task particularly": 55271, "network applied": 36696, "previously established": 42333, "set texts": 50265, "new high": 37218, "english indian": 17824, "training testing": 58295, "representation sentences": 46579, "sentences important": 49736, "important text": 24783, "tasks involve": 55697, "propose series": 43627, "series novel": 50067, "learning latent": 29701, "latent representations": 29132, "representations sentences": 46753, "sentence inter": 49572, "ways using": 61845, "sampling method": 48502, "computational power": 9854, "achieve fine": 1140, "make code": 31548, "code publicly": 8850, "text systems": 56800, "complex data": 9620, "techniques data": 56072, "natural human": 36411, "language common": 27994, "common way": 9210, "way human": 61806, "human human": 24168, "users interact": 60468, "general data": 22050, "methods finally": 32865, "opportunities future": 38512, "research progress": 47099, "progress text": 43118, "task requiring": 55341, "broader context": 7363, "comprehension models": 9768, "models constrained": 34854, "context improve": 10654, "knowledge needed": 27558, "present submission": 42029, "task corpus": 54981, "results shared": 47830, "low performance": 31163, "need develop": 36554, "extracting entities": 20029, "types text": 59121, "text important": 56624, "entity relation": 18139, "relied human": 46262, "corpora training": 11251, "pipeline systems": 40906, "systems require": 54619, "require additional": 46841, "additional human": 1673, "human expertise": 24163, "joint extraction": 27170, "context agnostic": 10582, "poses unique": 41255, "challenges task": 8078, "novel domain": 37807, "algorithm extract": 2276, "joint optimization": 27182, "problem learn": 42593, "capture cross": 7659, "relations experiments": 46030, "domains news": 16279, "news biomedical": 37390, "improvement f1": 25005, "entities short": 18084, "model make": 34084, "learning strategies": 29896, "methods public": 33001, "movies tv": 35900, "methods adapt": 32737, "adapt different": 1501, "types entity": 59085, "outperform current": 38789, "methods trained": 33080, "tasks introduce": 55694, "introduce word": 26878, "corpus created": 11314, "freely accessible": 21651, "related text": 45945, "text explore": 56569, "vectors capture": 61481, "parameter tuning": 39681, "embeddings competitive": 17098, "results outperform": 47753, "performance release": 40531, "release corpus": 46148, "corpus data": 11316, "available hope": 5303, "used future": 60195, "future studies": 21896, "paper demonstrates": 39317, "new arabic": 37135, "dataset trained": 13121, "process known": 42799, "rely human": 46288, "leverage knowledge": 30271, "explore method": 19713, "method incorporate": 32538, "improves state": 25163, "identification systems": 24397, "submitted results": 53584, "languages mixed": 28729, "mixed english": 33405, "tagging techniques": 54754, "work languages": 62703, "novel problem": 37898, "knowledge specific": 27616, "end approach": 17612, "answer options": 3042, "finally approach": 20838, "structured query": 53172, "question human": 44732, "historical data": 23958, "semantically annotated": 49380, "features train": 20684, "demonstrate viability": 14000, "overall approach": 39034, "dimensional convolutional": 15227, "representation encoder": 46508, "receptive field": 45479, "attains state": 4675, "quadratic time": 44465, "statistical power": 52760, "power neural": 41428, "symbolic reasoning": 54269, "neural symbolic": 37100, "model maps": 34092, "presents challenge": 42075, "challenge community": 7971, "given large": 22757, "general text": 22094, "generated using": 22332, "near future": 36504, "experiments data": 19396, "different rnn": 15054, "errors produced": 18248, "achieve level": 1166, "level accuracy": 30056, "huge amounts": 24069, "amounts annotated": 2544, "annotated text": 2923, "model open": 34144, "source data": 51760, "providing novel": 44252, "novel data": 37796, "used work": 60354, "data neural": 12509, "progress past": 43109, "bilingual sentence": 7114, "needed training": 36605, "training human": 58122, "data bottleneck": 12193, "dual learning": 16461, "learning mechanism": 29723, "dual task": 16462, "dual tasks": 16463, "generate informative": 22213, "train translation": 57654, "mechanism use": 32146, "task agent": 54889, "output model": 38986, "reconstruction error": 45582, "using policy": 60861, "policy gradient": 41094, "gradient methods": 23010, "data 10": 12101, "task common": 54957, "way train": 61832, "resulting increased": 47467, "framework investigate": 21550, "investigate different": 26951, "different choices": 14861, "translation accuracy": 58573, "little impact": 30878, "detection natural": 14505, "detection approach": 14459, "fixed size": 21080, "method fully": 32513, "encode sentence": 17469, "sentence fragment": 49561, "size representation": 51396, "entity label": 18112, "tasks methods": 55747, "traditional sequence": 57543, "presents empirical": 42081, "empirical comparison": 17320, "published date": 44369, "related unrelated": 45951, "poor performance": 41140, "second subtask": 49025, "multiple semantic": 36278, "models called": 34801, "inspired work": 26418, "models simple": 35512, "using fewer": 60694, "learning settings": 29871, "corpus results": 11422, "various domain": 61328, "documents large": 15892, "large document": 28871, "used perform": 60260, "standard dataset": 52483, "evaluated multiple": 18539, "performance dataset": 40276, "performance sentiment": 40552, "models treat": 35633, "allows models": 2474, "models create": 34871, "mentions entities": 32304, "dialogue generation": 14773, "discourse context": 15388, "words experiments": 62415, "selection text": 49156, "text entailment": 56557, "framework performs": 21582, "level matching": 30158, "using convolutional": 60628, "particularly focus": 39881, "focus different": 21155, "datasets evaluate": 13252, "functions based": 21769, "better standard": 6968, "present domain": 41894, "network train": 36814, "learning network": 29776, "speech dataset": 52257, "parameters trained": 39724, "trained network": 57824, "datasets train": 13459, "different characteristics": 14859, "multiple source": 36283, "methods research": 33020, "propose memory": 43449, "models incorporate": 35121, "memory mechanism": 32269, "continuous data": 10843, "function based": 21752, "method baseline": 32400, "including sentiment": 25297, "question type": 44753, "type classification": 59050, "large pool": 28937, "documents paper": 15901, "lack standard": 27913, "make publicly": 31590, "provide gold": 44080, "set entity": 50145, "entity related": 18138, "related articles": 45887, "articles propose": 4475, "directional lstms": 15284, "encode entire": 17462, "compared recurrent": 9446, "temporal dependencies": 56184, "wmt 16": 62099, "translation achieve": 58574, "competitive accuracy": 9537, "accuracy state": 1052, "results wmt": 47911, "task models": 55220, "obtain accuracy": 38159, "deep lstm": 13726, "wmt 14": 62098, "14 english": 138, "accuracy strong": 1055, "language task": 28520, "counter intuitive": 11614, "task challenging": 54948, "challenging humans": 8100, "candidate words": 7582, "used conjunction": 60125, "level tasks": 30222, "tasks unlike": 55950, "dependency information": 14123, "relationships sentences": 46083, "embeddings fixed": 17139, "discrimination tasks": 15441, "training approaches": 57935, "relatively unexplored": 46135, "work particular": 62744, "word classification": 62129, "contrastive loss": 10912, "siamese network": 50818, "network training": 36817, "models addition": 34692, "use recurrent": 59990, "models unlike": 35647, "direct models": 15257, "models produce": 35362, "produce outputs": 42996, "output distribution": 38968, "distribution using": 15657, "decoder experimental": 13591, "outperform direct": 38792, "significantly benefit": 50938, "texts key": 56895, "nlp problem": 37514, "build evaluate": 7398, "sequence framework": 49923, "strongly outperforms": 53072, "prior methods": 42408, "task novel": 55243, "scientific articles": 48755, "furthermore work": 21844, "work shows": 62823, "useful text": 60391, "representations obtained": 46728, "level logical": 30153, "art pre": 4357, "methods sentence": 33032, "data context": 12251, "gold data": 22912, "time use": 57235, "similarity different": 51092, "based observations": 5923, "manual inspection": 31743, "approaches suffer": 3930, "suffer shortcomings": 53780, "utilize large": 61097, "order minimize": 38639, "words resulting": 62500, "simple mechanism": 51190, "copy mechanism": 11133, "able exploit": 693, "handle vocabulary": 23419, "algorithm exploits": 2275, "systems submitted": 54643, "linking el": 30834, "consists modules": 10323, "candidate generation": 7572, "best achieved": 6745, "achieved f1": 1231, "focus text": 21206, "platforms paper": 40954, "model incorporates": 33991, "online debate": 38359, "macro average": 31402, "data significantly": 12662, "model design": 33752, "accuracy english": 968, "models showing": 35495, "regardless language": 45796, "suggest new": 53826, "using gold": 60708, "goal improve": 22888, "achieve higher": 1154, "performance measure": 40433, "understanding long": 59361, "past decades": 39931, "models depend": 34898, "level annotation": 30062, "words able": 62359, "simple models": 51195, "obtain competitive": 38166, "study attempt": 53329, "attempt build": 4682, "corpus arabic": 11280, "corpus includes": 11360, "news sources": 37416, "source input": 51774, "source texts": 51809, "score experiments": 48844, "correlation coefficient": 11519, "label space": 27729, "use target": 60039, "ranking algorithm": 44965, "metrics automatically": 33140, "automatically select": 5200, "learning text": 29910, "unified semantic": 59478, "model entity": 33830, "relation embeddings": 45971, "including entity": 25256, "entity prediction": 18125, "prediction relation": 41735, "relation prediction": 45992, "significantly consistently": 50949, "consistently improve": 10294, "compared baselines": 9387, "models achieved": 34674, "achieved success": 1277, "models gain": 35049, "globally normalized": 22851, "crf models": 11765, "models mainly": 35209, "prediction work": 41753, "compare model": 9347, "model different": 33765, "models known": 35156, "tasks experiments": 55631, "previously unseen": 42355, "novel architecture": 37768, "alternative word": 2511, "mechanism model": 32129, "proposed attention": 43740, "number trainable": 38047, "trainable parameters": 57665, "tackle challenge": 54697, "identify useful": 24450, "text end": 56555, "propose knowledge": 43428, "knowledge enhanced": 27463, "hybrid neural": 24321, "model fuses": 33914, "knowledge word": 27648, "representations knowledge": 46697, "units gru": 59531, "network generate": 36748, "model extends": 33865, "extends existing": 19843, "global context": 22823, "sentences evaluation": 49712, "matching models": 31916, "models particularly": 35308, "use recent": 59988, "advances representation": 1923, "overall task": 39051, "model end": 33821, "end differentiable": 17630, "documents similar": 15913, "measure improve": 32056, "relevant tasks": 46238, "tasks document": 55594, "answering paper": 3087, "documents topic": 15919, "novel mechanism": 37861, "research social": 47121, "models solve": 35519, "order understand": 38658, "evaluate representations": 18499, "extent model": 19923, "model properties": 34244, "multiple classifiers": 36185, "nlp previous": 37513, "works mainly": 62896, "using pipeline": 60859, "approach address": 3410, "problem uses": 42682, "pointer network": 41058, "alleviate error": 2407, "error propagation": 18223, "propagation problem": 43246, "utilize contextual": 61087, "information experimental": 25847, "paper available": 39277, "contains approximately": 10492, "style information": 53488, "important topic": 24786, "quality large": 44543, "different corpus": 14880, "corpus analysis": 11273, "frequency analysis": 21669, "data aim": 12127, "present state": 42022, "methods build": 32774, "analyses examine": 2596, "examine effect": 18862, "necessary sufficient": 36534, "achieving high": 1408, "high classification": 23710, "generative modeling": 22598, "language state": 28503, "performance investigate": 40400, "linguistic perspective": 30780, "model data": 33734, "explicit modeling": 19621, "crucial achieving": 11894, "performance attention": 40201, "providing support": 44253, "data feature": 12361, "designing better": 14339, "tasks recently": 55841, "development deep": 14674, "model model": 34104, "representation document": 46504, "model generating": 33934, "models terms": 35591, "experiments analyze": 19351, "key points": 27328, "including model": 25274, "types tasks": 59120, "tasks argue": 55508, "generate good": 22205, "introduce joint": 26814, "existing document": 19059, "model recurrent": 34288, "learning setup": 29872, "learn complex": 29350, "despite usefulness": 14402, "models affected": 34701, "study effect": 53364, "shown strong": 50754, "task similar": 55377, "model building": 33636, "smaller models": 51523, "learning helps": 29668, "research linguistics": 47066, "length text": 30036, "models usually": 35660, "features feature": 20583, "classification compared": 8443, "tasks specifically": 55903, "highest accuracy": 23849, "classification fine": 8472, "grained classification": 23026, "news social": 37414, "single language": 51312, "focuses specific": 21243, "manual curation": 31735, "major languages": 31514, "model detecting": 33757, "high frequency": 23739, "results number": 47746, "direct use": 15261, "use input": 59913, "word information": 62217, "leaf nodes": 29333, "better representations": 6954, "learning emerged": 29615, "active research": 1478, "document embedding": 15787, "classification document": 8457, "process produce": 42818, "contributions paper": 10955, "method named": 32584, "background information": 5491, "increasing importance": 25452, "automatically recognized": 5199, "specific texts": 52159, "methods best": 32771, "best suited": 6828, "main reasons": 31456, "reasons lack": 45236, "tool support": 57366, "open datasets": 38418, "datasets average": 13164, "processing time": 42958, "time experimental": 57154, "comparison reveals": 9505, "best average": 6750, "datasets available": 13162, "best methods": 6780, "new encoder": 37183, "decoder approach": 13585, "model learned": 34051, "map input": 31794, "vector using": 61473, "including sentence": 25296, "sentence prediction": 49621, "hierarchical encoder": 23667, "predict multiple": 41647, "training models": 58179, "sentence encoder": 49549, "superiority proposed": 53953, "competing methods": 9530, "evaluated large": 18535, "datasets present": 13374, "complex words": 9672, "test samples": 56366, "space efficient": 51855, "relations knowledge": 46039, "representation knowledge": 46533, "entities entities": 18046, "information entities": 25837, "models encode": 34962, "valuable information": 61202, "text description": 56532, "gating mechanism": 22006, "unified architecture": 59467, "experiments models": 19470, "tasks source": 55899, "available github": 5297, "learning bias": 29548, "words low": 62451, "early stages": 16515, "written non": 63006, "received increasing": 45258, "years number": 63066, "number annotated": 37981, "approaches limited": 3863, "consuming expensive": 10445, "expensive work": 19224, "propose utilize": 43697, "utilize unlabeled": 61105, "detection models": 14502, "negative training": 36638, "data introduce": 12440, "introduce attention": 26782, "use reinforcement": 59992, "learning learn": 29703, "predicted using": 41672, "benefit learning": 6565, "phrases different": 40850, "different conventional": 14878, "abstractive summarization": 772, "previous sequence": 42276, "seq2seq models": 49902, "single decoder": 51294, "seq2seq model": 49901, "model fuse": 33913, "specific vocabulary": 52172, "final output": 20825, "datasets result": 13408, "approaches terms": 3938, "connectionist temporal": 10182, "temporal classification": 56181, "allow model": 2437, "model allows": 33562, "generating natural": 22383, "languages particular": 28747, "proposed novel": 43871, "novel approaches": 37767, "text sequences": 56766, "addressing problem": 1822, "problem long": 42597, "range dependency": 44914, "detection human": 14490, "focus identifying": 21169, "content internet": 10532, "understanding content": 59333, "key steps": 27335, "using available": 60579, "available annotated": 5261, "annotated datasets": 2890, "datasets like": 13317, "models fail": 35015, "fail generalize": 20338, "differ significantly": 14815, "domains large": 16269, "training robust": 58234, "recognition models": 45514, "models key": 35152, "adapt models": 1505, "available domains": 5283, "methods effectively": 32833, "effectively adapt": 16720, "domains using": 16300, "using distributed": 60662, "analyze linguistic": 2821, "identify key": 24426, "linguistic insights": 30774, "performance domains": 40304, "methods capture": 32777, "capture domain": 7666, "global semantics": 22842, "knowledge learn": 27546, "ner models": 36679, "previous baselines": 42246, "baselines domain": 6254, "approach identify": 3558, "multilingual context": 36070, "language expression": 28064, "extraction pipeline": 20093, "pos tagged": 41230, "regular expression": 45831, "false positives": 20383, "demonstrated effectiveness": 14005, "tasks tasks": 55927, "study possible": 53433, "models multiple": 35241, "embeddings finally": 17137, "use previous": 59981, "work uses": 62854, "small corpus": 51468, "new neural": 37269, "using negative": 60831, "compare proposed": 9360, "improving previous": 25193, "success deep": 53697, "reasoning requires": 45222, "requires complex": 46919, "recent neural": 45324, "approaches attempted": 3769, "typically limited": 59148, "synthetic tasks": 54383, "framework integrates": 21547, "non differentiable": 37647, "parsing dataset": 39776, "task requires": 55339, "requires significant": 46950, "text wikipedia": 56844, "wikipedia knowledge": 62049, "effective tasks": 16702, "memory representations": 32281, "mimic iii": 33269, "networks predict": 36895, "models improved": 35112, "bidirectional encoder": 7067, "encoder attention": 17491, "attention decoder": 4733, "tokens sentence": 57335, "adequacy fluency": 1831, "approaches entity": 3810, "entity identification": 18109, "boundary detection": 7283, "types natural": 59103, "level representation": 30192, "frame problem": 21440, "architecture performs": 4076, "models bilstm": 34789, "linearly number": 30685, "expensive data": 19207, "learning aims": 29509, "reduce cost": 45655, "confidence scores": 10118, "likelihood based": 30517, "based active": 5556, "methods shown": 33036, "understood work": 59424, "learning end": 29622, "reduce number": 45676, "number samples": 38034, "random sampling": 44887, "inspired recent": 26412, "explore ways": 19754, "model highly": 33961, "language level": 28136, "maintaining performance": 31496, "processing word": 42967, "languages high": 28684, "able correctly": 684, "present training": 42043, "data sample": 12619, "semantically correct": 49383, "model fits": 33900, "literature propose": 30861, "benchmarks approach": 6510, "typically requires": 59156, "accuracy result": 1038, "good margin": 22934, "analysis work": 2795, "daily life": 12085, "linguistic markers": 30777, "methods text": 33072, "classification topic": 8576, "modeling text": 34631, "analysis applied": 2614, "personal stories": 40759, "techniques word": 56152, "crowd sourcing": 11882, "combined word": 9089, "resources wordnet": 47340, "intrinsic evaluations": 26769, "applications word": 3259, "community question": 9272, "yes questions": 63087, "stage framework": 52431, "framework perform": 21581, "leverage existing": 30267, "pu learning": 44301, "positive unlabeled": 41300, "unlabeled examples": 59573, "binary classifier": 7148, "using distant": 60659, "learning help": 29667, "answers using": 3113, "social platforms": 51599, "information multi": 25978, "turn dialogue": 58989, "specifically propose": 52222, "propose hierarchical": 43409, "softmax classifier": 51630, "classification evaluate": 8464, "classification real": 8529, "method capture": 32411, "capture contextual": 7656, "systems recently": 54615, "encoding bpe": 17563, "paper presented": 39467, "work applying": 62570, "main idea": 31443, "improve performances": 24904, "performance highly": 40374, "models computing": 34843, "representations specifically": 46760, "monolingual multilingual": 35805, "allows perform": 2475, "perform unsupervised": 40158, "training embeddings": 58082, "semantic compositionality": 49252, "trained unsupervised": 57907, "multilingual embeddings": 36081, "multiple variants": 36309, "information methods": 25971, "methods process": 32993, "process context": 42764, "methods incorporate": 32900, "methods fine": 32867, "modeling process": 34614, "global local": 22834, "report accuracy": 46424, "improved classification": 24945, "model future": 33916, "using global": 60707, "based order": 5930, "achieve improvement": 1164, "english social": 17874, "media websites": 32190, "strategy using": 52955, "release dataset": 46151, "finally identify": 20863, "recently attention": 45407, "plays key": 41001, "score function": 48845, "decoding step": 13646, "model greatly": 33947, "greatly increases": 23235, "complexity paper": 9686, "proposing novel": 43947, "attention framework": 4751, "model step": 34411, "step experiments": 52806, "conventional attention": 11001, "networks attention": 36832, "attention mechanisms": 4784, "different dimensions": 14900, "dataset perform": 13025, "uses large": 60518, "set linguistic": 50185, "morphological complexity": 35840, "linguistics research": 30824, "tense aspect": 56220, "everyday language": 18803, "language usage": 28566, "paper aim": 39260, "aim analyze": 2136, "vocabulary used": 61717, "200 000": 234, "based tool": 6099, "media users": 32188, "derived large": 14201, "labeling framework": 27785, "representations proposed": 46744, "accuracy trained": 1064, "approach train": 3723, "language systems": 28516, "learning relies": 29839, "study learning": 53405, "word meanings": 62246, "better reflect": 6951, "simple strategy": 51212, "document aligned": 15766, "address challenges": 1747, "challenges applying": 8031, "solve challenges": 51676, "multiple translations": 36306, "selection models": 49147, "pairs experimental": 39187, "models multilingual": 35237, "predictions language": 41761, "contrast propose": 10887, "using continuous": 60623, "improve inference": 24864, "inference language": 25664, "multilingual language": 36088, "approaches automatic": 3771, "manner paper": 31721, "provide depth": 44047, "evaluation existing": 18616, "carefully designed": 7762, "experiments explore": 19435, "based evaluations": 5710, "evaluations results": 18769, "metrics work": 33209, "international conference": 26695, "score 79": 48814, "systems participated": 54584, "media content": 32163, "substantial research": 53630, "ensemble learning": 17975, "information detect": 25809, "accuracy significantly": 1047, "majority baseline": 31526, "emotions expressed": 17303, "applied natural": 3284, "come cost": 9128, "interpretability paper": 26717, "effects model": 16826, "set input": 50171, "model decision": 33738, "comprehensive analysis": 9782, "multiple nlp": 36256, "linguistic feature": 30770, "prediction proposed": 41732, "methodology offers": 32719, "model decisions": 33739, "analysis neural": 2706, "learning sequence": 29869, "unsupervised task": 59739, "task supervised": 55425, "final layer": 20822, "task auxiliary": 54926, "task architecture": 54910, "shows improvements": 50786, "percentage points": 40053, "points f1": 41072, "problem computational": 42522, "training corpora": 57961, "grained fine": 23035, "score 76": 48811, "neural method": 36969, "method transfer": 32689, "learning source": 29887, "tasks aspects": 55510, "target labels": 54822, "class labels": 8406, "labels documents": 27817, "select relevant": 49110, "applied target": 3299, "adversarial training": 1988, "different baselines": 14849, "baselines model": 6279, "rapid growth": 44990, "growth social": 23310, "need automated": 36547, "limited work": 30634, "work reported": 62806, "model developed": 33761, "rich source": 48123, "increase accuracy": 25404, "taken account": 54773, "does directly": 15943, "experiment model": 19242, "model english": 33823, "data german": 12390, "data jointly": 12443, "jointly modeling": 27207, "additionally investigate": 1724, "finally study": 20881, "structured attention": 53152, "attention neural": 4799, "models incorporating": 35122, "structural information": 53079, "information propagation": 26028, "treebank dataset": 58763, "model persian": 34208, "persian english": 40745, "persian language": 40746, "language best": 27977, "hyper parameters": 24327, "persian dataset": 40744, "enhance word": 17928, "alignment model": 2375, "entity entity": 18105, "entity embeddings": 18104, "investigate state": 26986, "aware model": 5461, "entity level": 18114, "complementary information": 9589, "baseline fine": 6169, "information entity": 25838, "entity descriptions": 18100, "improves multi": 25138, "entities paper": 18070, "response generation": 47392, "learning especially": 29628, "framework propose": 21585, "evaluation propose": 18683, "models online": 35276, "human judgement": 24180, "results modeling": 47727, "sentence matching": 49599, "phrase representation": 40843, "single framework": 51304, "data evaluation": 12331, "standard practice": 52516, "method build": 32406, "training experiment": 58098, "demonstrate modeling": 13944, "crowd workers": 11883, "level quality": 30185, "quality domain": 44512, "data scale": 12622, "supervision propose": 54089, "learning mtl": 29767, "tasks shared": 55883, "task learn": 55167, "learn mapping": 29396, "mtl model": 35932, "model tested": 34454, "various levels": 61355, "asr model": 4558, "model results": 34321, "especially useful": 18311, "novel decoding": 37801, "decoding approach": 13627, "based continuous": 5646, "using gradient": 60711, "right right": 48142, "right left": 48140, "leads substantial": 29331, "models typical": 35638, "end conduct": 17621, "evaluation compare": 18591, "produced state": 43021, "systems language": 54541, "language directions": 28029, "error categories": 18217, "systems neural": 54570, "models conditional": 34844, "perform tasks": 40153, "analysis speech": 2765, "nlp researchers": 37523, "called textit": 7555, "detection identify": 14491, "category based": 7861, "preliminary work": 41809, "limited task": 30623, "compared simple": 9452, "simple bag": 51138, "based skip": 6039, "higher probability": 23839, "accuracy time": 1062, "model distinguish": 33779, "resources present": 47327, "goal identify": 22887, "improvements multiple": 25083, "multiple baselines": 36171, "use specific": 60023, "representations improves": 46686, "identifying important": 24459, "representations especially": 46654, "matching approach": 31909, "efficient transfer": 16905, "methods training": 33081, "network long": 36761, "learning schemes": 29860, "small user": 51511, "user data": 60407, "methods especially": 32841, "dialogue data": 14770, "methods successfully": 33058, "aspects paper": 4550, "different domain": 14904, "modeling using": 34634, "using fine": 60695, "ensemble models": 17980, "similar gains": 51043, "able outperform": 709, "understanding nlu": 59373, "nlu tasks": 37571, "tasks shallow": 55882, "semantic slot": 49351, "current deep": 11969, "labeling problem": 27789, "labels paper": 27843, "alternative approach": 2497, "promote development": 43190, "resources language": 47309, "availability large": 5252, "large parallel": 28932, "report performance": 46441, "use text": 60046, "ones obtained": 38340, "pearson correlation": 40005, "analysis indicates": 2682, "based interactions": 5790, "based specific": 6051, "task related": 55328, "based distribution": 5686, "approaches data": 3792, "difficulty task": 15202, "task lack": 55158, "competing approaches": 9529, "sentiments expressed": 49870, "address propose": 1794, "propose augment": 43304, "datasets systems": 13450, "use traditional": 60052, "features perform": 20640, "handle complex": 23407, "structured data": 53153, "benefit various": 6575, "benefits using": 6590, "text relations": 56733, "new cross": 37159, "participants asked": 39813, "based english": 5703, "computer generated": 9889, "corpus provided": 11413, "provided dataset": 44160, "contain errors": 10461, "hurt performance": 24304, "different input": 14956, "effectively improve": 16739, "translation errors": 58606, "additionally method": 1725, "knowledge target": 27624, "modal attention": 33453, "different parts": 15020, "decoder hidden": 13597, "different strategies": 15081, "features compare": 20541, "impact adding": 24589, "models report": 35436, "evaluated data": 18526, "domain dialogue": 16047, "trained produce": 57843, "indistinguishable human": 25561, "dialogue utterances": 14795, "cast task": 7823, "problem jointly": 42589, "jointly train": 27221, "train systems": 57642, "generated ones": 22305, "number potential": 38028, "adversarial evaluation": 1968, "evaluation demonstrate": 18606, "adversarially trained": 1994, "baselines introduce": 6273, "simple general": 51174, "generate outputs": 22227, "length model": 30030, "based token": 6097, "token generation": 57292, "summarization machine": 53888, "bleu rouge": 7211, "rouge scores": 48355, "given collection": 22726, "semantically relevant": 49390, "annotations model": 2994, "word categories": 62123, "aims make": 2204, "comparable models": 9299, "english corpora": 17789, "benchmark corpus": 6439, "trained sequence": 57868, "quality proposed": 44567, "users social": 60482, "purpose training": 44413, "hate speech": 23480, "speech detection": 52259, "training phase": 58208, "corpora build": 11181, "build accurate": 7385, "candidate selection": 7578, "measured bleu": 32068, "increase quality": 25422, "important various": 24790, "various fields": 61343, "fields natural": 20783, "processing recent": 42930, "corpora proposed": 11235, "corpus experiments": 11339, "apply state": 3353, "art techniques": 4425, "techniques different": 56078, "way combine": 61797, "produce better": 42975, "propose different": 43353, "models examine": 34981, "novel combination": 37784, "models various": 35669, "models experiment": 34992, "learning ssl": 29891, "understanding slu": 59400, "model adapt": 33531, "utilizing knowledge": 61124, "graph document": 23128, "descent sgd": 14210, "selection techniques": 49155, "chi square": 8287, "attempt explore": 4686, "research method": 47072, "contextual similarity": 10783, "importance word": 24695, "lack resources": 27911, "research word": 47142, "generate word": 22263, "testing different": 56405, "hong kong": 23999, "demonstrate importance": 13920, "media language": 32169, "popular natural": 41174, "nlp aims": 37460, "text task": 56805, "supervised based": 53964, "based conditional": 5635, "order tackle": 38655, "english bengali": 17779, "able successfully": 727, "labels given": 27830, "given code": 22725, "sentence experiments": 49557, "pairs domains": 39182, "addresses task": 1817, "generation leveraging": 22484, "training graph": 58117, "using heuristic": 60724, "generate output": 22226, "sentences evaluated": 49711, "far paper": 20404, "multilingual cross": 36074, "lingual data": 30696, "based assumption": 5578, "documents written": 15931, "new tasks": 37336, "tasks respectively": 55860, "main ideas": 31444, "relations document": 46023, "leverage multilingual": 30279, "multilingual resources": 36115, "target entities": 54816, "remains difficult": 46331, "available facilitate": 5293, "information analysis": 25760, "provide important": 44088, "important insights": 24736, "quality control": 44500, "based user": 6122, "interface provides": 26661, "provides overview": 44218, "human knowledge": 24185, "knowledge recent": 27586, "learning ml": 29739, "ml natural": 33432, "learning networks": 29777, "networks paper": 36888, "survey aims": 54202, "texts present": 56912, "corpus introduce": 11363, "probabilistic topic": 42468, "challenges posed": 8068, "specific example": 52082, "methods understanding": 33091, "understanding nature": 59372, "attention networks": 4798, "structural dependencies": 53077, "end training": 17720, "training work": 58316, "work experiment": 62654, "chain conditional": 7957, "model models": 34105, "networks outperform": 36887, "models variety": 35668, "synthetic real": 54380, "way learn": 61816, "attention propose": 4814, "modal data": 33456, "model advantage": 33547, "languages improve": 28691, "function training": 21760, "sentence ranking": 49628, "task additional": 54885, "strong improvements": 53032, "temporal characteristics": 56180, "additional knowledge": 1678, "used discover": 60153, "sources work": 51842, "entities mentioned": 18064, "articles present": 4474, "similar text": 51073, "resources like": 47311, "trained based": 57679, "gap language": 21966, "limitations existing": 30548, "approach adapting": 3407, "adapting existing": 1564, "method work": 32709, "noisy data": 37614, "reduce noise": 45675, "information terms": 26116, "comparable better": 9290, "art benchmark": 4226, "representations encode": 46649, "task jointly": 55151, "products services": 43058, "task joint": 55150, "user ratings": 60444, "data compared": 12226, "presents simple": 42104, "simple robust": 51205, "shows competitive": 50769, "results respect": 47812, "domain based": 16023, "available given": 5301, "student network": 53214, "better learning": 6909, "teacher network": 55994, "sentence neural": 49605, "smaller model": 51521, "process demonstrate": 42769, "data filtering": 12362, "filtering method": 20812, "knowledge teacher": 27628, "training leads": 58153, "performance given": 40367, "search strategy": 48986, "reach better": 45044, "decoding speed": 13644, "driven models": 16431, "models excel": 34984, "task test": 55433, "called cross": 7542, "proposes neural": 43936, "shows promise": 50794, "architectures task": 4124, "model external": 33869, "study based": 53334, "cloze task": 8725, "linear classifier": 30651, "context addition": 10580, "model predictions": 34224, "reaches state": 45057, "demonstrate different": 13890, "different task": 15093, "way people": 61825, "revolutionized field": 48062, "widely explored": 61996, "handle various": 23418, "invariant features": 26918, "representative nlp": 46800, "study popular": 53432, "weighting schemes": 61936, "optimal performance": 38530, "set used": 50273, "best match": 6778, "weighting scheme": 61935, "scheme used": 48731, "best overall": 6787, "especially used": 18310, "performance general": 40361, "layer recurrent": 29205, "model temporal": 34448, "knowledge input": 27527, "input signal": 26335, "carry depth": 7775, "semantic aspects": 49237, "method obtaining": 32592, "accurate models": 1081, "fail fully": 20337, "large context": 28861, "run parallel": 48402, "entire documents": 18024, "parameter sharing": 39678, "training procedures": 58216, "accuracy comparable": 947, "entire document": 18023, "important goal": 24729, "embeddings evaluated": 17129, "focus word": 21213, "propose evaluation": 43377, "focus data": 21151, "data efficiency": 12312, "available data": 5276, "comprehensive evaluation": 9788, "complete picture": 9600, "brings new": 7342, "new insight": 37224, "unsupervised language": 59703, "agnostic method": 2093, "substantial amounts": 53616, "health related": 23518, "information social": 26091, "provides opportunity": 44217, "opportunity study": 38514, "particular study": 39862, "identify potentially": 24437, "employ simple": 17390, "simple rule": 51206, "supervised classifier": 53969, "hand annotated": 23383, "using user": 61012, "available sources": 5368, "graph representations": 23163, "end model": 17685, "learns latent": 29963, "objective experiments": 38088, "models performance": 35318, "ensemble model": 17978, "standard english": 52489, "translation dataset": 58596, "recognition text": 45545, "constructed large": 10412, "different dataset": 14888, "ground truths": 23258, "make datasets": 31562, "datasets publicly": 13388, "advantage neural": 1944, "weight sharing": 61920, "model compression": 33686, "knowledge neural": 27559, "consistently yields": 10313, "compared baseline": 9383, "models proven": 35377, "result models": 47441, "black boxes": 7194, "learned patterns": 29472, "demonstrate new": 13951, "text attributes": 56443, "challenging text": 8160, "level deep": 30098, "used compute": 60121, "end manner": 17682, "using propagation": 60878, "maximize likelihood": 31957, "propose variants": 43699, "limited understanding": 30629, "automatically detects": 5160, "time span": 57218, "chat logs": 8261, "different users": 15119, "benefit future": 6562, "future exploration": 21874, "detection methods": 14500, "finally obtain": 20871, "overall f1": 39039, "corpus recent": 11416, "evidence humans": 18811, "factors affect": 20305, "affect human": 2015, "sense knowledge": 49486, "model estimates": 33839, "second study": 49024, "syntactic representation": 54318, "applications recent": 3244, "limited english": 30584, "dependency graphs": 14122, "handling complex": 23424, "forms language": 21376, "multilingual evaluation": 36084, "languages datasets": 28634, "datasets english": 13248, "ability represent": 641, "negation scope": 36613, "investigate possibility": 26974, "logic representation": 30980, "using universal": 61008, "similar texts": 51074, "representations evaluate": 46656, "entity semantic": 18147, "correlation scores": 11529, "rate compared": 45013, "entity embedding": 18103, "representations addition": 46614, "accuracy scores": 1042, "methods operate": 32968, "representation using": 46601, "performance reported": 40534, "specific natural": 52115, "comparison approaches": 9490, "concepts entities": 9933, "contains million": 10499, "person organization": 40751, "different mentions": 14987, "evaluated performance": 18542, "performance based": 40207, "based concept": 5633, "approaches performance": 3895, "higher state": 23846, "propose hybrid": 43411, "approach encourages": 3510, "incorporate linguistic": 25358, "prior training": 42417, "developed deep": 14628, "task question": 55314, "new question": 37295, "main task": 31461, "achieving better": 1396, "set approach": 50108, "produces higher": 43030, "engineering approaches": 17766, "approaches state": 3925, "linear transformation": 30675, "using dictionaries": 60651, "pairs improve": 39195, "set composed": 50121, "robust noise": 48259, "languages achieving": 28593, "set finally": 50158, "finally extend": 20859, "extend method": 19824, "based cross": 5660, "annotations english": 2989, "meaning preserving": 32011, "preserving semantic": 42126, "consists main": 10322, "main steps": 31460, "segmentation text": 49089, "based discourse": 5681, "trained semi": 57860, "new parallel": 37278, "developing evaluating": 14652, "language proficiency": 28445, "original text": 38731, "systems corpus": 54462, "identifying specific": 24469, "need new": 36585, "features deep": 20554, "architectures achieve": 4102, "achieve robust": 1188, "representations provide": 46745, "validate hypothesis": 61179, "features additionally": 20520, "low medium": 31159, "medium high": 32217, "challenging real": 8132, "relative improvements": 46102, "proposed features": 43776, "shown effectiveness": 50703, "effectiveness using": 16821, "segmentation models": 49085, "rely large": 46292, "data effective": 12309, "resource datasets": 47220, "insufficient training": 26494, "propose transfer": 43679, "corpora train": 11250, "model high": 33959, "use learned": 59931, "learned knowledge": 29462, "train student": 57640, "model low": 34076, "resource data": 47219, "data experiment": 12342, "results work": 47913, "work significantly": 62824, "datasets machine": 13322, "translated sentence": 58557, "sets compared": 50285, "set accuracy": 50101, "accuracy challenging": 941, "individual data": 25565, "data points": 12546, "data point": 12545, "examine impact": 18864, "methods human": 32890, "patterns experiments": 39968, "experiments natural": 19477, "inference nli": 25674, "examples neural": 18918, "task data": 54991, "sparsity issue": 51979, "issue paper": 27070, "tackle data": 54703, "baseline neural": 6193, "extra linguistic": 19963, "study proposes": 53446, "containing multiple": 10485, "model identifying": 33971, "labeling approach": 27777, "dataset consists": 12861, "open access": 38410, "annotation model": 2957, "rnn long": 48199, "representations computed": 46628, "input layer": 26291, "feature rich": 20501, "model furthermore": 33912, "extraction scientific": 20106, "corpus recently": 11417, "address different": 1755, "paper possible": 39442, "human designed": 24134, "able obtain": 708, "style analysis": 53481, "fake news": 20371, "presents large": 42088, "articles manually": 4469, "corpus contains": 11308, "meta learning": 32337, "news detection": 37400, "results important": 47668, "presents systematic": 42108, "results additional": 47492, "model addition": 33539, "requires manual": 46941, "settings present": 50390, "efficiency accuracy": 16838, "responses given": 47405, "dialogue models": 14780, "models experiments": 34995, "diverse outputs": 15711, "explore simple": 19736, "solution multi": 51656, "architecture training": 4092, "simply concatenate": 51249, "sentences form": 49725, "source languages": 51780, "provide insights": 44093, "information scenario": 26073, "train supervised": 57641, "high reliability": 23789, "investigate ways": 26996, "optimize model": 38562, "achieve desired": 1132, "value pairs": 61209, "train recurrent": 57624, "generate textual": 22256, "generates sentences": 22356, "achieves bleu": 1310, "baseline human": 6175, "human preference": 24216, "evaluation suggests": 18731, "manual analysis": 31730, "classification ctc": 8448, "labeling methods": 27786, "models models": 35229, "paper train": 39597, "encoder used": 17548, "used pretrain": 60268, "learning joint": 29688, "specialized domain": 52032, "present systems": 42035, "complexity proposed": 9687, "proposed tasks": 43909, "worst case": 62977, "crucial understand": 11916, "requires considerable": 46920, "considerable human": 10230, "human supervision": 24247, "near real": 36511, "specifically focus": 52204, "increased accuracy": 25428, "baseline method": 6182, "learning computational": 29567, "computational approach": 9833, "approach investigate": 3577, "learning mechanisms": 29724, "sensitive different": 49497, "bias model": 7034, "model knowledge": 34030, "learning scenarios": 29858, "learning examples": 29631, "examples paper": 18920, "task word": 55471, "models despite": 34904, "able recover": 721, "information single": 26090, "vector model": 61457, "broad set": 7354, "generate training": 22259, "generated training": 22328, "set labels": 50178, "major drawbacks": 31508, "use hand": 59904, "mentions context": 32303, "micro f1": 33224, "model dataset": 33736, "systems approaches": 54433, "transferring knowledge": 58436, "knowledge improve": 27521, "high overall": 23757, "extend state": 19831, "yields improvement": 63125, "propose information": 43416, "spanish tweets": 51949, "lexical level": 30370, "tend use": 56209, "obtained different": 38207, "work suggests": 62833, "languages low": 28717, "subword level": 53685, "substantially better": 53632, "models highly": 35090, "target training": 54853, "combining multiple": 9117, "multiple related": 36274, "compensate lack": 9520, "learning proposed": 29825, "model relatively": 34302, "network recurrent": 36793, "recurrent layer": 45616, "human learning": 24195, "learning single": 29881, "single feature": 51303, "non recurrent": 37678, "recurrent models": 45619, "models capturing": 34806, "requires use": 46957, "paradigm shift": 39629, "limited paper": 30601, "features outperforms": 20636, "outperforms complex": 38889, "complex neural": 9643, "models detecting": 34906, "models offer": 35274, "rich contextual": 48095, "contextual semantics": 10781, "occurrence patterns": 38276, "provide effective": 44058, "open research": 38444, "research questions": 47108, "propose solution": 43639, "models joint": 35149, "latent space": 29136, "generated synthetic": 22323, "inspired observation": 26408, "selection process": 49149, "goal work": 22906, "end design": 17628, "making process": 31664, "approaches widely": 3954, "significant accuracy": 50846, "accuracy improvement": 990, "improvement especially": 25003, "cnns used": 8780, "used existing": 60176, "layers deep": 29220, "capture human": 7679, "residual connections": 47187, "combination different": 9040, "chat data": 8260, "compared widely": 9473, "respectively present": 47377, "robust approach": 48239, "pairs used": 39227, "used single": 60304, "manual evaluations": 31740, "able consistently": 681, "consistently outperform": 10298, "increasingly used": 25478, "datasets shows": 13426, "networks achieve": 36828, "achieve accurate": 1110, "accurate language": 1079, "classification performances": 8515, "text best": 56458, "popular method": 41170, "local semantic": 30949, "accuracy rate": 1033, "obtained training": 38227, "investigate using": 26994, "soft labels": 51623, "improve generalization": 24859, "training deep": 58056, "tuning approach": 58899, "labels provided": 27847, "true label": 58821, "improved generalization": 24949, "nli task": 37455, "label training": 27733, "performance baselines": 40210, "reasoning understanding": 45231, "teacher student": 55995, "machines understand": 31400, "understand text": 59314, "sentence text": 49657, "observed text": 38149, "learning experimental": 29634, "input prior": 26318, "uses pipeline": 60527, "pipeline method": 40902, "proposes approach": 43930, "approach identifies": 3557, "jointly solve": 27220, "solve issue": 51679, "conversations present": 11060, "knowledge information": 27524, "forms words": 21378, "common concepts": 9169, "explicitly stated": 19648, "access data": 819, "knowledge guide": 27510, "guide model": 23338, "medical terms": 32209, "aim work": 2162, "based importance": 5779, "new unsupervised": 37356, "heterogeneous information": 23622, "topic coherence": 57397, "levels semantic": 30247, "semantic types": 49369, "expert annotated": 19569, "identification performance": 24394, "metrics performance": 33188, "performance relatively": 40530, "learning robust": 29854, "readily applied": 45077, "applied domains": 3270, "use terms": 60044, "importance scores": 24690, "speed training": 52325, "research mainly": 47070, "algorithms propose": 2335, "propose generic": 43403, "easy implement": 16562, "learning research": 29844, "use pre": 59975, "vocabulary tokens": 61716, "architectural choices": 4018, "final performance": 20826, "systematically explore": 54412, "provide recommendations": 44120, "area paper": 4145, "resources improve": 47305, "augmented training": 4983, "corpus vocabulary": 11456, "detailed error": 14422, "hindi marathi": 23942, "improve coverage": 24836, "works paper": 62900, "method propose": 32625, "designed test": 14333, "approach comparing": 3459, "target entity": 54817, "tweet text": 59007, "stance detection": 52455, "perform classification": 40074, "detection accuracy": 14455, "finally perform": 20874, "model construct": 33707, "word selection": 62294, "language easily": 28039, "languages having": 28683, "significant challenges": 50854, "reasonable results": 45174, "present solution": 42019, "using lexical": 60767, "results achieve": 47485, "kind data": 27366, "difficult propose": 15183, "used augment": 60095, "explicit memory": 19620, "apply model": 3336, "tasks achieve": 55487, "benchmarks including": 6527, "including cnn": 25244, "qa tasks": 44460, "examples task": 18935, "model encode": 33817, "people paper": 40033, "semantic differences": 49269, "groups people": 23282, "apply approach": 3321, "different regions": 15046, "systems today": 54655, "models long": 35201, "suffer problem": 53777, "propose lightweight": 43442, "learn robust": 29417, "datasets low": 13320, "environment paper": 18172, "approaches methods": 3873, "methods explored": 32856, "scheme uses": 48732, "discriminative features": 15443, "positive samples": 41296, "samples training": 48492, "using nearest": 60830, "using ground": 60718, "data effectiveness": 12311, "artificial language": 4495, "experimental data": 19260, "complex context": 9618, "micro level": 33226, "macro level": 31409, "vectors language": 61489, "similarity propose": 51114, "detection method": 14499, "using range": 60893, "contrast state": 10889, "completely unsupervised": 9607, "experiments publicly": 19504, "compared strong": 9460, "strong supervised": 53053, "supervised baselines": 53966, "baselines approach": 6233, "paper work": 39607, "systems robust": 54626, "systems effectively": 54483, "human robot": 24237, "method help": 32522, "work consider": 62608, "software development": 51637, "broad coverage": 7352, "used language": 60222, "demonstrate effect": 13892, "model prove": 34253, "systems deployed": 54474, "expensive train": 19222, "prohibitively expensive": 43128, "scale analysis": 48551, "report empirical": 46431, "novel insights": 37842, "release open": 46162, "enables researchers": 17447, "researchers easily": 47153, "novel techniques": 37937, "networks task": 36915, "labels available": 27810, "propose extensions": 43384, "dynamic memory": 16487, "proposed extensions": 43772, "art end": 4255, "single task": 51342, "research previous": 47096, "based pattern": 5935, "approach parsing": 3635, "scale study": 48627, "structure called": 53092, "efficient framework": 16875, "massive corpora": 31881, "generates high": 22342, "supervised baseline": 53965, "using features": 60693, "encoding strategies": 17577, "performed better": 40660, "accuracy 72": 906, "ranking 3rd": 44963, "official evaluation": 38307, "study natural": 53416, "linguistic competence": 30754, "fundamental question": 21789, "article introduce": 4451, "arguments given": 4180, "modular framework": 35745, "sequence attention": 49911, "significantly accurate": 50932, "define novel": 13778, "based formulation": 5739, "random noise": 44884, "data considered": 12239, "self paced": 49202, "high noise": 23754, "study contributes": 53349, "recent development": 45302, "development large": 14682, "research end": 47028, "increasingly complex": 25470, "neural baseline": 36940, "development neural": 14691, "building high": 7446, "high performing": 23761, "models argue": 34726, "categories including": 7844, "data problem": 12563, "difficult maintain": 15175, "sequence generative": 49930, "generative adversarial": 22584, "sub models": 53522, "generator discriminator": 22616, "discriminator generator": 15452, "aims generate": 2195, "generate sentences": 22243, "human translated": 24248, "generated sentences": 22316, "sentences human": 49733, "generation high": 22472, "evaluate generated": 18461, "model consistently": 33702, "art transformer": 4432, "german chinese": 22663, "2017 task": 269, "types knowledge": 59096, "order enhance": 38614, "results entity": 47613, "entity retrieval": 18146, "combined using": 9088, "achieved best": 1219, "accuracy average": 936, "code mixing": 8831, "switching languages": 54261, "does necessarily": 15961, "speaker aware": 51996, "speech technologies": 52307, "based social": 6043, "propose context": 43333, "used score": 60294, "rank correlation": 44950, "competitive existing": 9548, "existing baseline": 19038, "reported literature": 46451, "truth data": 58835, "provide best": 44019, "narrative texts": 36384, "corpus 000": 11262, "10 different": 40, "different scenarios": 15058, "respectively additionally": 47360, "coreference information": 11159, "corpus shows": 11430, "rich lexical": 48109, "knowledge natural": 27556, "popular research": 41183, "powerful tools": 41449, "learning current": 29577, "chit chat": 8330, "using end": 60675, "end architectures": 17615, "introduce deep": 26797, "grounded task": 23263, "oriented dialogues": 38698, "gradient algorithm": 23002, "tested dataset": 56394, "model performed": 34199, "semeval 2017": 49430, "lingual semantic": 30725, "employs attention": 17404, "similarity paper": 51112, "spanish arabic": 51937, "similarity dataset": 51090, "dataset best": 12828, "knowledge dataset": 27433, "recent papers": 45331, "shown neural": 50730, "networks obtain": 36885, "specific feature": 52083, "tasks large": 55713, "source task": 51807, "target task": 54847, "available annotations": 5262, "studied tasks": 53237, "lstm rnns": 31282, "task like": 55188, "like speech": 30506, "perform large": 40118, "training framework": 58109, "layer wise": 29213, "wise training": 62086, "moving average": 35902, "successfully trained": 53750, "outperform deep": 38791, "shallow model": 50440, "model recognition": 34283, "trained proposed": 57844, "proposed training": 43917, "compared original": 9428, "original model": 38720, "minimum bayes": 33303, "bayes risk": 6353, "small dataset": 51470, "dataset outperform": 13020, "change detection": 8169, "dialog modeling": 14758, "modeling paper": 34609, "differs existing": 15152, "audio based": 4925, "various scenarios": 61387, "attention experimental": 4746, "propose supervised": 43654, "supervised algorithm": 53960, "type embeddings": 59053, "embeddings algorithm": 17081, "achieves near": 1347, "task outperforming": 55259, "manually curated": 31773, "finally use": 20885, "use embeddings": 59873, "mechanism used": 32147, "decoder uses": 13620, "sentence parts": 49617, "customer feedback": 12055, "like social": 30504, "prediction model": 41719, "model objective": 34134, "classification approaches": 8436, "approaches study": 3928, "study aims": 53322, "manual labelling": 31746, "time propose": 57200, "context generation": 10647, "representations attention": 46620, "performance evidence": 40328, "dataset bias": 12830, "common space": 9201, "using speech": 60958, "vice versa": 61573, "use image": 59911, "words multi": 62461, "labels train": 27853, "able predict": 713, "making effective": 31654, "dataset allows": 12806, "allowing researchers": 2448, "data enables": 12321, "work hope": 62679, "certain linguistic": 7942, "examples text": 18938, "topics natural": 57455, "important issue": 24737, "improvement traditional": 25035, "expensive time": 19220, "tend perform": 56205, "additional linguistic": 1685, "performance makes": 40432, "popular metrics": 41172, "performances different": 40641, "methods yield": 33105, "higher performance": 23835, "finally introduce": 20864, "performance metrics": 40437, "workshop shared": 62922, "robust different": 48244, "quality estimation": 44516, "using reference": 60902, "processing pipeline": 42922, "written english": 62998, "analysis dependency": 2649, "errors automatic": 18234, "focus learning": 21174, "similarity function": 51097, "errors propose": 18249, "approach task": 3717, "task lexical": 55187, "using subset": 60968, "speech corpus": 52255, "task methods": 55212, "defined task": 13786, "methods new": 32958, "core tasks": 11157, "areas natural": 4156, "different evaluation": 14920, "advent large": 1959, "data access": 12107, "researchers paper": 47162, "sensitive data": 49496, "pragmatic reasoning": 41495, "reasoning framework": 45195, "framework experiments": 21514, "classifiers built": 8613, "cases model": 7808, "newly collected": 37371, "collected corpus": 8956, "corpus human": 11357, "recognition important": 45508, "dataset named": 13005, "collected multiple": 8964, "dataset used": 13126, "gender age": 22033, "largest dataset": 29094, "networks significantly": 36910, "significantly faster": 50957, "perplexity using": 40742, "using significantly": 60940, "parameters paper": 39714, "resource intensive": 47231, "mixed data": 33401, "existing monolingual": 19110, "resources training": 47337, "produce significantly": 43008, "baseline present": 6202, "mixed tweets": 33412, "multilingual speakers": 36123, "great promise": 23214, "output decoder": 38966, "conditional variational": 10008, "variational autoencoders": 61246, "discourse level": 15392, "encoder model": 17525, "using greedy": 60717, "developed novel": 14639, "novel variant": 37950, "knowledge better": 27418, "performance finally": 40347, "loss proposed": 31103, "models validated": 35666, "novel cross": 37794, "lingual transfer": 30734, "transfer method": 58403, "art monolingual": 4302, "use labeled": 59920, "shot shot": 50645, "developed dataset": 14627, "task called": 54944, "simultaneously propose": 51275, "available twitter": 5383, "big personality": 7093, "personality traits": 40762, "task scientific": 55348, "scientific paper": 48765, "requires large": 46936, "corpus automatic": 11282, "automatic feature": 5092, "work conducted": 62607, "using 10": 60545, "evaluation conducted": 18595, "set annotated": 50107, "embeddings effective": 17119, "better overall": 6924, "overall classification": 39037, "works shown": 62907, "synthetic parallel": 54378, "generated translation": 22330, "effective various": 16711, "various neural": 61370, "issues study": 27105, "data efficient": 12313, "pseudo parallel": 44280, "synthetic examples": 54375, "pairs experiments": 39189, "czech german": 12079, "german french": 22668, "surface forms": 54152, "performed experiments": 40662, "methods various": 33099, "translation datasets": 58597, "previous word": 42301, "information event": 25842, "information seeking": 26074, "empirically test": 17368, "news texts": 37420, "motivate development": 35858, "label large": 27713, "task train": 55441, "data compare": 12225, "performance domain": 40301, "specific classifiers": 52056, "classifiers trained": 8625, "given news": 22765, "data domains": 12295, "varies depending": 61255, "annotators provide": 3016, "challenge existing": 7980, "parsing techniques": 39801, "uses novel": 60526, "set features": 50156, "ability handle": 611, "graph structures": 23172, "structures languages": 53186, "models potentially": 35334, "posterior probabilities": 41362, "improvements baselines": 25051, "models applications": 34717, "like sentiment": 30502, "representations particular": 46734, "traditional feature": 57518, "methods high": 32887, "particular words": 39874, "extensive study": 19914, "embedding size": 17061, "interesting insights": 26651, "specifically approach": 52181, "financial news": 20892, "news headlines": 37406, "architecture used": 4095, "challenge task": 8018, "performance present": 40493, "submitted systems": 53589, "use syntax": 60037, "unsupervised supervised": 59736, "supervised way": 54070, "best run": 6819, "ranked 1st": 44952, "annotations paper": 2996, "using iterative": 60742, "problem evaluate": 42553, "language available": 27971, "models input": 35133, "model leads": 34049, "10 relative": 51, "propose multi": 43473, "variational encoder": 61247, "model labeled": 34035, "learning generative": 29662, "discrete continuous": 15420, "features data": 20551, "framework effectively": 21500, "benchmark model": 6478, "outperforms single": 38943, "languages explore": 28667, "explore ability": 19685, "models uses": 35654, "uses different": 60506, "evaluating models": 18563, "useful insights": 60371, "approach modeling": 3603, "media using": 32189, "graph structured": 23171, "structure experiments": 53104, "architecture different": 4043, "benefit model": 6566, "al 2018": 2240, "level granularity": 30127, "document specific": 15834, "based previous": 5951, "corpus annotations": 11276, "al 2017": 2239, "english corpus": 17790, "liu et": 30892, "al 2021": 2245, "al 2022": 2246, "terms number": 56302, "simulated annealing": 51259, "search algorithms": 48963, "95 f1": 568, "research propose": 47101, "new annotated": 37126, "hope useful": 24016, "community present": 9270, "present contribution": 41877, "typologically different": 59166, "level reasoning": 30186, "expressed natural": 19799, "expressions like": 19808, "strategies model": 52910, "art attention": 4216, "task additionally": 54886, "additionally provide": 1732, "tackle task": 54713, "task efficiently": 55038, "makes unsuitable": 31641, "content ugc": 10566, "brazilian portuguese": 7308, "techniques work": 56153, "propose technique": 43662, "word relationships": 62280, "vectors based": 61480, "features present": 20646, "embeddings approach": 17083, "approach obtains": 3616, "time sequence": 57209, "algorithm generate": 2277, "prediction paper": 41726, "propose perform": 43581, "later stage": 29151, "bi lingual": 7011, "correct incorrect": 11468, "train set": 57632, "set containing": 50128, "performance instead": 40396, "instead single": 26463, "neural nets": 36992, "work aims": 62564, "aims reduce": 2213, "practice work": 41487, "set techniques": 50259, "reducing dimensionality": 45706, "network performs": 36783, "step model": 52815, "emotional state": 17299, "analysis models": 2699, "existing deep": 19056, "data evaluated": 12330, "using pretrained": 60871, "pretrained model": 42168, "evaluation score": 18709, "lstm gru": 31264, "results related": 47803, "end architecture": 17614, "process work": 42841, "empirically evaluate": 17361, "tasks conduct": 55552, "thorough investigation": 57063, "representations data": 46634, "quantitative evaluation": 44618, "light important": 30451, "aspects neural": 4549, "participation semeval": 39827, "task multilingual": 55228, "multilingual knowledge": 36087, "submission semeval": 53574, "work builds": 62593, "lingual language": 30707, "dependent information": 14147, "require considerable": 46847, "data produced": 12567, "use larger": 59928, "shows method": 50788, "method previous": 32621, "languages aim": 28597, "representation independent": 46528, "capture underlying": 7718, "lingual similarity": 30730, "measure compare": 32046, "sentences provide": 49774, "provide experimental": 44066, "evidence sentences": 18819, "sentences close": 49688, "close embedding": 8684, "languages chinese": 28614, "coherence modeling": 8910, "processing field": 42872, "need feature": 36564, "features capture": 20534, "based current": 5662, "model specifically": 34402, "identifying entity": 24457, "existing strong": 19148, "achieved promising": 1258, "number target": 38042, "propose select": 43613, "tokens training": 57340, "japanese chinese": 27145, "proved effectiveness": 43988, "translation baseline": 58583, "represented single": 46807, "method enables": 32480, "translate source": 58552, "sentences proposed": 49773, "models shot": 35493, "properties text": 43271, "using bayesian": 60584, "known data": 27656, "dataset question": 13052, "question pairs": 44742, "architectures furthermore": 4111, "model pretrained": 34229, "noisy dataset": 37615, "dataset automatically": 12818, "automatically collected": 5149, "structure neural": 53122, "rely graph": 46282, "graph convolutional": 23123, "networks gcns": 36860, "produce representations": 43004, "paper model": 39427, "based visual": 6131, "qualitative analyses": 44470, "analyses demonstrate": 2593, "resulting embeddings": 47465, "gradient updates": 23011, "method combined": 32420, "different configurations": 14873, "grained sentiment": 23046, "analysis problem": 2726, "techniques pre": 56122, "lexical based": 30354, "embeddings able": 17075, "score 69": 48804, "sub task": 53533, "new large": 37234, "scale dataset": 48563, "dataset called": 12835, "comprehension question": 9773, "released datasets": 46174, "generate question": 22232, "existing question": 19134, "conduct human": 10053, "evaluation test": 18738, "proposed dataset": 43752, "dataset serve": 13080, "serve benchmark": 50076, "years seen": 63074, "scale evaluation": 48570, "datasets snli": 13434, "inference problem": 25683, "advance research": 1882, "evaluation end": 18614, "data major": 12476, "spanish russian": 51948, "set baselines": 50113, "lingual word": 30737, "best scores": 6821, "average accuracy": 5402, "research multilingual": 47076, "variety text": 61293, "sentences train": 49795, "labels based": 27811, "based manual": 5831, "manual annotations": 31732, "learning classifier": 29556, "using labeled": 60749, "use trained": 60054, "trained classifier": 57687, "baseline task": 6214, "requires little": 46939, "syntax morphology": 54350, "sentence order": 49607, "present brief": 41860, "learning focus": 29650, "extract common": 19970, "common task": 9204, "task invariant": 55146, "features existing": 20574, "shared features": 50472, "propose adversarial": 43287, "adversarial multi": 1976, "tasks publicly": 55830, "available url": 5385, "url http": 59792, "data knowledge": 12445, "important resources": 24766, "variety natural": 61282, "tasks suffer": 55918, "model emph": 33808, "sparse attention": 51964, "concepts relations": 9942, "concepts learned": 9936, "baselines use": 6316, "media communication": 32162, "distinct representations": 15594, "detection challenging": 14465, "task apply": 54907, "conventional classification": 11002, "features particular": 20639, "experiments real": 19508, "collected social": 8967, "demonstrate deep": 13888, "based rnn": 6001, "mechanism effectively": 32109, "relevant parts": 46228, "investigate neural": 26971, "tagging problem": 54749, "including multi": 25275, "learning natural": 29774, "relation detection": 45969, "component nlp": 9712, "including knowledge": 25264, "base question": 5550, "residual learning": 47188, "results evidence": 47621, "detection performance": 14510, "qa benchmarks": 44446, "benchmarks paper": 6536, "learning particularly": 29800, "challenges arise": 8032, "state representation": 52707, "world events": 62940, "processing computational": 42860, "based existing": 5713, "existing texts": 19160, "quality applications": 44491, "especially large": 18282, "questions text": 44812, "experiment using": 19255, "users particular": 60473, "particular model": 39854, "model consider": 33699, "complex models": 9635, "models furthermore": 35047, "instance model": 26428, "sophisticated models": 51716, "generates fluent": 22341, "usually better": 61038, "promising direction": 43163, "combine advantages": 9061, "framework leveraging": 21558, "points best": 41068, "best single": 6822, "single output": 51327, "text collection": 56495, "novel form": 37824, "missing data": 33361, "dependency graph": 14121, "using efficient": 60671, "layer perceptron": 29199, "art semantic": 4396, "code open": 8839, "source available": 51742, "translation automatic": 58581, "automatic question": 5118, "handle long": 23411, "long documents": 31011, "difficult use": 15193, "irrelevant information": 27040, "underlying model": 59271, "gradient method": 23009, "method train": 32684, "benchmarks different": 6518, "analysis news": 2708, "accuracy recent": 1036, "works explored": 62888, "speech representation": 52289, "way investigate": 61814, "investigate role": 26985, "using siamese": 60939, "siamese networks": 50819, "different information": 14954, "information particular": 26004, "setting introduce": 50327, "different combinations": 14867, "present qualitative": 41992, "based audio": 5584, "systems method": 54561, "best case": 6755, "addition introduce": 1622, "achieve bleu": 1119, "reducing memory": 45709, "study new": 53419, "adversarial networks": 1980, "networks gans": 36858, "employ adversarial": 17373, "training architecture": 57936, "model human": 33965, "goal create": 22879, "language bridge": 27981, "diverse language": 15704, "theoretical guarantees": 57022, "language demonstrate": 28019, "scheduled sampling": 48719, "exposure bias": 19789, "new training": 37349, "provide informative": 44090, "points previous": 41078, "addition using": 1649, "detection research": 14518, "research date": 47012, "performance publicly": 40514, "pairs various": 39230, "baseline state": 6211, "attracted lot": 4884, "crucial component": 11897, "task design": 55008, "task focus": 55091, "based datasets": 5667, "right answer": 48137, "results attained": 47510, "models human": 35094, "datasets particular": 13366, "information question": 26038, "task inspired": 55140, "inspired propose": 26411, "dataset visual": 13135, "task extensive": 55069, "models datasets": 34884, "datasets methods": 13332, "problem provide": 42638, "provide fine": 44074, "lexical constraints": 30359, "incorporate additional": 25344, "parameters training": 39725, "lexically constrained": 30399, "constrained decoding": 10364, "conducting experiments": 10101, "adaptation neural": 1530, "provide large": 44096, "user input": 60422, "significant gains": 50867, "gains performance": 21940, "adaptation scenarios": 1537, "played important": 40983, "propose sequence": 43624, "modeling objective": 34606, "useful improving": 60369, "accuracy different": 959, "range datasets": 44911, "datasets covering": 13200, "improvements benchmark": 25052, "requiring additional": 46960, "additional annotated": 1651, "unannotated data": 59207, "data fundamental": 12376, "learned features": 29460, "useful knowledge": 60372, "wikipedia pages": 62052, "categories according": 7842, "annotated labels": 2901, "labels used": 27856, "used information": 60213, "recognize important": 45550, "challenge set": 8015, "hand designed": 23391, "present english": 41902, "use analyze": 59820, "neural systems": 37101, "analysis provides": 2733, "presents attempt": 42073, "language written": 28586, "covering various": 11659, "various areas": 61302, "topics paper": 57457, "proposes simple": 43942, "simple machine": 51188, "sources data": 51827, "74 accuracy": 498, "accuracy classifying": 945, "styles paper": 53508, "interesting observations": 26652, "known facts": 27658, "experiments support": 19538, "algorithm implemented": 2279, "discuss potential": 15479, "failure modes": 20353, "suggest directions": 53816, "directions future": 15291, "video captioning": 61581, "promising improvements": 43169, "improvements recent": 25097, "models accurately": 34665, "task remains": 55334, "given lack": 22754, "lack sufficient": 27915, "sufficient annotated": 53800, "sharing knowledge": 50516, "encoder representations": 17538, "representations present": 46740, "present multi": 41949, "model shares": 34370, "encoders decoders": 17554, "art standard": 4412, "using diverse": 60664, "automatic human": 5094, "joint modeling": 27179, "based classifiers": 5621, "methods joint": 32910, "languages experiments": 28666, "work release": 62802, "unsupervised model": 59712, "modeling inter": 34585, "knowledge useful": 27644, "multiple topics": 36304, "inspired previous": 26410, "task modeling": 55219, "approach useful": 3732, "useful predicting": 60381, "predicting missing": 41678, "achieved notable": 1255, "success machine": 53707, "summarization dialog": 53881, "model query": 34265, "order enable": 38612, "testing model": 56407, "model introduce": 34018, "new query": 37294, "summarization dataset": 53880, "dataset building": 12833, "clearly outperforms": 8658, "outperforms vanilla": 38958, "representations effective": 46646, "models match": 35219, "predictive accuracy": 41774, "model access": 33496, "heavily relies": 23534, "vectors different": 61484, "different target": 15092, "weighted sum": 61931, "decoder states": 13614, "new source": 37321, "way obtain": 61823, "propose variant": 43698, "current input": 11980, "input previous": 26317, "challenging issue": 8104, "text feature": 56578, "extraction techniques": 20121, "techniques using": 56149, "techniques proven": 56127, "useful tools": 60393, "generation text": 22566, "knowledge largest": 27545, "computational study": 9865, "study performed": 53431, "extend existing": 19821, "require linguistic": 46875, "component neural": 9711, "trained relatively": 57852, "little labeled": 30880, "context embeddings": 10620, "bidirectional language": 7073, "model standard": 34407, "additional labeled": 1679, "learning supervised": 29900, "settings introduce": 50378, "models finding": 35025, "relations paper": 46049, "participated task": 39822, "post evaluation": 41347, "better random": 6948, "random baseline": 44869, "systems cross": 54464, "lingual model": 30711, "method predicting": 32618, "language parallel": 28372, "corpora provide": 11236, "limiting applicability": 30636, "approaches address": 3758, "improvements competitive": 25062, "benchmark methods": 6477, "methods quality": 33002, "corpora low": 11217, "resulting poor": 47472, "poor translation": 41146, "data augmentation": 12149, "augmentation approach": 4948, "words generating": 62426, "generating new": 22385, "new sentence": 37311, "words new": 62467, "settings method": 50382, "quality bleu": 44497, "consider different": 10210, "learn multiple": 29401, "dirichlet process": 15346, "representations able": 46612, "word models": 62248, "lexical substitution": 30388, "task indicating": 55135, "describes participation": 14230, "continuous scale": 10852, "tackled problem": 54716, "using number": 60846, "memory blstm": 32246, "improvement using": 25037, "model reflect": 34294, "metrics recent": 33195, "based predictions": 5946, "tasks related": 55844, "years automatic": 63050, "attention paper": 4804, "paper particularly": 39439, "datasets constructed": 13196, "problem construct": 42523, "ms coco": 35912, "learning syntactic": 29901, "encoder learns": 17524, "able improve": 700, "yields best": 63115, "performance significant": 40560, "techniques approach": 56061, "task linking": 55190, "applicable task": 3157, "task different": 55020, "different general": 14941, "entities including": 18056, "built models": 7488, "models outperforms": 35291, "clean data": 8643, "tasks despite": 55584, "trial error": 58785, "understand role": 59311, "systematically evaluate": 54411, "evaluate effect": 18453, "consists parts": 10326, "bayes logistic": 6349, "predictions different": 41757, "different sub": 15086, "3rd place": 390, "applied sequence": 3292, "features previous": 20647, "ability propose": 636, "simple technique": 51218, "technique called": 56029, "enhance learning": 17914, "rnn layer": 48197, "language main": 28146, "point processes": 41048, "distinct languages": 15593, "software based": 51635, "state machines": 52703, "accuracy 99": 931, "attentional sequence": 4859, "new standard": 37322, "challenge models": 7996, "increase training": 25425, "efficient baseline": 16864, "decoder output": 13607, "second propose": 49019, "gru lstm": 23314, "lstm layer": 31267, "fully connected": 21716, "connected layers": 10176, "architecture achieves": 4022, "achieves similar": 1373, "similar accuracy": 51028, "recurrent model": 45618, "fraction training": 21431, "combining techniques": 9124, "100 words": 65, "best published": 6809, "accuracy speed": 1050, "uses convolutional": 60501, "network consists": 36726, "convolutional layer": 11104, "connected layer": 10175, "inputs different": 26362, "label given": 27710, "lingual text": 30732, "categories paper": 7847, "model distillation": 33778, "originally proposed": 38745, "corpus documents": 11326, "documents train": 15920, "train classifiers": 57574, "technique applied": 56026, "applied model": 3283, "training reduce": 58225, "unlabeled target": 59579, "attention layer": 4762, "task reading": 55317, "candidates generated": 7585, "results improve": 47669, "using contrastive": 60624, "negative samples": 36633, "data key": 12444, "learning requires": 29843, "requires annotated": 46915, "hard obtain": 23449, "task limited": 55189, "stack overflow": 52418, "difficult humans": 15168, "mitigation strategies": 33397, "finally based": 20839, "kb entities": 27271, "train proposed": 57623, "relevant text": 46239, "corpus texts": 11444, "entity annotations": 18095, "evaluated model": 18537, "important nlp": 24748, "tasks code": 55540, "code trained": 8862, "available academic": 5259, "academic research": 794, "results study": 47860, "study model": 53412, "seen significant": 49062, "significant rise": 50920, "novel dataset": 37799, "10 000": 28, "annotated task": 2919, "trained deep": 57706, "multimodal model": 36152, "introduce architecture": 26781, "networks compared": 36840, "input length": 26293, "attention module": 4789, "wu et": 63022, "translation order": 58651, "faster speed": 20441, "great potential": 23210, "information largely": 25945, "largely overlooked": 29061, "overlooked existing": 39098, "features produced": 20649, "called sentence": 7554, "language aim": 27958, "aim study": 2160, "summarize existing": 53905, "existing works": 19176, "carried different": 7769, "build existing": 7399, "powerful paradigm": 41440, "paradigm natural": 39624, "methods building": 32775, "rely high": 46287, "applied domain": 3269, "mechanism encode": 32112, "encode input": 17465, "sequence vectors": 50017, "help reduce": 23586, "test method": 56356, "data provides": 12576, "used enhance": 60165, "large quantities": 28949, "design different": 14274, "data need": 12507, "text translation": 56825, "trained generic": 57741, "recently data": 45413, "train data": 57575, "unclear extent": 59236, "challenging data": 8087, "support development": 54116, "development evaluation": 14678, "evaluation comparison": 18593, "new exciting": 37198, "processing computer": 42862, "techniques present": 56124, "various datasets": 61321, "datasets models": 13337, "types non": 59109, "attention deep": 4734, "models fit": 35035, "provide directions": 44054, "reduces human": 45690, "efforts building": 16935, "tasks promising": 55816, "promising technique": 43187, "affect model": 2016, "paper deep": 39314, "noise training": 37604, "thoroughly evaluate": 57068, "approach wide": 3738, "consistently improves": 10296, "extraction results": 20105, "results outperforms": 47755, "various evaluation": 61336, "techniques analyze": 56058, "method existing": 32493, "existing tools": 19162, "good accuracy": 22925, "dataset experiment": 12915, "accuracy 94": 926, "baseline future": 6171, "future development": 21868, "success existing": 53701, "tasks effectiveness": 55602, "achieved good": 1234, "performance short": 40557, "short input": 50557, "output sequences": 39001, "attention attends": 4712, "generated output": 22306, "supervised word": 54071, "rl models": 48176, "step training": 52831, "standard word": 52541, "prediction training": 41748, "shown exhibit": 50706, "large benchmark": 28851, "dataset semantic": 13077, "difficult measure": 15176, "related problems": 45929, "analysis design": 2650, "annotation protocol": 2964, "spoken conversations": 52352, "propose evaluate": 43375, "development process": 14699, "selection relevant": 49151, "relevant features": 46218, "features high": 20596, "systems struggle": 54641, "work seek": 62815, "problem proposing": 42636, "dialogue agent": 14766, "key value": 27340, "retrieval mechanism": 47952, "model dialogue": 33763, "underlying knowledge": 59267, "metrics present": 33189, "jointly model": 27206, "representations shared": 46755, "problem extensive": 42562, "resulting new": 47470, "models https": 35092, "limits performance": 30643, "performance consistency": 40265, "exploit structure": 19665, "performance experiment": 40334, "effectiveness methods": 16793, "way training": 61833, "training character": 57949, "performing multi": 40684, "use limited": 59933, "data effectively": 12310, "languages recent": 28765, "high performances": 23760, "need trained": 36596, "dataset user": 13128, "learning address": 29505, "dataset dataset": 12879, "number labels": 38013, "using bi": 60592, "performance nlp": 40457, "use computational": 59848, "computational social": 9862, "social science": 51602, "data lack": 12451, "lack interpretability": 27895, "modeling global": 34580, "use representations": 59995, "training techniques": 58289, "models experimental": 34993, "interpretability models": 26716, "underlying corpus": 59264, "directly model": 15324, "human intuition": 24177, "improve existing": 24851, "information explicit": 25850, "human feedback": 24166, "embeddings instead": 17152, "leverage semantic": 30289, "learning evaluation": 29630, "best answer": 6747, "quality using": 44597, "base model": 5546, "retrieval techniques": 47973, "appropriate context": 3963, "extract appropriate": 19968, "pure text": 44393, "corpora resulting": 11239, "ability generalize": 609, "generalize paper": 22146, "explore models": 19716, "comprehensive ablation": 9780, "ablation studies": 657, "conceptually simple": 9955, "architecture outperforms": 4073, "multimodal approaches": 36143, "established benchmarks": 18353, "using basic": 60583, "encode word": 17473, "model support": 34431, "al 2003": 2229, "given model": 22760, "trained contextual": 57692, "contextual word": 10787, "embeddings input": 17151, "deep natural": 13731, "models named": 35242, "neighbor information": 36656, "embeddings furthermore": 17140, "contextual embeddings": 10765, "information improves": 25915, "performance cases": 40229, "random initializations": 44883, "downstream performance": 16348, "research including": 47055, "documents multiple": 15897, "characteristics different": 8236, "texts investigate": 56894, "investigate cross": 26946, "methods language": 32915, "representations end": 46651, "representations open": 46729, "use tree": 60062, "fully differentiable": 21722, "eliminating need": 16991, "easily trained": 16550, "compared various": 9472, "lstm architectures": 31240, "reverse dictionary": 48022, "structure aware": 53090, "additional annotations": 1654, "drawing inspiration": 16406, "recent efforts": 45306, "bias propose": 7040, "encode document": 17461, "automatically inducing": 5184, "rich structural": 48125, "use attention": 59826, "evaluation different": 18610, "results document": 47596, "problem automatic": 42509, "detection use": 14538, "systems study": 54642, "specific labels": 52098, "labels using": 27858, "using recently": 60898, "biomedical entities": 7174, "event event": 18783, "methods current": 32807, "rely complex": 46276, "extract higher": 19976, "present sentence": 42006, "experiments shown": 19525, "shown achieve": 50694, "level event": 30114, "nlp machine": 37495, "techniques help": 56093, "uses supervised": 60538, "media user": 32187, "decision process": 13567, "tool provides": 57365, "provides efficient": 44196, "efficient way": 16910, "specific attention": 52048, "mechanism improves": 32123, "improves overall": 25139, "word list": 62240, "automatic semi": 5121, "different periods": 15024, "fail capture": 20330, "propose latent": 43434, "learn underlying": 29441, "learning goal": 29664, "enable development": 17422, "set vectors": 50276, "simple techniques": 51219, "techniques like": 56106, "does provide": 15965, "topic sentiment": 57430, "straightforward approach": 52886, "directly predicts": 15333, "component end": 9702, "previous attention": 42243, "model capability": 33643, "achieves improvement": 1341, "score state": 48874, "art baseline": 4220, "generate appropriate": 22179, "planning surface": 40945, "produce natural": 42993, "model extensively": 33868, "extensively evaluated": 19918, "domains results": 16290, "achieved better": 1222, "domains compared": 16239, "critical applications": 11777, "define task": 13780, "task able": 54872, "makes task": 31638, "experiments multiple": 19473, "multiple deep": 36196, "architectures learn": 4114, "annotated tweets": 2927, "f1 points": 20192, "different techniques": 15096, "benefit multiple": 6567, "multiple different": 36198, "models difficulty": 34920, "understanding systems": 59406, "specific rules": 52141, "far apart": 20395, "large gains": 28881, "long tail": 31035, "sequence architectures": 49910, "news translation": 37423, "algorithms automatic": 2321, "research opportunities": 47085, "larger set": 29088, "paper illustrates": 39393, "challenge automatic": 7969, "methods analysis": 32747, "propose multiple": 43483, "different factors": 14930, "relational reasoning": 46011, "central component": 7918, "relation networks": 45990, "plug play": 41026, "challenging dataset": 8088, "curated dataset": 11949, "code switched": 8858, "cnn used": 8777, "effective nlp": 16681, "despite great": 14362, "effective domain": 16646, "tasks spoken": 55907, "label embeddings": 27708, "models increasingly": 35126, "learning use": 29927, "unsupervised neural": 59717, "training unsupervised": 58309, "ability combine": 599, "work does": 62641, "important natural": 24745, "perform specific": 40144, "domains study": 16294, "study introduce": 53395, "introduce domain": 26799, "similarity calculation": 51087, "prove proposed": 43982, "corpus methods": 11380, "trained domain": 57712, "use lexical": 59932, "approaches ranging": 3905, "results test": 47880, "surprisingly good": 54187, "results challenge": 47530, "embeddings standard": 17219, "correct answers": 11466, "example pairs": 18879, "dataset evaluating": 12911, "embeddings demonstrate": 17108, "pose significant": 41241, "challenges current": 8036, "methods common": 32789, "knowledge required": 27594, "neural natural": 36989, "nlu systems": 37569, "systems knowledge": 54538, "knowledge acquired": 27388, "time introduce": 57168, "nlu models": 37564, "specific text": 52158, "text inputs": 56630, "representations task": 46768, "representations experiments": 46662, "experiments document": 19421, "document question": 15823, "approach analysis": 3417, "exploit knowledge": 19657, "appropriate way": 3969, "context surrounding": 10727, "token embeddings": 57287, "embeddings represent": 17203, "simple efficient": 51161, "token embedding": 57286, "embeddings large": 17160, "text evaluate": 56560, "trained smaller": 57875, "smaller amounts": 51515, "embeddings consistently": 17100, "set sizes": 50248, "models obtaining": 35273, "obtaining better": 38231, "parameter count": 39666, "perform given": 40111, "networks applied": 36830, "translation introduce": 58622, "architecture inspired": 4053, "architectural changes": 4017, "removing need": 46380, "convolution operation": 11099, "reduces number": 45695, "neighborhood information": 36661, "adjacent sentences": 1841, "sentences evaluate": 49710, "tasks include": 55676, "classification benchmarks": 8440, "quantitative comparison": 44617, "tasks addition": 55492, "model perform": 34184, "accurate classification": 1076, "confusion matrix": 10161, "90 f1": 553, "research suggests": 47126, "order obtain": 38643, "child directed": 8289, "directed speech": 15268, "negative log": 36623, "log probability": 30976, "understanding social": 59402, "study human": 53385, "build automatic": 7386, "social contexts": 51560, "contexts paper": 10753, "perform study": 40146, "corpus freely": 11345, "magnitude larger": 31417, "larger previously": 29086, "use corpus": 59856, "corpus perform": 11401, "reveal underlying": 48014, "gender information": 22036, "information problem": 26022, "automatically predicting": 5194, "number relevant": 38032, "linguistic complexity": 30755, "original context": 38706, "context previous": 10692, "selection framework": 49140, "results empirical": 47605, "learning platform": 29807, "learning dl": 29599, "approaches domains": 3802, "datasets exist": 13259, "systems applied": 54431, "work adapt": 62553, "large open": 28929, "dataset squad": 13099, "based state": 6055, "embeddings novel": 17181, "rely domain": 46278, "expensive create": 19206, "systems achieve": 54420, "learning low": 29714, "gained attention": 21914, "entity representation": 18143, "different representation": 15050, "users easily": 60460, "models implement": 35102, "used rank": 60283, "query extract": 44667, "based complex": 5628, "performing models": 40683, "mechanism propose": 32137, "architecture transformer": 4094, "transformer based": 58452, "based solely": 6045, "tasks models": 55750, "models superior": 35563, "superior quality": 53941, "requiring significantly": 46965, "time train": 57233, "task improving": 55128, "improving existing": 25180, "existing best": 19043, "model establishes": 33835, "establishes new": 18360, "art bleu": 4230, "training costs": 57964, "reporting bias": 46459, "knowledge does": 27447, "knowledge acquisition": 27389, "extract knowledge": 19982, "performance study": 40581, "layers model": 29227, "grained level": 23040, "vocabulary sizes": 61713, "words target": 62525, "new sentences": 37312, "words address": 62363, "based source": 6048, "external word": 19957, "simple novel": 51201, "candidate pool": 7575, "speedup compared": 52329, "explore challenges": 19690, "domain mismatch": 16112, "mismatch training": 33352, "data rare": 12583, "improvements quality": 25096, "advances state": 1927, "art text": 4428, "learning extract": 29642, "classify sentences": 8631, "sentences express": 49718, "propose stage": 43644, "stage neural": 52435, "model tackle": 34437, "answers question": 3111, "model copy": 33722, "extraction model": 20082, "entity tagging": 18151, "approaches demonstrate": 3795, "documents based": 15860, "certain properties": 7944, "language case": 27983, "tabular data": 54694, "data structure": 12698, "wide array": 61960, "status quo": 52780, "does fully": 15947, "despite strong": 14393, "web applications": 61878, "good representations": 22942, "mixture model": 33421, "weights different": 61938, "representation ability": 46488, "acl anthology": 1431, "reference corpus": 45737, "produce reasonable": 43002, "challenge human": 7984, "human automated": 24109, "automated methods": 5052, "methods previous": 32990, "scale large": 48589, "networks extract": 36854, "demonstrate learned": 13931, "higher precision": 23837, "retrieval methods": 47954, "significantly increased": 50983, "suggest promising": 53828, "order facilitate": 38619, "annotated experts": 2897, "using naive": 60824, "approaches training": 3944, "mini batch": 33280, "processing speed": 42941, "length based": 30025, "strategies work": 52922, "presents analysis": 42071, "analysis impact": 2677, "floating point": 21115, "classification quality": 8525, "different classifiers": 14865, "source toolkit": 51813, "standard attention": 52466, "visualization tool": 61681, "english datasets": 17795, "training significantly": 58259, "success models": 53711, "models relies": 35429, "learning given": 29663, "discriminative models": 15447, "match accuracy": 31895, "art generative": 4265, "models easy": 34945, "models require": 35441, "introduced models": 26886, "produced model": 43020, "features computed": 20544, "makes model": 31628, "powerful generative": 41434, "models unable": 35642, "work aim": 62562, "aim developing": 2145, "selection using": 49158, "representations key": 46696, "document understanding": 15840, "demonstrate methods": 13938, "methods bring": 32773, "include multiple": 25224, "analysis confirms": 2637, "model builds": 33637, "supervision model": 54086, "wise relevance": 62084, "relevance propagation": 46193, "input space": 26340, "classification decisions": 8455, "based bi": 5603, "task evaluate": 55055, "evaluate resulting": 18500, "used previous": 60271, "work previous": 62767, "order avoid": 38597, "maximum posteriori": 31974, "translation trained": 58693, "model solve": 34396, "gumbel softmax": 23361, "effective generating": 16655, "result different": 47437, "russian language": 48415, "language shown": 28484, "research various": 47141, "research recently": 47113, "automated text": 5062, "text summarisation": 56796, "techniques task": 56141, "resource development": 47221, "development future": 14679, "corpora news": 11228, "challenging current": 8086, "ignoring fact": 24500, "corpus manually": 11377, "important features": 24727, "features task": 20680, "local contexts": 30932, "jointly optimizes": 27212, "enhance training": 17926, "training stage": 58268, "inference stage": 25694, "number recent": 38031, "works proposed": 62904, "proposed techniques": 43912, "human interpretable": 24175, "supervision paper": 54087, "present sequence": 42007, "negative results": 36631, "achieve near": 1172, "near perfect": 36509, "scale open": 48607, "domain qa": 16140, "fine tuned": 20957, "overall results": 39048, "performance recently": 40523, "large proportion": 28945, "chinese translation": 8325, "novel memory": 37862, "inference experiments": 25656, "vocabulary paper": 61710, "used datasets": 60138, "poses new": 41250, "reference texts": 45749, "lexical richness": 30382, "discourse phenomena": 15395, "learning dataset": 29580, "establish baseline": 18341, "difficulties associated": 15196, "success failure": 53702, "set diverse": 50136, "lack reliable": 27910, "reliable automatic": 46250, "scarcity high": 48665, "corpora address": 11173, "current evaluation": 11975, "motivating need": 35882, "second problem": 49018, "quality corpus": 44502, "training study": 58276, "baselines unsupervised": 6315, "model handle": 33951, "short length": 50558, "based objective": 5920, "used state": 60312, "state theart": 52710, "task systems": 55426, "difficult determine": 15164, "deployed real": 14172, "world use": 62965, "relatively easy": 46114, "higher bleu": 23815, "conduct depth": 10035, "compare relative": 9363, "relative gains": 46099, "results develop": 47591, "learning machine": 29716, "novel stage": 37926, "given high": 22746, "aims answer": 2173, "squad dataset": 52395, "dataset challenging": 12837, "dataset achieve": 12797, "achieve f1": 1137, "approaching performance": 3960, "models f1": 35011, "approach generate": 3545, "generate synthetic": 22251, "generate data": 22191, "related source": 45938, "report experimental": 46434, "generated data": 22282, "points using": 41081, "data shows": 12660, "used provide": 60278, "data standard": 12691, "models computationally": 34841, "expensive requires": 19218, "longer sequences": 31053, "attention scores": 4826, "scores demonstrate": 48899, "significant potential": 50913, "suffer poor": 53776, "especially problematic": 18294, "problem firstly": 42570, "sample efficient": 48451, "models employ": 34955, "policy learning": 41098, "learning experience": 29633, "improve sample": 24922, "sample efficiency": 48450, "pre train": 41515, "models prior": 35356, "demonstrate practical": 13958, "rl based": 48174, "author profiling": 5000, "language variety": 28576, "output multiple": 38987, "svm classifiers": 54234, "word grams": 62213, "evaluate using": 18514, "dataset provided": 13044, "approach achieved": 3392, "written languages": 63003, "accuracy language": 996, "united kingdom": 59525, "spatial temporal": 51987, "finally analyze": 20836, "hierarchical attention": 23656, "achieved remarkable": 1261, "performance document": 40299, "language multilingual": 28352, "considered training": 10254, "language transfer": 28537, "single multilingual": 51323, "propose multilingual": 43481, "document structures": 15836, "shared encoders": 50470, "label sets": 27727, "news documents": 37401, "word propose": 62272, "sequence s2s": 49972, "style model": 53490, "target model": 54832, "model possible": 34212, "generation strategy": 22554, "approach superior": 3713, "superior state": 53944, "accuracy human": 987, "measuring performance": 32087, "complexity data": 9675, "time order": 57186, "document text": 15838, "text segment": 56755, "existing metrics": 19106, "improvements existing": 25072, "metrics new": 33183, "applied sentence": 3291, "achieved excellent": 1230, "present context": 41876, "external corpus": 19931, "topic level": 57413, "result deep": 47436, "hierarchical representations": 23690, "text sentence": 56761, "layers network": 29229, "sentence important": 49568, "user preferences": 60435, "current methods": 11985, "use structured": 60031, "text known": 56638, "language explanations": 28062, "predicting user": 41685, "features design": 20558, "term memories": 56243, "generates text": 22359, "large real": 28953, "metrics shows": 33201, "prediction method": 41717, "prediction time": 41747, "systems commonly": 54453, "trained evaluated": 57724, "datasets recently": 13396, "researchers started": 47166, "text preprocessing": 56703, "step pipeline": 52822, "potential impact": 41392, "impact final": 24596, "performance despite": 40288, "investigate impact": 26960, "simple text": 51220, "neural text": 37103, "evaluation standard": 18724, "benchmarks text": 6546, "analysis experiments": 2664, "experiments simple": 19528, "preprocessing techniques": 41830, "comparing different": 9479, "provides insights": 44206, "mitigate problem": 33389, "produce final": 42982, "able select": 724, "arguments relations": 4182, "text previous": 56707, "area focused": 4140, "analysis real": 2738, "learning using": 29931, "paradigms language": 39633, "interactive learning": 26630, "role social": 48322, "algorithms perform": 2333, "document vector": 15843, "captures semantic": 7728, "embeddings word2vec": 17248, "generating high": 22376, "classification semantic": 8541, "enables training": 17449, "time model": 57179, "model efficient": 33802, "classifier attention": 8593, "analysis classification": 2626, "result indicates": 47439, "goal task": 22902, "task classify": 54951, "consuming process": 10452, "process recent": 42823, "supervision used": 54099, "larger datasets": 29074, "order create": 38605, "domain cross": 16035, "methods obtained": 32965, "approach addition": 3409, "representing text": 46814, "tasks main": 55736, "makes data": 31618, "architecture learns": 4060, "efficiently model": 16919, "outperforms comparable": 38885, "comparable previous": 9306, "mining task": 33325, "gained significant": 21922, "years paper": 63068, "including text": 25309, "health care": 23512, "quantitative methods": 44621, "exploratory study": 19683, "related specific": 45939, "social economic": 51562, "time frame": 57157, "model separately": 34358, "model obtained": 34139, "shared information": 50473, "information sentences": 26079, "using ensemble": 60677, "progress natural": 43105, "used standard": 60311, "standard benchmark": 52470, "paper revisit": 39570, "problem end": 42549, "introduce large": 26817, "dataset extracted": 12923, "corpus obtained": 11395, "retrieval systems": 47970, "learning procedures": 29820, "use state": 60027, "techniques large": 56104, "potential solution": 41407, "key contributions": 27305, "based output": 5931, "concludes discussion": 9974, "practical use": 41476, "use results": 59999, "automatic tools": 5131, "text particular": 56692, "preserving semantics": 42127, "evaluate popular": 18486, "interact users": 26592, "combination various": 9052, "recognition natural": 45516, "results demonstrated": 47586, "procedure used": 42745, "context effective": 10619, "way capture": 61795, "challenge work": 8023, "example word": 18884, "analysis approaches": 2616, "context feature": 10637, "just using": 27254, "size language": 51387, "hundreds thousands": 24298, "spelling variations": 52337, "paper challenges": 39288, "cross genre": 11826, "target documents": 54811, "target document": 54810, "establish strong": 18348, "shows improvement": 50785, "study measure": 53409, "improvement classification": 24994, "results illustrate": 47667, "characteristics language": 8238, "blog posts": 7226, "benefit downstream": 6560, "word2vec doc2vec": 62346, "simultaneously learn": 51271, "training employ": 58083, "text instead": 56631, "newly created": 37374, "model usually": 34516, "learning study": 29899, "study interpretability": 53393, "available models": 5328, "automated detection": 5040, "non relevant": 37680, "task detection": 55013, "detection semantic": 14520, "choice word": 8339, "models word2vec": 35685, "glove word2vec": 22862, "dataset human": 12954, "task non": 55242, "make comparison": 31553, "propose dataset": 43349, "lexical grammatical": 30365, "research automatic": 46989, "asr text": 4563, "paper step": 39576, "allows train": 2479, "knowledge deep": 27435, "trained labeled": 57756, "data state": 12692, "extraction approaches": 20049, "level labels": 30143, "consuming costly": 10442, "life tasks": 30441, "tasks make": 55737, "labels usually": 27859, "desired output": 14349, "end e2e": 17632, "based pointer": 5939, "pointer networks": 41061, "trained directly": 57710, "raw input": 45036, "corpus compare": 11296, "compare neural": 9351, "use token": 60048, "model focuses": 33904, "global semantic": 22841, "rare word": 44999, "word encoding": 62199, "oov problem": 38405, "models english": 34967, "tasks does": 55595, "non canonical": 37640, "submission shared": 53575, "editing task": 16597, "performs particularly": 40710, "particularly low": 39885, "sentence provide": 49627, "improve results": 24918, "results training": 47888, "dataset important": 12958, "documents belonging": 15861, "belonging different": 6419, "propose extract": 43385, "collection texts": 8987, "unlabeled datasets": 59569, "compare methods": 9346, "dataset scientific": 13074, "trending topics": 58780, "time specifically": 57220, "pairwise ranking": 39239, "structure prediction": 53128, "pairwise comparisons": 39238, "translation framework": 58614, "loss functions": 31096, "create model": 11708, "highest score": 23856, "limited computational": 30574, "models establish": 34974, "establish new": 18344, "meaningful representations": 32025, "models making": 35215, "making possible": 31662, "representation existing": 46511, "encoded word": 17486, "problems introduce": 42703, "fully leverage": 21736, "leverage rich": 30287, "information help": 25900, "analysis demonstrate": 2645, "performances benchmark": 40638, "networks represent": 36905, "hypothesis space": 24348, "networks demonstrate": 36842, "utility approach": 61080, "spoken dialog": 52353, "recognition neural": 45521, "problems large": 42707, "allows generate": 2466, "enable effective": 17423, "generation step": 22551, "settings furthermore": 50375, "score sentence": 48872, "level instead": 30135, "instead directly": 26448, "suggest model": 53824, "operations used": 38495, "variety models": 61281, "tasks improved": 55673, "text models": 56669, "outperform non": 38806, "sub character": 53515, "language method": 28150, "units called": 59529, "information difficult": 25815, "difficult access": 15154, "improvement strong": 25028, "naive approach": 36361, "annotations language": 2993, "corpus train": 11446, "classifier identify": 8598, "trained non": 57834, "does improve": 15952, "level rnn": 30197, "quality compared": 44499, "aware language": 5454, "model achieving": 33528, "data generally": 12382, "order learn": 38630, "architecture learn": 4058, "space test": 51900, "task study": 55416, "graphs kgs": 23188, "novel reinforcement": 37906, "graph embeddings": 23131, "approach includes": 3567, "accuracy diversity": 960, "ranking based": 44968, "learning datasets": 29581, "selecting optimal": 49126, "performance little": 40420, "design choices": 14267, "different network": 15004, "event detection": 18781, "different setups": 15069, "large impact": 28888, "lstm layers": 31268, "embedding pre": 17054, "word contexts": 62132, "embeddings pre": 17190, "results error": 47614, "data additionally": 12122, "predictions word": 41771, "systems tend": 54649, "bridges gap": 7324, "unsupervised knowledge": 59702, "providing interpretable": 44249, "hotel reviews": 24034, "particular train": 39867, "train text": 57650, "analyze large": 2820, "score using": 48881, "using established": 60680, "provides effective": 44195, "languages best": 28609, "knowledge study": 27623, "relational information": 46009, "information english": 25834, "classes results": 8417, "proposed cross": 43748, "transfer approach": 58351, "approach sets": 3687, "explored work": 19769, "level long": 30154, "grams features": 23084, "evaluated models": 18538, "models dataset": 34883, "dataset consisting": 12860, "applying classical": 3359, "classical machine": 8423, "close performance": 8690, "discrete features": 15421, "networks nns": 36883, "context modeling": 10675, "strategy training": 52953, "algorithm developed": 2269, "developed work": 14642, "evaluated terms": 18550, "lms perform": 30921, "faster inference": 20437, "inference efficiency": 25653, "downstream nlp": 16345, "applications order": 3226, "dataset provides": 13045, "existing dataset": 19053, "addition conduct": 1602, "different encoders": 14915, "mechanism experiments": 32118, "experiments new": 19481, "analyses different": 2595, "able generalize": 695, "generalize unseen": 22149, "art unsupervised": 4435, "arguments appear": 4179, "investigate effectiveness": 26955, "features state": 20673, "terms f1": 56289, "analysis widely": 2792, "languages non": 28739, "non english": 37648, "designed generate": 14317, "set tools": 50266, "learning human": 29673, "improve current": 24839, "translation training": 58694, "expensive human": 19209, "algorithm improves": 2281, "large action": 28830, "level machine": 30155, "high variance": 23808, "problem language": 42591, "field recently": 20768, "use character": 59840, "attention capture": 4727, "capture intra": 7686, "based newly": 5910, "newly published": 37380, "comprehensive benchmark": 9783, "benchmark contains": 6437, "compared performance": 9429, "performance popular": 40486, "propose methodology": 43457, "analysis order": 2710, "knowledge perform": 27568, "best combination": 6757, "combination proposed": 9048, "problem especially": 42552, "number entities": 38000, "teams participated": 56008, "methods employed": 32836, "constructed dataset": 10409, "tweets annotated": 59009, "multilingual representations": 36114, "languages need": 28737, "model lexicon": 34061, "minimize distance": 33298, "embeddings learning": 17165, "context evaluate": 10627, "method compare": 32424, "classification shown": 8550, "translation different": 58600, "increase model": 25417, "proposed far": 43774, "evaluate existing": 18457, "approaches introduce": 3850, "additionally explore": 1721, "including deep": 25247, "attention used": 4844, "evaluation carried": 18586, "single gpu": 51305, "proposed architectures": 43739, "obtain best": 38161, "average improvement": 5410, "release code": 46144, "joint entity": 27168, "classification relation": 8532, "extraction particular": 20091, "types relations": 59114, "algorithm search": 2299, "faster training": 20443, "retrieve relevant": 47979, "space existing": 51859, "model improve": 33977, "shown significant": 50749, "work suggest": 62832, "difficult data": 15162, "task investigate": 55147, "investigate effective": 26954, "particular introduce": 39849, "analyzing performance": 2845, "neural generation": 36956, "generation methods": 22492, "methods experiments": 32852, "produce fluent": 42983, "fluent text": 21134, "exceed performance": 18945, "models metrics": 35225, "systems focused": 54507, "user needs": 60432, "retrieving relevant": 47994, "relationship model": 46070, "algorithmic framework": 2316, "framework task": 21612, "application task": 3180, "neural representations": 37091, "2017 shared": 267, "kernel learning": 27290, "speech transcripts": 52310, "dimensional representation": 15233, "audio recordings": 4928, "task organizers": 55252, "ridge regression": 48135, "development set": 14701, "art nli": 4315, "results goal": 47650, "based string": 6062, "task despite": 55010, "transcripts speech": 58342, "models reached": 35399, "macro f1": 31406, "score 87": 48822, "score 93": 48828, "speech input": 52266, "unsupervised speech": 59733, "corpus demonstrated": 11320, "spearman correlation": 52011, "methods perform": 32978, "performing baseline": 40670, "language tags": 28517, "improvement automatic": 24984, "automatic language": 5100, "analysis human": 2673, "models commonly": 34832, "communication platforms": 9251, "detection entity": 14477, "order provide": 38650, "additional text": 1705, "text state": 56788, "mentioned entities": 32299, "tweets propose": 59020, "contextual knowledge": 10771, "use models": 59951, "truth dataset": 58836, "addition standard": 1643, "dataset publicly": 13048, "research new": 47081, "domain research": 16148, "based joint": 5793, "features textual": 20683, "released research": 46182, "demonstrate strong": 13979, "using multimodal": 60821, "new self": 37308, "using predefined": 60868, "approach capable": 3441, "process provide": 42821, "benchmarks language": 6528, "tasks approach": 55503, "fast efficient": 20423, "achieving comparable": 1398, "performance relative": 40529, "complex problems": 9648, "problems require": 42729, "constraints training": 10379, "based inference": 5782, "model weights": 34533, "inference procedure": 25684, "study efficacy": 53367, "constraints semantic": 10378, "network state": 36806, "capture deeper": 7660, "italian language": 27111, "hyper parameter": 24325, "accurate word": 1090, "provide accurate": 44004, "provide automated": 44010, "scientific community": 48756, "languages dialects": 28640, "corpus construct": 11304, "analysis introduce": 2684, "results novel": 47745, "task organized": 55251, "task encourage": 55046, "encourage research": 17599, "human references": 24233, "propose translation": 43685, "estimate quality": 18374, "evaluation setup": 18716, "german data": 22664, "automatically predicted": 5193, "model acoustic": 33529, "studies paper": 53287, "apply data": 3323, "quickly identify": 44822, "contains 10": 10490, "dataset explore": 12919, "effective general": 16654, "general features": 22060, "level evaluation": 30113, "semantic indexing": 49283, "alignment task": 2383, "pairs training": 39223, "string similarity": 52994, "errors training": 18252, "data argue": 12145, "language provide": 28452, "models know": 35153, "subject matter": 53553, "learn syntax": 29433, "languages knowledge": 28701, "knowledge extracted": 27479, "knowledge existing": 27470, "languages build": 28611, "information missing": 25973, "access information": 824, "studied decades": 53221, "recently studies": 45469, "usually considered": 61040, "studies applied": 53246, "particular target": 39864, "explicitly implicitly": 19636, "study investigate": 53397, "investigate possible": 26975, "report evaluation": 46433, "results ner": 47738, "detection experiments": 14484, "using named": 60826, "set tweets": 50271, "tweets results": 59021, "tweets paper": 59018, "non deterministic": 37646, "performance scores": 40546, "based multiple": 5892, "using fixed": 60699, "control model": 10968, "systems important": 54525, "methods aim": 32744, "solely rely": 51644, "properties words": 43273, "properties language": 43263, "used pre": 60262, "present alternative": 41843, "input language": 26288, "output method": 38985, "method obtains": 32593, "languages investigate": 28698, "investigate techniques": 26990, "translation existing": 58609, "small domain": 51472, "number techniques": 38044, "techniques reduce": 56130, "reduce overfitting": 45677, "regularization techniques": 45842, "l2 regularization": 27685, "novel regularization": 37905, "regularization technique": 45841, "techniques combination": 56068, "combination neural": 9046, "amounts domain": 2547, "data needed": 12508, "data gain": 12378, "pretrained large": 42161, "large supervised": 29020, "models pretrained": 35351, "vectors paper": 61493, "trained machine": 57780, "adding context": 1594, "application deep": 3162, "facilitate effective": 20267, "domain given": 16079, "based rl": 6000, "module model": 35766, "model free": 33909, "use entity": 59880, "transfer multi": 58407, "11 absolute": 83, "users express": 60464, "using short": 60937, "user experience": 60411, "models integrate": 35136, "user reviews": 60445, "prediction existing": 41704, "works consider": 62880, "framework named": 21569, "terms learning": 56297, "studied extensively": 53224, "work exploring": 62662, "nlp perspective": 37508, "introduce notion": 26841, "lack available": 27876, "use test": 60045, "techniques automatic": 56063, "proved useful": 43990, "specific corpora": 52063, "attracted attention": 4874, "high demand": 23726, "beginning end": 6382, "design makes": 14288, "training efficiency": 58079, "art data": 4244, "substantially lower": 53642, "introduce dynamic": 26800, "original data": 38707, "systems english": 54487, "layer normalization": 29196, "architectures different": 4107, "big models": 7092, "terms training": 56319, "corpus size": 11431, "neural dependency": 36947, "model initialization": 34004, "competitive current": 9547, "combination text": 9051, "yielded better": 63104, "better performances": 6938, "work multi": 62728, "introduce additional": 26777, "knowledge end": 27460, "training natural": 58186, "tasks able": 55486, "able leverage": 705, "performance individual": 40392, "task analyze": 54902, "analyze impact": 2818, "training training": 58303, "sharing tasks": 50520, "tasks defined": 55574, "information named": 25982, "improved using": 24971, "task task": 55429, "english sentence": 17872, "techniques learn": 56105, "learn effectively": 29365, "adaptation using": 1547, "task aims": 54894, "aims evaluate": 2191, "quality representation": 44571, "inference task": 25696, "model equipped": 33831, "helps achieve": 23602, "performance addition": 40183, "popularity social": 41205, "mining social": 33323, "political news": 41111, "perspective paper": 40776, "used online": 60256, "semantic gap": 49280, "accurate robust": 1087, "available social": 5365, "sample sizes": 48456, "privacy concerns": 42439, "crucial natural": 11903, "require language": 46867, "process resulting": 42827, "task argument": 54913, "solution task": 51662, "models reveal": 35460, "fundamental component": 21778, "words directly": 62399, "directly applicable": 15304, "vectors model": 61492, "words compared": 62382, "models evaluate": 34976, "lm based": 30905, "work terms": 62839, "success recent": 53723, "infrequent words": 26189, "tasks demonstrated": 55580, "nmt baseline": 37574, "compared competitive": 9395, "competitive methods": 9549, "highly subjective": 23920, "challenges proposing": 8073, "method outperform": 32595, "level cross": 30094, "loss training": 31106, "level task": 30221, "achieving significant": 1422, "baseline based": 6157, "based automatic": 5588, "metrics human": 33169, "multiple datasets": 36193, "reward model": 48069, "model overall": 34170, "sequential sentence": 50050, "encoder multi": 17527, "encoder based": 17492, "uses encoder": 60508, "relationship sentences": 46072, "sentence encoders": 49550, "encoders achieve": 17552, "achieve strong": 1205, "model result": 34319, "classification different": 8456, "classification datasets": 8452, "byte level": 7513, "provides best": 44183, "methods employ": 32835, "results domains": 47598, "domains recently": 16288, "recently variety": 45473, "model designs": 33754, "learning related": 29838, "related models": 45918, "numerous nlp": 38069, "compare contrast": 9333, "detailed understanding": 14430, "past present": 39933, "learning nlp": 29782, "traditional natural": 57535, "model extract": 33871, "corpus compiled": 11298, "enhanced using": 17941, "performance final": 40346, "using f1": 60691, "accuracy improved": 989, "features sentence": 20662, "task make": 55201, "dataset model": 12996, "attentive recurrent": 4865, "evaluations model": 18761, "discuss different": 15464, "use meta": 59945, "information proposed": 26032, "non textual": 37687, "methods tackle": 33065, "quality improvements": 44533, "vocabulary large": 61705, "respectively results": 47382, "cost effective": 11579, "explore state": 19737, "method improved": 32533, "user embeddings": 60409, "main approaches": 31424, "approaches automatically": 3772, "work argue": 62573, "graphs propose": 23191, "lexicon expansion": 30409, "tasks order": 55777, "auxiliary tasks": 5242, "tasks deep": 55571, "various cross": 61319, "cross corpus": 11809, "gains using": 21947, "networks popular": 36891, "outperforms deep": 38893, "publish code": 44365, "dimensional dense": 15229, "present word": 42053, "tweet data": 59003, "data combination": 12221, "data general": 12380, "data consist": 12241, "experiments demonstrating": 19411, "performing natural": 40685, "embeddings learn": 17163, "representations large": 46700, "fashion paper": 20416, "respect word": 47354, "summary generation": 53914, "information generate": 25891, "novel challenging": 37781, "challenging research": 8137, "largely unexplored": 29065, "topics discussed": 57448, "generate summaries": 22250, "analysis identifying": 2676, "propose models": 43468, "models enhance": 34968, "features directly": 20561, "character embeddings": 8201, "paper created": 39311, "temporal dynamics": 56185, "term long": 56240, "proposed state": 43902, "corpora large": 11214, "large diverse": 28870, "compared methods": 9419, "nlp recently": 37519, "applications various": 3258, "components natural": 9720, "current trends": 12024, "entities events": 18048, "media paper": 32174, "surprisingly simple": 54191, "approach open": 3618, "novel set": 37920, "problem specific": 42664, "features significantly": 20668, "significantly boost": 50944, "classifier accuracy": 8591, "extraction language": 20076, "level embedding": 30106, "parameters proposed": 39720, "prediction methods": 41718, "learning efficient": 29612, "propose types": 43687, "tasks time": 55935, "lstm approach": 31238, "approach shows": 3689, "studies based": 53249, "text contain": 56509, "syntactic constructions": 54295, "time data": 57141, "corpus release": 11418, "release data": 46149, "task trained": 55442, "order assess": 38596, "meaning preservation": 32010, "task evaluating": 55057, "arabic word": 4009, "arabic dialects": 3998, "segmentation results": 49086, "using limited": 60770, "context attention": 10586, "movie subtitles": 35898, "learn distinguish": 29360, "pilot study": 40884, "study observe": 53423, "attention patterns": 4808, "paper problem": 39487, "context prediction": 10690, "deep bidirectional": 13684, "features classification": 20537, "classification network": 8508, "corpus achieves": 11267, "task improve": 55126, "incorporated model": 25369, "typically involves": 59146, "softmax function": 51631, "approach alleviate": 3415, "words play": 62481, "play role": 40977, "using optimal": 60850, "train different": 57579, "reduction training": 45722, "different standard": 15077, "recommendation systems": 45565, "text social": 56776, "objective task": 38106, "users tweets": 60483, "information user": 26145, "using support": 60971, "task process": 55296, "cast problem": 7822, "introduce semi": 26858, "methods neural": 32956, "builds recent": 7479, "domain introduce": 16087, "introduce graph": 26810, "extraction performance": 20092, "semeval task": 49444, "task sequence": 55362, "improved leveraging": 24951, "leveraging unlabeled": 30342, "form language": 21325, "model work": 34543, "fusion method": 21856, "leverages pre": 30309, "able better": 678, "better utilize": 6992, "better generalization": 6894, "transfer new": 58410, "extracting knowledge": 20033, "literature review": 30862, "combination bidirectional": 9034, "lstm convolutional": 31256, "attention natural": 4793, "processing community": 42858, "traditional models": 57533, "models main": 35208, "main advantages": 31423, "large raw": 28952, "addressing issue": 1819, "models second": 35472, "used automatically": 60098, "achieved competitive": 1227, "models range": 35395, "tasks hand": 55661, "information complementary": 25784, "compared models": 9421, "paper systematically": 39589, "systematically investigate": 54413, "fundamental nlp": 21783, "tasks based": 55516, "benchmarks state": 6543, "comparable best": 9289, "structure source": 53138, "translation multi": 58639, "investigate data": 26947, "tuning model": 58928, "level fine": 30121, "tuning data": 58904, "model ensemble": 33827, "data fine": 12364, "works best": 62878, "best training": 6833, "model incrementally": 33996, "ensemble different": 17974, "models performed": 35320, "better data": 6874, "tedious time": 56162, "representation modeling": 46555, "implement evaluate": 24633, "potential application": 41380, "generated target": 22324, "words especially": 62410, "dependency relationship": 14137, "words design": 62396, "systems crucial": 54465, "tasks summarization": 55920, "vary significantly": 61423, "systems end": 54485, "architecture enables": 4045, "structure input": 53110, "approach discuss": 3491, "content social": 10558, "develop test": 14617, "learning fine": 29645, "understanding information": 59353, "answering text": 3101, "test compare": 56337, "specific datasets": 52067, "enables learning": 17442, "finer grained": 21038, "grained knowledge": 23038, "improvement baselines": 24987, "relations events": 46028, "learn topic": 29439, "mainly driven": 31470, "causal relations": 7876, "source learning": 51781, "time demonstrate": 57142, "datasets extracted": 13269, "narrative understanding": 36385, "chronological order": 8354, "discourse relation": 15396, "collecting human": 8976, "accuracy best": 940, "data work": 12777, "study task": 53467, "annotate data": 2871, "grained domain": 23030, "cover different": 11644, "different properties": 15039, "context learning": 10667, "supervised models": 54021, "effectiveness data": 16774, "need improve": 36572, "humans easily": 24276, "information useful": 26144, "useful language": 60373, "datasets generated": 13284, "evaluation resources": 18699, "present language": 41935, "benchmarks used": 6548, "best approaches": 6749, "including large": 25266, "crowdsourcing study": 11890, "study involving": 53400, "years researchers": 63073, "methods question": 33003, "approaches achieved": 3753, "closed domain": 8697, "domain settings": 16157, "2016 dataset": 256, "pre selected": 41513, "passage answer": 39919, "corpus wikipedia": 11458, "model reads": 34274, "generate answer": 22178, "domain performance": 16133, "new pipeline": 37283, "learns rank": 29969, "jointly trains": 27224, "based reinforcement": 5987, "art multiple": 4305, "datasets given": 13285, "modeling problem": 34613, "problem chinese": 42518, "propose explicitly": 43378, "explicitly incorporate": 19637, "chinese nlp": 8315, "modeling word": 34637, "segmentation model": 49084, "task relevant": 55332, "relevant semantic": 46233, "framework aims": 21455, "aims learn": 2202, "logical inference": 30985, "close gap": 8686, "models evaluating": 34978, "demonstrate promising": 13962, "open problem": 38439, "paper explain": 39358, "probabilistic graphical": 42461, "version model": 61553, "techniques improve": 56096, "growth online": 23309, "query model": 44673, "multiple hops": 36224, "unsupervised setting": 59731, "task real": 55318, "recommender systems": 45572, "information usually": 26148, "longer texts": 31056, "end solution": 17709, "task deep": 54994, "outperforms classical": 38880, "sets present": 50301, "model adapts": 33536, "little effect": 30874, "words natural": 62463, "field model": 20761, "construct new": 10394, "allow better": 2434, "representation task": 46590, "plays pivotal": 41003, "pivotal role": 40920, "nlp application": 37462, "tagging methods": 54741, "surpass state": 54166, "performance analysis": 40192, "target tokens": 54852, "performance learning": 40415, "rely manually": 46295, "relevant contextual": 46206, "avoid explicit": 5431, "characteristics data": 8235, "data known": 12446, "comes cost": 9131, "datasets experiments": 13265, "experiments focus": 19438, "focus important": 21170, "important contextual": 24715, "features easily": 20567, "analyze various": 2833, "investigate automatic": 26943, "automatic quality": 5117, "errors using": 18253, "good bad": 22926, "methods non": 32961, "translation current": 58594, "approaches focus": 3829, "large documents": 28873, "documents like": 15893, "task framework": 55098, "aim automatically": 2138, "identify classify": 24416, "consistent human": 10276, "documents key": 15889, "key contribution": 27304, "contribution research": 10946, "apply machine": 3331, "scholarly articles": 48737, "hope dataset": 24006, "nlp communities": 37473, "modeling introduce": 34587, "introduce cross": 26793, "metric word": 33130, "effective means": 16669, "different vector": 15123, "linguistic similarity": 30794, "languages machine": 28720, "translation demonstrate": 58598, "limited word": 30633, "observed using": 38151, "using 50": 60548, "track progress": 57493, "paragraph level": 39636, "model detects": 33758, "identifying relations": 24462, "approach suffers": 3711, "words study": 62523, "using benchmark": 60587, "different relation": 15048, "optimization process": 38555, "resulting performance": 47471, "written natural": 63004, "effect different": 16613, "study addresses": 53319, "embedding approaches": 17012, "output layers": 38982, "layers neural": 29230, "mechanism proposed": 32138, "addition construct": 1603, "model outperformed": 34154, "furthermore experiments": 21821, "demonstrate dynamic": 13891, "help model": 23578, "entities input": 18057, "embeddings training": 17235, "clustering word": 8748, "varying lengths": 61432, "using common": 60612, "prediction evaluate": 41703, "performance building": 40227, "building models": 7455, "level datasets": 30097, "highly specific": 23918, "information guide": 25899, "translation translate": 58696, "financial domain": 20891, "statistical neural": 52758, "significant advantage": 50848, "selected sentences": 49119, "domain model": 16113, "optimal number": 38529, "efficient information": 16878, "does involve": 15955, "rapid progress": 44991, "systems extract": 54501, "text existing": 56563, "approaches make": 3871, "answer spans": 3057, "propose instead": 43419, "achieves second": 1363, "second highest": 49006, "stanford question": 52558, "answering dataset": 3067, "attention flow": 4749, "augmentation method": 4961, "semantically valid": 49397, "present joint": 41933, "simultaneously train": 51277, "learn latent": 29388, "clinical notes": 8671, "single layer": 51313, "limited success": 30620, "success method": 53708, "modern day": 35703, "systems information": 54533, "benchmark models": 6479, "types models": 59102, "extraction framework": 20068, "framework automatically": 21460, "methods datasets": 32812, "achieves high": 1331, "research despite": 47015, "successful applications": 53734, "fields crfs": 20779, "model local": 34070, "terms inference": 56295, "integrating external": 26522, "incorporate information": 25356, "improvements strong": 25102, "models lstm": 35205, "experimented using": 19337, "using monolingual": 60811, "models according": 34661, "introduce open": 26852, "research model": 47074, "variety problems": 61287, "based content": 5640, "embedding multi": 17049, "multi relational": 36001, "task empirical": 55039, "generally applicable": 22162, "tasks furthermore": 55650, "embeddings embeddings": 17124, "types paper": 59110, "response selection": 47402, "party conversations": 39914, "understanding multi": 59367, "conversations challenging": 11057, "multiple speakers": 36287, "problem experimental": 42557, "build reliable": 7423, "models handcrafted": 35075, "models study": 35544, "neural framework": 36955, "framework extract": 21520, "knowledge hidden": 27515, "level knowledge": 30139, "knowledge contained": 27426, "embeddings character": 17092, "aware neural": 5464, "guide language": 23336, "model key": 34029, "comparing previous": 9484, "model conduct": 33696, "training different": 58065, "framework does": 21496, "knowledge self": 27605, "information training": 26130, "effectiveness leveraging": 16787, "efficiency training": 16856, "score 91": 48826, "using extra": 60690, "knowledge grounded": 27508, "contain multiple": 10466, "entities propose": 18073, "propose fully": 43394, "fully data": 21720, "capable generating": 7620, "entities appear": 18037, "dynamic knowledge": 16486, "entities different": 18043, "according different": 857, "enabling model": 17455, "collect human": 8944, "human conversation": 24126, "annotations proposed": 2998, "translation corpus": 58591, "use output": 59969, "carry extensive": 7777, "extensive feature": 19906, "speak different": 51994, "information communication": 25782, "human translations": 24250, "previous researches": 42273, "future researchers": 21894, "tasks capture": 55533, "local dependencies": 30935, "recently attracted": 45408, "modeling dependencies": 34570, "mechanism attention": 32100, "multi dimensional": 35951, "self attention": 49175, "proposed learn": 43798, "attention rnn": 4824, "temporal order": 56190, "prediction quality": 41733, "test accuracy": 56331, "accuracy sentence": 1044, "encoding methods": 17570, "shows state": 50805, "multi genre": 35959, "shot approach": 50598, "language generator": 28091, "generator learn": 22619, "score highly": 48851, "linguistic categories": 30752, "tweets contain": 59011, "knowledge external": 27478, "texts improve": 56890, "framework achieves": 21449, "human annotator": 24104, "area computational": 4137, "models useful": 35652, "models consider": 34847, "models translating": 35632, "approaches face": 3824, "face problem": 20243, "problem data": 42528, "generate unseen": 22261, "original training": 38733, "effective solution": 16696, "data social": 12671, "information essential": 25840, "difficult process": 15182, "fast evolving": 20424, "potential using": 41411, "news reports": 37412, "chinese words": 8328, "new perspectives": 37282, "received relatively": 45264, "attention field": 4748, "model accurately": 33501, "paper makes": 39423, "skip grams": 51422, "words approach": 62366, "model operate": 34145, "model classification": 33659, "witnessed rapid": 62094, "discrepancy training": 15417, "problems model": 42712, "training address": 57928, "process guided": 42787, "receiving increasing": 45273, "step fine": 52807, "text difficult": 56537, "process domain": 42774, "integrate information": 26506, "labeling using": 27798, "performance specifically": 40571, "performance differences": 40291, "information directly": 25816, "especially case": 18264, "data insufficient": 12437, "information jointly": 25933, "model algorithm": 33557, "information extensive": 25853, "using meta": 60798, "despite ubiquity": 14400, "resource constraints": 47215, "makes challenging": 31616, "dnn models": 15757, "traditional language": 57523, "magnitude smaller": 31418, "baseline performance": 6200, "accuracy 90": 922, "art non": 4320, "non neural": 37670, "analogical reasoning": 2580, "reasoning knowledge": 45198, "embeddings despite": 17111, "remains unclear": 46351, "embeddings conduct": 17099, "pairs word": 39233, "empirically verify": 17371, "general applicability": 22043, "user profiles": 60437, "certain topics": 7948, "relation based": 45966, "concepts paper": 9939, "study machine": 53408, "work applies": 62568, "problem large": 42592, "slightly lower": 51436, "lower accuracy": 31206, "method terms": 32682, "inference speed": 25692, "room future": 48338, "future improvement": 21875, "incorporating pre": 25391, "model introduced": 34019, "jointly optimizing": 27213, "generative discriminative": 22589, "learned latent": 29466, "latent codes": 29119, "generalization model": 22121, "unsupervised manner": 59709, "predictive performance": 41777, "text sequence": 56765, "baselines especially": 6255, "speech transcription": 52309, "set 10": 50099, "arabic dialect": 3997, "features vector": 20695, "features shared": 20666, "teams submitted": 56012, "methods fall": 32862, "fall categories": 20373, "benchmark results": 6490, "representations compared": 46627, "hand labeled": 23394, "datasets work": 13486, "obtained large": 38214, "dataset constructed": 12863, "method use": 32693, "approach directly": 3490, "dataset second": 13075, "approach learns": 3587, "evaluating automatic": 18557, "fashion experiments": 20414, "missing words": 33366, "optimization methods": 38550, "methods largely": 32920, "largely improve": 29057, "improve efficiency": 24848, "widely known": 61998, "end asr": 17616, "models successful": 35552, "hybrid deep": 24314, "model process": 34237, "features natural": 20627, "question arises": 44718, "corpus called": 11292, "ai research": 2121, "explore performance": 19724, "art retrieval": 4394, "semantic accuracy": 49231, "linguistically informed": 30815, "building block": 7438, "models employed": 34956, "convolutional filters": 11103, "document set": 15831, "generation mechanism": 22489, "mechanism introduced": 32125, "outperforms standard": 38945, "standard cnn": 52476, "cnn attention": 8759, "methodology results": 32720, "computational processing": 9855, "use low": 59939, "based strategies": 6060, "achieve best": 1112, "set best": 50116, "best f1": 6763, "score overall": 48863, "team ranked": 56003, "sentiment tendency": 49862, "svm models": 54237, "sentence training": 49660, "sentence vector": 49668, "improves perplexity": 25148, "quality outputs": 44559, "according human": 863, "extended new": 19837, "automatically process": 5195, "especially important": 18280, "generate novel": 22225, "knowledge structured": 27621, "rely simple": 46299, "spurious associations": 52385, "does capture": 15936, "capture different": 7662, "increase f1": 25414, "building large": 7451, "dataset includes": 12963, "15 years": 154, "unlike prior": 59607, "prior studies": 42415, "studies focus": 53266, "propose dual": 43358, "dual encoder": 16459, "encoder approach": 17488, "approach word": 3740, "level encoder": 30108, "learns representation": 29971, "representation context": 46499, "encoder learn": 17523, "gru based": 23313, "learning remains": 29840, "amounts textual": 2559, "data large": 12454, "nlp algorithms": 37461, "datasets order": 13356, "algorithms nlp": 2331, "tasks discuss": 55592, "framework current": 21485, "domain dependency": 16044, "domain sentiment": 16155, "similarity prediction": 51113, "useful task": 60389, "challenging large": 8107, "investigate models": 26968, "identifying sentences": 24467, "results furthermore": 47642, "process allows": 42757, "suggests future": 53846, "directions improvement": 15295, "effective neural": 16680, "art works": 4442, "works focus": 62890, "tasks identifying": 55667, "tackle tasks": 54714, "pipeline methods": 40903, "tree propose": 58754, "attention methods": 4786, "encourage model": 17595, "experimentally demonstrate": 19332, "attentive neural": 4863, "architectures proposed": 4121, "studies demonstrated": 53258, "tasks empirical": 55606, "different document": 14902, "task addressing": 54888, "applications propose": 3237, "end deep": 17625, "approach detect": 3483, "typically rely": 59152, "results strong": 47858, "sentences translation": 49799, "investigate deep": 26949, "demonstrate consistent": 13884, "knowledge sentence": 27607, "level classification": 30075, "intent classification": 26564, "classification slot": 8552, "order utilize": 38660, "potential benefits": 41384, "propose jointly": 43427, "level label": 30141, "semantic relevance": 49331, "classification proposed": 8524, "introduce evaluation": 26804, "evaluation scheme": 18708, "collect annotate": 8938, "includes tasks": 25235, "user intent": 60425, "task divided": 55028, "divided sub": 15747, "paper publish": 39561, "data study": 12702, "information develop": 25810, "contributes better": 10936, "present initial": 41929, "grained information": 23037, "trained real": 57848, "agent trained": 2060, "train evaluate": 57587, "human dialogue": 24135, "better trade": 6980, "learning agent": 29508, "ability process": 634, "dialogue turns": 14793, "findings paper": 20910, "dataset text": 13117, "collected large": 8963, "better parameter": 6926, "tuning paper": 58936, "objective functions": 38090, "problem alleviated": 42502, "selected based": 49117, "particular using": 39872, "sentences achieve": 49677, "language inputs": 28117, "reasoning language": 45199, "attention existing": 4745, "existing data": 19051, "simple task": 51216, "language visual": 28579, "visual reasoning": 61665, "data nlp": 12513, "model higher": 33960, "context input": 10659, "propose attentive": 43303, "convolution network": 11094, "features word": 20696, "contexts experiments": 10752, "context particular": 10686, "qa data": 44448, "data extract": 12353, "texts train": 56937, "models efficient": 34949, "selection strategy": 49153, "strategy based": 52928, "extrinsic task": 20173, "questions similar": 44809, "similar questions": 51060, "processing large": 42882, "corpora corpora": 11187, "corpora contain": 11185, "report presents": 46444, "set source": 50251, "language groups": 28094, "associated target": 4624, "standing challenge": 52550, "systems performing": 54589, "humans paper": 24283, "art english": 4257, "using logistic": 60776, "combines domain": 9094, "models substantially": 35548, "models knowledge": 35154, "improve human": 24863, "pipeline used": 40907, "used multi": 60243, "results shows": 47842, "nlp studies": 37527, "related task": 45942, "models implicitly": 35105, "input paper": 26312, "paper implement": 39394, "different hypotheses": 14953, "relative performance": 46104, "shown using": 50759, "additional languages": 1681, "help improve": 23570, "tasks multilingual": 55754, "primarily used": 42367, "difficult compare": 15160, "test splits": 56380, "hope help": 24009, "research purposes": 47106, "present manually": 41940, "provides evidence": 44198, "solutions problem": 51669, "words neural": 62466, "inner product": 26244, "module jointly": 35763, "rest model": 47410, "achieve improvements": 1165, "level discourse": 30102, "discourse aware": 15386, "various existing": 61338, "positively correlated": 41303, "common crawl": 9170, "sentences linguistic": 49748, "corpus yields": 11462, "smaller corpora": 51516, "models suggest": 35559, "related distinct": 45898, "task making": 55204, "making accurate": 31644, "research study": 47124, "speech model": 52269, "semantics use": 49418, "text labels": 56640, "labels introduce": 27835, "text query": 56723, "matches human": 31905, "extensive analysis": 19856, "model resulting": 34320, "representations neural": 46724, "remain open": 46316, "open questions": 38443, "performance pretrained": 40494, "empirically using": 17369, "specifically compare": 52185, "pretrained embeddings": 42154, "outperform random": 38816, "ones large": 38339, "embeddings useful": 17239, "information reference": 26047, "experiment benchmark": 19231, "metrics shared": 33199, "analyze understand": 2831, "network evaluate": 36740, "including fine": 25257, "metric correlates": 33113, "par state": 39617, "fewer resources": 20740, "documents automatic": 15857, "documents provided": 15907, "data particularly": 12539, "label document": 27706, "approaches document": 3800, "label information": 27712, "approaches lexical": 3861, "models efficiently": 34950, "efficiently learn": 16918, "results predicting": 47772, "develop systems": 14615, "previous efforts": 42255, "valuable resource": 61205, "presidential election": 42131, "source news": 51788, "resource setting": 47274, "low resources": 31200, "representations like": 46709, "graph graph": 23141, "using translation": 61003, "additional input": 1676, "applications introduce": 3213, "fraction model": 21429, "submissions shared": 53578, "task multimodal": 55229, "image captions": 24532, "shared different": 50465, "performance english": 40319, "datasets achieves": 13143, "previously available": 42329, "public domain": 44318, "domain new": 16121, "corpus pre": 11404, "pre processed": 41508, "workshop asian": 62920, "available non": 5333, "non commercial": 37641, "available english": 5288, "web content": 61882, "features identify": 20599, "given tweet": 22799, "methods method": 32944, "method simple": 32660, "simple fast": 51168, "fast train": 20430, "require extensive": 46853, "sentences present": 49769, "challenging setting": 8140, "evaluate strong": 18509, "strong neural": 53038, "task translating": 55449, "generalizes different": 22155, "benchmarks english": 6520, "english dutch": 17798, "different attention": 14846, "attention distributions": 4739, "product attention": 43042, "output tokens": 39006, "tokens work": 57345, "present strategies": 42025, "strategies using": 52920, "different translation": 15107, "weak correlation": 61848, "confidence score": 10117, "score human": 48852, "set synthetic": 50255, "languages simple": 28788, "single source": 51338, "parser trained": 39761, "language including": 28104, "results target": 47876, "corpus speech": 11436, "problem usually": 42684, "data supervised": 12710, "baseline large": 6178, "features global": 20592, "models designed": 34903, "models tend": 35589, "local model": 30945, "methods feature": 32864, "indicative words": 25547, "word identification": 62215, "use ensemble": 59879, "words furthermore": 62423, "furthermore analyze": 21803, "lexical complexity": 30358, "analysis machine": 2693, "learn non": 29404, "general word": 22097, "framework generate": 21528, "results framework": 47639, "datasets provide": 13386, "representations information": 46690, "classification dataset": 8451, "sentences sentence": 49784, "objective method": 38095, "releasing dataset": 46188, "dataset help": 12946, "algorithms task": 2341, "need better": 36549, "network classifier": 36717, "dev set": 14567, "set pre": 50221, "sentences experiment": 49714, "segmentation errors": 49082, "result significant": 47452, "known problem": 27664, "approach second": 3679, "understanding semantics": 59398, "despite potential": 14375, "algorithm state": 2303, "art classification": 4233, "model create": 33728, "information downstream": 25823, "detection datasets": 14472, "datasets particularly": 13367, "fine granularity": 20946, "research focus": 47040, "code public": 8849, "computational understanding": 9869, "understanding human": 59349, "especially high": 18279, "short stories": 50566, "approach enables": 3508, "humans learn": 24278, "languages jointly": 28699, "task ability": 54871, "ability understand": 648, "visual modality": 61661, "based natural": 5897, "systems provide": 54607, "large general": 28883, "general corpus": 22049, "results question": 47795, "adapted domain": 1552, "describes systems": 14236, "performance certain": 40230, "cnn bi": 8762, "micro average": 33218, "f1 metrics": 20189, "tags using": 54759, "parsing natural": 39788, "end using": 17726, "framework includes": 21541, "efficient scalable": 16896, "fast inference": 20426, "text uses": 56835, "tune parameters": 58859, "corpora perform": 11231, "model showing": 34373, "evidence method": 18814, "model case": 33650, "scores test": 48925, "important semantic": 24770, "description length": 14245, "high effectiveness": 23732, "accuracy classifier": 944, "corpus 17": 11265, "medical information": 32207, "intent detection": 26567, "learning introduced": 29687, "structured semantic": 53175, "queries model": 44654, "model extracts": 33873, "23 relative": 324, "best baseline": 6751, "methods different": 32824, "human emotion": 24139, "human rated": 24221, "predict fine": 41639, "contribution work": 10950, "mutual benefit": 36343, "emotion prediction": 17292, "text available": 56451, "obtain representation": 38187, "representation similarity": 46580, "similarity matrix": 51101, "achieved model": 1251, "trained simultaneously": 57871, "shared representation": 50485, "results preliminary": 47773, "direction future": 15271, "research recent": 47111, "recent advancements": 45279, "information technology": 26115, "information generation": 25894, "systems exist": 54494, "users search": 60479, "create novel": 11714, "document pair": 15815, "model retrieval": 34325, "spread information": 52377, "media news": 32172, "diverse sources": 15719, "content news": 10541, "languages leveraging": 28711, "new findings": 37205, "findings reported": 20914, "comparison present": 9502, "model multilingual": 34111, "based traditional": 6102, "end trained": 17719, "systems compared": 54455, "technique significantly": 56046, "lack clear": 27877, "clear understanding": 8654, "understanding problem": 59384, "human biases": 24118, "contrast humans": 10878, "ability shot": 642, "little data": 30872, "shown powerful": 50736, "identify patterns": 24435, "representations shown": 46756, "application tasks": 3181, "differently different": 15149, "trained huge": 57746, "framework train": 21614, "individual users": 25586, "use application": 59823, "direct assessment": 15254, "quality automatically": 44494, "metrics comparing": 33152, "used evaluating": 60172, "human assessment": 24106, "rate quality": 45014, "video text": 61586, "generation techniques": 22565, "provides good": 44201, "consuming laborious": 10449, "user intents": 60426, "user utterances": 60456, "produced using": 43023, "dataset observe": 13014, "scores experiments": 48901, "experiments synthetic": 19539, "synthetic dataset": 54373, "english parallel": 17855, "corpus covering": 11311, "domain conversational": 16032, "conversational dialogue": 11045, "dataset kind": 12973, "number novel": 38023, "techniques model": 56112, "improve training": 24934, "estimate model": 18372, "sequential features": 50041, "lstm lm": 31270, "tasks achieved": 55488, "methods lack": 32914, "lack understanding": 27925, "clustering results": 8745, "aggregate information": 2072, "information analyze": 25761, "method demonstrated": 32454, "extract important": 19977, "automatically generates": 5178, "method novel": 32590, "learning traditional": 29915, "features proposed": 20651, "compared unsupervised": 9469, "best supervised": 6829, "methods achieved": 32732, "achieved overall": 1256, "work design": 62630, "swiss german": 54252, "widely spoken": 62002, "input order": 26307, "best solution": 6824, "testing data": 56402, "practical problem": 41467, "techniques require": 56132, "strong cross": 53024, "corpora model": 11223, "trained monolingual": 57813, "based reading": 5974, "significant advances": 50847, "robust machine": 48253, "generation network": 22505, "architectures including": 4112, "including long": 25269, "ability generate": 610, "generate sentence": 22242, "manner experimental": 31715, "understanding complex": 59332, "propose treat": 43686, "problem develop": 42536, "data experimental": 12344, "scale parallel": 48610, "tens thousands": 56218, "model widely": 34536, "datasets language": 13310, "data consisting": 12243, "models aimed": 34704, "aimed identifying": 2164, "dialog context": 14752, "models question": 35391, "using cross": 60635, "propose mixed": 43461, "self critical": 49194, "derived word": 14206, "word overlap": 62257, "objective improve": 38091, "performance question": 40516, "question types": 44754, "types input": 59094, "requires ability": 46911, "exact match": 18850, "accuracy 83": 915, "accuracy 86": 918, "86 f1": 537, "f1 paper": 20190, "language recent": 28461, "complete sentence": 9601, "based query": 5969, "yield different": 63094, "optimization model": 38551, "model answer": 33567, "new questions": 37297, "robust evaluation": 48247, "final answer": 20817, "good balance": 22927, "datasets framework": 13280, "framework significantly": 21599, "multiple strong": 36293, "virtual assistants": 61625, "given short": 22784, "nature text": 36490, "make prediction": 31586, "text language": 56642, "processing pipelines": 42923, "pipelines paper": 40911, "classifier distinguish": 8596, "detection error": 14478, "reproducible research": 46829, "testing datasets": 56404, "datasets code": 13173, "african languages": 2040, "humans understand": 24290, "set simple": 50246, "shot generalization": 50619, "solve task": 51691, "humans use": 24291, "learned classifiers": 29453, "new concept": 37153, "concepts language": 9935, "used pretraining": 60270, "learning results": 29849, "text editing": 56547, "settings models": 50384, "models linguistic": 35191, "models access": 34659, "linguistic context": 30760, "context recent": 10702, "standard automatic": 52467, "proposed multi": 43865, "previous sentence": 42275, "encoder models": 17526, "coherence cohesion": 8905, "compared non": 9426, "non contextual": 37644, "performance novel": 40461, "strategy multi": 52943, "leads best": 29305, "similar structures": 51068, "current document": 11972, "large database": 28867, "support wide": 54134, "tasks domain": 55596, "domain examples": 16061, "examples provided": 18926, "better quality": 6947, "scarce paper": 48658, "proposed extract": 43773, "english persian": 17856, "systems shown": 54631, "corpus consists": 11303, "art feature": 4262, "scarcity labeled": 48668, "issue using": 27081, "different multi": 15000, "labeling srl": 27793, "mtl models": 35933, "makes predictions": 31632, "predictions using": 41770, "deeper analysis": 13757, "require massive": 46879, "propose construct": 43332, "adopt multi": 1864, "limited fixed": 30586, "propose directly": 43354, "tasks performance": 55796, "performance loss": 40422, "architecture learning": 4059, "lexical overlap": 30374, "learning multiple": 29773, "tasks learning": 55719, "study computational": 53344, "task single": 55380, "language time": 28530, "tasks languages": 55712, "languages simultaneously": 28789, "languages benefit": 28608, "advances nlp": 1919, "large extent": 28877, "minority languages": 33333, "specific research": 52139, "questions posed": 44798, "task aimed": 54892, "task provide": 55307, "lessons learned": 30047, "model leverages": 34058, "standard recurrent": 52520, "cost human": 11584, "expert annotation": 19571, "annotation paper": 2958, "model loss": 34074, "network experiments": 36741, "models synthetic": 35573, "explore approaches": 19688, "model robustness": 34331, "robust training": 48267, "approaches neural": 3883, "generated outputs": 22307, "lower latency": 31212, "autoregressive transformer": 5226, "transformer network": 58503, "demonstrate substantial": 13981, "validate approach": 61173, "non autoregressive": 37633, "scale human": 48578, "human created": 24129, "test dataset": 56342, "high school": 23798, "requires deeper": 46924, "attention span": 4829, "including language": 25265, "performance gap": 40359, "limited ability": 30561, "term context": 56232, "methods compute": 32796, "semantics given": 49404, "vision based": 61634, "present end": 41900, "problem code": 42521, "active area": 1471, "acoustic linguistic": 1436, "developing effective": 14651, "based applications": 5569, "ability existing": 607, "language technologies": 28522, "inter sentential": 26588, "intra sentential": 26761, "work studied": 62828, "problem context": 42524, "features effective": 20568, "monolingual language": 35802, "lm trained": 30913, "code switch": 8857, "larger number": 29083, "parameters evaluate": 39693, "models speech": 35528, "systems achieved": 54422, "performance systems": 40590, "transcribed speech": 58334, "key problem": 27329, "method particular": 32608, "existing training": 19163, "unsupervised adaptation": 59679, "different topics": 15102, "thorough understanding": 57066, "applying models": 3369, "subjective information": 53564, "modeling methods": 34598, "networks proposed": 36898, "supervised trained": 54060, "trained contrastive": 57695, "model layer": 34046, "model compared": 33673, "evaluation analysis": 18574, "low efficiency": 31149, "command line": 9136, "annotation quality": 2965, "multiple annotators": 36165, "proposed reduce": 43887, "reduce annotation": 45649, "annotation time": 2975, "existing annotation": 19024, "successful natural": 53736, "relationships different": 46077, "pieces text": 40881, "parameters perform": 39715, "locality sensitive": 30955, "sensitive hashing": 49499, "significantly reduce": 51010, "explicit representation": 19623, "labels evaluate": 27818, "extraction datasets": 20056, "datasets observe": 13350, "observe significant": 38141, "art classifiers": 4234, "mechanism address": 32096, "address mismatch": 1780, "area curve": 4138, "encoding method": 17569, "method attention": 32389, "proposed deep": 43755, "model code": 33663, "paper summarize": 39585, "automatically extracts": 5172, "model identifies": 33969, "topics different": 57446, "people opinions": 40032, "furthermore use": 21841, "use evaluation": 59882, "models recent": 35406, "perform par": 40127, "task require": 55338, "memory time": 32285, "time training": 57234, "unsupervised document": 59692, "approaches require": 3915, "require complex": 46845, "difficult parallelize": 15180, "enables train": 17448, "structure document": 53100, "results public": 47791, "public benchmarks": 44308, "fraction computational": 21428, "queries natural": 44655, "training sequence": 58249, "problem existing": 42556, "learning limited": 29710, "sequence set": 50000, "set model": 50194, "prior art": 42394, "task major": 55200, "data complex": 12227, "data cleaning": 12209, "potential future": 41390, "digital technologies": 15214, "systematic review": 54402, "gap paper": 21970, "outperform conventional": 38788, "difficult tasks": 15188, "paper conduct": 39297, "furthermore discuss": 21817, "generates new": 22351, "demonstrate advantage": 13861, "regularization method": 45839, "input perturbations": 26315, "tagging performance": 54747, "task dependency": 55004, "helps model": 23611, "model generally": 33928, "generally effective": 22165, "model dynamic": 33794, "dynamic fusion": 16484, "fusion network": 21861, "comprehension mrc": 9769, "questions answer": 44770, "multi step": 36011, "step reasoning": 52827, "reasoning module": 45206, "generating answers": 22365, "reasoning steps": 45225, "detailed empirical": 14421, "analysis demonstrates": 2646, "mrc models": 35906, "models explicit": 34996, "models reason": 35403, "models build": 34797, "tasks sequential": 55880, "evaluation language": 18631, "models producing": 35364, "domain chinese": 16028, "dataset designed": 12892, "designed address": 14307, "datasets data": 13206, "manually generated": 31780, "provides rich": 44224, "dataset far": 12927, "experiments human": 19443, "performance current": 40273, "community make": 9266, "posted online": 41356, "encourage exploration": 17592, "models release": 35424, "baselines popular": 6285, "popular recent": 41181, "recent approach": 45291, "models extract": 35007, "usually extract": 61049, "questions require": 44805, "generated existing": 22287, "ensemble techniques": 17982, "approaches combine": 3783, "tuned training": 58890, "evaluated text": 18552, "comparison methods": 9498, "method directly": 32464, "directly learns": 15322, "learns relation": 29970, "language requires": 28472, "causal effects": 7871, "text neural": 56674, "complements existing": 9597, "transformers model": 58526, "model updates": 34506, "model reason": 34277, "information understanding": 26136, "representations existing": 46658, "learning shown": 29874, "successful tasks": 53738, "experiments investigate": 19447, "learned source": 29482, "dataset target": 13112, "qa models": 44452, "target datasets": 54807, "examples available": 18889, "lstm units": 31285, "languages semantic": 28779, "encoder obtain": 17530, "obtain final": 38172, "task dependent": 55005, "performance conventional": 40266, "increasingly popular": 25476, "shown language": 50722, "language representations": 28471, "type language": 59059, "various stages": 61395, "multimodal representations": 36155, "outperform single": 38820, "single modality": 51316, "input modalities": 26298, "motivated human": 35868, "concept representations": 9926, "based semantics": 6017, "interpretation methods": 26735, "model handles": 33952, "improved word": 24974, "small sized": 51503, "seven languages": 50420, "data scarcity": 12624, "similar language": 51049, "language evaluate": 28054, "improves current": 25122, "baseline score": 6207, "models solving": 35520, "hierarchically structured": 23700, "resources form": 47303, "manually annotating": 31763, "explore techniques": 19741, "techniques incorporate": 56101, "dataset knowledge": 12974, "bases kb": 6324, "existing information": 19077, "new relations": 37299, "methods traditionally": 33078, "traditionally used": 57559, "practical task": 41475, "datasets significantly": 13430, "sentence set": 49646, "dataset 000": 12787, "datasets including": 13300, "work best": 62588, "model complex": 33680, "applications research": 3247, "method better": 32405, "future works": 21901, "task encoder": 55045, "encoder network": 17528, "model respectively": 34317, "findings indicate": 20908, "limited knowledge": 30591, "knowledge intensive": 27530, "ai tasks": 2123, "tasks open": 55775, "existing end": 19063, "entire text": 18029, "linear text": 30673, "synthetic text": 54384, "latent structure": 29139, "qa pairs": 44455, "journal articles": 27227, "text order": 56681, "extracting structured": 20040, "structured representations": 53174, "materials science": 31928, "approaches extracting": 3823, "models extracting": 35009, "nature data": 36477, "exciting new": 18971, "recognition relation": 45530, "difficult problem": 15181, "sets paper": 50300, "level dataset": 30096, "quality dataset": 44506, "methods solve": 33046, "models conduct": 34845, "provide baselines": 44014, "com lancopku": 9018, "ner dataset": 36677, "dataset work": 13137, "building automatic": 7437, "represent real": 46478, "alignment approaches": 2365, "iteratively improve": 27130, "improve data": 24840, "understanding performance": 59381, "tasks analysis": 55499, "understanding recently": 59391, "effective representations": 16690, "event level": 18786, "semantic interactions": 49289, "generation method": 22490, "method produces": 32623, "produces better": 43027, "subtle differences": 53676, "focus evaluating": 21161, "evaluating quality": 18568, "systems introduce": 54534, "perform evaluation": 40100, "evaluation state": 18725, "english neural": 17849, "evaluation confirms": 18596, "effective identifying": 16659, "role modern": 48315, "grained control": 23029, "control information": 10966, "information retained": 26060, "multiple benchmarks": 36174, "community based": 9260, "potential applications": 41381, "based textual": 6092, "achieve satisfactory": 1189, "satisfactory results": 48525, "sufficient information": 53803, "modality data": 33475, "data inspired": 12432, "strong semantic": 53050, "semantic correlation": 49262, "representations training": 46774, "pooling layer": 41125, "promising result": 43178, "clearly demonstrate": 8657, "use train": 60053, "user satisfaction": 60446, "knowledge make": 27550, "make difficult": 31566, "use available": 59832, "available knowledge": 5314, "user preference": 60434, "new tools": 37346, "use topic": 60051, "complex model": 9634, "called multi": 7549, "order models": 38642, "effectively efficiently": 16730, "poor target": 41145, "domain specifically": 16194, "specifically existing": 52199, "target domains": 54814, "learn shared": 29422, "unified framework": 59472, "combining sentence": 9123, "interaction based": 26596, "model extensive": 33866, "competing models": 9531, "networks work": 36925, "random initialization": 44882, "especially considering": 18268, "significant difference": 50862, "learn perform": 29408, "perform reasonably": 40132, "translation approaches": 58579, "better automatic": 6851, "automatic translations": 5134, "promising way": 43188, "results systematic": 47874, "enhance quality": 17919, "introduces additional": 26891, "collection data": 8980, "data specific": 12686, "downstream classification": 16335, "community detection": 9261, "collecting data": 8973, "annotation experiments": 2950, "data drawn": 12297, "task does": 55031, "providing high": 44246, "existing classification": 19046, "approaches improving": 3845, "detection investigate": 14494, "task processing": 55297, "step investigate": 52813, "performs task": 40720, "develop approach": 14572, "test multiple": 56360, "multiple hypotheses": 36225, "predictions results": 41767, "box neural": 7293, "does scale": 15978, "contribute better": 10927, "words form": 62420, "vocabulary knowledge": 61704, "knowledge order": 27563, "interaction model": 26606, "methods effective": 32832, "learning introduce": 29686, "method embedding": 32477, "context specific": 10724, "posterior distributions": 41360, "applications example": 3204, "based variational": 6126, "generated large": 22297, "want know": 61768, "traditional supervised": 57548, "work attempted": 62578, "number models": 38020, "patterns human": 39969, "perform thorough": 40155, "analyses showing": 2606, "text current": 56521, "target text": 54849, "order handle": 38624, "handle issue": 23409, "issue propose": 27075, "set target": 50256, "words semantically": 62505, "build new": 7417, "new layer": 37236, "model estimated": 33838, "manner experiments": 31717, "analysis presented": 2724, "crucial tasks": 11915, "current studies": 12014, "surface level": 54153, "multiple information": 36226, "present compositional": 41869, "post processed": 41350, "model adapted": 33533, "automatically detected": 5158, "compare analyze": 9328, "machine interaction": 31304, "data models": 12493, "models combined": 34827, "sentiment classifiers": 49837, "vis vis": 61630, "improvements paper": 25090, "identify strengths": 24446, "broad categories": 7350, "comments given": 9145, "extremely large": 20161, "datasets tend": 13454, "expert domain": 19578, "knowledge embeddings": 27457, "building previous": 7463, "analysis challenging": 2625, "proposed automatic": 43741, "content context": 10515, "microblog posts": 33229, "trained separate": 57863, "testing phase": 56409, "embedding similarity": 17060, "outperforming best": 38848, "life applications": 30437, "european language": 18428, "languages set": 28781, "results known": 47688, "present challenges": 41864, "best worst": 6839, "worst scaling": 62978, "required number": 46903, "significantly affect": 50935, "simple heuristics": 51178, "created dataset": 11725, "include various": 25226, "dataset analyze": 12808, "impact individual": 24598, "solutions based": 51667, "quantitative results": 44628, "results terms": 47879, "utmost importance": 61131, "product paper": 43043, "considered special": 10251, "corpus date": 11318, "automatic discovery": 5080, "dual attention": 16458, "qa pair": 44454, "challenges addressed": 8029, "vaswani et": 61445, "solely using": 51647, "encoder proposed": 17535, "understand context": 59289, "distance based": 15542, "based self": 6011, "order model": 38641, "shows good": 50779, "nli data": 37450, "additionally model": 1726, "paper identify": 39392, "novel qa": 37902, "url https": 59793, "https www": 24062, "design neural": 14291, "network called": 36714, "supervised attention": 53963, "network san": 36800, "supervised sequence": 54042, "baselines neural": 6282, "model applies": 33569, "document represented": 15827, "node edge": 37584, "document graph": 15798, "topic text": 57433, "level supervision": 30218, "combines advantages": 9091, "document document": 15786, "approach various": 3737, "datasets compare": 13182, "compare state": 9367, "approach relying": 3674, "output language": 38980, "architecture takes": 4088, "roman urdu": 48330, "make following": 31572, "following contributions": 21264, "correctly predict": 11494, "predict sentences": 41654, "achieving bleu": 1397, "serve baseline": 50073, "work domain": 62642, "outperformed previous": 38839, "art benchmarks": 4227, "code released": 8852, "openly available": 38478, "models freely": 35043, "available propose": 5349, "fully end": 21723, "text encoder": 56552, "batch normalization": 6341, "new attention": 37138, "analysis pipeline": 2716, "kg embeddings": 27359, "embeddings specifically": 17218, "specifically explore": 52200, "using entity": 60679, "maintaining comparable": 31487, "reasoning machine": 45201, "unique feature": 59513, "improves robustness": 25157, "comprehension dataset": 9763, "used convert": 60129, "setting work": 50356, "algorithm proposed": 2295, "use efficient": 59872, "rl framework": 48175, "framework recent": 21591, "model available": 33593, "metric task": 33125, "reducing computational": 45705, "framework training": 21616, "self attentional": 49189, "different model": 14993, "incorporate new": 25360, "new ideas": 37221, "overall best": 39035, "used experiments": 60178, "apache license": 3128, "capable predicting": 7626, "propose zero": 43707, "method involves": 32553, "embedding sentence": 17059, "model generalize": 33924, "classifiers learn": 8617, "accuracy test": 1059, "models generalize": 35054, "new unseen": 37354, "cases models": 7809, "given growing": 22745, "ability provide": 638, "methods generally": 32876, "social data": 51561, "published works": 44376, "10 languages": 45, "leverage external": 30268, "models requires": 35444, "single reference": 51331, "blind test": 7217, "performance 17": 40168, "baselines task": 6307, "released dataset": 46173, "dataset annotated": 12809, "uses character": 60496, "78 accuracy": 512, "accuracy identifying": 988, "challenge lies": 7993, "discussed text": 15487, "methods given": 32879, "text research": 56742, "white box": 61954, "adversarial examples": 1970, "neural classifier": 36943, "decrease accuracy": 13667, "accuracy method": 1005, "method perform": 32610, "training makes": 58167, "proposes novel": 43940, "training text": 58297, "good trade": 22947, "set available": 50110, "span multiple": 51927, "decoding methods": 13634, "using separate": 60928, "specific design": 52069, "monolingual word": 35814, "method comparable": 32423, "classification algorithm": 8430, "achieved accuracy": 1217, "use approach": 59825, "using best": 60591, "best worlds": 6838, "gradient boosting": 23005, "learning state": 29894, "art machine": 4278, "experiments report": 19510, "parameter settings": 39677, "settings recent": 50393, "answer based": 3031, "leveraging external": 30323, "additional source": 1699, "task automated": 54921, "questions answered": 44771, "20 datasets": 223, "datasets commonly": 13180, "learning applications": 29517, "improves prediction": 25149, "approach applicable": 3418, "specific case": 52052, "legal scientific": 30008, "model interpretability": 34015, "proposed existing": 43771, "methods suffer": 33059, "leverages knowledge": 30305, "knowledge entity": 27466, "existing baselines": 19039, "answering forums": 3073, "forums social": 21400, "language form": 28076, "language focus": 28075, "largely neglected": 29059, "conduct large": 10055, "world online": 62951, "prevalent social": 42227, "social biases": 51555, "model suffers": 34425, "problems propose": 42722, "source token": 51810, "token prediction": 57301, "prediction module": 41721, "headline generation": 23506, "token wise": 57314, "present best": 41858, "important implications": 24732, "models derive": 34900, "context meaning": 10672, "distance measure": 15546, "model applying": 33571, "current literature": 11984, "results joint": 47684, "attracted considerable": 4876, "structure long": 53117, "empirical experiments": 17328, "topic analysis": 57389, "languages hand": 28682, "content work": 10574, "include new": 25225, "pose challenges": 41239, "challenges using": 8080, "study language": 53402, "improve language": 24866, "motivates research": 35878, "methods approaches": 32755, "15 000": 146, "domain sentences": 16154, "sentences labeled": 49744, "baseline experiments": 6165, "experiments experiments": 19434, "data include": 12423, "related phenomena": 45924, "input contexts": 26261, "need annotated": 36546, "quality high": 44528, "annotation costs": 2940, "costs work": 11608, "design experiments": 14282, "terms text": 56317, "consider multiple": 10215, "multiple text": 36301, "data applications": 12139, "embeddings corpus": 17105, "corpus generation": 11350, "corpora demonstrate": 11190, "models lead": 35171, "rely pre": 46297, "text learning": 56647, "leading performance": 29294, "performance bottleneck": 40226, "sequential order": 50049, "position information": 41267, "information encoder": 25831, "decoder experiments": 13593, "experiments shows": 19526, "learning significantly": 29879, "fact based": 20287, "text contribute": 56514, "applications goal": 3207, "domain propose": 16138, "input training": 26352, "understanding knowledge": 59356, "word matching": 62244, "systems utilize": 54666, "generation propose": 22531, "set questions": 50230, "evaluated domain": 18530, "end proposed": 17704, "models led": 35181, "enormous data": 17958, "based components": 5630, "augment data": 4939, "cover diverse": 11645, "continual learning": 10822, "neural conversational": 36946, "tasks data": 55568, "efficacy method": 16832, "support domain": 54117, "text preserving": 56706, "semantics using": 49419, "semantic preservation": 49320, "model unlike": 34502, "languages multi": 28734, "upper layers": 59775, "representations design": 46638, "experiments popular": 19488, "parameter size": 39679, "performance additionally": 40185, "highly interpretable": 23903, "recent researches": 45344, "textual knowledge": 56972, "knowledge concepts": 27424, "learn knowledge": 29386, "knowledge wikipedia": 27647, "performance 91": 40172, "datasets achieving": 13144, "study evaluate": 53371, "demonstrate competitive": 13882, "highly challenging": 23882, "challenging identify": 8101, "systems incorporate": 54530, "low accuracy": 31132, "multiple ways": 36311, "syntactically correct": 54340, "architecture called": 4031, "95 accuracy": 567, "generalization capability": 22119, "accurate automatic": 1075, "adversarial attacks": 1965, "short paper": 50560, "model parameter": 34179, "language dependent": 28020, "new representation": 37300, "instead words": 26470, "simple automatic": 51137, "summarization language": 53887, "structure different": 53097, "knowledge knowledge": 27535, "extracting semantic": 20037, "like named": 30488, "context single": 10721, "art sota": 4404, "sota results": 51731, "challenging practical": 8124, "research problem": 47097, "extractive method": 20135, "semantic embedding": 49273, "tasks attention": 55512, "extensive set": 19912, "advantage proposed": 1945, "essential tasks": 18337, "processing machine": 42885, "million sentence": 33256, "pairs collected": 39173, "corpus experiment": 11336, "lifelong learning": 30443, "past tasks": 39935, "help future": 23564, "classification particular": 8512, "particular proposed": 39860, "task motivated": 55223, "need large": 36576, "answering dialogue": 3070, "leading significant": 29298, "automatically annotated": 5141, "model auxiliary": 33592, "auxiliary training": 5243, "training objectives": 58197, "model guided": 33949, "performance strong": 40577, "data annotated": 12135, "questions dataset": 44781, "customer service": 12057, "usually employed": 61047, "answering datasets": 3069, "available study": 5372, "study behavior": 53335, "models convolutional": 34865, "cost high": 11583, "computational requirements": 9856, "provide summary": 44138, "learning environments": 29626, "applied large": 3278, "modeling used": 34633, "researchers explore": 47155, "distant supervised": 15556, "approach scale": 3678, "extraction large": 20077, "relational facts": 46006, "text recent": 56727, "progress task": 43116, "sentences low": 49750, "syntax information": 54349, "syntax aware": 54346, "tree sentence": 58757, "level entity": 30111, "finally combine": 20842, "combine sentence": 9073, "embedding entity": 17027, "classification conduct": 8444, "experiments widely": 19562, "used real": 60284, "languages address": 28595, "method reduce": 32635, "effective mechanism": 16670, "work english": 62646, "processing involves": 42879, "languages supervised": 28797, "algorithm leverages": 2284, "demonstrate advantages": 13862, "advantages approach": 1948, "structured inference": 53155, "features associated": 20525, "information users": 26146, "art solutions": 4403, "art algorithms": 4211, "online shopping": 38383, "voice text": 61724, "incorporate context": 25347, "multi round": 36003, "latent random": 29130, "model translation": 34491, "complex dependencies": 9623, "translations different": 58707, "order deal": 38606, "models quickly": 35393, "new labeled": 37231, "alternative way": 2510, "lower cost": 31209, "lower quality": 31221, "experts paper": 19591, "approach performing": 3639, "adversarial learning": 1972, "create data": 11694, "ner tasks": 36684, "tasks domains": 55597, "domains experimental": 16253, "results achieves": 47488, "driving force": 16439, "linear relationships": 30668, "individual predictions": 25576, "changes underlying": 8183, "prediction using": 41751, "inductive transfer": 25613, "approaches nlp": 3885, "require task": 46892, "training scratch": 58240, "propose universal": 43690, "model fine": 33891, "effective transfer": 16707, "applied task": 3300, "introduce techniques": 26870, "tuning language": 58919, "tasks reducing": 55843, "reducing error": 45707, "performance training": 40605, "data open": 12522, "pretrained models": 42169, "models code": 34820, "code paper": 8841, "crawled web": 11687, "generally better": 22163, "models process": 35360, "novel text": 37938, "work different": 62637, "different traditional": 15103, "reduction methods": 45720, "input sequences": 26333, "following text": 21271, "tasks shown": 55886, "data achieves": 12113, "produced data": 43018, "great importance": 23206, "promote research": 43192, "words embedding": 62405, "space extensive": 51863, "concept based": 9921, "based multilingual": 5890, "multilingual embedding": 36080, "key concepts": 27302, "inputs outputs": 26365, "subject predicate": 53555, "answer span": 3056, "interaction scenarios": 26611, "supervised dataset": 53976, "generative approach": 22587, "capture structure": 7713, "structure output": 53125, "introduce hierarchical": 26811, "generates words": 22361, "designed measure": 14324, "results approaches": 47508, "question aware": 44721, "approaches incorporating": 3849, "benchmark evaluation": 6466, "annotated different": 2891, "ensure quality": 17990, "trained reinforcement": 57850, "user questions": 60443, "generated sequence": 22317, "quality vector": 44598, "measure performance": 32058, "analysis yields": 2796, "learning semantics": 29867, "models bilingual": 34788, "handling long": 23425, "models fall": 35017, "results improvements": 47672, "mt model": 35920, "model having": 33954, "relies solely": 46268, "yield improvements": 63097, "suffer lack": 53770, "context models": 10676, "words specifically": 62520, "develop neural": 14604, "neural non": 37082, "context current": 10604, "method wide": 32704, "models demonstrate": 34892, "systems low": 54553, "propose leverage": 43439, "information build": 25771, "obtain large": 38179, "model operates": 34146, "select important": 49106, "matching word": 31924, "softmax based": 51629, "differentiable neural": 15140, "systems word": 54672, "trained natural": 57822, "language corpora": 28008, "corpora models": 11224, "popularity recent": 41203, "discuss key": 15471, "time text": 57231, "spoken english": 52358, "word importance": 62216, "score word": 48882, "training automatic": 57938, "role knowledge": 48309, "traditional deep": 57515, "average pooling": 5414, "learn attention": 29346, "results relation": 47804, "new solutions": 37318, "supervision training": 54098, "dependencies tokens": 14112, "global dependencies": 22827, "hard attention": 23440, "select subset": 49113, "paper integrate": 39399, "soft hard": 51622, "attention context": 4730, "reward signals": 48072, "facilitate training": 20278, "encoding model": 17571, "solely based": 51642, "graphs kg": 23187, "tasks end": 55611, "task complex": 54962, "factual questions": 20323, "ii use": 24506, "complex real": 9653, "world settings": 62959, "models reported": 35437, "massive amounts": 31879, "multilingual machine": 36094, "shared parameters": 50483, "model creates": 33730, "language space": 28493, "according language": 864, "open door": 38428, "driven language": 16425, "research deep": 47013, "achieve highly": 1157, "vision tasks": 61644, "architectures like": 4116, "tend suffer": 56208, "tasks inspired": 55691, "densely connected": 14088, "model benchmark": 33614, "datasets sentence": 13415, "obtain significant": 38190, "model promising": 34241, "unstructured data": 59667, "different modalities": 14992, "information captured": 25775, "process neural": 42810, "parent child": 39747, "original problem": 38724, "propose step": 43645, "wide margin": 61963, "including approaches": 25238, "recent deep": 45300, "adversarial network": 1978, "text high": 56615, "produce diverse": 42979, "based discriminator": 5683, "better distinguish": 6877, "generation dialogue": 22446, "baselines code": 6243, "features set": 20665, "previously selected": 42349, "texts used": 56940, "corpus identify": 11358, "evaluation approach": 18575, "systems build": 54443, "differences performance": 14827, "types different": 59082, "language compare": 27997, "different paradigms": 15018, "use long": 59937, "especially effective": 18274, "explore multiple": 19719, "attention layers": 4763, "practical application": 41457, "cognitive modeling": 8893, "performance evaluated": 40326, "emotional information": 17298, "propose automated": 43305, "automated framework": 5044, "accuracy 88": 920, "produce correct": 42977, "questions using": 44815, "paper mainly": 39420, "constantly evolving": 10345, "applications large": 3216, "legal documents": 30004, "consider semantic": 10220, "work kind": 62700, "methods automated": 32760, "inference tasks": 25697, "tasks lack": 55707, "serve training": 50083, "construct corpus": 10383, "combination domain": 9041, "knowledge provide": 27581, "knowledge construct": 27425, "text training": 56818, "build end": 7396, "paper tries": 39599, "monolingual corpus": 35794, "corresponding english": 11549, "alignment quality": 2381, "nli dataset": 37451, "performing text": 40692, "learning labeled": 29692, "examples tasks": 18936, "examples language": 18915, "lingual learning": 30709, "problem method": 42607, "training yields": 58319, "unlabeled document": 59570, "concept set": 9927, "stage use": 52446, "supporting documents": 54137, "languages tested": 28803, "using wikipedia": 61026, "test collections": 56336, "serve input": 50079, "dataset improves": 12962, "performance original": 40468, "highlights importance": 23876, "datasets better": 13169, "ai systems": 2122, "generates human": 22345, "capable producing": 7628, "work tackle": 62836, "problem sentence": 42647, "sentence boundary": 49523, "using trained": 60994, "results general": 47644, "accuracy models": 1009, "f1 metric": 20188, "encoded embeddings": 17477, "behave like": 6387, "years significant": 63075, "improvements language": 25076, "challenges field": 8049, "language phenomenon": 28381, "challenge lack": 7988, "creating large": 11742, "step better": 52801, "positive examples": 41281, "approach classification": 3445, "methods consistently": 32800, "translated text": 58560, "language help": 28096, "deep contextualized": 13686, "contextualized word": 10812, "easily added": 16536, "challenging nlp": 8120, "analysis showing": 2757, "downstream models": 16342, "semi supervision": 49470, "supervision signals": 54094, "signals paper": 50835, "languages target": 28799, "lexical representation": 30379, "utilize lexical": 61098, "higher resource": 23842, "sentences compared": 49692, "18 bleu": 179, "multilingual training": 36127, "dataset fine": 12930, "tuning pre": 58943, "trained multi": 57814, "lingual zero": 30740, "shot setting": 50642, "setting present": 50342, "architecture address": 4024, "premise hypothesis": 41811, "model relationship": 34300, "inference introduce": 25663, "final predictions": 20828, "results improved": 47670, "results achieving": 47489, "art scores": 4395, "sentences long": 49749, "research related": 47114, "methodology applied": 32717, "dataset tested": 13116, "highest correlation": 23851, "similar models": 51055, "learning signal": 29877, "deep generative": 13692, "posterior probability": 41363, "investigate model": 26967, "tasks achieving": 55490, "including natural": 25280, "text similarity": 56771, "architectures shown": 4123, "model machine": 34080, "models end": 34965, "models investigated": 35147, "pairs similar": 39217, "focus end": 21157, "distant language": 15554, "data examples": 12335, "network structures": 36809, "end encoder": 17634, "task experiment": 55064, "approach provide": 3658, "provide significant": 44129, "learning automatic": 29526, "approach small": 3697, "studied language": 53225, "data suitable": 12709, "linguistic diversity": 30765, "using weighted": 61024, "texts challenging": 56862, "better current": 6873, "techniques perform": 56119, "model question": 34266, "generation knowledge": 22480, "generating questions": 22389, "mechanism generate": 32120, "art zero": 4443, "leading state": 29299, "performance key": 40401, "representations train": 46772, "models downstream": 34940, "evaluate pre": 18487, "showing strong": 50690, "detection techniques": 14534, "classification framework": 8473, "gap present": 21974, "event specific": 18788, "documents domains": 15874, "release annotated": 46141, "attention modules": 4790, "ability extract": 608, "scratch using": 48946, "training speed": 58266, "various sizes": 61391, "models implicit": 35104, "predictions model": 41763, "task example": 55060, "performance reducing": 40526, "models identifying": 35098, "entities sentence": 18082, "sentence does": 49545, "interaction graph": 26599, "facilitate evaluation": 20268, "created datasets": 11726, "covering diverse": 11656, "evaluations proposed": 18766, "methods natural": 32952, "approach capture": 3442, "demonstrate joint": 13925, "embeddings compared": 17097, "using relevant": 60907, "subset data": 53607, "data related": 12594, "related methods": 45917, "methods furthermore": 32873, "demonstrate used": 13994, "used address": 60083, "natural questions": 36463, "http github": 24048, "information valuable": 26152, "systems create": 54463, "systems help": 54518, "explore feasibility": 19707, "end create": 17624, "annotated named": 2906, "lstm word": 31288, "module learns": 35765, "informative ones": 26174, "data provide": 12574, "provide details": 44052, "resulting data": 47463, "data recently": 12590, "significant attention": 50851, "approaches generate": 3834, "work proposed": 62791, "proposed generate": 43787, "address research": 1797, "research gap": 47044, "gap presenting": 21975, "generated humans": 22293, "easily understood": 16552, "propose ways": 43703, "aware models": 5462, "significant reductions": 50917, "model sizes": 34392, "model layers": 34047, "best hypotheses": 6766, "recognition results": 45533, "text structured": 56791, "study end": 53369, "unified approach": 59466, "achieve reasonable": 1184, "demonstrated model": 14013, "work extends": 62664, "models fully": 35045, "fully exploits": 21727, "competitive models": 9550, "changes meaning": 8178, "desired properties": 14350, "challenge recent": 8012, "works use": 62914, "sequence target": 50008, "target label": 54821, "ones experiments": 38338, "experiments effectiveness": 19425, "generate plausible": 22228, "diverse sentences": 15716, "parsing methods": 39785, "methods problem": 32992, "goal enable": 22883, "expert written": 19586, "millions users": 33265, "users share": 60481, "media sites": 32181, "data public": 12578, "challenging reasons": 8135, "figurative language": 20790, "problem challenging": 42517, "unlike previously": 59605, "supervised deep": 53978, "data makes": 12479, "detection benchmark": 14462, "benchmark new": 6483, "mentions multiple": 32307, "data small": 12669, "massively multilingual": 31892, "language embeddings": 28043, "differences language": 14823, "task instance": 55141, "high accuracies": 23706, "label spaces": 27730, "learning transfer": 29919, "data auxiliary": 12174, "sequence classification": 49915, "single multi": 51321, "task baselines": 54931, "embeddings document": 17114, "abstractive text": 774, "summarization methods": 53891, "methods adopt": 32740, "representations fail": 46666, "information carried": 25777, "rouge points": 48353, "used dataset": 60137, "clean dataset": 8644, "approach generates": 3546, "performance new": 40455, "based structured": 6065, "entities document": 18044, "classification regression": 8531, "work employs": 62645, "exact inference": 18849, "propose bidirectional": 43315, "inference algorithm": 25640, "absolute accuracy": 738, "accuracy popular": 1024, "reading understanding": 45092, "major research": 31521, "problem field": 42565, "nlp work": 37560, "using knowledge": 60745, "knowledge large": 27544, "attention architecture": 4711, "novel dual": 37808, "models information": 35131, "documents experiments": 15879, "additionally develop": 1717, "techniques demonstrate": 56075, "span text": 51934, "text single": 56773, "single entity": 51302, "response propose": 47400, "efficient self": 16898, "attention encoder": 4741, "perform multi": 40122, "multi instance": 35973, "instance learning": 26425, "datasets achieve": 13141, "dataset order": 13019, "larger existing": 29077, "existing human": 19076, "task difficult": 55021, "years deep": 63054, "results sentiment": 47825, "approach current": 3475, "arabic corpus": 3995, "applying different": 3361, "accuracy sentiment": 1045, "available arabic": 5263, "sentiment dataset": 49839, "prior research": 42411, "fully utilize": 21749, "proposed sentence": 43894, "optimal transport": 38533, "multi scale": 36004, "models supervised": 35564, "supervised semantic": 54038, "training based": 57941, "text pair": 56684, "dataset extensive": 12920, "proposed hierarchical": 43791, "cnn long": 8770, "paper design": 39335, "com neulab": 9021, "describes semeval": 14232, "semeval 2018": 49432, "2018 task": 277, "knowledge use": 27642, "model interactions": 34013, "question answers": 44717, "incorporate commonsense": 25345, "augment input": 4941, "relation embedding": 45970, "official test": 38312, "data code": 12210, "provide high": 44084, "different subsets": 15087, "role understanding": 48324, "set concepts": 50122, "grained semantics": 23045, "built large": 7486, "academic commercial": 790, "results end": 47607, "make model": 31580, "closes gap": 8714, "cross lingually": 11860, "chinese paper": 8316, "propose build": 43316, "corpora limited": 11216, "build sentence": 7426, "tokens based": 57323, "corpus propose": 11408, "local attention": 30929, "alignment translation": 2387, "paper bring": 39283, "representation proposed": 46572, "similarity used": 51127, "provide explanations": 44068, "model increase": 33993, "mechanism transformer": 32145, "results machine": 47706, "does explicitly": 15946, "inputs work": 26369, "relative positions": 46108, "bleu bleu": 7203, "relation aware": 45965, "aware self": 5469, "portion data": 41221, "possible identify": 41328, "model correctly": 33725, "correctly classify": 11491, "learn model": 29398, "modeling different": 34571, "output sentence": 38998, "representation train": 46595, "train using": 57657, "models recognizing": 35415, "sentences produced": 49771, "model experiment": 33849, "results given": 47649, "quality diversity": 44511, "diversity generated": 15735, "facilitate development": 20265, "geometric properties": 22655, "end text": 17715, "search word": 48988, "embedding dimension": 17024, "novel class": 37782, "like neural": 30492, "answering cqa": 3066, "explore new": 19720, "new problem": 37289, "information external": 25855, "structured outputs": 53168, "structured output": 53167, "develop large": 14593, "network multi": 36770, "accuracy sequence": 1046, "model scores": 34339, "output label": 38978, "exploit hierarchical": 19654, "hierarchical information": 23672, "model structured": 34416, "success nlp": 53715, "work compare": 62602, "poses significant": 41252, "features best": 20531, "setting proposed": 50344, "work help": 62677, "automatically evaluating": 5167, "features semantic": 20661, "gain better": 21904, "annotation study": 2972, "text methods": 56659, "speakers different": 52004, "shows possible": 50792, "sound change": 51735, "syntactic differences": 54301, "using newly": 60839, "learns different": 29957, "despite having": 14365, "poor results": 41143, "encourage researchers": 17600, "purposes paper": 44417, "manual work": 31753, "standard corpus": 52478, "various metrics": 61363, "corpus language": 11367, "incorporate entity": 25353, "expensive obtain": 19213, "based contrastive": 5647, "method alleviates": 32378, "alleviates need": 2424, "need data": 36552, "domain demonstrate": 16043, "ner model": 36678, "proposed feature": 43775, "work work": 62860, "work systematically": 62834, "generated word": 22334, "information generated": 25892, "generating embeddings": 22373, "quality embeddings": 44514, "introduce framework": 26806, "multiple experimental": 36214, "models employing": 34957, "provide greater": 44082, "sampled data": 48461, "code https": 8818, "general public": 22084, "achieves 30": 1288, "corpus furthermore": 11347, "achieve substantial": 1208, "substantial gain": 53619, "requires reasoning": 46948, "reasoning using": 45232, "similar datasets": 51036, "datasets focus": 13278, "knowledge specifically": 27617, "results substantial": 47863, "validation set": 61196, "accuracy task": 1058, "validation test": 61197, "fully neural": 21737, "forward network": 21403, "yields higher": 63123, "accuracy approach": 935, "language barriers": 27973, "human parity": 24211, "require significant": 46887, "significant amounts": 50850, "furthermore training": 21840, "evaluate standard": 18506, "tasks question": 55831, "called hybrid": 7547, "human intuitions": 24178, "multiple aspects": 36169, "attention text": 4834, "text question": 56724, "predictions experimental": 41758, "accuracy 84": 916, "languages rich": 28776, "purpose evaluation": 44401, "datasets based": 13165, "corpus russian": 11424, "substantially outperform": 53643, "outperform competitive": 38786, "previous years": 42324, "years based": 63051, "studies semantic": 53299, "similarity analysis": 51082, "different english": 14916, "studied languages": 53226, "successful approaches": 53735, "approaches english": 3809, "models directly": 34922, "score systems": 48876, "development novel": 14694, "semantic aware": 49240, "text complex": 56501, "studies current": 53255, "analysis application": 2612, "approaches semantic": 3918, "work training": 62846, "models available": 34747, "parallel dataset": 39648, "translating text": 58570, "text code": 56494, "input natural": 26302, "input languages": 26290, "model facilitate": 33876, "method benchmark": 32402, "survey recent": 54216, "generation introduce": 22478, "techniques compare": 56070, "properties models": 43267, "common problems": 9194, "generation diversity": 22450, "finally conduct": 20845, "known datasets": 27657, "systems common": 54452, "systems known": 54539, "method analyzing": 32381, "common framework": 9177, "modern neural": 35715, "learning speech": 29890, "research research": 47116, "group based": 23271, "models automatic": 34744, "text usually": 56837, "post edit": 41345, "systems addition": 54424, "limited available": 30572, "synthetic corpus": 54369, "translating source": 58569, "source publicly": 51791, "text dialogue": 56535, "level addition": 30057, "specific discourse": 52070, "like structures": 30507, "multiple low": 36242, "regression task": 45821, "approach english": 3513, "english data": 17792, "competition results": 9534, "support researchers": 54125, "researchers want": 47170, "build novel": 7419, "framework makes": 21560, "core semantic": 11155, "al 2005": 2230, "intelligence paper": 26538, "task mining": 55214, "received significant": 45266, "task targets": 55428, "automated classification": 5038, "models annotated": 34713, "approach demonstrates": 3482, "demonstrates superior": 14047, "classification current": 8449, "datasets recent": 13394, "equally important": 18191, "context introduce": 10662, "produces accurate": 43025, "unsupervised sentence": 59730, "representations classification": 46626, "unsupervised state": 59734, "research multi": 47075, "lingual cross": 30693, "analysis focused": 2667, "reach performance": 45052, "datasets supervised": 13449, "annual conference": 3020, "combine models": 9068, "models conventional": 34863, "retrieval models": 47956, "match human": 31897, "available existing": 5289, "use annotated": 59821, "corpora languages": 11213, "supervised speech": 54050, "recognition work": 45547, "unseen languages": 59651, "combining existing": 9111, "trained single": 57872, "additional improvements": 1674, "language finally": 28070, "pairs available": 39170, "challenges adapting": 8027, "evaluate simple": 18505, "simple unsupervised": 51223, "models varying": 35671, "varying degrees": 61430, "discuss challenges": 15462, "classifiers using": 8628, "detection data": 14470, "unigrams bigrams": 59496, "proposed data": 43750, "endangered language": 17731, "language documentation": 28033, "neural multi": 36988, "source model": 51783, "datasets multi": 13339, "terms memory": 56299, "small memory": 51483, "autoencoder architecture": 5025, "ones experimental": 38336, "using binary": 60598, "30 times": 358, "learning provides": 29828, "methods applications": 32751, "learning applied": 29518, "longstanding challenge": 31062, "challenge language": 7989, "work formulate": 62673, "based dataset": 5666, "generalizes unseen": 22157, "develop automatic": 14575, "indo aryan": 25593, "aryan languages": 4509, "languages india": 28695, "accuracy 96": 928, "used corpora": 60130, "based study": 6066, "attention method": 4785, "method encoding": 32481, "apply self": 3352, "mitigate issues": 33387, "hybrid models": 24320, "explicit control": 19613, "model approaches": 33573, "approaches strong": 3927, "attention heads": 4757, "key observation": 27323, "provides significant": 44226, "dataset machine": 12987, "dataset uses": 13129, "gap performance": 21971, "performance 20": 40169, "20 f1": 225, "performance varies": 40618, "temporal reasoning": 56191, "past tense": 39936, "quantify extent": 44608, "strong correlations": 53023, "simple extension": 51167, "learning better": 29547, "new end": 37185, "input addition": 26253, "addition traditional": 1647, "small improvements": 51476, "long document": 31010, "encoding long": 17568, "text encoders": 56553, "lead higher": 29259, "baselines including": 6271, "based single": 6038, "single encoder": 51299, "temporal information": 56188, "approach perform": 3638, "verb phrase": 61511, "new events": 37195, "performance single": 40565, "multi speaker": 36009, "handle unseen": 23417, "sample text": 48457, "text articles": 56441, "task main": 55198, "articles paper": 4473, "different modules": 14998, "retrieved knowledge": 47986, "meta learner": 32336, "continuously update": 10859, "specifically target": 52228, "performance diverse": 40298, "transfer tasks": 58424, "models allow": 34707, "transfer task": 58423, "learning pretrained": 29815, "use transfer": 60057, "learning sentence": 29868, "model bias": 33628, "bias pre": 7037, "faceted search": 20257, "networks domain": 36847, "models bidirectional": 34786, "improved overall": 24954, "scores compared": 48896, "previous benchmarks": 42247, "12 f1": 106, "previous machine": 42259, "customer experience": 12054, "scarcity training": 48676, "features relevant": 20654, "large speech": 29017, "dataset 10": 12788, "lot recent": 31118, "words trained": 62534, "used general": 60196, "problem recent": 42642, "effective multi": 16677, "framework sentence": 21596, "inductive biases": 25611, "sources multiple": 51837, "multiple training": 36305, "sentences extensive": 49720, "settings using": 50402, "able use": 731, "english grammar": 17818, "reading writing": 45093, "need automatic": 36548, "suggesting potential": 53840, "potential directions": 41386, "efficient robust": 16895, "time applications": 57116, "approaches different": 3798, "limitations approach": 30543, "crowdsourced dataset": 11886, "poorly task": 41151, "remaining errors": 46324, "analysis code": 2627, "mixed text": 33411, "sentiment positive": 49855, "contrastive learning": 10898, "accuracy 10": 893, "embedding deep": 17023, "outstanding performance": 39030, "performance approaches": 40197, "incorporate word": 25366, "classification propose": 8523, "results clearly": 47534, "proposed scheme": 43888, "improvements 10": 25043, "information crucial": 25797, "text evaluation": 56561, "evaluation dataset": 18603, "released pre": 46179, "datasets source": 13437, "weighted average": 61924, "final result": 20829, "word2vec embeddings": 62347, "ensemble approaches": 17971, "approach highly": 3554, "previous statistical": 42284, "depend heavily": 14100, "leveraging transfer": 30340, "learning train": 29916, "models multi": 35235, "title generation": 57270, "focus low": 21177, "usage language": 59801, "external factors": 19935, "face face": 20242, "time social": 57215, "speakers language": 52006, "considered low": 10249, "corpora evaluate": 11197, "existing corpora": 19048, "used benchmark": 60103, "benchmark future": 6469, "subtasks semeval": 53672, "historical text": 23961, "proposed evaluation": 43770, "evaluation practices": 18677, "provide clear": 44025, "rigorous evaluation": 48149, "evaluation including": 18627, "effective method": 16671, "previously seen": 42348, "process specifically": 42830, "sentence use": 49666, "pairs source": 39219, "similar input": 51048, "based similarities": 6035, "knowledge propose": 27579, "annotated gold": 2898, "case using": 7803, "work represents": 62807, "applied languages": 3277, "training multilingual": 58183, "performance monolingual": 40443, "capable performing": 7625, "speech speech": 52295, "numerous studies": 38070, "detection approaches": 14460, "problem social": 42658, "input instance": 26286, "false positive": 20381, "tweets posted": 59019, "posted users": 41357, "better utilization": 6991, "switchboard corpus": 54255, "15 times": 152, "tasks makes": 55738, "sacrificing quality": 48423, "media increasingly": 32168, "needed order": 36602, "overview research": 39115, "research data": 47009, "mining natural": 33319, "different areas": 14842, "problem lack": 42590, "corpus languages": 11368, "pairs extracted": 39191, "extracted open": 20017, "main problem": 31452, "ability make": 623, "formal informal": 21347, "advantage method": 1942, "make corpus": 31555, "informal style": 25742, "generated corpus": 22281, "million comments": 33251, "explore relationship": 19731, "training signal": 58257, "signal training": 50830, "shared model": 50479, "particular style": 39863, "output distributions": 38969, "textual input": 56970, "models consistently": 34851, "style language": 53489, "generation capabilities": 22429, "results publicly": 47793, "methods tasks": 33069, "vital task": 61693, "task better": 54939, "machine understanding": 31394, "texts difficult": 56874, "novel effective": 37810, "expert human": 19582, "task attempt": 54919, "require prior": 46884, "methods word2vec": 33103, "user based": 60404, "user profile": 60436, "results user": 47897, "capsule network": 7644, "model way": 34532, "strong search": 53049, "notoriously difficult": 37737, "german language": 22674, "work progress": 62770, "output quality": 38995, "time possible": 57194, "costs paper": 11607, "non negligible": 37669, "study examines": 53373, "specifically use": 52232, "student learning": 53211, "field using": 20773, "data boost": 12192, "boost model": 7253, "data reach": 12584, "data respectively": 12612, "approach predict": 3646, "using sentiment": 60927, "forum posts": 21398, "art deep": 4246, "learning time": 29912, "series models": 50066, "special focus": 52019, "random sample": 44885, "term pairs": 56253, "false negatives": 20380, "similar images": 51046, "previous attempts": 42242, "knowledge inference": 27523, "called semantic": 7553, "use commonsense": 59845, "developed neural": 14636, "systems outperform": 54577, "existing multi": 19111, "modal fusion": 33457, "fusion methods": 21857, "video understanding": 61588, "multiple modalities": 36249, "modalities different": 33468, "rarely explored": 45005, "modal representations": 33465, "task finally": 55083, "results widely": 47909, "representations additional": 46615, "systematically compare": 54410, "compare popular": 9357, "popular neural": 41176, "integrating word": 26526, "features outperform": 20635, "second best": 48999, "provides additional": 44180, "10 f1": 41, "summarization models": 53893, "short documents": 50554, "new hierarchical": 37217, "models discourse": 34923, "linguistic processes": 30783, "samples non": 48484, "possible achieve": 41314, "provide useful": 44148, "learning self": 29864, "learning leveraging": 29708, "enhancing model": 17948, "model performances": 34198, "model confidence": 33698, "instance selection": 26429, "automatically based": 5144, "terms better": 56271, "evaluation phase": 18672, "score 75": 48810, "frequently occurring": 21686, "use recently": 59989, "serve starting": 50080, "aggregating information": 2076, "multiple mentions": 36246, "mentions entity": 32305, "approaches automated": 3770, "results conll": 47558, "fluent sentences": 21133, "present real": 41996, "world application": 62926, "methods improving": 32897, "improving neural": 25187, "commerce platform": 9151, "simulation experiments": 51264, "experiments paper": 19485, "work real": 62800, "thorough analysis": 57055, "improve task": 24932, "datasets target": 13451, "different underlying": 15115, "building state": 7471, "languages challenging": 28613, "data extremely": 12357, "settings propose": 50391, "approaches need": 3881, "augmentation methods": 4963, "finally explore": 20858, "explore cross": 19693, "train single": 57634, "model related": 34297, "tree learning": 58747, "sentence syntactic": 49653, "work models": 62726, "like sentence": 30501, "ability models": 626, "single correct": 51290, "needs learn": 36609, "task current": 54989, "framework utilize": 21624, "data addition": 12120, "methods exploit": 32854, "predicted labels": 41667, "labels unlabeled": 27855, "samples based": 48465, "based prediction": 5945, "prediction confidence": 41698, "augment training": 4944, "sampling bias": 48500, "explore data": 19695, "select high": 49104, "unlabeled samples": 59577, "suffers low": 53792, "resource scenarios": 47273, "obtained pre": 38217, "work perform": 62747, "embeddings help": 17145, "tasks embeddings": 55605, "piece information": 40876, "learning objectives": 29786, "integrating domain": 26521, "auto completion": 5013, "task specifically": 55405, "specifically address": 52179, "queries work": 44659, "work improve": 62683, "goal building": 22878, "focus using": 21212, "time information": 57166, "information study": 26108, "datasets previous": 13375, "methods accuracy": 32727, "approach novel": 3614, "novel reward": 37911, "reward functions": 48068, "including human": 25261, "dataset strong": 13102, "lack robustness": 27912, "robustness propose": 48293, "significantly increases": 50984, "model making": 34089, "learning capabilities": 29550, "based adversarial": 5559, "types adversarial": 59077, "task recently": 55324, "samples target": 48489, "better predictions": 6943, "german russian": 22675, "nature neural": 36485, "theoretically sound": 57033, "constraints present": 10377, "present algorithm": 41842, "model bleu": 33632, "implementation available": 24638, "classification systems": 8560, "domain agnostic": 16016, "monolingual cross": 35796, "lingual multilingual": 30715, "languages german": 28680, "results monolingual": 47730, "combining machine": 9113, "approaches discuss": 3799, "corpus target": 11441, "easier learn": 16526, "algorithm automatically": 2263, "images text": 24554, "failure cases": 20352, "facilitate future": 20269, "research introduce": 47057, "new benchmark": 37142, "gender bias": 22035, "winograd schema": 62073, "approach combination": 3449, "affecting performance": 2021, "dataset code": 12839, "word attention": 62116, "better sentence": 6964, "firstly propose": 21065, "directional gated": 15277, "bi gru": 7009, "entity centric": 18098, "combination model": 9044, "combines multiple": 9098, "datasets making": 13325, "multiple real": 36269, "framework contains": 21481, "content similarity": 10557, "enable better": 17421, "capability generating": 7608, "producing high": 43039, "document recent": 15824, "coherence model": 8909, "fashion using": 20418, "data empirical": 12317, "efficiently capture": 16912, "qualitative evaluation": 44476, "complementary aspects": 9588, "embeddings propose": 17196, "approach generalizes": 3543, "correlate strongly": 11508, "effective using": 16710, "new evidence": 37196, "mixture experts": 33419, "approaches yield": 3957, "image based": 24529, "answer queries": 3048, "sentence paragraph": 49615, "text addition": 56424, "wikipedia entity": 62048, "address aforementioned": 1741, "aforementioned issues": 2037, "improve models": 24875, "network predicts": 36787, "models adapt": 34689, "leads state": 29329, "art single": 4401, "languages aligned": 28598, "works typically": 62913, "use retrieval": 60000, "inference paper": 25678, "propose unified": 43688, "directly optimizes": 15327, "improvements observed": 25088, "near human": 36505, "performance languages": 40410, "amounts parallel": 2555, "pairs work": 39235, "having access": 23485, "effect language": 16614, "iterative translation": 27128, "english benchmarks": 17778, "benchmarks models": 6532, "supervised supervised": 54053, "important input": 24735, "understand limitations": 59302, "limitations methods": 30550, "lack information": 27894, "prediction label": 41713, "trained maximum": 57787, "tune models": 58858, "high entropy": 23734, "examples fine": 18904, "tuned models": 58880, "models interpretable": 35141, "reduction accuracy": 45717, "accuracy loss": 1000, "maximizing mutual": 31964, "focus training": 21207, "simple architecture": 51136, "parsing recent": 39794, "model structure": 34415, "neural methods": 36970, "methods end": 32838, "implicitly learns": 24670, "information explicitly": 25851, "entities involved": 18059, "baselines significantly": 6300, "processing previous": 42925, "work demonstrated": 62626, "inference data": 25650, "conducting extensive": 10102, "using auxiliary": 60578, "compared single": 9453, "learning addition": 29503, "performance transfer": 40607, "simple greedy": 51176, "improvement comes": 24997, "propose flexible": 43391, "additional computational": 1657, "corpus built": 11291, "using output": 60851, "work problem": 62769, "learning trained": 29917, "range models": 44923, "yields substantial": 63134, "created different": 11727, "different versions": 15124, "achieved near": 1252, "techniques address": 56056, "exhibit poor": 19003, "performance f1": 40341, "score 37": 48791, "research avenues": 46991, "understanding propose": 59386, "processing method": 42889, "method enriching": 32485, "representation vector": 46603, "consists steps": 10330, "word2vec fasttext": 62348, "based target": 6082, "structural complexity": 53075, "metric measure": 33120, "dataset experiments": 12918, "unsupervised semantic": 59727, "context query": 10700, "approaches usually": 3951, "address limitations": 1778, "better zero": 6994, "shot performance": 50635, "performance robust": 40542, "specific training": 52164, "need model": 36584, "multilingual encoder": 36082, "architecture demonstrate": 4039, "learns language": 29962, "shot translation": 50656, "using smaller": 60950, "tasks method": 55745, "paper specifically": 39575, "standard rnn": 52522, "model known": 34033, "including novel": 25286, "work establish": 62647, "establish state": 18346, "transfer sentence": 58419, "models given": 35063, "size dataset": 51380, "uses pre": 60528, "matching model": 31915, "achieves mean": 1345, "accuracy 64": 901, "significant gain": 50866, "describes submitted": 14235, "submitted semeval": 53586, "texts large": 56899, "labeled corpora": 27736, "models feature": 35021, "feature extractors": 20490, "models support": 35565, "embeddings train": 17232, "information raw": 26039, "end way": 17727, "textual modalities": 56973, "complete task": 9603, "language grounding": 28093, "results attention": 47511, "task terms": 55432, "attention approach": 4709, "embeddings learnt": 17166, "context different": 10614, "wikidata knowledge": 62038, "different entity": 14918, "modeling lexical": 34589, "reliable evaluation": 46251, "method models": 32579, "models researchers": 35448, "store information": 52873, "information human": 25906, "work test": 62840, "sentence corpus": 49537, "originally written": 38746, "increased use": 25432, "used benchmarks": 60106, "second introduce": 49007, "performance level": 40416, "multi choice": 35943, "task makes": 55203, "end task": 17713, "important words": 24793, "representations ii": 46684, "position aware": 41260, "improvement prior": 25019, "art pretrained": 4359, "applied successfully": 3298, "learn relation": 29411, "linear classifiers": 30652, "performance methods": 40436, "methods integrating": 32906, "evaluation setups": 18717, "suitable evaluation": 53857, "models primarily": 35355, "approach evaluating": 3521, "evaluating language": 18559, "proposed using": 43923, "models exhibit": 34985, "models discussed": 34928, "aims train": 2217, "shared encoder": 50468, "pairs sentences": 39216, "shared latent": 50477, "issue introduce": 27063, "proposed enhance": 43767, "tasks report": 55853, "task featured": 55080, "set tasks": 50258, "tasks binary": 55527, "description papers": 14247, "consuming task": 10453, "evaluate compare": 18445, "corpus addition": 11270, "results finally": 47633, "learn implicit": 29382, "sota methods": 51727, "generating human": 22378, "sota systems": 51732, "tasks standard": 55908, "new target": 37331, "like deep": 30467, "quite difficult": 44829, "difficult understand": 15192, "model stage": 34406, "aim identify": 2149, "model errors": 33832, "world large": 62946, "com ibm": 9016, "features instead": 20606, "features generate": 20589, "process present": 42817, "present public": 41990, "public dataset": 44313, "peer reviews": 40011, "accept reject": 809, "collection process": 8985, "novel nlp": 37886, "second task": 49026, "use online": 59967, "relational databases": 46005, "vast data": 61438, "type information": 59057, "accuracy 17": 896, "17 absolute": 173, "semantic coverage": 49265, "distance metrics": 15549, "better learn": 6908, "representations approach": 46619, "provides state": 44228, "past year": 39939, "advances sequence": 1926, "modeling machine": 34595, "recent transformer": 45361, "transformer model": 58495, "seq2seq architectures": 49894, "new architectures": 37137, "techniques apply": 56060, "seq2seq architecture": 49893, "relation sentences": 45996, "current open": 11994, "quality sentence": 44579, "global structural": 22843, "applied different": 3267, "effectiveness generality": 16781, "art open": 4321, "directly text": 15337, "models answer": 34715, "types questions": 59113, "question context": 44724, "multiple conditions": 36186, "greatly outperforms": 23236, "supervised systems": 54054, "predict relations": 41652, "neural entity": 36954, "helps explain": 23605, "model contrast": 33715, "contrast conventional": 10874, "spaces paper": 51911, "model sequence": 34360, "tree search": 58756, "lstm used": 31286, "used summarize": 60316, "parameters work": 39729, "outperformed state": 38842, "source semantic": 51794, "data apply": 12141, "model pre": 34217, "method different": 32463, "datasets second": 13413, "entity given": 18107, "way alleviate": 61791, "problems neural": 42716, "sentences usually": 49804, "subword segmentation": 53687, "trains model": 58324, "model multiple": 34113, "training addition": 57926, "experiment multiple": 19243, "improvements especially": 25071, "resource domain": 47222, "settings work": 50404, "assignment problem": 4606, "learning inspired": 29684, "tools support": 57385, "data recent": 12587, "propose cross": 43342, "suffer low": 53774, "shed new": 50527, "participated semeval": 39818, "achieving f1": 1403, "bias problem": 7039, "suffer problems": 53778, "paper employ": 39346, "inverse reinforcement": 26929, "generation specifically": 22549, "generate higher": 22208, "million articles": 33250, "demonstrate high": 13918, "high diversity": 23731, "abstractive extractive": 770, "extraction strategies": 20115, "strategies used": 52919, "methods data": 32809, "techniques yield": 56154, "essays written": 18318, "spanish german": 51943, "constituency parses": 10350, "dependency parses": 14130, "applications dataset": 3195, "relationship extraction": 46069, "semantic drift": 49272, "approach multiple": 3607, "capture inter": 7683, "annotator disagreement": 3013, "truth value": 58840, "building intelligent": 7448, "supervised language": 53992, "ability capturing": 598, "catastrophic forgetting": 7833, "learning novel": 29784, "novel knowledge": 37846, "imitation reinforcement": 24579, "trained approach": 57674, "verified effectiveness": 61529, "crucial understanding": 11917, "related problem": 45928, "methods particular": 32976, "sentiment expressed": 49844, "information general": 25889, "aims extract": 2192, "sentence conditioned": 49531, "lingual information": 30704, "language shared": 28483, "iterative process": 27125, "adversarial neural": 1983, "unsupervised cross": 59689, "lingual embeddings": 30700, "outperform baselines": 38783, "demonstrate improvements": 13923, "models believe": 34764, "non existent": 37652, "performance boosted": 40223, "mechanism called": 32103, "rnn attention": 48182, "combines multi": 9097, "multi head": 35963, "distributed multiple": 15622, "multiple heads": 36222, "sequential information": 50043, "free model": 21644, "art competitive": 4238, "nlp benchmarks": 37469, "vanilla seq2seq": 61216, "models reach": 35398, "scores proposed": 48916, "data split": 12689, "models augmented": 34741, "seq2seq based": 49895, "sequence language": 49942, "fully capture": 21715, "data distribution": 12290, "different lengths": 14976, "model ability": 33485, "structure training": 53144, "training distribution": 58068, "highly sensitive": 23915, "reduce human": 45664, "reduced using": 45686, "word predictions": 62268, "number factors": 38004, "significant effect": 50865, "perform translation": 40157, "size fits": 51384, "parameter efficient": 39668, "efficient adaptation": 16859, "technique requires": 56045, "particular user": 39871, "various studies": 61401, "studies proposed": 53289, "errors lead": 18243, "network generates": 36749, "compositional representations": 9748, "approach low": 3592, "setting languages": 50329, "languages different": 28641, "news outlets": 37409, "training learning": 58155, "results compare": 47544, "terms performance": 56307, "set predefined": 50223, "typically relies": 59151, "train binary": 57570, "annotation different": 2943, "evaluating model": 18562, "methods able": 32725, "predict token": 41658, "task set": 55365, "relations existing": 46029, "lack ability": 27870, "hard time": 23452, "generalizes better": 22154, "continuous space": 10853, "generalizing unseen": 22160, "model helps": 33956, "suffer various": 53785, "sequential nature": 50048, "investigate alternative": 26939, "encoding text": 17579, "text consists": 56508, "information exchange": 25844, "various classification": 61314, "classification sequence": 8545, "benchmarks proposed": 6538, "representation power": 46566, "competitive performances": 9557, "performances compared": 40639, "bilstm models": 7135, "typically focused": 59144, "systems highly": 54520, "features study": 20676, "better transfer": 6984, "transfer languages": 58372, "study ability": 53317, "humans perform": 24284, "approach fine": 3538, "processing model": 42891, "tasks downstream": 55598, "tasks dialogue": 55588, "positive effects": 41280, "high time": 23806, "time reduction": 57204, "experiments use": 19551, "use auxiliary": 59831, "policy makers": 41099, "languages existing": 28661, "approaches assume": 3767, "learning effective": 29608, "effective word": 16716, "available low": 5324, "model corpus": 33723, "information design": 25807, "great challenges": 23201, "model leverage": 34057, "tree model": 58752, "process extensive": 42781, "conducted large": 10087, "embeddings knowledge": 17156, "view contrastive": 61596, "negative examples": 36619, "main model": 31447, "multiple metrics": 36248, "articles provide": 4476, "improve user": 24938, "chinese dataset": 8304, "annotated subset": 2918, "reference based": 45735, "greatly improved": 23231, "correlations human": 11535, "effect adding": 16610, "impact different": 24592, "information learning": 25952, "representational power": 46609, "despite impressive": 14369, "user provided": 60439, "text spans": 56781, "performance framework": 40352, "new topic": 37347, "years thanks": 63080, "documents web": 15929, "conventional text": 11015, "embedding approach": 17011, "necessary information": 36531, "models preserve": 35350, "academic paper": 792, "experiments validate": 19554, "limited information": 30590, "provides natural": 44214, "providing explanations": 44241, "labels furthermore": 27828, "perform inference": 40114, "test ability": 56330, "paper order": 39433, "automatically determine": 5161, "network achieve": 36692, "highlight major": 23866, "major limitations": 31516, "main evaluation": 31435, "evaluation procedures": 18681, "provides analysis": 44181, "intermediate representations": 26677, "sequences propose": 50024, "classification label": 8482, "attention learned": 4765, "learned training": 29486, "labeled samples": 27761, "ability leverage": 621, "combined different": 9078, "significantly lower": 50987, "task received": 55320, "achieve impressive": 1161, "set performance": 50217, "examine robustness": 18868, "data identify": 12412, "challenging models": 8113, "nli models": 37454, "models benefit": 34769, "translation requires": 58668, "text learn": 56646, "address challenging": 1750, "problem based": 42511, "approach specifically": 3700, "including semantic": 25295, "syntactic knowledge": 54305, "effectiveness multi": 16797, "input embeddings": 26272, "respect input": 47346, "investigate extent": 26958, "usually built": 61039, "propagation paper": 43245, "approach encoder": 3509, "distinct existing": 15591, "highly confident": 23887, "tasks knowledge": 55704, "algorithms rely": 2339, "suggest proposed": 53829, "achieve remarkable": 1185, "terms mean": 56298, "mean reciprocal": 31993, "reciprocal rank": 45485, "baselines automatic": 6234, "largely focused": 29055, "individual systems": 25580, "systems benchmark": 54440, "systems time": 54654, "time present": 57197, "carefully chosen": 7759, "use dataset": 59860, "dataset examine": 12913, "sentiment intensity": 49848, "race gender": 44844, "supervised techniques": 54057, "incorporates information": 25376, "accuracy 80": 912, "participated shared": 39819, "tasks trained": 55938, "teams participating": 56010, "task submissions": 55419, "class imbalance": 8402, "sampling technique": 48509, "domain terms": 16203, "terms general": 56292, "analysis involves": 2686, "languages lrls": 28719, "lingual training": 30733, "training high": 58120, "tag sets": 54726, "aims improve": 2199, "information sharing": 26083, "demonstrate superior": 13982, "existing cross": 19050, "lingual approaches": 30692, "method combine": 32419, "called domain": 7544, "domain adapted": 16008, "help achieve": 23549, "various benchmarks": 61311, "learning pre": 29810, "different statistical": 15080, "analysis proposed": 2730, "possible improvements": 41330, "scores used": 48927, "modeling neural": 34605, "used estimate": 60168, "allowing users": 2449, "interpret model": 26710, "confidence model": 10115, "online resources": 38381, "information address": 25757, "work build": 62592, "clarification questions": 8386, "inspired idea": 26407, "create dataset": 11695, "samples dataset": 48469, "intermediate layers": 26675, "models latent": 35170, "major advantage": 31500, "inference learning": 25667, "proposed graph": 43789, "performing par": 40687, "especially rare": 18295, "models jointly": 35150, "methods named": 32951, "entities usually": 18090, "model detect": 33756, "bidirectional gated": 7070, "mechanism designed": 32106, "learn entity": 29369, "introduce benchmark": 26786, "code generation": 8816, "sentences existing": 49713, "share similar": 50461, "order encourage": 38613, "appeared training": 3144, "model chinese": 33655, "new annotation": 37128, "broadly applicable": 7366, "believe dataset": 6410, "task success": 55421, "articles collected": 4464, "process corpus": 42765, "bilingual multilingual": 7112, "english turkish": 17895, "data solve": 12673, "unpaired data": 59625, "review datasets": 48030, "method substantially": 32671, "substantially improves": 53639, "content preservation": 10547, "datasets respectively": 13407, "systems face": 54502, "applying framework": 3363, "setting demonstrate": 50319, "early warning": 16518, "supervised scenarios": 54037, "scenarios paper": 48702, "contribution semeval": 10947, "standard model": 52506, "potentially relevant": 41418, "unbalanced data": 59219, "approaches building": 3780, "theoretical results": 57025, "vectors linear": 61491, "requires fewer": 46929, "fewer examples": 20736, "tasks analyze": 55500, "passages text": 39923, "using notion": 60844, "ignore important": 24491, "important question": 24756, "drop accuracy": 16441, "model accurate": 33500, "developing language": 14654, "language word": 28581, "hierarchical multi": 23680, "scale language": 48586, "lower level": 31214, "prevent catastrophic": 42229, "machine intelligence": 31303, "shows effectiveness": 50775, "specifically investigate": 52211, "use significantly": 60015, "utterances paper": 61150, "neural representation": 37090, "data settings": 12654, "text provides": 56721, "evaluation corpora": 18598, "different user": 15118, "user groups": 60420, "characteristics training": 8245, "including domain": 25252, "setting new": 50334, "sophisticated neural": 51717, "single question": 51330, "question paper": 44743, "stage procedure": 52441, "stage process": 52442, "learning result": 29848, "improved state": 24966, "significantly challenging": 50947, "evaluated results": 18547, "generate correct": 22189, "approach automated": 3424, "important application": 24698, "applications task": 3251, "methods good": 32880, "better encoding": 6882, "methods leverage": 32925, "hybrid method": 24317, "leverage advantages": 30255, "provided different": 44161, "entailment question": 18004, "entities text": 18085, "models built": 34799, "binary relations": 7154, "specific models": 52113, "improvement average": 24985, "translation languages": 58626, "combination machine": 9042, "results consistently": 47560, "mental states": 32293, "challenge introduce": 7986, "tasks suggesting": 55919, "recent attention": 45295, "advances word": 1932, "adaptation target": 1539, "syntactically similar": 54343, "similar source": 51067, "achieves 90": 1295, "domains provide": 16287, "error free": 18220, "absolute increase": 747, "outside training": 39027, "capture global": 7674, "domain study": 16197, "external features": 19936, "model methods": 34100, "ensemble based": 17972, "models guide": 35072, "correlates better": 11512, "intelligence systems": 26539, "goal explore": 22885, "potentially leading": 41415, "text challenging": 56463, "span prediction": 51929, "available community": 5271, "set given": 50161, "vectors pre": 61494, "help mitigate": 23577, "datasets outperforms": 13359, "models achieving": 34685, "systems popular": 54590, "data low": 12470, "methods reduce": 33010, "text explicitly": 56568, "input work": 26360, "extent models": 19924, "classifier performance": 8601, "models applying": 34720, "style transfer": 53504, "demonstrate trade": 13991, "learning provide": 29827, "different views": 15125, "view training": 61603, "data supervision": 12712, "success unsupervised": 53728, "modal alignment": 33452, "fashion proposed": 20417, "training followed": 58108, "refinement procedure": 45768, "comparable supervised": 9314, "languages little": 28716, "audio text": 4931, "models account": 34662, "data tasks": 12725, "tasks improve": 55671, "translation experimental": 58611, "proposed adversarial": 43714, "require labeled": 46865, "pairs proposed": 39211, "proposed generative": 43788, "study shot": 53459, "language domains": 28037, "metric based": 33111, "optimization based": 38545, "based meta": 5844, "domain low": 16106, "inter task": 26589, "realistic setting": 45152, "tasks diverse": 55593, "based algorithms": 5562, "capture complex": 7654, "propose adaptive": 43283, "weighted combination": 61927, "meta training": 32345, "training tasks": 58286, "shot task": 50649, "extensive quantitative": 19909, "art shot": 4400, "corpus different": 11325, "translation direction": 58601, "ability distinguish": 604, "original texts": 38732, "languages annotated": 28599, "corpus publicly": 11414, "available work": 5392, "text identify": 56618, "major obstacle": 31517, "generating large": 22380, "synthetic training": 54385, "techniques make": 56109, "created manually": 11729, "effort needed": 16929, "improved versions": 24973, "analysis previous": 2725, "novel bidirectional": 37780, "dependency structure": 14138, "structure features": 53105, "sentences key": 49743, "dependency syntactic": 14140, "form content": 21316, "text typically": 56828, "networks makes": 36874, "baselines data": 6248, "shows using": 50810, "perplexity metric": 40739, "models combination": 34825, "strategies improve": 52904, "model reduces": 34291, "mean absolute": 31987, "dataset respectively": 13066, "models presence": 35345, "representation learned": 46535, "learned pre": 29474, "integrating context": 26520, "novel strategies": 37929, "propose training": 43678, "powerful technique": 41446, "aware representations": 5468, "introducing extra": 26900, "yielding better": 63109, "tasks paraphrase": 55792, "similarity natural": 51110, "tasks rely": 55850, "level character": 30072, "tasks single": 55893, "media datasets": 32166, "results news": 47742, "identification paper": 24393, "terms data": 56282, "data baseline": 12186, "lingual resources": 30723, "dataset methods": 12994, "unlike english": 59593, "basic semantic": 6333, "semantic unit": 49371, "character information": 8205, "propose low": 43444, "feature maps": 20497, "single models": 51320, "data independent": 12428, "relying external": 46307, "time specific": 57219, "languages cultures": 28628, "cost time": 11595, "corpus new": 11388, "methods investigated": 32909, "nlu task": 37570, "large improvement": 28889, "house data": 24041, "processing approaches": 42854, "task consisting": 54972, "evaluation real": 18692, "evaluation best": 18584, "years lot": 63063, "outstanding results": 39031, "tasks unfortunately": 55948, "solve issues": 51680, "issues introduce": 27092, "model retains": 34323, "reasoning abilities": 45182, "tested model": 56397, "tasks joint": 55699, "present supervised": 42031, "supervision task": 54097, "different content": 14874, "reveal new": 48011, "used simple": 60303, "incorporating additional": 25378, "control degree": 10962, "new methodology": 37255, "quality labeled": 44539, "require substantial": 46890, "solving task": 51706, "equipped attention": 18197, "lack systematic": 27920, "work cross": 62614, "classification aims": 8429, "additional resources": 1697, "best practice": 6802, "transfer english": 58362, "prior distributions": 42398, "using multilingual": 60819, "framework evaluate": 21507, "research important": 47052, "important area": 24700, "models surprisingly": 35570, "comparable superior": 9313, "majority cases": 31527, "propose additional": 43284, "including classification": 25243, "code datasets": 8808, "obtained https": 38211, "describes submissions": 14234, "translation generation": 58616, "translation improve": 58619, "learning high": 29671, "tasks general": 55653, "domain embeddings": 16055, "proposed meta": 43802, "data past": 12540, "results domain": 47597, "process improve": 42792, "methods generate": 32877, "methods demonstrate": 32817, "systems process": 54600, "sentences isolation": 49741, "introduce context": 26789, "experiment english": 19238, "observe model": 38137, "consistent gains": 10275, "languages significantly": 28786, "inputs experiments": 26363, "quality state": 44582, "design paper": 14294, "models small": 35515, "data generation": 12387, "generation procedure": 22528, "identifying potential": 24461, "systems identifying": 54524, "learning exploit": 29638, "experiments zero": 19566, "spanish french": 51942, "setting method": 50330, "data zero": 12780, "shot language": 50622, "directions improve": 15294, "models features": 35022, "paper seek": 39571, "quality questions": 44570, "questions collected": 44777, "dataset enables": 12907, "context demonstrate": 10607, "efficacy model": 16833, "model comparing": 33676, "comparing state": 9486, "baselines human": 6268, "remains elusive": 46332, "outperforms range": 38936, "scores human": 48903, "evaluation large": 18632, "based network": 5899, "dynamic context": 16483, "effect quality": 16618, "intra inter": 26759, "regression based": 45812, "way leverage": 61817, "maximize performance": 31960, "quality task": 44586, "specific sentences": 52145, "sentences context": 49697, "feature level": 20495, "needs large": 36608, "code pre": 8843, "learns generate": 29959, "reasoning capability": 45188, "systems experimental": 54497, "results validate": 47900, "validate effectiveness": 61174, "tasks following": 55648, "knowledge discovery": 27438, "texts recently": 56917, "task gained": 55100, "problems related": 42727, "related social": 45936, "data shared": 12655, "modalities text": 33472, "images audio": 24552, "valuable insights": 61203, "identification using": 24402, "using visual": 61018, "comparison traditional": 9509, "tasks modeled": 55749, "trained different": 57709, "search based": 48965, "achieves improvements": 1342, "methods attention": 32759, "retain original": 47922, "network propose": 36790, "preserving original": 42125, "feature information": 20493, "increasing size": 25464, "task compare": 54959, "agnostic approach": 2087, "order gain": 38621, "efficiency modularity": 16847, "learning frameworks": 29657, "graph allows": 23094, "decoding algorithms": 13626, "present text": 42039, "approach estimating": 3518, "works propose": 62903, "user profiling": 60438, "unified end": 59469, "fuse information": 21846, "knowledge approach": 27397, "learning baselines": 29545, "outperforms approaches": 38866, "approaches significantly": 3920, "users propose": 60475, "approach information": 3572, "combines benefits": 9092, "limited annotations": 30568, "embeddings multi": 17177, "datasets conll": 13190, "model consisting": 33704, "given class": 22724, "performs close": 40701, "interpretable models": 26727, "context training": 10735, "prediction multi": 41722, "context available": 10588, "trained state": 57882, "model bert": 33618, "bert language": 6667, "modeling framework": 34577, "use bert": 59834, "bert embeddings": 6651, "success paper": 53716, "arabic texts": 4006, "relevant textual": 46241, "constructed corpus": 10408, "success variety": 53729, "general models": 22070, "investigate properties": 26978, "data affect": 12126, "input models": 26301, "data furthermore": 12377, "collected using": 8970, "analysis state": 2766, "systems reason": 54612, "surprisingly strong": 54192, "data constructed": 12246, "hierarchical architecture": 23655, "representation transfer": 46596, "order mitigate": 38640, "mitigate data": 33381, "second place": 49016, "score english": 48842, "2018 shared": 275, "training low": 58160, "low precision": 31165, "tuning methods": 58927, "transformer variant": 58516, "number high": 38007, "corpus additional": 11271, "used initialize": 60214, "parameters fine": 39699, "tuned using": 58892, "score 74": 48809, "enhances model": 17945, "mechanism helps": 32121, "informative words": 26178, "furthermore existing": 21819, "greatly benefit": 23227, "perform text": 40154, "using memory": 60797, "architecture better": 4030, "perform data": 40084, "data sampling": 12621, "classes demonstrate": 8416, "performance large": 40411, "large batch": 28849, "training larger": 58151, "yields state": 63131, "available high": 5302, "crucial real": 11908, "world domain": 62938, "leverages domain": 30303, "human participants": 24212, "performance improves": 40387, "performance suffers": 40584, "humans better": 24273, "models utilize": 35663, "architecture design": 4040, "including data": 25246, "open set": 38446, "recognition previous": 45525, "baselines based": 6238, "task determining": 55015, "understanding existing": 59344, "stress tests": 52980, "systems ability": 54418, "models respect": 35450, "challenging linguistic": 8109, "area recently": 4148, "improve learning": 24868, "creating new": 11745, "attention structure": 4832, "quality experiments": 44522, "investigate relationship": 26983, "understand user": 59316, "depends quality": 14163, "domain specificity": 16195, "mitigate effects": 33383, "data availability": 12175, "20 relative": 230, "learn policy": 29409, "score 78": 48813, "twitter posts": 59039, "dataset construction": 12864, "dataset creation": 12873, "closed world": 8700, "world assumption": 62931, "limited human": 30589, "attributes work": 4912, "formalize problem": 21360, "networks specifically": 36911, "context semantics": 10710, "mechanism provide": 32139, "provide interpretable": 44095, "sampling strategy": 48508, "reduce burden": 45651, "features prior": 20648, "annotated samples": 2912, "score 83": 48818, "reference data": 45738, "encode text": 17472, "aggregation mechanism": 2079, "mechanism obtain": 32132, "dynamic routing": 16491, "information need": 25986, "aggregation methods": 2080, "tasks largely": 55715, "analysis effect": 2654, "effect using": 16622, "varying amounts": 61426, "local syntactic": 30951, "task capturing": 54945, "open knowledge": 38436, "formal definition": 21345, "domain applications": 16020, "human perception": 24213, "labeled datasets": 27755, "annotation procedure": 2960, "present annotation": 41845, "observations propose": 38128, "study provide": 53447, "available labeled": 5315, "explore possible": 19726, "different cross": 14882, "scores different": 48900, "use human": 59910, "method identifying": 32527, "gaussian process": 22017, "trained small": 57874, "bayesian approach": 6357, "languages does": 28645, "requires substantial": 46953, "used humans": 60205, "challenge sets": 8016, "set testing": 50263, "structure news": 53124, "sets used": 50310, "build machine": 7410, "capture multiple": 7699, "including english": 25255, "benchmarks measure": 6529, "achieve multi": 1171, "document describes": 15783, "tasks fine": 55643, "trained representations": 57854, "technique improving": 56035, "model particular": 34183, "specific model": 52112, "reduce need": 45673, "quite useful": 44834, "propose embed": 43363, "2003 dataset": 240, "dataset document": 12900, "natural question": 36462, "develop evaluation": 14588, "using translated": 61002, "asked predict": 4522, "conduct study": 10064, "cause performance": 7887, "differences languages": 14824, "label prediction": 27720, "directly capture": 15308, "art overall": 4322, "task involves": 55148, "jointly models": 27208, "cost data": 11578, "need additional": 36544, "number baselines": 37985, "tasks extracting": 55637, "limited amounts": 30564, "text words": 56846, "potential improve": 41393, "indigenous languages": 25553, "highly diverse": 23894, "challenges research": 8077, "distant languages": 15555, "areas like": 4154, "2017 proposed": 266, "hierarchical data": 23665, "data demonstrated": 12273, "technique allows": 56025, "allows learn": 2470, "hyperbolic embeddings": 24329, "hyperbolic space": 24330, "embeddings encode": 17127, "space makes": 51875, "inference question": 25687, "lstm does": 31259, "does help": 15950, "word interaction": 62219, "advances cross": 1908, "adaptation problem": 1535, "perform domain": 40091, "adaptation experiments": 1524, "novel state": 37927, "pairs including": 39196, "domains code": 16238, "challenging problems": 8130, "architecture trained": 4091, "learn structure": 29430, "datasets enable": 13245, "labeling models": 27788, "conduct systematic": 10065, "model comparison": 33677, "comparison analysis": 9489, "generation given": 22471, "user defined": 60408, "model producing": 34240, "model implement": 33973, "studied nlp": 53230, "goal determine": 22880, "dependent word": 14153, "consuming labor": 10447, "models resource": 35449, "collect data": 8940, "following research": 21269, "build single": 7427, "results robust": 47818, "knowledge world": 27651, "relation paths": 45991, "benefits modeling": 6585, "framework models": 21565, "fundamental building": 21776, "novel ensemble": 37816, "student knowledge": 53210, "knowledge gaps": 27489, "online educational": 38365, "achieved highest": 1243, "score evaluation": 48843, "metrics datasets": 33156, "model discuss": 33776, "mainly rely": 31477, "provide sufficient": 44137, "meaning work": 32022, "approaches enable": 3806, "strong generalization": 53030, "work open": 62741, "provide complementary": 44033, "words consequently": 62385, "trained task": 57891, "task agnostic": 54890, "agnostic data": 2088, "evaluated various": 18554, "evaluation benchmarks": 18583, "models benchmarks": 34767, "identify aspects": 24413, "multiple applications": 36167, "study aim": 53320, "aim develop": 2144, "clinical data": 8668, "curve auc": 12050, "auc score": 4921, "dataset obtained": 13016, "respectively model": 47373, "efficiently identify": 16916, "speech word": 52317, "work review": 62812, "approaches domain": 3801, "approaches including": 3847, "different amounts": 14835, "progress recent": 43113, "uses self": 60531, "semantics work": 49422, "path information": 39947, "coming different": 9134, "cross attention": 11807, "performance sequence": 40553, "source python": 51793, "industrial applications": 25616, "mit license": 33378, "license https": 30429, "speech used": 52315, "speech systems": 52297, "used cross": 60133, "apply different": 3326, "transformer models": 58498, "subword vocabulary": 53689, "models low": 35202, "analysis case": 2624, "study recent": 53451, "released code": 46171, "recommend future": 45563, "experiments consider": 19390, "variety datasets": 61266, "aid understanding": 2128, "provide practical": 44113, "make results": 31593, "gpu memory": 22995, "simple pre": 51203, "evaluated automatic": 18521, "linked entities": 30831, "simple sequence": 51210, "model significant": 34378, "document multi": 15814, "neighboring sentences": 36664, "marginal improvements": 31824, "decrease performance": 13669, "present multiple": 41952, "multiple sentence": 36280, "vectors context": 61483, "detailed overview": 14428, "overview various": 39120, "task open": 55248, "approaches time": 3941, "specific issues": 52091, "addition provide": 1640, "commonly applied": 9217, "assessing performance": 4587, "performance open": 40466, "automatically determining": 5162, "aspects like": 4544, "past research": 39934, "called code": 7541, "field text": 20771, "hindi code": 23936, "datasets social": 13435, "texts collected": 56863, "uses various": 60542, "algorithms identify": 2326, "task allows": 54899, "related semantic": 45933, "effects models": 16827, "concepts human": 9934, "usually manually": 61058, "rely external": 46280, "extra parameters": 19964, "linear unit": 30677, "english multi": 17846, "showed using": 50674, "similar word": 51076, "words distributed": 62400, "capture syntactic": 7714, "explicit supervision": 19626, "original test": 38730, "gaussian distribution": 22011, "examples sampled": 18930, "target distribution": 54809, "recognition specifically": 45537, "lower dimensional": 31210, "potential practical": 41403, "generated samples": 22314, "final goal": 20821, "study state": 53462, "layer used": 29211, "based low": 5824, "frequency distribution": 21671, "distribution tokens": 15653, "methods low": 32931, "entire model": 18025, "multilingual nmt": 36107, "learning furthermore": 29658, "shot inference": 50620, "despite increasing": 14370, "motivated work": 35875, "generation achieved": 22410, "current context": 11966, "fed decoder": 20704, "tuning process": 58950, "data performance": 12542, "effective parameter": 16683, "parameter choices": 39665, "performance main": 40429, "training demonstrate": 58059, "problem domain": 42544, "training deployment": 58060, "test effectiveness": 56345, "used regularization": 60289, "knowledge finally": 27483, "papers published": 39609, "results help": 47653, "human labelled": 24189, "step study": 52829, "ai based": 2115, "local contextual": 30933, "information global": 25896, "benchmarks verify": 6550, "deal large": 13517, "algorithms applied": 2320, "based annotation": 5566, "presents extension": 42084, "compared similar": 9451, "relations used": 46061, "relations natural": 46045, "method provide": 32628, "recognition approach": 45492, "effectively using": 16761, "information neural": 25991, "propose inject": 43417, "especially fine": 18276, "automatically learning": 5189, "entities multiple": 18067, "crucial aspect": 11895, "dialogue structure": 14786, "dialogue datasets": 14772, "generate meaningful": 22218, "specifically employ": 52197, "make training": 31605, "datasets human": 13292, "available internet": 5313, "mining tools": 33328, "model represent": 34308, "ability incorporate": 613, "additionally proposed": 1731, "general nlp": 22076, "learns tasks": 29976, "specific modules": 52114, "modules parameters": 35773, "multitask setting": 36326, "shot capabilities": 50602, "pointer generator": 41056, "task setting": 55366, "good data": 22931, "type semantic": 59068, "sequence paper": 49965, "simple variant": 51224, "tasks alleviate": 55497, "alleviate need": 2414, "human labor": 24190, "features methods": 20622, "methods utilize": 33097, "great results": 23215, "methods evaluating": 32846, "performance 96": 40175, "62 f1": 465, "73 accuracy": 495, "tagging pos": 54748, "13 f1": 124, "score ner": 48860, "linguistic unit": 30806, "models actually": 34688, "solution paper": 51657, "augmentation strategies": 4966, "thorough examination": 57060, "conducted evaluate": 10081, "comprehensive performance": 9796, "performance generalization": 40362, "ability proposed": 637, "approach helps": 3553, "baselines various": 6319, "various public": 61380, "addresses challenges": 1809, "data including": 12425, "model accuracy": 33499, "quality annotation": 44489, "annotation cost": 2939, "provide online": 44107, "maintaining high": 31492, "evaluate framework": 18459, "drastically reduces": 16395, "scores respectively": 48919, "representation embedding": 46506, "head attention": 23495, "mean pooling": 31992, "resulting state": 47477, "performances datasets": 40640, "modeling propose": 34615, "possible solution": 41336, "method adapt": 32369, "agent interaction": 2056, "topic classifier": 57396, "decoding strategy": 13649, "visual semantic": 61670, "adversarial attack": 1964, "limitation current": 30535, "model establish": 33834, "textual semantics": 56979, "visual concepts": 61651, "adversarial samples": 1986, "noticeable improvement": 37728, "set downstream": 50142, "answering knowledge": 3077, "increasing use": 25466, "generation large": 22483, "resulting improved": 47466, "settings text": 50400, "medical domains": 32203, "datasets researchers": 13405, "framework develop": 21493, "paper compares": 39292, "vectors compared": 61482, "context automatic": 10587, "systems aim": 54428, "systematic errors": 54395, "errors machine": 18244, "empirical observations": 17335, "domain english": 16057, "tasks additionally": 55493, "providing better": 44238, "performance far": 40344, "languages study": 28796, "conduct qualitative": 10058, "ai nlp": 2120, "systems models": 54563, "trained translate": 57904, "based comparison": 5627, "nlp practitioners": 37511, "developed various": 14641, "various research": 61386, "representation format": 46521, "typologically diverse": 59168, "lingual settings": 30728, "settings finally": 50374, "auxiliary losses": 5234, "results sequence": 47827, "speed performance": 52324, "additional prediction": 1694, "forces model": 21289, "process training": 42835, "aims capture": 2180, "lack human": 27893, "labeled resources": 27760, "information existing": 25845, "terms coverage": 56281, "approach adapts": 3408, "discrete nature": 15425, "suggest approach": 53813, "models deal": 34885, "discrete space": 15428, "space allows": 51849, "performance widely": 40629, "model information": 34003, "learn make": 29395, "training cost": 57963, "sentences generated": 49728, "overfitting problem": 39083, "problem caused": 42516, "seq2seq framework": 49897, "correction model": 11484, "model correct": 33724, "correct sentence": 11475, "annotation dataset": 2942, "set respectively": 50240, "evaluate multiple": 18476, "positive instances": 41283, "instances learn": 26435, "set constructed": 50127, "understand interpret": 59300, "representations explore": 46663, "explore best": 19689, "use pretrained": 59979, "pretrained representations": 42180, "using technique": 60981, "sentiment score": 49857, "score prediction": 48866, "sentiment scores": 49858, "unimodal multimodal": 59499, "models sentiment": 35481, "methods fail": 32861, "kinds information": 27371, "capturing semantics": 7744, "language key": 28124, "dataset train": 13120, "primary secondary": 42372, "recent large": 45316, "analysis different": 2651, "classifiers applied": 8611, "results experiment": 47624, "models fundamental": 35046, "decision trees": 13571, "objective measures": 38094, "forest model": 21302, "model according": 33497, "resource indian": 47229, "types word": 59126, "efforts develop": 16936, "according defined": 856, "systems automatic": 54436, "correlate poorly": 11506, "poorly human": 41150, "model improvements": 33981, "expensive paper": 19214, "evaluation practice": 18676, "accurately reflect": 1100, "content using": 10568, "movie scripts": 35897, "extensive training": 19915, "address present": 1782, "individual concepts": 25564, "time proposed": 57201, "faster train": 20442, "advantages compared": 1949, "designing neural": 14340, "users using": 60487, "conducted real": 10091, "approach effectively": 3501, "digital age": 15208, "form news": 21331, "linking relation": 30836, "digital assistants": 15209, "art research": 4367, "challenges future": 8050, "acoustic information": 1435, "novel generative": 37830, "languages cross": 28626, "efficient simple": 16900, "different problems": 15037, "make effective": 31568, "improvements experimental": 25074, "final evaluation": 20820, "work existing": 62653, "datasets new": 13345, "dataset release": 13060, "information effectively": 25825, "downstream use": 16372, "maximization em": 31955, "improve detection": 24842, "lstms model": 31291, "introduce supervised": 26866, "difficulty levels": 15201, "text fragment": 56585, "generated framework": 22289, "framework better": 21462, "labels hierarchical": 27831, "level input": 30134, "high complexity": 23711, "complex deep": 9621, "certain aspects": 7934, "learned various": 29489, "model argue": 33579, "does correlate": 15940, "classifier predicts": 8603, "tasks inherently": 55689, "achieve superior": 1210, "despite successes": 14397, "simple tasks": 51217, "observed training": 38150, "self attentive": 49191, "tasks contrast": 55559, "standard transformer": 52538, "en dataset": 17413, "novel annotation": 37754, "build classifiers": 7390, "human authored": 24108, "scenario data": 48684, "truth labels": 58839, "visual language": 61659, "highly efficient": 23898, "manually compiled": 31767, "combines state": 9101, "usually employ": 61046, "linear transformations": 30676, "shared space": 50489, "using bilingual": 60596, "unsupervised techniques": 59741, "techniques sentence": 56134, "task languages": 55162, "new intrinsic": 37229, "achieve average": 1111, "new entity": 37189, "use new": 59963, "existing benchmarks": 19042, "using multitask": 60823, "head word": 23500, "train multi": 57613, "task hierarchical": 55116, "learning efficiently": 29613, "datasets containing": 13198, "hierarchy aware": 23703, "text collections": 56496, "40 languages": 396, "domain adversarial": 16014, "avoid overfitting": 5433, "training domain": 58071, "languages case": 28612, "languages monolingual": 28732, "pretrained multilingual": 42172, "layers encoder": 29223, "model empirically": 33811, "using pseudo": 60882, "results trained": 47887, "effectively leveraging": 16749, "core nlp": 11153, "open corpus": 38415, "faced task": 20251, "study participants": 53428, "contextual dependencies": 10761, "contextual model": 10776, "contextual models": 10777, "independent models": 25502, "learning auxiliary": 29528, "deep encoder": 13690, "work observe": 62738, "observe performance": 38139, "ctc model": 11928, "intermediate layer": 26674, "performance lower": 40425, "lower resource": 31222, "improves standard": 25162, "multitask training": 36327, "experiments low": 19458, "training works": 58318, "obtained combining": 38205, "learning pretraining": 29816, "pretraining improves": 42205, "analysis reveal": 2744, "10 improvement": 44, "set messages": 50191, "systems incorporating": 54531, "common data": 9171, "early stage": 16514, "texts automatically": 56860, "corpus collected": 11294, "data representations": 12603, "training stages": 58269, "knowledge unlabeled": 27639, "requiring training": 46966, "corpora achieve": 11171, "domain adaption": 16009, "increases performance": 25438, "score gain": 48847, "metric model": 33121, "task building": 54943, "verb noun": 61509, "does yield": 15984, "score propose": 48868, "time based": 57122, "apply pre": 3345, "information including": 25918, "used help": 60203, "embeddings requires": 17204, "requires fine": 46930, "method word": 32708, "use fine": 59890, "grained typing": 23047, "build datasets": 7394, "datasets large": 13312, "grained classes": 23025, "sentence context": 49534, "draw conclusions": 16400, "cloud based": 8720, "main techniques": 31463, "models minimal": 35226, "performance impact": 40379, "generalization novel": 22124, "requiring models": 46964, "settings model": 50383, "analysis large": 2688, "scale social": 48624, "emotional status": 17300, "analysis textual": 2779, "analysis studied": 2768, "sentiment classifier": 49836, "embeddings attention": 17085, "aware embeddings": 5449, "scanned documents": 48654, "recurrent encoder": 45613, "corresponding text": 11559, "focal loss": 21140, "improvement standard": 25025, "standard cross": 52479, "imbalance problem": 24564, "capacity model": 7637, "joint distribution": 27166, "version original": 61554, "recently used": 45472, "novel document": 37806, "document context": 15778, "title abstract": 57269, "using generative": 60705, "generate document": 22195, "user behavior": 60405, "standard seq2seq": 52526, "scale propose": 48617, "models scale": 35469, "provide comparison": 44031, "input trained": 26351, "produce different": 42978, "different rates": 15043, "factors influence": 20310, "level labeled": 30142, "decoder modules": 13602, "second apply": 48995, "significantly compared": 50948, "high impact": 23741, "especially non": 18292, "english speaking": 17882, "150 000": 156, "complexity lexical": 9680, "studies mainly": 53279, "quantitative metrics": 44622, "automatically evaluate": 5166, "evaluate metrics": 18471, "depth analyses": 14183, "theory mind": 57037, "experiments testing": 19544, "framework provide": 21588, "set test": 50261, "set allows": 50106, "does suffer": 15980, "perform empirical": 40095, "data substantially": 12706, "models fixed": 35036, "solving complex": 51701, "modelling language": 34643, "number common": 37990, "constructed based": 10406, "design model": 14289, "algorithms developed": 2324, "developed years": 14643, "datasets report": 13400, "evaluation study": 18730, "establishing new": 18365, "optimized training": 38569, "making useful": 31674, "computational research": 9857, "annotations word": 3007, "level speech": 30215, "resource tasks": 47282, "experiments named": 19475, "computational techniques": 9866, "techniques identify": 56094, "improve nlp": 24879, "recognition techniques": 45544, "techniques recent": 56129, "types human": 59091, "using quantitative": 60888, "quantitative measures": 44620, "metrics demonstrate": 33157, "data sequence": 12641, "fundamental problems": 21788, "framework improve": 21539, "information additionally": 25756, "survey provides": 54215, "applications computational": 3189, "recommendations future": 45568, "scientific disciplines": 48757, "makes easier": 31620, "science domain": 48746, "research understanding": 47137, "task new": 55239, "forward pass": 21408, "unstructured information": 59669, "important topics": 24787, "specific fine": 52085, "effective architecture": 16631, "addition dataset": 1606, "approach benchmark": 3429, "increases size": 25440, "cause significant": 7888, "quality trade": 44589, "space proposed": 51887, "function encourages": 21753, "generate words": 22264, "best candidates": 6754, "french data": 21659, "sets proposed": 50303, "improvements standard": 25099, "datasets especially": 13250, "dataset freely": 12936, "youtube comments": 63148, "creation process": 11750, "content data": 10517, "evaluate dataset": 18450, "public attitudes": 44304, "work define": 62623, "provide evaluation": 44062, "evaluation benchmark": 18582, "compared number": 9427, "systems able": 54419, "domain ood": 16124, "neural sentence": 37094, "dimensional continuous": 15226, "set unlabeled": 50272, "embedding used": 17068, "sentence detection": 49540, "text pairs": 56686, "task participants": 55268, "use combination": 59843, "weighted f1": 61928, "f1 measures": 20187, "task ranked": 55316, "ranked 2nd": 44953, "biases training": 7060, "bias models": 7035, "models view": 35673, "mitigate bias": 33380, "trained pre": 57840, "research human": 47049, "modeling techniques": 34630, "varies different": 61256, "task demands": 54999, "input feature": 26278, "wide application": 61958, "learning powerful": 29809, "approaches developed": 3797, "hindi telugu": 23943, "speakers languages": 52007, "class distribution": 8400, "distribution different": 15636, "size vector": 51401, "useful building": 60357, "building nlp": 7462, "systems including": 54529, "auto encoding": 5018, "parameters improve": 39702, "accuracy finally": 978, "benchmark suite": 6496, "look like": 31065, "data according": 12109, "different inputs": 14957, "related events": 45906, "information possible": 26011, "dependencies using": 14113, "residual connection": 47186, "way construct": 61798, "capture temporal": 7717, "information shared": 26082, "number layers": 38016, "future information": 21877, "term information": 56239, "corpus compared": 11297, "facebook posts": 20246, "health informatics": 23515, "processing long": 42884, "conversations recent": 11062, "improve recognition": 24916, "model explicitly": 33857, "uses context": 60498, "information end": 25833, "manner evaluate": 31714, "corpus outperforms": 11397, "role specific": 48323, "method recent": 32634, "datasets release": 13398, "domain limited": 16104, "effectively utilize": 16762, "utilize existing": 61091, "source domains": 51768, "adaptation paper": 1532, "procedure model": 42743, "tuned small": 58885, "related domain": 45900, "effective representation": 16689, "text critical": 56518, "important understand": 24788, "multi channel": 35942, "representation experimental": 46512, "dataset low": 12985, "generally improves": 22167, "studied context": 53220, "body research": 7240, "variety topics": 61294, "topics including": 57451, "available use": 5388, "different modeling": 14995, "propose answer": 43294, "answer open": 3041, "information bottleneck": 25770, "like semantic": 30500, "languages complex": 28620, "surpasses state": 54176, "embeddings widely": 17244, "gap propose": 21976, "model adopts": 33546, "results cross": 47563, "novel hierarchical": 37837, "achieves 94": 1297, "dataset furthermore": 12938, "vinyals et": 61612, "alzheimer disease": 2517, "introduce multilingual": 26827, "task conversational": 54979, "transfer methods": 58404, "new multilingual": 37264, "experiments dataset": 19397, "methods practical": 32984, "evaluated benchmark": 18523, "dutch spanish": 16479, "corpus german": 11351, "inputs model": 26364, "model quickly": 34267, "generation address": 22412, "kullback leibler": 27679, "leibler divergence": 30013, "output probabilities": 38993, "comment generation": 9140, "users work": 60488, "work construct": 62610, "generate human": 22211, "baselines study": 6304, "general multi": 22071, "tasks extensive": 55635, "benefit tasks": 6569, "learned jointly": 29461, "test suite": 56384, "limit performance": 30533, "automated metrics": 5054, "test suites": 56385, "effectiveness multilingual": 16798, "multilingual settings": 36120, "semi markov": 49451, "markov conditional": 31843, "examples generated": 18906, "probing tasks": 42494, "shows improved": 50784, "individual tasks": 25582, "processing neural": 42896, "perform remarkably": 40134, "use inter": 59914, "comparably better": 9316, "terms efficiency": 56284, "aware information": 5452, "relationship different": 46067, "precision score": 41619, "work primarily": 62768, "language invariant": 28123, "study cross": 53351, "language adversarial": 27955, "training cross": 57966, "agnostic representations": 2097, "tasks experiment": 55628, "training consistently": 57957, "trained baseline": 57680, "compare multiple": 9350, "baselines addition": 6231, "boosts performance": 7264, "nlp recent": 37517, "represent state": 46481, "structure semantics": 53134, "benchmark automatic": 6426, "evaluation recent": 18694, "consistency generated": 10266, "texts input": 56892, "data guide": 12397, "guide training": 23342, "measure consistency": 32047, "humans reason": 24287, "commonsense inference": 9233, "commonsense reasoning": 9239, "reasoning present": 45215, "annotation artifacts": 2935, "novel procedure": 37899, "models struggle": 35540, "solving various": 51709, "used technique": 60325, "processing speech": 42940, "context nlp": 10681, "nlp specifically": 37526, "word lexicon": 62239, "produce similar": 43009, "methods addition": 32738, "view problem": 61601, "problem weakly": 42686, "novel soft": 37924, "problem experiments": 42559, "model beats": 33610, "set outperforms": 50210, "set present": 50225, "learning graph": 29666, "dense space": 14082, "distance measures": 15547, "information graph": 25898, "results outperforming": 47754, "outperforming strong": 38862, "embedding baselines": 17019, "model computationally": 33689, "yields consistent": 63122, "news story": 37418, "important difficult": 24718, "seed set": 49044, "graphs model": 23189, "task enables": 55044, "content high": 10528, "accuracy multiple": 1011, "time study": 57227, "range text": 44939, "text entity": 56558, "entity graph": 18108, "techniques automatically": 56064, "form graph": 21321, "captured word": 7726, "abstract level": 759, "measuring word": 32090, "model limited": 34065, "investigate importance": 26961, "presented results": 42062, "based test": 6089, "best neural": 6785, "points compared": 41071, "small high": 51475, "competitive traditional": 9569, "data applied": 12140, "challenges existing": 8046, "models hand": 35074, "datasets multiple": 13340, "aware context": 5444, "agnostic models": 2095, "auxiliary classifier": 5229, "approaches finally": 3826, "translation possible": 58658, "com google": 9012, "information task": 26113, "task event": 55059, "dataset english": 12909, "dataset make": 12989, "systems showing": 54630, "human agents": 24091, "poses great": 41247, "building universal": 7476, "framework specifically": 21603, "annotations target": 3002, "dialog data": 14753, "train state": 57638, "knowledge student": 27622, "italian german": 27110, "achieve promising": 1180, "task measuring": 55210, "understanding recent": 59390, "different perspectives": 15025, "aggregation module": 2081, "problem insufficient": 42585, "previous strong": 42285, "dataset date": 12881, "addition observe": 1629, "principal components": 42382, "representations extensive": 46664, "learning adversarial": 29506, "demographic information": 13856, "neural classifiers": 36944, "trained textual": 57895, "accuracy training": 1065, "post hoc": 41348, "substantially higher": 53635, "improve effectiveness": 24847, "training achieve": 57923, "invariant representation": 26920, "widely applicable": 61992, "input graph": 26284, "hierarchical reinforcement": 23686, "different sizes": 15070, "sampling strategies": 48507, "models applied": 34718, "capable recognizing": 7630, "task setup": 55368, "different auxiliary": 14848, "provide dataset": 44045, "identifying speaker": 24468, "mutually exclusive": 36353, "experiments automatic": 19358, "based adaptive": 5558, "used collect": 60116, "information ii": 25909, "copying words": 11139, "baselines finally": 6261, "model yielded": 34550, "distribution based": 15633, "wasserstein distance": 61781, "closed form": 8698, "evaluated paper": 18541, "gather information": 22001, "questions introduce": 44793, "conversational question": 11048, "systems dataset": 54469, "present existing": 41908, "obtains f1": 38248, "score 65": 48801, "ample room": 2566, "growth number": 23308, "information necessary": 25985, "aims automatically": 2175, "online comments": 38353, "dataset quality": 13051, "multi target": 36014, "outperform various": 38832, "various baselines": 61308, "accuracy benchmark": 938, "domain wikipedia": 16227, "web corpus": 61883, "applications knowledge": 3215, "propose semantic": 43617, "designed handle": 14318, "multiple domain": 36203, "context windows": 10745, "features predicting": 20645, "predicting sentence": 41681, "accuracy standard": 1051, "tasks designed": 55583, "generalization paper": 22125, "external resource": 19951, "effect word": 16624, "task downstream": 55035, "model encoder": 33818, "attention different": 4737, "types words": 59128, "words function": 62422, "called self": 7552, "model attend": 33584, "quality work": 44600, "augmentation text": 4970, "based tasks": 6084, "design data": 14271, "existing augmentation": 19035, "extremely simple": 20166, "simple data": 51145, "augmentation strategy": 4967, "sentence target": 49654, "different scales": 15057, "implement method": 24635, "convolutional models": 11106, "text vision": 56840, "rnn encoders": 48193, "gains bleu": 21934, "image generation": 24537, "focus general": 21166, "need generate": 36568, "need consider": 36551, "generation fully": 22466, "training mechanism": 58171, "baselines proposed": 6289, "neural parser": 37084, "generates candidate": 22339, "utterances using": 61153, "achieving results": 1421, "despite current": 14357, "promising performances": 43173, "vulnerable adversarial": 61754, "paper tackles": 39592, "leveraging knowledge": 30328, "aims transfer": 2218, "applied answer": 3263, "used annotate": 60088, "annotate corpus": 2870, "variety data": 61265, "twitter api": 59031, "collection annotation": 8979, "annotation efforts": 2946, "arabic paper": 4004, "large manually": 28902, "various social": 61392, "media sources": 32182, "popular methods": 41171, "learn contextual": 29352, "outperforms popular": 38917, "compared training": 9467, "model coupled": 33727, "graph enhanced": 23134, "tasks state": 55909, "results analyze": 47500, "different sentiment": 15065, "syntactically complex": 54339, "random seeds": 44888, "distillation model": 15575, "using contextualized": 60622, "elmo embeddings": 16996, "yields significantly": 63130, "sentiment labels": 49850, "model optimization": 34147, "proximal policy": 44261, "policy optimization": 41101, "models introduced": 35144, "problems like": 42709, "mainly based": 31467, "known suffer": 27667, "latent distribution": 29122, "learning learning": 29704, "corresponding target": 11558, "unseen data": 59644, "data pairs": 12528, "robustness experiments": 48280, "tasks arabic": 55505, "trained training": 57896, "added training": 1591, "labels predicted": 27844, "classifier training": 8607, "data detect": 12276, "data outperforms": 12526, "dataset german": 12944, "annotated examples": 2895, "indirect supervision": 25555, "emerged promising": 17263, "language represent": 28467, "knowledge relations": 27588, "supervision using": 54100, "using variational": 61014, "rich domain": 48097, "approach propose": 3656, "models enables": 34960, "universal model": 59542, "adaptation approach": 1519, "changes model": 8179, "input generates": 26282, "parameters encoder": 39692, "remains unchanged": 46350, "enables use": 17450, "perform zero": 40162, "enhance understanding": 17927, "used express": 60181, "paper combine": 39290, "capture dependencies": 7661, "attention time": 4835, "generated context": 22279, "embeddings effectively": 17120, "propose apply": 43295, "better cross": 6871, "experiments confirm": 19389, "models monolingual": 35233, "independently trained": 25509, "address shortcoming": 1799, "shortcoming propose": 50577, "approaches experiments": 3818, "addition model": 1626, "recurrent architectures": 45608, "improves ability": 25112, "capturing long": 7740, "semantic feature": 49277, "results self": 47820, "cnns outperform": 8779, "representations derived": 46637, "derived pre": 14202, "trained bidirectional": 57684, "lstm cnn": 31255, "properties representations": 43270, "contextual representations": 10779, "representations outperform": 46731, "network depth": 36730, "perplexity ppl": 40740, "sentences demonstrate": 49703, "method aims": 32375, "data synthetic": 12716, "translating sentences": 58568, "randomly sampled": 44903, "prediction loss": 41716, "different parameter": 15019, "unrelated languages": 59633, "sharing parameters": 50518, "conventional wisdom": 11018, "features novel": 20632, "present ablation": 41840, "representation state": 46585, "disfluency detection": 15506, "features representations": 20657, "model automatic": 33590, "layer capture": 29182, "result task": 47454, "textual structural": 56984, "dataset 50": 12793, "capture various": 7721, "progress neural": 43107, "architectures models": 4118, "lack explicit": 27888, "generation stage": 22550, "results 10": 47480, "points higher": 41075, "approach trained": 3724, "larger dataset": 29073, "par best": 39611, "able reason": 718, "models memory": 35222, "keeping track": 27280, "models accuracy": 34663, "generating relevant": 22391, "introducing additional": 26898, "work needed": 62733, "query terms": 44677, "complex queries": 9650, "performances using": 40650, "structures like": 53188, "constituency dependency": 10347, "structural representation": 53082, "model naturally": 34119, "structures experiments": 53184, "result model": 47440, "binary trees": 7157, "models bleu": 34791, "language generated": 28082, "encode different": 17460, "corpus resource": 11421, "goal generate": 22886, "generate accurate": 22174, "novel lightweight": 37856, "task focuses": 55093, "models german": 35062, "gains strong": 21945, "strong transformer": 53056, "models social": 35517, "tasks effectively": 55601, "effectively integrates": 16745, "tasks spanning": 55901, "generalization new": 22123, "using linguistically": 60773, "gain performance": 21912, "effectiveness approaches": 16768, "recognition languages": 45511, "hierarchical nature": 23682, "vector quantization": 61459, "continuous embeddings": 10846, "method standard": 32666, "tasks allowing": 55498, "achieve substantially": 1209, "perplexity scores": 40741, "learn patterns": 29407, "languages models": 28731, "generation including": 22476, "quite successful": 44833, "multiple references": 36273, "importantly propose": 24797, "greedy search": 23244, "methods address": 32739, "identifying classifying": 24455, "tasks develop": 55587, "develop unified": 14621, "scientific information": 48762, "span representations": 51930, "received considerable": 45256, "propose incorporate": 43415, "alignment framework": 2368, "network embedding": 36735, "results downstream": 47599, "based encoders": 5700, "parsing trees": 39803, "study effectiveness": 53365, "effectiveness different": 16776, "gives better": 22805, "words closer": 62379, "additional experiments": 1666, "design effective": 14275, "crucial information": 11902, "text modality": 56666, "specifically leverage": 52213, "outperforms text": 38954, "model baselines": 33609, "tasks classification": 55538, "framework building": 21465, "building unsupervised": 7477, "unsupervised representations": 59726, "methods code": 32782, "investigate effects": 26956, "introduced task": 26889, "negative transfer": 36639, "considerable improvements": 10232, "shows consistent": 50771, "data novel": 12516, "time provide": 57202, "respect previous": 47350, "based individual": 5781, "information spread": 26103, "problem graph": 42576, "nodes graph": 37592, "graph edges": 23129, "questions requiring": 44807, "challenging introduce": 8103, "graph kg": 23143, "text represented": 56739, "multiple valid": 36308, "problem goal": 42575, "setting different": 50320, "reach goal": 45046, "learning reinforcement": 29836, "new effective": 37178, "drop performance": 16442, "accuracy original": 1018, "label bias": 27694, "model improvement": 33980, "tasks goal": 55659, "generation output": 22512, "time algorithm": 57114, "algorithm significantly": 2302, "success text": 53726, "models largely": 35168, "generation using": 22577, "using hidden": 60726, "decoder learns": 13599, "interpretable controllable": 26719, "achieves strong": 1380, "systems exhibit": 54493, "exhibit significant": 19004, "results non": 47744, "sentence dependencies": 49539, "results wide": 47907, "models integrating": 35138, "knowledge different": 27437, "knowledge driven": 27452, "knowledge explicitly": 27474, "neural module": 36984, "greatly improve": 23229, "generalization abilities": 22113, "annotation methodology": 2955, "multiple forms": 36218, "introduce syntactic": 26867, "improve strong": 24929, "dataset crowd": 12876, "generation perform": 22516, "generation multi": 22501, "generation proposed": 22533, "inner workings": 26245, "negative polarity": 36629, "evaluate extent": 18458, "extent neural": 19925, "model finds": 33890, "emotion classification": 17288, "model additional": 33540, "model analysis": 33563, "addition discuss": 1610, "embeddings target": 17223, "models leads": 35173, "leads faster": 29311, "training better": 57947, "quality given": 44527, "propose structure": 43649, "model generalized": 33926, "allows learning": 2471, "allows effective": 2460, "better leverage": 6910, "leverage prior": 30285, "english finnish": 17804, "method strong": 32669, "baselines trained": 6313, "answers multiple": 3110, "data allow": 12129, "extract keywords": 19981, "outside scope": 39026, "exceeds performance": 18949, "way allows": 61792, "potential biases": 41385, "based transformer": 6106, "focus improving": 21171, "model rnn": 34328, "demonstrates state": 14042, "strong indicator": 53033, "informal texts": 25744, "autoregressive models": 5222, "accuracy drop": 964, "sentence better": 49521, "including transformer": 25313, "estimation qe": 18386, "effectively encode": 16731, "encode local": 17467, "global contextual": 22825, "information target": 26112, "languages second": 28778, "making predictions": 31663, "model submitted": 34421, "results ranking": 47797, "different embeddings": 14913, "understanding key": 59355, "local optima": 30947, "models shared": 35492, "filtering noisy": 20813, "data sentence": 12640, "predicate object": 41631, "detection based": 14461, "apply multiple": 3339, "methods generalize": 32875, "results visual": 47906, "combined approach": 9077, "achieves superior": 1384, "quickly learn": 44823, "document sets": 15832, "entities various": 18091, "application machine": 3166, "sense text": 49489, "language recently": 28463, "main findings": 31440, "findings study": 20916, "vary widely": 61424, "paper time": 39596, "core task": 11156, "input utterance": 26357, "different benchmark": 14850, "conventional method": 11006, "predict future": 41641, "text length": 56648, "content recent": 10551, "works neural": 62899, "conversation history": 11033, "learning proven": 29826, "resource conditions": 47211, "baseline trained": 6218, "targeting different": 54862, "significantly advances": 50934, "modern machine": 35710, "noise robust": 37603, "noisy inputs": 37620, "propose benchmark": 43311, "noisy text": 37625, "types noise": 59108, "methods tailored": 33066, "text mt": 56670, "cs cmu": 11920, "cmu edu": 8755, "learning jointly": 29689, "entropy minimization": 18164, "target data": 54805, "approach better": 3433, "leverage unlabeled": 30295, "domain achieve": 15993, "various experimental": 61339, "experimental settings": 19325, "news datasets": 37399, "simple implement": 51180, "recently growing": 45432, "growing developing": 23295, "human agent": 24090, "work topic": 62844, "training paper": 58201, "method increases": 32542, "sense reasoning": 49488, "hypothesis model": 24345, "allowing direct": 2445, "space recent": 51890, "annotate large": 2873, "representations results": 46749, "embeddings significantly": 17216, "deep nlp": 13741, "structures data": 53182, "using shelf": 60936, "latent structures": 29140, "approach end": 3511, "medical records": 32208, "leveraging existing": 30322, "resulting corpus": 47462, "learning potential": 29808, "training baseline": 57942, "form question": 21333, "derive new": 14198, "dialogue response": 14782, "addition demonstrate": 1607, "dataset multimodal": 13003, "capturing temporal": 7746, "test bed": 56333, "benchmark evaluating": 6465, "systems data": 54468, "idea method": 24370, "experiments various": 19556, "generalizes new": 22156, "dataset audio": 12816, "second experiment": 49005, "labels approach": 27809, "tend generate": 56199, "incorporating information": 25387, "generated responses": 22311, "help generate": 23565, "obtaining high": 38234, "high correlation": 23719, "applicability approach": 3153, "model update": 34505, "different families": 14931, "dataset framework": 12935, "entire sentence": 18027, "prediction framework": 41710, "energy based": 17748, "model adopt": 33545, "approaches generally": 3833, "lattice based": 29164, "task pre": 55279, "layer learns": 29188, "learns high": 29960, "achieve macro": 1169, "world question": 62954, "questions long": 44795, "performance rule": 40544, "rely information": 46289, "based queries": 5968, "propose reinforcement": 43601, "model framework": 33908, "framework able": 21446, "approach recent": 3667, "occurrence graph": 38273, "graph present": 23156, "increase difficulty": 25411, "problem address": 42498, "information especially": 25839, "especially suitable": 18303, "aware attention": 5441, "slot type": 51443, "inter dependencies": 26580, "training step": 58272, "important questions": 24757, "classification objective": 8510, "finally experiments": 20857, "showing effectiveness": 50678, "decoding neural": 13635, "systems requires": 54622, "sequential encoder": 50040, "models method": 35223, "translation addition": 58575, "model sequential": 34362, "generated models": 22300, "english resource": 17864, "integrate multiple": 26507, "multiple pieces": 36261, "correctly answer": 11489, "information encoding": 25832, "information rich": 26071, "graph neural": 23152, "networks graph": 36865, "information leads": 25948, "following recent": 21268, "semantics syntax": 49416, "addition explore": 1616, "unsupervised systems": 59738, "supervised ones": 54030, "ones propose": 38343, "owing lack": 39126, "trained languages": 57763, "languages written": 28826, "currently exists": 12036, "models shows": 35504, "sub words": 53540, "capture patterns": 7700, "embedding layers": 17034, "model convolutional": 33720, "diverse target": 15720, "evidence model": 18815, "useful features": 60364, "stacked lstm": 52421, "analysis understand": 2786, "approach existing": 3524, "information derived": 25806, "inference compared": 25645, "data pre": 12551, "input proposed": 26322, "capture common": 7652, "structure languages": 53113, "languages evaluate": 28659, "model semantics": 34351, "using dense": 60649, "knowledge novel": 27561, "models smaller": 35516, "features additional": 20519, "generation dataset": 22442, "dataset outperforming": 13021, "systems long": 54552, "study focuses": 53381, "demographic groups": 13855, "enable new": 17427, "structural constraints": 53076, "model understanding": 34500, "human loop": 24203, "generation human": 22473, "models aid": 34702, "models adversarial": 34699, "hop reasoning": 24004, "set result": 50241, "models naturally": 35246, "manner propose": 31724, "models effectiveness": 34948, "method mitigating": 32577, "problems training": 42735, "information learned": 25951, "learned model": 29467, "indicates model": 25540, "strategy named": 52944, "basic building": 6327, "middle layers": 33237, "time achieve": 57113, "best option": 6786, "content related": 10552, "provide explicit": 44069, "models having": 35079, "architecture paper": 4074, "instances training": 26437, "help alleviate": 23552, "structure dataset": 53096, "limitations paper": 30553, "adapted task": 1555, "systems open": 54574, "based f1": 5725, "score 58": 48796, "performance unseen": 40613, "theoretical linguistics": 57023, "grows exponentially": 23306, "work instead": 62690, "results depth": 47587, "significantly effective": 50954, "model parsing": 34181, "chinese german": 8308, "technique able": 56023, "competitively state": 9571, "usually trained": 61071, "loss using": 31108, "using teacher": 60980, "teacher forcing": 55990, "level training": 30225, "mitigate problems": 33391, "addition method": 1625, "improved quality": 24962, "present generic": 41922, "directions english": 15289, "performance remains": 40532, "based crf": 5658, "score achieved": 48833, "obtain embeddings": 38171, "reflect semantic": 45776, "approaches achieve": 3752, "remarkable success": 46362, "main reason": 31455, "reason lack": 45168, "external commonsense": 19928, "incorporating commonsense": 25379, "media post": 32179, "incorporating context": 25381, "topic extraction": 57405, "message level": 32321, "content information": 10530, "model outputs": 34169, "representations discourse": 46641, "topics present": 57458, "information dataset": 25800, "dataset comprised": 12854, "used query": 60282, "dataset main": 12988, "attention values": 4847, "dataset source": 13094, "current automatic": 11961, "written sentences": 63009, "written references": 63008, "advantage model": 1943, "model utilize": 34517, "dependency words": 14144, "methods applicable": 32750, "adversarial loss": 1974, "languages tasks": 28801, "method zero": 32712, "techniques deep": 56073, "generation language": 22482, "model baseline": 33608, "generates coherent": 22340, "additional contextual": 1660, "23 languages": 323, "adopted nlp": 1872, "applications existing": 3205, "methods result": 33022, "paper overcome": 39436, "graph convolution": 23120, "framework incorporating": 21545, "make source": 31598, "available encourage": 5286, "encourage reproducible": 17598, "task learns": 55184, "simultaneously specifically": 51276, "specifically develop": 52195, "consists neural": 10325, "information increasing": 25921, "data improving": 12422, "training experimental": 58099, "absolute improvements": 746, "practical scenarios": 41470, "using audio": 60569, "92 f1": 559, "lower performance": 31219, "controllable generation": 10977, "tasks unsupervised": 55951, "shelf language": 50536, "framework text": 21613, "closer look": 8711, "dataset requires": 13064, "fields computer": 20776, "progress machine": 43103, "train multilingual": 57614, "mt nmt": 35922, "performing zero": 40694, "amazon reviews": 2523, "shot classification": 50604, "understand underlying": 59315, "shared vocabulary": 50510, "data type": 12747, "encoder representation": 17536, "push forward": 44424, "present task": 42036, "label model": 27714, "features help": 20594, "ai agents": 2112, "evaluation protocol": 18686, "model asked": 33580, "helpful improving": 23596, "news websites": 37426, "train automatic": 57564, "based retrieval": 5998, "topics topic": 57462, "topic representation": 57427, "obtained neural": 38216, "model news": 34130, "tokens text": 57338, "task distinguishing": 55025, "model capturing": 33649, "idf features": 24477, "work indicates": 62686, "vs non": 61748, "nlp technologies": 37554, "text classifiers": 56492, "identify problems": 24438, "develop deep": 14581, "annotated manually": 2903, "testing set": 56411, "classifiers used": 8627, "learning target": 29903, "based pre": 5941, "score 85": 48820, "relied hand": 46260, "features provide": 20652, "strong inductive": 53034, "structure task": 53139, "language new": 28357, "artificial training": 4500, "sense aware": 49482, "instead focus": 26451, "challenging multi": 8114, "requires model": 46943, "information context": 25792, "context generate": 10645, "requires understanding": 46956, "present strong": 42026, "strong generative": 53031, "multi attention": 35941, "perform multiple": 40123, "performs substantially": 40718, "art span": 4410, "models introduce": 35142, "based scoring": 6007, "reasoning dataset": 45191, "fact checking": 20288, "model evidence": 33846, "presents neural": 42092, "datasets ablation": 13139, "method despite": 32457, "analysis properties": 2727, "learn universal": 29442, "reconstruct input": 45577, "hidden vectors": 23651, "furthermore compared": 21809, "traditional recurrent": 57540, "depth error": 14185, "parameter initialization": 39671, "neural classification": 36942, "applied existing": 3273, "representations pre": 46737, "models elmo": 34951, "results set": 47828, "task participated": 55269, "trained transformer": 57897, "transformer architecture": 58449, "architecture using": 4097, "large quantity": 28950, "generated new": 22304, "incremental training": 25484, "simple combination": 51142, "language leveraging": 28137, "target corpora": 54803, "interpretable representations": 26729, "highlight differences": 23861, "information models": 25977, "generating response": 22392, "fluent responses": 21132, "clause based": 8638, "based type": 6115, "contexts propose": 10754, "modeling context": 34567, "clause level": 8639, "systems consider": 54457, "style evaluation": 53483, "grained evaluation": 23034, "initial step": 26219, "using public": 60883, "public benchmark": 44306, "datasets suggest": 13446, "affect performance": 2017, "training improves": 58127, "benefit training": 6570, "language enables": 28047, "performance limited": 40419, "data alleviate": 12128, "propose exploit": 43379, "present input": 41930, "baseline outperforms": 6198, "extraction existing": 20064, "improve generation": 24861, "generation used": 22575, "generation performance": 22517, "human interactions": 24174, "inference approaches": 25643, "tasks biomedical": 55528, "showed promising": 50669, "scarce resources": 48659, "dataset covers": 12870, "major obstacles": 31518, "results address": 47493, "address lack": 1775, "data entity": 12326, "models reduce": 35418, "reduce false": 45662, "performance leveraging": 40418, "leveraging multiple": 30334, "datasets annotated": 13152, "types given": 59090, "accuracy downstream": 962, "supervised sentence": 54041, "additionally experiment": 1720, "experiment datasets": 19236, "little understood": 30889, "using contextual": 60621, "specific labeled": 52096, "data main": 12474, "cross view": 11874, "encoder using": 17549, "modules model": 35772, "learning evaluate": 29629, "able solve": 726, "showing models": 50683, "representations achieve": 46613, "languages previous": 28756, "dense embeddings": 14075, "approaches obtain": 3886, "sparse representation": 51970, "dense models": 14076, "multilingual societies": 36122, "use code": 59842, "learning make": 29717, "release model": 46157, "loss information": 31097, "image information": 24538, "bases generate": 6323, "media platform": 32175, "covering different": 11655, "baselines average": 6237, "score metric": 48858, "framework design": 21490, "training extremely": 58104, "training algorithms": 57930, "range potential": 44929, "help facilitate": 23563, "memory computational": 32248, "proposed hybrid": 43792, "memory consumption": 32250, "datasets real": 13392, "users interested": 60469, "exhibit different": 19002, "question dataset": 44726, "use crowdsourcing": 59858, "models neglect": 35250, "apply novel": 3344, "pruning strategy": 44269, "existing sequence": 19141, "complementary strengths": 9591, "models combining": 34828, "models nlms": 35262, "computation complexity": 9826, "life paper": 30439, "pruning techniques": 44270, "techniques provide": 56128, "energy consumption": 17749, "relative increase": 46103, "match outperform": 31898, "f1 performance": 20191, "datasets question": 13390, "unanswerable questions": 59209, "extractive model": 20136, "datasets improved": 13296, "similarity models": 51109, "trained dataset": 57704, "datasets https": 13291, "study empirically": 53368, "specifically study": 52227, "choosing right": 8347, "predict human": 41642, "human accuracy": 24088, "sentences propose": 49772, "gap source": 21980, "sentences current": 49701, "experiments 10": 19342, "baselines text": 6312, "task discuss": 55024, "research text": 47129, "output labels": 38979, "pooling mechanism": 41127, "models cases": 34808, "ir models": 27034, "model existing": 33848, "inference performance": 25680, "shows superior": 50808, "competitive approaches": 9539, "amounts labelled": 2553, "models open": 35277, "generate responses": 22237, "models expensive": 34991, "requires extensive": 46928, "issue existing": 27062, "approaches leverage": 3860, "combining pre": 9120, "learning extensive": 29640, "experiments analyses": 19347, "self supervised": 49204, "better downstream": 6879, "conditional generative": 9993, "network gan": 36745, "unbalanced datasets": 59220, "datasets limited": 13318, "limited labelled": 30594, "framework explicitly": 21515, "related datasets": 45896, "schema challenge": 48723, "reasoning task": 45227, "uses knowledge": 60515, "text web": 56843, "generates relevant": 22354, "approach competitive": 3460, "plausible alternatives": 40959, "sequence lengths": 49948, "objective evaluation": 38087, "tasks given": 55657, "task identification": 55120, "legal domain": 30005, "adaptation tasks": 1541, "chinese corpora": 8301, "annotations available": 2984, "learn cross": 29354, "monolingual settings": 35812, "settings cross": 50362, "task translation": 55450, "terms automatic": 56265, "bleu metrics": 7206, "assess models": 4580, "encoder architectures": 17490, "scale multimodal": 48601, "propose multimodal": 43482, "tv series": 58999, "emotion sentiment": 17294, "modalities propose": 33471, "propose strong": 43647, "conversations dataset": 11058, "increase precision": 25421, "alignment mechanism": 2372, "mechanism learns": 32127, "leverages multi": 30307, "leveraging information": 30327, "performance conduct": 40264, "language example": 28057, "framework suitable": 21608, "attention enables": 4740, "query understanding": 44680, "handle task": 23415, "dataset suggest": 13106, "showing potential": 50684, "like model": 30485, "intended meaning": 26550, "highly desirable": 23893, "robustness paper": 48291, "current utterance": 12026, "based incremental": 5780, "10 percentage": 48, "dataset additional": 12804, "related topic": 45947, "bayes model": 6352, "datasets cross": 13203, "corpus high": 11353, "rich resource": 48116, "open sourced": 38462, "context encoder": 10621, "represent document": 46469, "usually available": 61036, "method advantage": 32373, "corpora experiments": 11200, "datasets approach": 13155, "language rich": 28477, "simplified version": 51239, "token sentence": 57305, "monolingual text": 35813, "systems monolingual": 54564, "develop unsupervised": 14622, "language subject": 28513, "results evaluate": 47617, "using test": 60987, "gaining insights": 21927, "content automatically": 10513, "recognition challenging": 45496, "classifiers paper": 8621, "audio signals": 4930, "content model": 10538, "information audio": 25764, "sequences using": 50029, "combines information": 9096, "information data": 25799, "features extensive": 20578, "emotion categories": 17287, "called bert": 7540, "representations transformers": 46776, "unlike recent": 59609, "models bert": 34770, "representations unlabeled": 46778, "right context": 48138, "trained bert": 57681, "bert model": 6683, "models wide": 35679, "simple empirically": 51165, "obtains new": 38252, "score 80": 48815, "squad v2": 52396, "text like": 56650, "improvements obtained": 25089, "understanding experiments": 59345, "strong assumptions": 53000, "application scenarios": 3179, "usually large": 61056, "identification method": 24390, "methods apply": 32753, "text new": 56675, "large paired": 28931, "examples work": 18942, "architecture perform": 4075, "perform ablation": 40064, "reference evaluation": 45739, "conditions paper": 10020, "systematically study": 54414, "models broad": 34794, "empirical insights": 17331, "2018 proposed": 274, "improve original": 24880, "better exploit": 6889, "structure generation": 53107, "unsupervised learned": 59704, "algorithm learn": 2282, "experiments prove": 19498, "art fully": 4263, "using semi": 60921, "language low": 28142, "train baseline": 57565, "model ii": 33972, "manual labeling": 31745, "utterances high": 61148, "level based": 30069, "corpus labeled": 11366, "entire training": 18030, "greatly improves": 23232, "different state": 15078, "fashion experimental": 20412, "score 71": 48806, "history previous": 23969, "current question": 12003, "single turn": 51352, "flow mechanism": 21120, "mechanism incorporate": 32124, "outperforms best": 38878, "greek language": 23246, "augmented models": 4981, "dependencies paper": 14110, "jointly embedding": 27194, "corpora annotated": 11175, "type level": 59060, "level corpus": 30091, "built different": 7484, "large performance": 28936, "need manual": 36581, "usually based": 61037, "linguistic understanding": 30805, "sequence text": 50011, "autoencoder vae": 5028, "hierarchical latent": 23674, "clean noisy": 8645, "noise level": 37600, "automatic understanding": 5135, "understanding domain": 59340, "use non": 59965, "information fixed": 25879, "easy understand": 16567, "accurately classify": 1093, "lack diversity": 27885, "capturing lexical": 7737, "diversity quality": 15740, "models single": 35514, "model mixture": 34102, "seq2seq baseline": 49896, "additional parameters": 1691, "computation cost": 9827, "attention distribution": 4738, "distribute attention": 15620, "learn align": 29344, "intent slot": 26568, "parsing systems": 39798, "sequence approaches": 49908, "approaches dataset": 3793, "models clear": 34817, "question given": 44731, "question asked": 44720, "answer query": 3049, "query given": 44669, "existing efforts": 19061, "uses multiple": 60522, "performance interpretability": 40397, "models big": 34787, "negatively affect": 36642, "affect quality": 2018, "systems identify": 54523, "sentences input": 49738, "seq seq": 49890, "framework present": 21584, "trained generate": 57738, "strategy achieves": 52925, "low latency": 31155, "zh en": 63186, "informal text": 25743, "university students": 59556, "dataset new": 13009, "jointly encode": 27195, "difficulty level": 15200, "outputs model": 39017, "advantages model": 1953, "model previous": 34232, "ones paper": 38341, "affects model": 2025, "knowledge using": 27645, "applications despite": 3199, "despite remarkable": 14386, "remarkable results": 46361, "leverage machine": 30277, "framework tackle": 21611, "language spanish": 28494, "rich annotation": 48093, "annotation data": 2941, "shared multilingual": 50480, "encoder sentence": 17542, "superiority method": 53951, "method state": 32667, "sentence inference": 49569, "knowledge relationships": 27589, "languages showing": 28783, "showing different": 50677, "different structures": 15083, "facto standard": 20294, "impact accuracy": 24588, "negative positive": 36630, "problem applied": 42504, "contribution present": 10944, "experimental study": 19329, "task addition": 54884, "reference future": 45742, "advanced deep": 1886, "methods pre": 32985, "trained 30": 57668, "30 million": 357, "model input": 34005, "performance prediction": 40491, "time speed": 57221, "baselines new": 6283, "experiments available": 19359, "current solutions": 12008, "classified using": 8588, "importantly model": 24796, "embeddings data": 17106, "augmentation techniques": 4969, "model final": 33887, "objective study": 38103, "methods automatically": 32763, "experimented various": 19338, "trained original": 57836, "imbalanced data": 24566, "features achieved": 20516, "contributions include": 10954, "improvements using": 25109, "user level": 60431, "targeted syntactic": 54859, "networks state": 36912, "process texts": 42834, "methods syntactic": 33063, "systems generate": 54513, "shown state": 50752, "performance recent": 40522, "compared lstm": 9418, "models reaching": 35400, "inference dataset": 25651, "fail perform": 20342, "replace original": 46402, "corpus designed": 11323, "results argue": 47509, "argue current": 4162, "inference using": 25703, "large pre": 28939, "models helps": 35084, "datasets similar": 13431, "nli datasets": 37452, "non redundant": 37679, "modeling lm": 34591, "methods far": 32863, "batch size": 6342, "embedding matrix": 17039, "scale number": 48606, "negligible loss": 36654, "advanced neural": 1891, "introduce auxiliary": 26785, "regularization term": 45843, "enhance ability": 17909, "short range": 50563, "unsupervised pretraining": 59723, "improvements nlp": 25086, "place task": 40927, "training final": 58105, "training code": 57953, "models challenging": 34810, "monolingual sentences": 35811, "using augmented": 60570, "score 10": 48783, "accordingly propose": 874, "dataset derived": 12890, "scenarios language": 48699, "incorporate external": 25354, "learn local": 29391, "approach outperformed": 3621, "performances achieved": 40637, "usually costly": 61044, "idf based": 24476, "based cosine": 5654, "present design": 41888, "design implementation": 14287, "general overview": 22078, "generate sequence": 22244, "promising solution": 43184, "art seq2seq": 4398, "models representing": 35440, "evaluate efficacy": 18456, "glove fasttext": 22859, "python package": 44442, "improved robustness": 24964, "model representations": 34310, "model behaviors": 33612, "imbalanced dataset": 24567, "distribution data": 15634, "label text": 27731, "performance classifiers": 40236, "high potential": 23762, "boosting performance": 7261, "mechanism allows": 32097, "ability effectively": 605, "control generation": 10964, "remain largely": 46315, "challenging new": 8119, "access external": 820, "address new": 1781, "challenge learning": 7992, "multimodal dialogue": 36147, "introduce knowledge": 26815, "learning paradigms": 29797, "datasets specifically": 13441, "multiple label": 36233, "approaches evaluate": 3812, "gains achieved": 21932, "insights models": 26392, "tune language": 58855, "improve interpretability": 24865, "downstream application": 16331, "accessed https": 832, "com thunlp": 9027, "tasks gap": 55652, "representations address": 46616, "conduct detailed": 10036, "detailed experiments": 14426, "weight matrices": 61918, "addition subtraction": 1645, "modeling long": 34592, "proposed network": 43867, "yield competitive": 63093, "present practical": 41986, "practical challenges": 41461, "achieve highest": 1156, "recent successes": 45357, "sentences high": 49731, "facilitate learning": 20271, "performance adding": 40182, "knowledge high": 27516, "way new": 61822, "art architectures": 4215, "proposed supervised": 43905, "english news": 17851, "identify appropriate": 24412, "search best": 48966, "memory bilstm": 32245, "crf architecture": 11761, "representation vectors": 46604, "surrounding sentences": 54197, "unsupervised pre": 59720, "tested proposed": 56399, "score respectively": 48870, "overall proposed": 39046, "predictions paper": 41766, "graph representing": 23164, "level predictions": 30181, "systems various": 54667, "appropriate word": 3970, "data offers": 12521, "challenges large": 8057, "studies highlight": 53268, "designed language": 14322, "quality conversational": 44501, "conversational data": 11043, "data chinese": 12208, "attracted increasing": 4881, "enhanced multi": 17934, "multi headed": 35967, "headed attention": 23502, "attend information": 4702, "representation subspaces": 46588, "model interaction": 34012, "interaction multiple": 26608, "multiple attention": 36170, "transformer baseline": 58476, "new parameters": 37279, "question introduce": 44734, "separate encoder": 49874, "better long": 6911, "available state": 5370, "resources used": 47338, "texts generated": 56883, "methods semi": 33031, "minimal human": 33288, "important facts": 24725, "space limited": 51874, "fewer words": 20742, "programming model": 43088, "texts texts": 56935, "information limited": 25955, "confirm proposed": 10133, "perceived quality": 40048, "models fast": 35019, "unsupervised objective": 59719, "sentences method": 49753, "explicitly modeled": 19643, "generate pseudo": 22230, "pseudo data": 44273, "phonetic similarity": 40829, "orthographic information": 38756, "model traditional": 34465, "method superior": 32676, "superior existing": 53933, "errors paper": 18246, "data current": 12265, "build unified": 7432, "achieves significantly": 1370, "empirical theoretical": 17354, "elastic weight": 16958, "weight consolidation": 61916, "experiments current": 19395, "decoder using": 13621, "large portion": 28938, "fully explore": 21728, "framework obtain": 21573, "resource translation": 47286, "text common": 56498, "context help": 10651, "global contexts": 22824, "context encoders": 10622, "dataset newly": 13010, "flexible way": 21111, "capsule networks": 7645, "stimulate research": 52850, "world people": 62952, "people express": 40029, "learning leverage": 29706, "necessary step": 36533, "speaker utterance": 52002, "utterance paper": 61139, "architecture capable": 4032, "multilingual sentence": 36118, "languages train": 28807, "used transfer": 60339, "efficient development": 16868, "agnostic model": 2094, "languages test": 28802, "evaluate transfer": 18513, "transfer performance": 58413, "experiments detailed": 19412, "lingual transferability": 30736, "effective context": 16638, "line work": 30647, "tasks ability": 55485, "implicit knowledge": 24661, "approaches explicitly": 3819, "decision boundary": 13561, "using layer": 60764, "light future": 30449, "future study": 21897, "demonstrates model": 14036, "improve consistency": 24834, "impressive progress": 24814, "set work": 50279, "performance outperforms": 40470, "baseline average": 6156, "training help": 58118, "help train": 23591, "results highly": 47659, "data hungry": 12411, "common types": 9208, "propose bayesian": 43310, "data named": 12502, "art approach": 4213, "better captures": 6860, "analyze errors": 2815, "methods mitigate": 32946, "mitigate issue": 33385, "dataset report": 13062, "projection layer": 43140, "tail distribution": 54763, "capabilities paper": 7606, "inductive learning": 25612, "investigate influence": 26962, "data regimes": 12593, "highlight need": 23868, "queries using": 44658, "based mechanisms": 5842, "effectiveness existing": 16779, "based matching": 5835, "support future": 54119, "generated neural": 22303, "models prone": 35367, "topic aware": 57392, "reddit comments": 45642, "generate diverse": 22194, "effective language": 16664, "glue benchmark": 22864, "training bert": 57945, "bert devlin": 6640, "score 81": 48816, "improvement bert": 24989, "peters et": 40800, "radford et": 44847, "map natural": 31795, "using beam": 60585, "difficult work": 15194, "posterior regularization": 41364, "provide general": 44077, "task transferring": 55447, "task representations": 55337, "representations form": 46672, "resources data": 47298, "learned multiple": 29469, "space recently": 51891, "languages possible": 28752, "paper extends": 39372, "novel formulation": 37825, "leading better": 29288, "consistent improvement": 10277, "text generates": 56592, "attempts extract": 4698, "identifying extracting": 24458, "manual labor": 31747, "manner specifically": 31726, "identify text": 24448, "better trained": 6982, "sentences trained": 49796, "able accurately": 667, "role language": 48314, "results overall": 47756, "fundamental differences": 21779, "demonstrating usefulness": 14056, "information dense": 25804, "generate sensible": 22241, "potentially provide": 41417, "provide benefits": 44018, "train end": 57585, "end automatic": 17617, "data end": 12322, "need expert": 36561, "paired data": 39162, "cycle consistency": 12074, "proposed way": 43925, "unsupervised data": 59691, "loss based": 31083, "instead raw": 26461, "consistency training": 10271, "initial model": 26215, "trained 100": 57667, "audio data": 4926, "data mainly": 12475, "modeling improve": 34583, "labeled text": 27765, "setting recently": 50347, "gap different": 21960, "understanding properties": 59385, "art datasets": 4245, "measure used": 32064, "datasets use": 13467, "discover best": 15406, "developing model": 14656, "model tailored": 34438, "factually correct": 20327, "build language": 7407, "effectively incorporate": 16743, "investigate various": 26995, "transfer improves": 58367, "reduces performance": 45697, "systems speech": 54636, "trained acoustic": 57670, "speech different": 52260, "lower word": 31225, "reasonably good": 45177, "models purpose": 35389, "apply models": 3337, "truly low": 58824, "effective feature": 16650, "specifically construct": 52188, "sentence understanding": 49665, "embedding framework": 17030, "improves baselines": 25116, "classify text": 8632, "method utilizes": 32700, "space experimental": 51860, "words need": 62465, "edit operations": 16593, "capture sequence": 7711, "raises question": 44862, "sub linear": 53521, "training input": 58132, "input learns": 26292, "does generalize": 15948, "understand generate": 59294, "study aimed": 53321, "study design": 53358, "analysis revealed": 2745, "group level": 23272, "changes time": 8181, "task zero": 55475, "require different": 46849, "great challenge": 23200, "data similar": 12664, "propose principled": 43591, "model zero": 34552, "unseen ones": 59652, "method utilizing": 32701, "multiple instances": 36231, "participating teams": 39825, "teams paper": 56007, "interaction paper": 26610, "production perception": 43048, "information focus": 25882, "focus model": 21181, "present techniques": 42038, "techniques train": 56144, "word relations": 62279, "information provide": 26034, "concept hierarchy": 9923, "learning usually": 29933, "automatic construction": 5074, "approaches better": 3778, "set data": 50133, "methods according": 32726, "graph relations": 23160, "syntactic cues": 54298, "model implicit": 33975, "make good": 31574, "good use": 22948, "relevant knowledge": 46221, "able extract": 694, "related attributes": 45888, "attributes entities": 4906, "approach build": 3437, "conversation corpus": 11031, "88 f1": 544, "formally define": 21364, "advanced models": 1890, "novel paradigm": 37890, "relations model": 46042, "evaluated public": 18545, "improving results": 25195, "utilize knowledge": 61096, "setting data": 50318, "novel auxiliary": 37775, "ability work": 651, "text directly": 56538, "multiple relations": 36275, "current works": 12029, "handle multiple": 23413, "considerably better": 10238, "individual sentence": 25578, "margin loss": 31821, "training task": 58285, "history using": 23970, "dialogue level": 14777, "need explicit": 36562, "promote diversity": 43191, "main feature": 31437, "second train": 49027, "auxiliary objective": 5236, "baselines multi": 6280, "attention transformer": 4840, "autoregressive decoding": 5213, "novel non": 37887, "significant speedup": 50925, "model heterogeneous": 33957, "encoders different": 17555, "information demonstrate": 25803, "huge success": 24077, "useful understanding": 60395, "propose improved": 43413, "contains multiple": 10501, "multiple channels": 36179, "translation abstractive": 58572, "modeling experimental": 34573, "expert users": 19585, "user utterance": 60455, "robot interaction": 48232, "promising solutions": 43185, "novel multimodal": 37881, "results relative": 47805, "based end": 5701, "information given": 25895, "local languages": 30944, "use transformer": 60059, "nlp fields": 37489, "text improve": 56625, "trained weights": 57913, "results f1": 47630, "model extracting": 33872, "perform equally": 40097, "method latent": 32560, "token alignment": 57280, "used encoding": 60161, "encoding sequence": 17576, "importance score": 24689, "improvement various": 25038, "parsing paper": 39790, "paper survey": 39587, "mechanism different": 32107, "techniques machine": 56107, "structure discourse": 53099, "discourse features": 15389, "process identifying": 42791, "evaluation performance": 18669, "systems tasks": 54648, "detection specifically": 14530, "evaluated tasks": 18549, "datasets best": 13168, "results systems": 47875, "identification relevant": 24395, "relevant entities": 46213, "described text": 14216, "problem short": 42652, "achieves improved": 1340, "ranked 7th": 44956, "understanding data": 59337, "requires generating": 46931, "generating long": 22381, "despite considerable": 14356, "considerable efforts": 10229, "topic generated": 57408, "paper create": 39310, "knowledge resource": 27597, "understanding use": 59412, "does hold": 15951, "wikipedia news": 62050, "focus specific": 21204, "thorough experimental": 57061, "outperforming baseline": 38845, "models improvement": 35114, "finally release": 20880, "text investigate": 56633, "building knowledge": 7449, "graph text": 23173, "generate set": 22245, "higher recall": 23841, "syntactic relationships": 54317, "neural seq2seq": 37095, "pre post": 41506, "data traditional": 12737, "models global": 35064, "global inference": 22830, "performance lags": 40405, "approach substantially": 3707, "english low": 17838, "challenge neural": 8000, "usually achieve": 61034, "performance trained": 40604, "sets data": 50286, "translated data": 58554, "data create": 12258, "end systems": 17712, "systems difficult": 54480, "languages highly": 28686, "guide models": 23339, "learning perform": 29801, "tasks jointly": 55700, "jointly multiple": 27209, "approaches fail": 3825, "fail model": 20341, "suffer error": 53763, "classifier detect": 8595, "require model": 46880, "model understand": 34499, "models diverse": 34932, "current datasets": 11968, "examples existing": 18899, "models evaluation": 34979, "currently existing": 12035, "work evaluating": 62650, "settings different": 50367, "million scale": 33255, "news headline": 37405, "body text": 7241, "text dataset": 56524, "dataset develop": 12894, "networks hierarchical": 36866, "architectures model": 4117, "input size": 26337, "experiments qualitative": 19507, "contribution method": 10942, "data labeling": 12449, "step building": 52802, "artificial intelligent": 4494, "recognition language": 45510, "models explicitly": 34997, "tasks build": 55531, "domain multi": 16115, "turn conversation": 58987, "cue words": 11932, "paper experiments": 39357, "evaluation natural": 18660, "modeling provides": 34617, "dataset diverse": 12899, "simple flexible": 51172, "conceptual framework": 9949, "kinds questions": 27372, "dataset including": 12964, "require reasoning": 46885, "proposed tackle": 43907, "overview different": 39111, "code results": 8856, "results limitations": 47700, "massive corpus": 31882, "present day": 41884, "component model": 9706, "estimation mle": 18384, "designed specifically": 14331, "outperforms multiple": 38913, "including standard": 25302, "training strategies": 58274, "architectures demonstrate": 4106, "approach recently": 3668, "recently large": 45435, "mechanisms models": 32152, "proposed sequence": 43895, "model graph": 33946, "networks gnns": 36863, "methods specifically": 33049, "propose contextualized": 43336, "text introduce": 56632, "datasets verify": 13482, "better handle": 6896, "model fully": 33911, "fully extract": 21731, "representations furthermore": 46674, "approaches cross": 3789, "paper seeks": 39572, "pairs language": 39199, "corpora exist": 11198, "knowledge captured": 27421, "learning resource": 29846, "scale labeled": 48584, "dataset facilitate": 12926, "research future": 47043, "identify major": 24430, "useful context": 60358, "information sequence": 26081, "generalization power": 22127, "representations non": 46725, "context improves": 10655, "languages experimental": 28664, "utilize information": 61095, "specific pre": 52126, "datasets high": 13289, "language fine": 28072, "level using": 30230, "output existing": 38972, "broad spectrum": 7355, "idea approach": 24367, "nlp domain": 37482, "architecture deep": 4038, "dataset significant": 13086, "significant gap": 50868, "world tasks": 62963, "corpus existing": 11335, "tasks learned": 55718, "message passing": 32322, "effective interpretable": 16662, "present rule": 42001, "work consists": 62609, "different challenges": 14858, "ones using": 38345, "dataset investigate": 12971, "tasks explored": 55634, "method joint": 32554, "complementary knowledge": 9590, "lingual knowledge": 30706, "knowledge attention": 27399, "attention cross": 4731, "fact extraction": 20290, "using evidence": 60682, "aims identifying": 2198, "specific aspect": 52046, "public corpora": 44309, "scarce data": 48657, "data largely": 12456, "aims leverage": 2203, "resource source": 47278, "task easily": 55036, "resource target": 47279, "domain fine": 16071, "multi granularity": 35962, "alignment network": 2377, "task help": 55114, "modeling fine": 34576, "method adopted": 32372, "data challenging": 12205, "challenging highly": 8098, "challenges paper": 8066, "13 million": 127, "model meaning": 34095, "model fact": 33878, "translations language": 58708, "apply methods": 3335, "translation corpora": 58590, "learn diverse": 29361, "selecting relevant": 49127, "proposed trained": 43916, "scale generative": 48575, "learn discriminative": 29359, "supervised fine": 53985, "domain shift": 16159, "studies suggest": 53302, "suggest models": 53825, "systems adopt": 54427, "evaluate competitive": 18446, "models challenge": 34809, "expensive collect": 19205, "learning signals": 29878, "improved significantly": 24965, "feedback data": 20716, "essential information": 18327, "study explore": 53375, "representations predict": 46739, "rapidly increasing": 44996, "modern large": 35709, "new supervised": 37329, "approaches benchmark": 3774, "learn training": 29440, "source training": 51815, "context type": 10736, "task ii": 55123, "systematic generalization": 54399, "capable reasoning": 7629, "end methods": 17684, "methods prior": 32991, "require explicit": 46852, "content detection": 10518, "dataset data": 12878, "supervised datasets": 53977, "datasets showcase": 13422, "providing explicit": 44242, "explicit labels": 19618, "average recall": 5416, "using annotated": 60558, "remains major": 46340, "used generating": 60199, "question natural": 44739, "actively studied": 1481, "complexity model": 9683, "detect cases": 14435, "costly time": 11604, "consuming paper": 10451, "learning pipeline": 29805, "conventional supervised": 11014, "data 50": 12103, "performance fully": 40353, "fully supervised": 21738, "data external": 12352, "external source": 19953, "supervised counterparts": 53974, "model working": 34545, "token time": 57311, "available word": 5391, "python module": 44441, "framework composed": 21474, "demonstrate large": 13928, "scale unsupervised": 48637, "tuning training": 58970, "task create": 54983, "finetuned model": 21044, "studies investigating": 53275, "architectures datasets": 4105, "results real": 47798, "semantic change": 49244, "key terms": 27339, "different research": 15052, "propose metric": 43459, "retrieval process": 47963, "fast text": 20429, "aims developing": 2188, "nlp field": 37488, "low recall": 31171, "recall rate": 45245, "optimal combination": 38527, "features chinese": 20536, "identification experiments": 24387, "selected features": 49118, "precision rate": 41615, "33 f1": 370, "based public": 5963, "study automatic": 53332, "world problems": 62953, "texts containing": 56868, "leads lack": 29319, "agglomerative clustering": 2068, "using cnn": 60607, "method conduct": 32433, "short sentence": 50564, "propose robust": 43610, "learn human": 29381, "question word": 44758, "estimate importance": 18371, "importance words": 24696, "model agnostic": 33551, "agnostic explanations": 2089, "models high": 35086, "changes input": 8177, "model sensitivity": 34352, "accuracy measure": 1004, "understand models": 59305, "robustness model": 48286, "corpus support": 11440, "context evaluation": 10628, "prior literature": 42407, "widely regarded": 62000, "developed model": 14634, "text generative": 56606, "framework trained": 21615, "level chinese": 30074, "method good": 32520, "generate realistic": 22233, "data aspect": 12146, "detection important": 14492, "analysis given": 2671, "aims detect": 2184, "implicitly explicitly": 24667, "depends availability": 14161, "availability labeled": 5251, "costly obtain": 11602, "method address": 32371, "task need": 55235, "baselines substantial": 6305, "substantial margin": 53625, "contextual understanding": 10785, "attention self": 4827, "model empirical": 33809, "translation human": 58617, "learning achieved": 29502, "substantial progress": 53629, "discuss main": 15473, "using encoder": 60673, "conditional text": 10006, "zhang et": 63188, "vision domain": 61636, "approaches mitigate": 3874, "utilizes latent": 61114, "used final": 60190, "generation usually": 22578, "continuous embedding": 10844, "distribution vocabulary": 15658, "capable handling": 7622, "produce meaningful": 42991, "user interactions": 60428, "hand written": 23396, "evaluated approach": 18520, "text extract": 56574, "used supervised": 60317, "addresses challenge": 1808, "shelf pretrained": 50540, "remove spurious": 46376, "target token": 54851, "tokens predicted": 57332, "study non": 53421, "results low": 47704, "propose sentence": 43621, "sentence wise": 49670, "require high": 46860, "nouns adjectives": 37745, "present article": 41850, "qualitative differences": 44475, "models systems": 35575, "bengali language": 6596, "systems respect": 54624, "types sentence": 59116, "researchers use": 47168, "using temporal": 60984, "temporal graph": 56187, "time sensitive": 57208, "time frames": 57158, "weights model": 61940, "neural topic": 37106, "trained fully": 57734, "number datasets": 37993, "datasets demonstrates": 13220, "process use": 42836, "nlp technology": 37555, "train task": 57644, "cross task": 11870, "improve understanding": 24937, "commerce platforms": 9152, "language description": 28021, "root leaf": 48342, "demonstrate machine": 13933, "graph dag": 23127, "essential problem": 18332, "performing downstream": 40675, "tasks successfully": 55917, "domain best": 16027, "approach jointly": 3579, "corpus context": 11309, "context event": 10629, "frequency features": 21672, "entities large": 18062, "python based": 44439, "based open": 5927, "embeddings efficiently": 17121, "various recent": 61385, "task sentences": 55359, "abstract semantic": 762, "building neural": 7460, "data variety": 12769, "variety real": 61288, "challenge using": 8022, "train domain": 57580, "synthetic datasets": 54374, "datasets domain": 13238, "test datasets": 56343, "improving user": 25200, "domain relevant": 16146, "sequential decision": 50037, "time period": 57191, "local feature": 30936, "type feature": 59056, "methods novel": 32963, "accuracy coverage": 952, "results provided": 47790, "texts annotated": 56858, "annotated according": 2876, "obtained text": 38226, "used far": 60185, "identification models": 24391, "task complexity": 54963, "manually selected": 31787, "different pre": 15028, "resource corpus": 47216, "corpus linguistics": 11374, "internet people": 26697, "groups work": 23285, "overview current": 39109, "area present": 4146, "number examples": 38002, "significantly affects": 50937, "representations demonstrate": 46636, "time existing": 57153, "traditional systems": 57550, "evaluate neural": 18477, "novel fine": 37823, "survey paper": 54210, "research trends": 47135, "point potential": 41047, "factors make": 20312, "reduce labor": 45667, "secondly propose": 49032, "applied low": 3280, "transfer related": 58417, "classification present": 8518, "present cross": 41879, "predictions based": 41755, "use joint": 59917, "generalize knowledge": 22143, "lingual monolingual": 30713, "monolingual resources": 35810, "available experiments": 5292, "transfer data": 58356, "languages sentiment": 28780, "issues existing": 27090, "datasets terms": 13455, "inference speedup": 25693, "compared autoregressive": 9381, "source tokens": 51811, "embeddings decoder": 17107, "method largely": 32559, "largely outperforms": 29060, "task previous": 55290, "model distribution": 33780, "information wikipedia": 26158, "languages dataset": 28633, "articles dataset": 4466, "domain topic": 16212, "theoretically empirically": 57029, "method widely": 32706, "indicate method": 25526, "method compared": 32425, "introduce methodology": 26822, "stages model": 52450, "problem time": 42675, "making classification": 31648, "score 77": 48812, "task detect": 55011, "sentence previous": 49624, "specific sentence": 52144, "introduce noise": 26840, "propose constrained": 43331, "attention multi": 4791, "single aspect": 51284, "fails capture": 20350, "transition probability": 58543, "order word": 38662, "texts text": 56934, "bilstm crf": 7130, "latent features": 29127, "classification specifically": 8556, "role entity": 48305, "recognition dataset": 45498, "benefits pre": 6586, "elmo bert": 16995, "bert outperforms": 6696, "fasttext embeddings": 20447, "million parameters": 33253, "parameters make": 39708, "train separate": 57629, "separate models": 49877, "multilingual pre": 36109, "training fine": 58106, "languages final": 28672, "compared fine": 9411, "91 f1": 557, "open world": 38467, "tasks entity": 55615, "works utilize": 62918, "context entity": 10626, "context level": 10669, "generic domain": 22628, "improvement terms": 25032, "method natural": 32585, "constituency tree": 10352, "trees encoding": 58768, "based mechanism": 5841, "models created": 34872, "datasets tasks": 13453, "model showed": 34372, "study introduces": 53396, "learning informative": 29683, "studies models": 53283, "document work": 15846, "propose coarse": 43320, "finds relevant": 20920, "scores candidate": 48894, "attention learn": 4764, "set outperforming": 50209, "various factors": 61341, "making task": 31669, "task document": 55029, "datasets reveal": 13412, "couple years": 11631, "exchange information": 18965, "regional languages": 45805, "numerous methods": 38066, "aim improving": 2152, "approaches particular": 3893, "generated reviews": 22313, "tackles problem": 54718, "different aspect": 14843, "demonstrates approach": 14028, "topic attention": 57391, "identifying words": 24470, "related different": 45897, "topics work": 57465, "provides empirical": 44197, "iterative training": 27127, "applications especially": 3203, "lesser extent": 30045, "large labelled": 28896, "labelled datasets": 27802, "datasets significant": 13427, "important applications": 24699, "applications case": 3187, "data privacy": 12562, "hidden information": 23638, "82 f1": 528, "benefit transfer": 6571, "second model": 49012, "learning binary": 29549, "documents used": 15924, "evaluation models": 18657, "field information": 20756, "report strong": 46449, "results transfer": 47889, "specific dataset": 52066, "matching network": 31918, "capable encoding": 7618, "metrics achieving": 33136, "performance demonstrating": 40285, "embeddings glove": 17143, "glove elmo": 22856, "matching module": 31917, "task aiming": 54893, "interaction information": 26602, "deeper level": 13759, "method multiple": 32583, "multiple perspectives": 36260, "study tackles": 53466, "summarization model": 53892, "model instead": 34008, "focuses generating": 21239, "target style": 54844, "accuracy drops": 965, "drops dramatically": 16449, "meaning space": 32018, "graph reasoning": 23159, "reasoning problems": 45216, "path forward": 39946, "richer representations": 48130, "transformer networks": 58504, "pretrained language": 42157, "preliminary evaluation": 41801, "related questions": 45931, "provide answers": 44009, "questions related": 44803, "dataset transfer": 13123, "accuracy experimental": 971, "experimental setting": 19324, "trained work": 57919, "understanding linguistic": 59360, "general neural": 22074, "corpus multilingual": 11384, "languages similar": 28787, "languages end": 28650, "translated english": 58555, "languages just": 28700, "causal relationships": 7878, "vocabulary mismatch": 61706, "mismatch problem": 33351, "sentence rewriting": 49635, "gain f1": 21907, "glove embeddings": 22858, "bias evaluation": 7026, "existing biases": 19044, "baseline text": 6217, "used communication": 60118, "making tasks": 31670, "proposed text": 43914, "aims generating": 2196, "generator network": 22621, "nli model": 37453, "numerical reasoning": 38061, "reasoning capabilities": 45187, "framework support": 21609, "understanding question": 59387, "answers generated": 3108, "questions humans": 44791, "attempt understand": 4692, "true meaning": 58822, "work human": 62680, "interactive attention": 26627, "models comparison": 34838, "models current": 34878, "generating texts": 22401, "texts given": 56884, "researches focus": 47173, "focus generating": 21167, "model fit": 33899, "experiments based": 19361, "strong language": 53036, "poor generalization": 41134, "embedding network": 17050, "cases neural": 7810, "recent improvements": 45314, "struggle generalize": 53200, "examples shot": 18931, "complex ways": 9671, "solving tasks": 51707, "capabilities language": 7597, "presenting new": 42065, "new topics": 37348, "words particular": 62477, "pairs generated": 39193, "form data": 21318, "user inputs": 60423, "suggest ways": 53833, "latin script": 29162, "language communities": 27996, "multiple possible": 36264, "possible use": 41339, "mle training": 33436, "supervision based": 54078, "based optimal": 5928, "utility proposed": 61083, "translation benchmarks": 58585, "samples text": 48490, "text samples": 56752, "generating semantically": 22393, "standard sequence": 52527, "generate semantically": 22240, "replication study": 46419, "data preprocessing": 12555, "argument identification": 4172, "augmented dataset": 4977, "statistical classifiers": 52739, "finally suggest": 20882, "algorithms neural": 2330, "rich input": 48104, "randomly initialized": 44899, "embeddings empirically": 17125, "empirically observed": 17365, "form model": 21327, "data following": 12369, "early stopping": 16516, "method method": 32575, "training outperforms": 58199, "error accumulation": 18211, "used detect": 60145, "tends generate": 56215, "scale unlabeled": 48634, "challenging traditional": 8161, "representation module": 46558, "value memory": 61208, "capture relevant": 7704, "module proposed": 35768, "strong reasoning": 53045, "overcome issue": 39063, "achieved new": 1253, "rouge f1": 48350, "score 24": 48786, "argue important": 4163, "particularly propose": 39888, "features encode": 20570, "way humans": 61807, "representations core": 46633, "study compared": 53340, "used traditional": 60331, "analysis finally": 2666, "performance tested": 40597, "tested models": 56398, "initial set": 26217, "data computing": 12233, "approach diverse": 3492, "addition providing": 1641, "providing evidence": 44240, "effectiveness transfer": 16819, "present open": 41982, "corpus hindi": 11356, "corpus news": 11389, "verify quality": 61543, "based bert": 5599, "gap model": 21969, "original dataset": 38708, "google com": 22954, "data pretrained": 12558, "google research": 22956, "research language": 47063, "tree master": 58751, "independent model": 25501, "experiments known": 19450, "obtaining best": 38230, "seven different": 50418, "furthermore results": 21838, "models generated": 35059, "facilitate reproducibility": 20274, "framework evaluation": 21510, "corpora containing": 11186, "approach ability": 3386, "models corpus": 34869, "based research": 5996, "benefit use": 6573, "vision models": 61640, "address gap": 1757, "processing use": 42963, "classification single": 8551, "bert achieves": 6606, "perfect accuracy": 40061, "classification code": 8442, "construction process": 10430, "task generate": 55103, "generate complex": 22185, "simple sentence": 51208, "generator model": 22620, "pipeline model": 40904, "important building": 24703, "highly specialized": 23917, "domain ii": 16081, "overview recent": 39114, "recent findings": 45311, "processing various": 42966, "interactions work": 26625, "importance context": 24678, "current time": 12020, "perform competitively": 40079, "simpler efficient": 51230, "scales linearly": 48646, "scale machine": 48593, "utterances corresponding": 61145, "data contain": 12248, "differences different": 14821, "trained automatically": 57677, "corpus evaluated": 11332, "improved model": 24952, "benchmark test": 6499, "introduce span": 26863, "capturing context": 7731, "theoretically prove": 57032, "witnessed dramatic": 62091, "explored previous": 19762, "proposed handle": 43790, "handle problem": 23414, "search decoding": 48969, "decoding phase": 13638, "training memory": 58172, "various experiments": 61340, "method multi": 32581, "relevant target": 46236, "open challenge": 38412, "information predict": 26016, "novel alignment": 37753, "transformer attention": 58451, "framework construct": 21480, "construct dataset": 10384, "dataset covering": 12869, "english web": 17902, "leverages large": 30306, "bidirectional transformer": 7084, "transformer language": 58490, "glue tasks": 22867, "bert representations": 6710, "knowledge document": 27446, "document prior": 15821, "prior document": 42399, "systems largely": 54545, "largely relied": 29063, "gcn based": 22027, "accuracy points": 1023, "algorithms trained": 2343, "trained learn": 57773, "utilizing data": 61120, "language output": 28364, "people social": 40037, "quality natural": 44555, "quality experimental": 44520, "analysis tweets": 2785, "song lyrics": 51712, "indicate models": 25529, "increasing performance": 25458, "learning field": 29643, "models graph": 35069, "effective approaches": 16630, "approaches multi": 3877, "relations multi": 46043, "task leveraging": 55186, "answer multi": 3038, "task aware": 54928, "aware pre": 5465, "bert pre": 6703, "proposed pre": 43879, "requires data": 46921, "method combining": 32422, "representations improve": 46685, "experimentally approach": 19331, "performance commonly": 40244, "based surface": 6074, "features obtained": 20633, "systems additionally": 54425, "additionally study": 1735, "methods utilizing": 33098, "especially beneficial": 18263, "extraction multiple": 20086, "require multiple": 46881, "task multiple": 55230, "trained self": 57857, "prediction layer": 41715, "multiple entity": 36211, "information associated": 25762, "entity aware": 18096, "ace 2005": 1102, "need methods": 36583, "unseen datasets": 59645, "research line": 47065, "identify address": 24411, "best unsupervised": 6834, "conventional neural": 11009, "generation demonstrate": 22444, "input information": 26285, "technical challenges": 56018, "trained low": 57779, "available benchmarks": 5267, "benchmarks work": 6552, "code reproduce": 8855, "reproduce experiments": 46822, "problem making": 42605, "spelling mistakes": 52336, "dramatically improve": 16387, "focus translation": 21208, "noise data": 37597, "super characters": 53921, "characters method": 8254, "asian languages": 4514, "accuracy gain": 981, "gain compared": 21905, "texts compare": 56864, "language results": 28476, "text attention": 56442, "used wide": 60350, "systematic overview": 54401, "representations textual": 46771, "propose taxonomy": 43661, "output present": 38992, "examples prior": 18924, "body literature": 7239, "approaches low": 3865, "model offers": 34143, "extends previous": 19844, "based counterparts": 5657, "generic framework": 22629, "standard self": 52523, "time different": 57145, "original self": 38725, "meeting corpus": 32223, "rely fixed": 46281, "autoregressive generation": 5215, "various settings": 61390, "settings training": 50401, "transformer outperforms": 58506, "original transformer": 38736, "led impressive": 29990, "impressive accuracy": 24808, "improvements low": 25078, "encoding framework": 17565, "latent embedding": 29124, "bert generate": 6664, "sentences diverse": 49706, "slightly worse": 51438, "human reader": 24227, "challenge present": 8006, "segment document": 49073, "distinct domains": 15590, "f1 compared": 20182, "effectiveness modeling": 16795, "representations considering": 46629, "considering contextual": 10257, "information proven": 26033, "query key": 44670, "layers used": 29237, "conducted extensive": 10084, "extensive analyses": 19855, "development accurate": 14666, "popular languages": 41168, "modern methods": 35712, "ensemble methods": 17977, "work develops": 62636, "predicting sentiment": 41682, "investigate potential": 26976, "sentiment datasets": 49840, "datasets domains": 13239, "serve useful": 50084, "relevant datasets": 46208, "fully labeled": 21735, "networks twitter": 36919, "perform substantially": 40147, "practical settings": 41472, "optimal solution": 38532, "global optimal": 22836, "quantitatively analyze": 44630, "text conduct": 56505, "higher score": 23844, "useful machine": 60375, "study usefulness": 53471, "building multilingual": 7457, "languages language": 28704, "models share": 35491, "adaptation new": 1531, "obtain comparable": 38165, "adapting new": 1567, "addition design": 1608, "models robust": 35465, "best trade": 6832, "diversity compared": 15731, "using time": 60990, "pooling strategy": 41129, "dev test": 14568, "learn predict": 29410, "human assessments": 24107, "generic task": 22632, "enhance semantic": 17922, "knowledge introduce": 27533, "classification knowledge": 8481, "text help": 56614, "information unlike": 26138, "decisions based": 13573, "pays attention": 39994, "attention important": 4758, "overcome aforementioned": 39057, "relevant work": 46246, "task known": 55155, "paper hypothesize": 39391, "strategies incorporating": 52907, "incorporating knowledge": 25388, "model dubbed": 33793, "significant consistent": 50858, "experiments involving": 19448, "research automatically": 46990, "showed model": 50667, "overlap metrics": 39088, "function proposed": 21759, "evaluation paper": 18666, "novel representation": 37908, "used image": 60207, "compare approach": 9329, "considerable improvement": 10231, "accuracy 93": 925, "approach code": 3446, "applications different": 3200, "different fields": 14934, "treat problem": 58731, "prone error": 43226, "training sentences": 58247, "annotated based": 2877, "created training": 11733, "data easily": 12307, "data naturally": 12505, "leads accurate": 29304, "multinomial naive": 36161, "bayes mnb": 6351, "forest rf": 21303, "demonstrate certain": 13879, "additionally discuss": 1718, "propose future": 43395, "token token": 57312, "networks able": 36827, "ones work": 38346, "outputs using": 39020, "propose strategies": 43646, "faces challenges": 20253, "chinese natural": 8312, "utilize multi": 61099, "word lattice": 62222, "noisy information": 37618, "document based": 15768, "models strong": 35537, "encode knowledge": 17466, "generalize poorly": 22147, "effectively integrate": 16744, "surpass previous": 54165, "extraction event": 20062, "aims solve": 2215, "information transfer": 26132, "ability automatically": 595, "specific characteristics": 52054, "characteristics task": 8244, "dataset benchmarking": 12826, "learn domain": 29362, "scoring model": 48937, "models considerable": 34848, "attention devoted": 4736, "systems fail": 54503, "domain generalization": 16078, "method multilingual": 32582, "multilingual transfer": 36128, "deep contextual": 13685, "embeddings pretrained": 17193, "end construct": 17623, "embeddings experimental": 17131, "approach zero": 3746, "perform consistently": 40082, "text human": 56616, "text particularly": 56693, "paper leverage": 39417, "enhance robustness": 17921, "preserve meaning": 42114, "quality measured": 44549, "multilingual contextual": 36071, "training single": 58261, "text multiple": 56671, "increasing complexity": 25447, "individual components": 25563, "performs sentence": 40712, "baselines datasets": 6250, "search generate": 48973, "devise novel": 14726, "according relevance": 866, "dialog model": 14757, "tracking data": 57497, "data contains": 12250, "contains information": 10496, "corpora manually": 11221, "entity labels": 18113, "models ner": 35251, "models individual": 35128, "research domain": 47023, "sota model": 51728, "framework achieved": 21448, "compared sota": 9455, "approaches end": 3807, "achieve sota": 1200, "examine performance": 18867, "specifically evaluate": 52198, "evaluate systems": 18510, "metric evaluating": 33115, "incorrect answer": 25398, "drastically improve": 16392, "framework compare": 21472, "existing novel": 19121, "novel sentence": 37917, "especially chinese": 18266, "focus automatic": 21146, "scale high": 48576, "module extract": 35757, "relations multiple": 46044, "networks widely": 36923, "applications deep": 3196, "networks pre": 36893, "approach select": 3680, "different algorithms": 14833, "new metrics": 37258, "nlp classification": 37472, "selection baselines": 49134, "common challenges": 9168, "search result": 48982, "issues work": 27107, "generative neural": 22600, "keyword generation": 27351, "set keywords": 50173, "adapt unseen": 1514, "scenarios recent": 48707, "support set": 54126, "learning way": 29938, "better evaluate": 6885, "dataset chinese": 12838, "shot text": 50651, "architecture new": 4069, "times compared": 57247, "experiments determine": 19413, "determine optimal": 14557, "sentence fusion": 49563, "information required": 26056, "respect various": 47353, "approach addresses": 3411, "models enabling": 34961, "mainly designed": 31469, "fail handle": 20340, "critical problems": 11787, "graph constructed": 23116, "graph propose": 23158, "graph attention": 23095, "learn multi": 29400, "models building": 34798, "static word": 52726, "information performance": 26006, "evaluation suite": 18732, "including tasks": 25308, "semeval 2019": 49434, "domain evaluation": 16059, "score 68": 48803, "facebook twitter": 20247, "demonstrate value": 13999, "users opinions": 60471, "using predicted": 60869, "faster accurate": 20432, "address following": 1756, "high error": 23735, "large label": 28892, "including new": 25283, "networks nodes": 36884, "simultaneously learning": 51272, "scaled dot": 48642, "mutual attention": 36342, "pairs documents": 39181, "parameters use": 39726, "learn meaningful": 29397, "increasingly difficult": 25471, "recommendation tasks": 45566, "tasks works": 55973, "recent natural": 45322, "methods jointly": 32911, "systems finally": 54504, "approaches build": 3779, "leading poor": 29295, "sources propose": 51839, "performance adversarial": 40189, "training analysis": 57932, "contextual representation": 10778, "various downstream": 61331, "layer output": 29197, "reduces time": 45698, "time spent": 57222, "times speedup": 57255, "level retrieval": 30196, "level similarities": 30213, "adaptation performance": 1533, "models relatively": 35423, "despite effectiveness": 14358, "data enhance": 12324, "level open": 30171, "hard understand": 23453, "knowledge facts": 27482, "new interesting": 37228, "number scientific": 38035, "growing exponentially": 23296, "actionable insights": 1460, "techniques natural": 56114, "variety sources": 61291, "pipeline uses": 40908, "embeddings work": 17250, "capture latent": 7689, "latent information": 29128, "module helps": 35761, "embeddings traditional": 17231, "embeddings map": 17171, "knowledge transferred": 27634, "clinical domains": 8670, "comparison popular": 9501, "possible solutions": 41337, "various challenges": 61313, "finally conclude": 20844, "entities method": 18065, "related user": 45952, "remains explored": 46333, "news paper": 37410, "paper verify": 39606, "compatible existing": 9517, "generation considering": 22437, "variational auto": 61241, "auto encoders": 5017, "used retrieve": 60293, "information incorporated": 25920, "methods knowledge": 32912, "requiring external": 46961, "benchmark method": 6476, "remains competitive": 46330, "offers better": 38300, "extracts text": 20144, "transformer decoder": 58480, "users understand": 60485, "existing entity": 19066, "label correlations": 27700, "novel label": 37849, "graph propagation": 23157, "relative f1": 46096, "step natural": 52816, "processing problems": 42927, "problems previous": 42721, "making models": 31661, "work make": 62720, "time method": 57178, "accuracy 92": 924, "model deployment": 33751, "2019 shared": 284, "settings results": 50396, "selecting correct": 49124, "level better": 30071, "better identify": 6901, "grounded dialogue": 23260, "models humans": 35095, "training state": 58270, "models setting": 35488, "independent word": 25507, "translation retrieval": 58671, "missing information": 33363, "information pre": 26013, "perform fine": 40109, "analysis generated": 2670, "multiple approaches": 36168, "study data": 53352, "systematic approach": 54391, "task evaluation": 55058, "approach pre": 3643, "vietnamese word": 61594, "trained embedding": 57715, "task utilize": 55465, "dataset apply": 12811, "users quickly": 60478, "select suitable": 49114, "embeddings addition": 17079, "approach facilitate": 3536, "driven method": 16428, "utterance based": 61133, "based fusion": 5742, "converting natural": 11078, "exploiting data": 19673, "domain pre": 16134, "training auxiliary": 57939, "tuning target": 58964, "learning able": 29500, "domains experiment": 16252, "including document": 25251, "level multi": 30164, "new network": 37268, "explicitly encourages": 19634, "translation document": 58603, "learning lead": 29702, "tasks observe": 55772, "consistent significant": 10285, "data abundant": 12106, "representations sequence": 46754, "sequence tagger": 50001, "level metrics": 30161, "topological data": 57467, "combines strengths": 9103, "information local": 25957, "syntax neural": 54351, "historical context": 23956, "including information": 25263, "corpora typically": 11252, "rich metadata": 48111, "exploit contextual": 19652, "data long": 12469, "corpora new": 11227, "start problem": 52567, "mining approach": 33311, "macro averaged": 31404, "different pretraining": 15032, "pretraining objectives": 42214, "adapt pretrained": 1509, "pretrained weights": 42193, "directly fine": 15316, "tuning pretrained": 58945, "results diverse": 47595, "diverse nlp": 15709, "models relative": 35422, "target tasks": 54848, "role semantic": 48321, "ongoing debate": 38348, "expressions different": 19807, "methodology used": 32721, "used fine": 60191, "highlighting challenges": 23872, "downstream text": 16370, "perturbations input": 40793, "change input": 8170, "largely ignored": 29056, "growing body": 23290, "related literature": 45916, "framework adversarial": 21454, "semantic equivalence": 49275, "additional constraints": 1658, "adversarial perturbations": 1984, "adversarial robustness": 1985, "released https": 46175, "text specifically": 56783, "patient doctor": 39956, "fully investigated": 21734, "transcript text": 58338, "benchmark consisting": 6436, "presents unsupervised": 42110, "concretely propose": 9984, "various online": 61376, "online media": 38374, "automated data": 5039, "driven manner": 16427, "models makes": 35214, "industry research": 25622, "provides comparable": 44186, "information visual": 26155, "constructed new": 10415, "transcribed text": 58335, "87 accuracy": 540, "furthermore study": 21839, "focus domain": 21156, "domains work": 16301, "qa based": 44445, "generation quality": 22534, "model popular": 34211, "using conversational": 60627, "conversational context": 11041, "nlg models": 37445, "suffer high": 53765, "multiple distinct": 36200, "study method": 53410, "expressions present": 19810, "detection multi": 14504, "context example": 10630, "usage scenarios": 59806, "resulting representation": 47474, "task types": 55453, "datasets created": 13201, "datasets finally": 13273, "problem setting": 42651, "despite progress": 14377, "focus relevant": 21195, "attention approaches": 4710, "context document": 10615, "decoder transformer": 13617, "context experiments": 10635, "experiments evaluation": 19431, "increase complexity": 25407, "complexity models": 9685, "models adding": 34691, "multiple pre": 36265, "score results": 48871, "set nlp": 50201, "knowledge capture": 27420, "representations produced": 46742, "recent pretrained": 45335, "transformer layers": 58494, "model pretraining": 34231, "supervised pretraining": 54034, "pretraining tasks": 42219, "pretraining dataset": 42199, "pretraining data": 42198, "2019 task": 286, "identifying categorizing": 24454, "000 english": 5, "english tweets": 17896, "tasks sub": 55915, "popular tasks": 41191, "participate task": 39816, "treat task": 58732, "steps including": 52842, "learning clustering": 29559, "problem multi": 42611, "second step": 49023, "clustering model": 8743, "model verified": 34528, "dataset large": 12977, "despite growing": 14364, "number word": 38055, "lack standardized": 27914, "performance design": 40287, "simple classification": 51141, "address introduce": 1761, "tasks case": 55534, "24 languages": 329, "multilingual setting": 36119, "probing task": 42493, "used explore": 60180, "setting neural": 50333, "dataset lack": 12975, "lack datasets": 27884, "datasets leads": 13316, "leads poor": 29323, "case low": 7791, "text augmentation": 56446, "improvements models": 25082, "augmented data": 4976, "handful languages": 23404, "benchmark based": 6428, "news topic": 37422, "classification textual": 8574, "embeddings context": 17102, "multi tasking": 36032, "languages far": 28671, "information image": 25911, "consistency loss": 10268, "images texts": 24555, "comparative experiments": 9321, "provide concrete": 44041, "learning complex": 29565, "conclude discussing": 9968, "scale empirical": 48567, "empirical data": 17322, "gender race": 22039, "bias sentence": 7043, "including state": 25303, "mixed results": 33408, "conclude proposing": 9971, "federated learning": 20709, "trained generative": 57739, "dataset popular": 13029, "document pairs": 15816, "concise summary": 9964, "abstract information": 758, "incorporating domain": 25384, "scale annotated": 48552, "scientific domain": 48760, "lack high": 27891, "pretraining large": 42208, "datasets variety": 13478, "bert achieve": 6602, "code pretrained": 8845, "com allenai": 9005, "documents context": 15867, "based similar": 6034, "single parameter": 51328, "features architecture": 20524, "implications results": 24657, "tweet classification": 59002, "used instead": 60216, "sequence processing": 49971, "separate tasks": 49878, "heterogeneous sources": 23624, "performance specific": 40570, "improvement current": 25000, "use prior": 59982, "knowledge current": 27431, "benchmarks shows": 6542, "findings demonstrate": 20906, "information semantics": 26077, "graph sequence": 23167, "explicitly capture": 19631, "achieves 24": 1287, "art points": 4356, "paper builds": 39285, "particular define": 39840, "work robust": 62814, "related corpus": 45892, "data observe": 12518, "humans express": 24277, "discourse markers": 15393, "play method": 40976, "highly imbalanced": 23901, "easily integrated": 16545, "using self": 60916, "self labeled": 49199, "data lead": 12457, "data performs": 12543, "performs worse": 40722, "noisy label": 37621, "explicitly modelling": 19645, "performance overall": 40472, "systematic differences": 54394, "neural transformer": 37107, "model includes": 33988, "bert elmo": 6649, "propose distill": 43355, "knowledge bert": 27416, "bert state": 6721, "tasks multiple": 55755, "times inference": 57252, "popular way": 41197, "problem make": 42604, "use pipeline": 59973, "nlp modules": 37503, "detection recent": 14517, "task usually": 55464, "usually involves": 61054, "evaluation use": 18745, "jointly perform": 27214, "attention topic": 4837, "number domains": 37998, "score classification": 48839, "provide results": 44123, "network graph": 36751, "informed decision": 26181, "generation attracted": 22422, "methods mainly": 32936, "mainly use": 31478, "methods performance": 32981, "way generate": 61805, "important factors": 24724, "baseline various": 6223, "effectively experimental": 16732, "baselines recent": 6295, "shot scenario": 50639, "information make": 25962, "score higher": 48850, "model recent": 34280, "based online": 5926, "learn automatically": 29347, "automatically classify": 5147, "model classifying": 33662, "idea model": 24371, "built domain": 7485, "unlabeled domain": 59572, "classification result": 8537, "way explain": 61803, "text humans": 56617, "described natural": 14212, "acquired knowledge": 1444, "context representations": 10706, "assess model": 4579, "published dataset": 44368, "learned attention": 29452, "speed inference": 52322, "reading model": 45088, "agents capable": 2063, "sub sentence": 53530, "comprehensive experimental": 9790, "compared vanilla": 9471, "text study": 56792, "challenge develop": 7977, "tasks bert": 55523, "distillation method": 15573, "applied train": 3305, "developed models": 14635, "language real": 28459, "results newly": 47741, "newly annotated": 37370, "search optimization": 48977, "testing time": 56415, "allows researchers": 2477, "researchers developers": 47152, "training multiple": 58184, "multiple gpus": 36220, "demo video": 13849, "www youtube": 63025, "youtube com": 63147, "scoring functions": 48934, "computationally challenging": 9871, "inference methods": 25669, "accuracy levels": 999, "syntactic representations": 54319, "tasks novel": 55770, "tasks empirically": 55608, "lda model": 29250, "model reduce": 34290, "hard interpret": 23444, "method interpret": 32549, "end train": 17716, "pairs semantically": 39214, "dataset 20": 12792, "20 000": 219, "covers broad": 11663, "better modeling": 6920, "task entails": 55050, "respect given": 47345, "evidence supporting": 18823, "models ranging": 35396, "heuristic rule": 23629, "evaluation available": 18580, "diversity present": 15739, "mainly consider": 31468, "consider textual": 10223, "effectively boost": 16724, "advantages different": 1950, "prediction word": 41752, "help task": 23590, "extra data": 19961, "heavily human": 23531, "human crafted": 24128, "features knowledge": 20608, "effectively capture": 16725, "models studies": 35543, "come different": 9129, "different conclusions": 14872, "languages comparing": 28619, "categories proposed": 7849, "effect training": 16620, "data quantity": 12582, "accurately predicting": 1098, "leveraging multilingual": 30333, "multilingual bert": 36064, "bert self": 6714, "languages fine": 28675, "tuning datasets": 58905, "recurrent language": 45615, "multilingual learning": 36091, "training provides": 58222, "bert trained": 6727, "trained code": 57690, "sentence test": 49656, "highest scoring": 23858, "explores potential": 19774, "large source": 29014, "knowledge exploited": 27475, "based popular": 5940, "popular benchmark": 41157, "analysis explore": 2665, "post training": 41354, "approach popular": 3641, "tuning bert": 58902, "training applied": 57933, "propose modeling": 43467, "model discriminator": 33775, "showing approach": 50676, "training regularization": 58228, "represented word": 46809, "datasets showed": 13423, "sequence encoder": 49920, "encoder architecture": 17489, "case performance": 7794, "improves significantly": 25160, "version task": 61556, "examples model": 18916, "model test": 34452, "neural components": 36945, "datasets codes": 13176, "work publicly": 62797, "trained existing": 57725, "tuning new": 58934, "data challenge": 12203, "datasets lead": 13314, "features shown": 20667, "improvement tasks": 25031, "limitations using": 30558, "current limitations": 11983, "tasks human": 55665, "understanding representation": 59393, "network convolutional": 36727, "task retrieving": 55347, "start end": 52566, "text video": 56839, "simple elegant": 51164, "datasets comparable": 13181, "labels dataset": 27813, "dataset different": 12897, "rely different": 46277, "create single": 11716, "introduce generative": 26809, "outperforms individual": 38905, "impact nlp": 24601, "community recent": 9274, "nlp architectures": 37465, "embeddings like": 17167, "directional language": 15279, "model elmo": 33804, "al proposed": 2247, "model order": 34150, "possible performance": 41334, "task competition": 54961, "participated subtasks": 39821, "encode words": 17474, "official results": 38310, "subtasks respectively": 53671, "sequence problem": 49970, "specifically given": 52207, "text decoder": 56528, "manner present": 31722, "jointly extract": 27197, "information aggregation": 25758, "attention work": 4854, "existing generation": 19071, "generation approaches": 22421, "baselines dataset": 6249, "large room": 28956, "relevant facts": 46217, "generating appropriate": 22366, "promising directions": 43164, "better datasets": 6875, "detection present": 14511, "collection pipeline": 8984, "technique automatically": 56027, "diverse corpora": 15695, "evaluating multiple": 18565, "ml models": 33431, "response quality": 47401, "semantic correctness": 49261, "paper existing": 39355, "nlg systems": 37446, "entity spans": 18149, "iteratively refine": 27133, "score improvement": 48854, "bias bias": 7020, "transformer bert": 58478, "experiments bert": 19366, "bert performs": 6701, "combining bert": 9108, "non bert": 37639, "bert encoder": 6653, "achieved score": 1266, "additional external": 1667, "global structure": 22845, "contain relevant": 10468, "state information": 52699, "models currently": 34879, "achieving promising": 1419, "results unsupervised": 47894, "datasets neural": 13344, "models nlm": 35261, "words challenging": 62376, "embedding matrices": 17038, "keeping parameters": 27279, "16 absolute": 162, "answer extraction": 3034, "choose appropriate": 8343, "languages words": 28821, "compared languages": 9417, "documents introduce": 15888, "order predict": 38647, "trained detect": 57708, "unlabeled training": 59585, "novel loss": 37857, "article introduces": 4452, "corpus creation": 11315, "creation new": 11749, "bilstm network": 7136, "conclusions drawn": 9978, "larger improvements": 29079, "world low": 62947, "data relatively": 12595, "discriminator learn": 15453, "performance use": 40615, "labels work": 27861, "art adversarial": 4210, "training shown": 58256, "shown impressive": 50718, "embeddings shared": 17214, "performance non": 40460, "methods challenging": 32780, "work revisit": 62813, "stable training": 52413, "method includes": 32537, "method robust": 32641, "modern natural": 35713, "evaluate ability": 18434, "perform semantic": 40136, "work create": 62613, "datasets datasets": 13208, "pairs total": 39221, "evaluation new": 18663, "late fusion": 29113, "fusion techniques": 21862, "sequence modelling": 49957, "architecture sequence": 4086, "arbitrarily long": 4011, "long contexts": 31006, "performances state": 40647, "thanks use": 57001, "different distributions": 14901, "model stable": 34405, "model calibration": 33640, "features downstream": 20565, "new embeddings": 37181, "available pretrained": 5347, "relevant training": 46243, "building existing": 7445, "data methods": 12485, "better systems": 6974, "takes word": 54785, "meta embeddings": 32333, "embeddings classify": 17093, "baselines results": 6297, "improves cross": 25121, "al 2019": 2241, "al 2019b": 2243, "lstm transformer": 31284, "openai gpt": 38469, "special characters": 52017, "capability handling": 7609, "work question": 62799, "mapping method": 31803, "comparing models": 9482, "strong empirical": 53027, "sentence levels": 49598, "yield improved": 63096, "minimum description": 33305, "corpus given": 11352, "single representation": 51332, "relative distances": 46092, "sub spaces": 53532, "task contextual": 54977, "nlp downstream": 37484, "sequence previous": 49969, "including pre": 25288, "set provide": 50228, "prediction objective": 41725, "task explore": 55068, "explore unsupervised": 19746, "training pre": 58210, "simple multi": 51197, "layer convolutional": 29183, "phrases sentence": 40852, "language cross": 28012, "lingual setting": 30727, "models submitted": 35546, "carefully selected": 7763, "baselines future": 6264, "used multiple": 60245, "multiple consecutive": 36187, "task 2018": 54868, "information removed": 26052, "output paper": 38990, "coherence relations": 8911, "penn discourse": 40022, "discourse treebank": 15401, "gained great": 21915, "labels distant": 27815, "usually suffer": 61069, "distribution given": 15640, "method dynamically": 32469, "similarity graph": 51098, "labels noisy": 27842, "better classification": 6863, "performance extensive": 40339, "systems significant": 54632, "success large": 53704, "directly applied": 15305, "passage retrieval": 39921, "retrieval using": 47975, "augmentation technique": 4968, "datasets establish": 13251, "demonstrate despite": 13889, "despite trained": 14398, "method designed": 32456, "explicitly learn": 19639, "models order": 35281, "underlying language": 59268, "tests ability": 56417, "ability language": 615, "tuning using": 58973, "dataset adding": 12802, "bert does": 6644, "availability training": 5256, "labels user": 27857, "information instead": 25926, "instead uses": 26467, "easier obtain": 16527, "improves previous": 25151, "consecutive sentences": 10196, "method beats": 32401, "multilabel text": 36055, "relevant query": 46230, "terms exact": 56286, "retrieval performance": 47962, "techniques generate": 56091, "representations elmo": 46647, "architecture results": 4082, "leads competitive": 29309, "resources human": 47304, "human subject": 24245, "datasets derived": 13222, "experiments domain": 19422, "domain transfer": 16217, "despite vast": 14404, "information cross": 25796, "descriptions entities": 14251, "art alternatives": 4212, "half million": 23367, "intra document": 26758, "annotation training": 2979, "datasets varying": 13481, "documents structured": 15915, "sentences specific": 49789, "posts social": 41372, "approach semeval": 3683, "target paper": 54836, "spoken question": 52364, "spoken documents": 52357, "documents recent": 15908, "asr hypotheses": 4557, "adversarial model": 1975, "systematic comparative": 54392, "study focusing": 53382, "crucial components": 11898, "representations allows": 46617, "position embeddings": 41265, "embeddings self": 17207, "attention using": 4846, "using unified": 61007, "outperform ones": 38807, "ones based": 38335, "newly developed": 37376, "contextual embedding": 10763, "architecture achieved": 4021, "2nd place": 352, "model reached": 34272, "appropriate model": 3964, "model syntactic": 34434, "input documents": 26269, "using bert": 60588, "bert able": 6601, "datasets address": 13148, "bert large": 6670, "large small": 29013, "bert base": 6612, "primary contribution": 42370, "baselines provide": 6290, "foundation future": 21416, "resources results": 47332, "represents step": 46819, "controversial topics": 10992, "change word": 8174, "analysis includes": 2679, "includes different": 25230, "different measures": 14985, "paper takes": 39594, "takes step": 54784, "facilitate progress": 20273, "dataset small": 13091, "reasoning text": 45229, "significantly larger": 50985, "new automatic": 37139, "news coverage": 37396, "diverse natural": 15707, "synthesized data": 54363, "train semantic": 57628, "work introduces": 62695, "compression technique": 9814, "method constructs": 32442, "applications users": 3256, "models sub": 35545, "lstm bidirectional": 31252, "bidirectional gru": 7072, "task rely": 55333, "ranked 5th": 44955, "sentence task": 49655, "task asks": 54916, "predict sentence": 41653, "various pre": 61378, "techniques training": 56145, "leveraging user": 30344, "practical significance": 41473, "various reasons": 61384, "model enhanced": 33825, "knowledge integration": 27529, "masking strategy": 31875, "masking strategies": 31874, "level masking": 30157, "inference semantic": 25690, "test machine": 56355, "masked language": 31862, "subset words": 53610, "parallel decoding": 39650, "unclear best": 59235, "best utilize": 6836, "entailment models": 18003, "pairs introduce": 39197, "importance weights": 24694, "important linguistic": 24742, "current unsupervised": 12025, "propose weakly": 43704, "languages better": 28610, "vary greatly": 61422, "domain supervised": 16198, "graph knowledge": 23144, "extraction semantic": 20107, "specific parallel": 52122, "domains end": 16249, "used unsupervised": 60344, "scene graphs": 48715, "impact using": 24607, "widely employed": 61995, "code url": 8865, "com microsoft": 9019, "https youtu": 24064, "nlp datasets": 37479, "aimed providing": 2166, "useful variety": 60396, "tasks applied": 55502, "reduces error": 45689, "small corpora": 51467, "embeddings general": 17141, "work conduct": 62606, "finding propose": 20901, "deep pre": 13742, "tuning models": 58929, "like bert": 30461, "works leverage": 62895, "resolve problem": 47202, "loss model": 31099, "teacher models": 55993, "demonstrates method": 14035, "incorrect answers": 25399, "answer different": 3032, "different related": 15047, "related question": 45930, "challenging existing": 8095, "based pretrained": 5948, "resource transfer": 47285, "samples propose": 48487, "purely data": 44395, "risk overfitting": 48163, "particular data": 39838, "effectively model": 16750, "build strong": 7428, "previous method": 42260, "method ignores": 32528, "level coherence": 30077, "english human": 17823, "accuracy respectively": 1037, "method various": 32703, "network output": 36777, "language empirically": 28045, "methods multiple": 32950, "representations evaluated": 46657, "evaluated based": 18522, "representational similarity": 46610, "selected set": 49120, "linguistic experts": 30768, "identify semantic": 24442, "experiments word": 19564, "languages code": 28615, "speech representations": 52290, "particular consider": 39836, "setting use": 50354, "previously studied": 42351, "propose multitask": 43484, "supervision form": 54081, "higher average": 23813, "evaluation neural": 18662, "textual inputs": 56971, "asr outputs": 4560, "systems test": 54651, "test robustness": 56365, "similar target": 51069, "present overview": 41984, "documents annotated": 15854, "annotated entities": 2893, "type entities": 59054, "document types": 15839, "introduce set": 26859, "tasks created": 55563, "pretraining language": 42206, "pretraining objective": 42213, "trained gold": 57743, "corpus achieving": 11268, "silver data": 51023, "success transformer": 53727, "surge research": 54158, "key research": 27331, "scale learning": 48591, "learning transformer": 29921, "transformer blocks": 58479, "relative positional": 46107, "learning rate": 29832, "text provided": 56720, "applied state": 3295, "task submission": 55418, "submission achieved": 53572, "models leading": 35172, "able perform": 712, "hop question": 24001, "doing multi": 15991, "investigate recently": 26982, "proposed datasets": 43753, "tasks design": 55582, "span based": 51920, "datasets useful": 13472, "previously thought": 42352, "knowledge effort": 27454, "examples propose": 18925, "possible automatically": 41316, "small annotated": 51464, "examples experiments": 18900, "work long": 62712, "better control": 6869, "matching task": 31922, "task binary": 54940, "method employs": 32479, "suffer issues": 53769, "number user": 38052, "responses different": 47404, "approaches goal": 3837, "different complementary": 14870, "recognition paper": 45523, "broadcast news": 7358, "task perform": 55272, "recognition using": 45546, "models performs": 35322, "experiments techniques": 19542, "reach human": 45049, "users query": 60477, "query using": 44681, "architecture models": 4065, "baselines training": 6314, "addresses key": 1813, "model student": 34418, "use textual": 60047, "propose time": 43673, "model constructs": 33709, "considerably improve": 10239, "decision makers": 13562, "severely limited": 50427, "political events": 41109, "topics discourse": 57447, "propose topic": 43676, "models applicable": 34716, "task general": 55101, "detection framework": 14488, "framework identify": 21536, "capture relations": 7701, "relations arguments": 46016, "average scores": 5418, "scores 81": 48889, "underlying reasons": 59275, "small manually": 51481, "set investigate": 50172, "investigate approach": 26941, "outperforming models": 38853, "automatically aligned": 5138, "en es": 17416, "related works": 45956, "information regarding": 26048, "portuguese english": 41225, "potential source": 41409, "demonstrate capability": 13878, "developed corpus": 14626, "models presented": 35348, "better bleu": 6856, "accurate sentence": 1089, "average 82": 5400, "uses bert": 60495, "bert fine": 6660, "novel augmentation": 37773, "increasing training": 25465, "different embedding": 14912, "create training": 11718, "according official": 865, "accuracy obtained": 1017, "gan generate": 21954, "aims discover": 2189, "unseen relations": 59653, "corpus knowledge": 11365, "construct set": 10403, "set constraints": 50126, "art relation": 4365, "transformers bert": 58523, "bert models": 6687, "different bert": 14853, "consistently high": 10293, "model exhibits": 33847, "exhibits strong": 19012, "paper conducts": 39301, "empirical investigation": 17332, "given complexity": 22727, "accuracy metrics": 1007, "based fine": 5734, "tuning approaches": 58900, "learning components": 29566, "particular use": 39870, "siamese neural": 50820, "accuracy propose": 1032, "scale semantic": 48622, "dataset size": 13089, "develop better": 14578, "better robust": 6961, "extracts relevant": 20143, "modelling tasks": 34644, "uses recurrent": 60530, "makes significant": 31636, "ability detect": 601, "sequence sentences": 49974, "able parse": 711, "datasets chinese": 13172, "score improvements": 48855, "improvements datasets": 25068, "datasets compared": 13183, "respectively addition": 47359, "models vary": 35670, "synthetic human": 54376, "shot transfer": 50654, "studies indicate": 53271, "indicate neural": 25530, "based shared": 6031, "scores using": 48928, "using manual": 60787, "considerably improves": 10240, "performance respect": 40537, "new unified": 37353, "shared transformer": 50509, "generation datasets": 22443, "explore deep": 19696, "stack lstm": 52417, "carry experiments": 7776, "deep transformer": 13751, "positional encoding": 41272, "case language": 7790, "modeling information": 34584, "analysis attention": 2618, "outputs paper": 39018, "extensively evaluate": 19917, "performance chinese": 40233, "accurately estimate": 1094, "studies focused": 53267, "studies investigated": 53274, "binary labels": 7150, "correct label": 11469, "demonstrate novel": 13952, "different instances": 14958, "performance predicting": 40490, "explicit reasoning": 19622, "reasoning paths": 45214, "reasoning questions": 45221, "dataset achieving": 12801, "score 34": 48790, "prone overfitting": 43228, "need high": 36570, "better model": 6919, "relevance diversity": 46190, "wide attention": 61961, "model tends": 34449, "handle large": 23410, "student framework": 53209, "words little": 62450, "generating process": 22388, "relations based": 46017, "depth study": 14189, "study specific": 53461, "model variations": 34525, "important challenges": 24705, "challenges address": 8028, "similar content": 51032, "processing researchers": 42934, "novel bert": 37777, "extract entities": 19973, "people communicate": 40027, "examples natural": 18917, "complex human": 9628, "language nlp": 28360, "role human": 48306, "provided natural": 44167, "platform allows": 40949, "use analysis": 59819, "key feature": 27311, "information public": 26037, "multiple machine": 36244, "probability mass": 42480, "propose sparse": 43643, "new family": 37201, "baselines experiments": 6259, "experiments distinct": 19418, "time improve": 57163, "reduce errors": 45661, "information enhance": 25835, "generation apply": 22419, "method dataset": 32450, "extensive human": 19908, "accuracy pre": 1025, "tasks focus": 55646, "reveals model": 48019, "lack context": 27880, "perform human": 40112, "dataset identify": 12956, "address second": 1798, "level introduce": 30138, "baseline new": 6196, "new benchmarks": 37144, "sacrificing performance": 48422, "grained representations": 23042, "corresponding textual": 11560, "image representations": 24545, "applications code": 3188, "nlu natural": 37565, "critical research": 11789, "utterances natural": 61149, "learning providing": 29829, "selection bias": 49135, "framework alleviate": 21456, "benchmarks natural": 6534, "understanding commonsense": 59331, "large unsupervised": 29043, "contextualized representation": 10808, "tasks building": 55532, "level contextual": 30082, "representations recent": 46746, "tasks non": 55768, "work ask": 62574, "methods case": 32778, "features users": 20691, "using modified": 60810, "task generates": 55104, "task construct": 54974, "demonstrate incorporating": 13924, "model adding": 33538, "release pre": 46163, "popular machine": 41169, "environment knowledge": 18171, "knowledge analysis": 27394, "test training": 56390, "external world": 19958, "motivated recent": 35873, "learning entity": 29624, "models behave": 34763, "heavily depend": 23529, "deal complex": 13513, "problems low": 42710, "applied open": 3287, "makes attempt": 31615, "information process": 26023, "node representations": 37587, "domain classifier": 16030, "explored task": 19765, "networks gcn": 36859, "science literature": 48748, "methods attempt": 32758, "propose automatically": 43308, "knowledge hand": 27512, "different extents": 14928, "corpora capture": 11182, "text fine": 56580, "existing pre": 19127, "rarely consider": 45003, "paper utilize": 39605, "textual corpora": 56955, "information simultaneously": 26089, "various knowledge": 61349, "paper obtained": 39430, "language pre": 28382, "comprehension natural": 9771, "approaches train": 3943, "transfer language": 58371, "model monolingual": 34106, "monolingual model": 35803, "time pre": 57195, "creating high": 11741, "creative text": 11753, "data imbalance": 12415, "certain number": 7943, "typically assume": 59136, "viterbi decoding": 61695, "performance introduce": 40398, "table structure": 54688, "datasets end": 13246, "prior best": 42395, "text related": 56731, "comprehension text": 9778, "text cross": 56519, "entire corpus": 18021, "corpus second": 11425, "effective compared": 16637, "proposed language": 43797, "predict text": 41657, "interaction language": 26603, "bidirectional attention": 7065, "studies demonstrate": 53257, "effectively generate": 16736, "using sophisticated": 60954, "cases work": 7817, "deep representations": 13748, "quality annotated": 44488, "specialized domains": 52033, "reduce costs": 45656, "noise paper": 37602, "directly modeling": 15325, "encoder experiments": 17515, "data instances": 12434, "effective strategy": 16699, "improvements model": 25081, "annotation schema": 2967, "evaluate techniques": 18512, "deep understanding": 13753, "understanding large": 59359, "key elements": 27308, "work finally": 62666, "written ones": 63007, "scenario based": 48683, "people tend": 40039, "resource neural": 47255, "strategy improve": 52936, "brings significant": 7343, "bert word": 6736, "annotations automatically": 2983, "multi feature": 35958, "challenging especially": 8092, "features analysis": 20521, "paper adapt": 39248, "conduct analysis": 10027, "pairs wikipedia": 39232, "develop methods": 14597, "entities provide": 18075, "cultural differences": 11936, "longer text": 31055, "facilitate better": 20262, "conduct user": 10068, "approaches chinese": 3782, "aims predicting": 2207, "writing patterns": 62989, "present thorough": 42041, "task social": 55382, "domains achieve": 16232, "newswire corpus": 37433, "weights using": 61943, "prior model": 42409, "obtain reliable": 38186, "model feature": 33883, "task distribution": 55026, "head self": 23498, "linguistically interpretable": 30816, "achieving strong": 1426, "challenging address": 8082, "query aware": 44661, "graph sentences": 23166, "context size": 10722, "integration method": 26529, "achieve consistent": 1130, "increases accuracy": 25434, "usually required": 61065, "required task": 46905, "specifically perform": 52218, "better task": 6975, "times data": 57248, "just language": 27250, "modeling pre": 34612, "data release": 12596, "supervised task": 54055, "train embeddings": 57582, "embeddings new": 17180, "bias detection": 7023, "bias word": 7048, "need addressed": 36545, "models fact": 35014, "combine existing": 9066, "batch sizes": 6343, "metrics quality": 33193, "introduce technique": 26869, "agnostic meta": 2091, "learning maml": 29719, "quickly adapt": 44821, "human evaluated": 24141, "time writing": 57240, "train bert": 57566, "classification remains": 8534, "approaches text": 3940, "boost accuracy": 7251, "tasks limited": 55729, "replace words": 46403, "sentence contextual": 49536, "mixture multiple": 33423, "model vocabulary": 34530, "text infilling": 56627, "conditioned context": 10014, "based gradient": 5757, "evaluations different": 18757, "detection context": 14469, "context awareness": 10593, "cosine distance": 11573, "performance time": 40601, "time constraints": 57126, "models binary": 34790, "infer latent": 25636, "pose challenge": 41238, "dataset highly": 12949, "sourced dataset": 51823, "task release": 55331, "code dataset": 8804, "dataset hope": 12951, "properties human": 43262, "ai applications": 2113, "sources paper": 51838, "web sources": 61899, "novel ways": 37953, "study consider": 53347, "automatically build": 5145, "corpus containing": 11307, "systems assist": 54435, "focus single": 21200, "enhances performance": 17946, "english evaluation": 17802, "effective cross": 16639, "process long": 42802, "categorization task": 7855, "words irrelevant": 62440, "model dynamically": 33795, "classification benchmark": 8439, "performance efficiency": 40314, "respectively neural": 47374, "bert transformer": 6729, "transformer xl": 58517, "context length": 10668, "models transformer": 35629, "models interaction": 35139, "better align": 6847, "bert using": 6732, "model increased": 33994, "outperforms original": 38916, "space discrete": 51854, "empirically method": 17364, "text style": 56793, "measuring degree": 32084, "component modern": 9708, "models understanding": 35646, "challenging work": 8163, "use explicit": 59884, "embeddings fine": 17138, "generative methods": 22596, "dataset automatic": 12817, "metrics task": 33202, "data example": 12334, "combination techniques": 9050, "respectively best": 47362, "representation key": 46532, "task end": 55047, "performed extensive": 40663, "entity relations": 18141, "encoding scheme": 17574, "unlike human": 59597, "far human": 20400, "lead successful": 29279, "controllable text": 10979, "public release": 44326, "ethical issues": 18418, "network sentence": 36801, "approximate nearest": 3977, "triplet loss": 58808, "modeling multiple": 34602, "method bert": 32404, "bert encode": 6652, "bert experiments": 6657, "experiments representative": 19512, "accordingly paper": 873, "text applying": 56437, "applying transfer": 3380, "novel latent": 37853, "datasets empirical": 13242, "powerful pre": 41441, "adapt bert": 1500, "bert pretrained": 6706, "bert based": 6616, "pretrained bert": 42147, "furthermore explore": 21822, "respectively finally": 47371, "data instead": 12435, "instead generating": 26453, "hypothesis using": 24349, "demonstrate combined": 13881, "new english": 37187, "study reveals": 53455, "success rates": 53722, "potential research": 41405, "research cross": 47007, "steps step": 52843, "performance drop": 40309, "poorly understood": 41152, "experiments single": 19529, "challenges including": 8054, "inference experimental": 25654, "important machine": 24744, "direct way": 15262, "way method": 61820, "information embeddings": 25828, "task field": 55081, "pairs single": 39218, "challenges modeling": 8061, "series data": 50062, "multimodal transformer": 36157, "words better": 62373, "mono lingual": 35788, "sentence contexts": 49535, "semantic constraints": 49258, "words visual": 62545, "original domain": 38712, "dialogue paper": 14781, "present stage": 42021, "stage method": 52433, "use bidirectional": 59836, "loss feature": 31089, "deep features": 13691, "inter class": 26578, "intra class": 26757, "final task": 20833, "task sharing": 55370, "sharing information": 50515, "models processing": 35361, "intermediate step": 26679, "dataset able": 12794, "able directly": 688, "directly generate": 15318, "aims transform": 2219, "tasks studied": 55912, "grammatical gender": 23072, "words sequence": 62509, "strong performances": 53042, "explored end": 19758, "word position": 62265, "difficulty learning": 15199, "position embedding": 41264, "learns better": 29954, "demonstrated strong": 14021, "robustness domain": 48277, "domain inputs": 16086, "presents strong": 42106, "base architecture": 5537, "propose adaptation": 43282, "effects performance": 16828, "modeling performance": 34610, "time previous": 57198, "proposed modifications": 43864, "significant computational": 50856, "computational overhead": 9852, "translation widely": 58702, "model adds": 33544, "used computer": 60122, "paper implemented": 39395, "analysis applications": 2613, "edge devices": 16586, "memory compute": 32249, "subjective evaluation": 53562, "extracting set": 20039, "language diversity": 28032, "advance field": 1881, "perform far": 40108, "training zero": 58320, "parameter setting": 39676, "setting performance": 50339, "lags far": 27937, "decoder pre": 13610, "challenging multilingual": 8115, "multilingual datasets": 36078, "online world": 38394, "experiment large": 19241, "twitter reddit": 59040, "score 61": 48797, "sequences text": 50026, "specifically generate": 52206, "documents usually": 15927, "collect release": 8952, "release large": 46154, "popular chinese": 41160, "extensive experiment": 19870, "coherent informative": 8916, "tasks construct": 55557, "like generalization": 30471, "architecture specifically": 4087, "larger degree": 29075, "work contribute": 62611, "explored area": 19756, "strategies evaluate": 52899, "automatic scores": 5120, "certain emotions": 7940, "step approach": 52798, "learning conduct": 29568, "data strong": 12697, "model findings": 33889, "training regimes": 58227, "events propose": 18797, "framework utilizes": 21625, "qualitative experiments": 44477, "implicitly encode": 24666, "far limited": 20403, "embedding learned": 17035, "led significant": 29993, "model facilitates": 33877, "integrate proposed": 26510, "approach state": 3703, "neural lm": 36965, "bert bert": 6628, "shot cross": 50608, "probing experiments": 42491, "pairs problem": 39209, "stark contrast": 52564, "bert multilingual": 6691, "combination bert": 9033, "embeddings low": 17168, "representations hierarchical": 46681, "syntactic trees": 54335, "dialog history": 14756, "context test": 10731, "dialog datasets": 14754, "like recurrent": 30496, "learning zero": 29948, "systems wide": 54669, "tasks need": 55762, "adaptation work": 1548, "work attempts": 62579, "understanding ability": 59321, "ability interpret": 614, "help explain": 23561, "proposed paradigm": 43876, "lattice structure": 29166, "method adapting": 32370, "positional embeddings": 41271, "inference cross": 25649, "signal model": 50829, "model receives": 34279, "baseline training": 6219, "lookup table": 31072, "analysis indicate": 2681, "focus detecting": 21153, "extent knowledge": 19922, "encoded model": 17481, "novel sentences": 37918, "addition domain": 1611, "attention research": 4821, "information recent": 26043, "bert perform": 6698, "studies aim": 53244, "automatically induce": 5183, "learning manner": 29720, "module used": 35770, "shown model": 50727, "structural knowledge": 53080, "task fact": 55078, "subtasks subtask": 53673, "submissions subtask": 53580, "subtask subtask": 53668, "systems improved": 54527, "oriented tasks": 38701, "data limits": 12466, "networks transformer": 36918, "transformer uses": 58514, "3x faster": 393, "training transformer": 58305, "making applicable": 31645, "detailed discussion": 14420, "existing benchmark": 19040, "sentences create": 49700, "important tool": 24785, "modeling work": 34638, "focuses learning": 21241, "aware network": 5463, "module based": 35752, "employ state": 17391, "techniques extract": 56085, "documents use": 15923, "use score": 60002, "potential risks": 41406, "information structured": 26107, "typically consists": 59138, "leibler kl": 30014, "quality sentences": 44580, "major issues": 31513, "levels information": 30242, "literal meaning": 30852, "significantly differ": 50952, "provides insight": 44205, "capture hierarchical": 7675, "euclidean space": 18424, "auto regressive": 5020, "achieve performance": 1178, "improved combining": 24946, "improvements results": 25098, "contextual encoding": 10767, "label based": 27693, "nature tasks": 36489, "prompt design": 43202, "commonly observed": 9219, "observed language": 38145, "based pseudo": 5962, "problem human": 42578, "presence multiple": 41837, "multiple heterogeneous": 36223, "components paper": 9722, "simple strong": 51213, "modeled using": 34556, "tasks strong": 55911, "models inference": 35130, "models textual": 35599, "multi grained": 35960, "hierarchical neural": 23683, "network effectively": 36734, "information manually": 25966, "construct multi": 10392, "methods statistical": 33053, "essential applications": 18321, "information results": 26059, "effectively extract": 16735, "pretraining framework": 42204, "computing resources": 9905, "resources limited": 47312, "model variational": 34524, "domain unlabeled": 16221, "baselines low": 6277, "decent performance": 13548, "recently increasing": 45433, "model discrete": 33772, "dataset achieves": 12800, "art terms": 4427, "base models": 5547, "difficult apply": 15157, "depth knowledge": 14187, "knowledge embedding": 27456, "representations specific": 46759, "emotions play": 17304, "written humans": 63001, "non verbal": 37690, "strongly associated": 53068, "propose global": 43404, "context enhanced": 10624, "global representation": 22840, "achieves 91": 1296, "leveraging bert": 30319, "bert additional": 6607, "improving robustness": 25196, "robustness adversarial": 48272, "adversarial inputs": 1971, "inputs propose": 26367, "higher robustness": 23843, "robustness noisy": 48290, "recently state": 45467, "does work": 15983, "introduce unsupervised": 26876, "selection experiments": 49139, "creates new": 11736, "embeddings identify": 17146, "relations pairs": 46048, "methods determining": 32821, "inherent limitations": 26203, "information helps": 25902, "corpora finally": 11202, "require costly": 46848, "annotations data": 2987, "data creation": 12260, "annotated texts": 2924, "suggestions future": 53843, "tasks cross": 55565, "likelihood objective": 30520, "source corpus": 51759, "english method": 17843, "yields average": 63114, "average absolute": 5401, "direct transfer": 15259, "art discriminative": 4249, "words topic": 62533, "adapt pre": 1507, "adapting pretrained": 1570, "improving language": 25184, "students learn": 53217, "candidate set": 7580, "given paper": 22767, "goal study": 22901, "hindering development": 23929, "scalability large": 48546, "instance level": 26427, "strong competitors": 53021, "data highly": 12405, "domain learning": 16102, "compare use": 9374, "learning query": 29830, "phrases using": 40857, "obtain consistent": 38170, "datasets state": 13442, "target embedding": 54815, "propose shared": 43629, "embeddings tend": 17226, "v2 dataset": 61160, "future models": 21878, "model arbitrary": 33575, "based relation": 5989, "models practical": 35336, "production setting": 43050, "evaluation indicate": 18628, "production quality": 43049, "heterogeneous domains": 23619, "domain aware": 16022, "method particularly": 32609, "metrics assess": 33137, "differences way": 14830, "contextualized features": 10802, "achieving average": 1393, "surpasses human": 54173, "resource cross": 47217, "retrieved documents": 47983, "training label": 58142, "based baselines": 5595, "method transformer": 32690, "impact model": 24599, "generation automatically": 22424, "abstractive summary": 773, "compare baselines": 9332, "systems investigate": 54535, "propose address": 43285, "create initial": 11703, "highlight key": 23864, "understanding challenges": 59329, "baselines multiple": 6281, "multiple subtasks": 36296, "built state": 7490, "nlp language": 37493, "assess ability": 4576, "tasks learn": 55717, "content task": 10563, "model means": 34096, "critical problem": 11786, "training embedding": 58081, "model post": 34213, "form knowledge": 21323, "propose online": 43576, "space pre": 51883, "domains model": 16275, "pearson spearman": 40006, "respectively using": 47385, "alleviate problems": 2418, "augmentation low": 4960, "translation uses": 58700, "framework extensive": 21518, "settings data": 50364, "f1 using": 20232, "sets english": 50290, "level transformer": 30226, "compared different": 9401, "improvement language": 25007, "simple methods": 51192, "model cross": 33731, "setting using": 50355, "provides comprehensive": 44188, "dataset russian": 13072, "component language": 9705, "task address": 54887, "present benchmark": 41856, "causal effect": 7870, "overcome challenge": 39058, "challenge learn": 7991, "edit based": 16591, "perform accurate": 40066, "task difficulty": 55022, "leverages information": 30304, "information hidden": 25904, "spatial information": 51985, "tasks requires": 55856, "furthermore considering": 21812, "learning cl": 29554, "approach real": 3666, "cost model": 11589, "apply bert": 3322, "captured bert": 7724, "approach transfer": 3726, "model response": 34318, "framework simultaneously": 21602, "independent training": 25506, "transfers knowledge": 58438, "training teacher": 58287, "student models": 53213, "approach generally": 3544, "systems research": 54623, "approaches commonly": 3785, "ungrammatical sentences": 59455, "languages spanish": 28793, "level tags": 30220, "languages average": 28605, "create multi": 11709, "content structure": 10561, "model aware": 33596, "fair comparison": 20356, "representations including": 46688, "produce higher": 42987, "improve computational": 24833, "available limited": 5321, "data actually": 12118, "generate context": 22188, "cloze questions": 8723, "questions finally": 44788, "various unsupervised": 61415, "including training": 25312, "64 f1": 469, "critical step": 11794, "diverse dataset": 15698, "require expensive": 46851, "expensive manual": 19212, "models representation": 35439, "allows training": 2480, "simple lstm": 51187, "information representation": 26053, "make better": 31544, "better use": 6989, "data applying": 12142, "select key": 49108, "step understanding": 52832, "approach identifying": 3559, "concepts related": 9941, "great extent": 23204, "language trained": 28534, "using sentences": 60926, "information representations": 26054, "order answer": 38595, "propose interpretable": 43422, "selects relevant": 49170, "reasoning chains": 45190, "human readers": 24228, "jointly optimize": 27210, "multiple reasoning": 36271, "reasoning chain": 45189, "sentence source": 49648, "attention head": 4756, "architecture search": 4083, "previously learned": 42337, "learning explore": 29639, "structure knowledge": 53111, "task empirically": 55041, "tasks glue": 55658, "jointly extracts": 27198, "systems new": 54571, "baseline addition": 6153, "experiments https": 19441, "seq2seq neural": 49903, "solve new": 51683, "pipeline based": 40893, "augmentation based": 4950, "results average": 47517, "encoder neural": 17529, "instead word": 26469, "network capable": 36715, "generator produces": 22622, "challenge nlp": 8001, "thesis presents": 57044, "search recommendation": 48981, "evaluations human": 18759, "high volume": 23809, "features experimental": 20575, "languages apply": 28601, "test languages": 56354, "problems arise": 42696, "addressing problems": 1823, "languages providing": 28763, "research aims": 46980, "learning obtain": 29787, "query paper": 44674, "dense sparse": 14083, "effectively captures": 16726, "optimization strategies": 38556, "analysis make": 2694, "lingual models": 30712, "language combinations": 27993, "employ data": 17376, "language bias": 27980, "widely investigated": 61997, "debiasing methods": 13535, "cost sensitive": 11593, "term training": 56254, "label pairs": 27719, "based instance": 5787, "models leveraging": 35184, "consistently better": 10291, "absolute f1": 740, "additionally use": 1736, "use self": 60003, "model inputs": 34006, "early fusion": 16512, "tasks computational": 55551, "focused data": 21218, "experimental design": 19261, "larger amounts": 29068, "efficient knowledge": 16880, "distillation kd": 15571, "biases models": 7057, "leverages state": 30315, "method applicable": 32382, "risk factors": 48162, "using weakly": 61021, "advantages using": 1955, "easily interpretable": 16547, "internal knowledge": 26687, "set domains": 50141, "using query": 60889, "life cycle": 30438, "label distributions": 27705, "perform systematic": 40150, "representation question": 46574, "performance known": 40403, "strong supervision": 53054, "patterns language": 39970, "effectively train": 16758, "language related": 28465, "furthermore develop": 21815, "based experiments": 5717, "strategies able": 52891, "level present": 30182, "detection aims": 14456, "style given": 53487, "svm random": 54238, "detection paper": 14509, "important parts": 24752, "detection text": 14535, "learns dense": 29956, "graphs paper": 23190, "approach incorporate": 3569, "supporting evidence": 54138, "accuracy datasets": 957, "answering requires": 3094, "long context": 31005, "yang et": 63044, "2018 dataset": 273, "models drops": 34943, "drops significantly": 16450, "compositional reasoning": 9747, "remain unclear": 46317, "framework quantify": 21590, "framework conduct": 21476, "enable model": 17426, "dataset corpus": 12868, "benchmark experiments": 6468, "model additionally": 33541, "additionally compare": 1715, "complete sentences": 9602, "combines word": 9105, "graph extracted": 23137, "realistic task": 45154, "label smoothing": 27728, "modern text": 35722, "trained publicly": 57846, "based decoder": 5668, "aspects model": 4546, "consuming manual": 10450, "new ones": 37273, "information ignoring": 25908, "information dependency": 25805, "question existing": 44728, "strategies selecting": 52917, "guided graph": 23345, "proves effectiveness": 44000, "reasoning systems": 45226, "used improving": 60210, "improving model": 25185, "rnn transformer": 48204, "technique propose": 56043, "propose structural": 43648, "applied transformer": 3307, "people usually": 40043, "share thoughts": 50462, "possible data": 41321, "discussions work": 15495, "evaluate accuracy": 18435, "contain noise": 10467, "data useful": 12763, "bert shown": 6717, "performance pre": 40487, "chinese pre": 8317, "models baselines": 34762, "including bert": 25241, "bert roberta": 6711, "roberta electra": 48221, "performances nlp": 40643, "research open": 47083, "based pretraining": 5950, "autoregressive language": 5216, "pretrain finetune": 42143, "pretraining method": 42209, "outperforms bert": 38877, "explore using": 19750, "document encoding": 15790, "document encoder": 15789, "embeddings derived": 17110, "using document": 60666, "improving ability": 25171, "providing alternative": 44237, "respect source": 47351, "encoders learn": 17556, "multilingual dataset": 36077, "dataset achieved": 12799, "methods shot": 33035, "unique challenge": 59509, "label dependency": 27703, "specially designed": 52038, "shot scenarios": 50640, "makes hard": 31624, "embedding technique": 17066, "outperforms strongest": 38951, "learning baseline": 29544, "information fine": 25877, "grained labels": 23039, "paper submission": 39581, "agnostic sentence": 2098, "corpus obtain": 11394, "filter noisy": 20808, "yields strong": 63133, "additional gains": 1672, "respectively compared": 47364, "practical scenario": 41469, "pairs context": 39174, "negative instances": 36622, "negatively affects": 36643, "contain useful": 10475, "information utilized": 26149, "distinguish positive": 15604, "models goal": 35065, "text despite": 56534, "label consistency": 27699, "built model": 7487, "process new": 42811, "generated machine": 22298, "user interfaces": 60430, "problem exacerbated": 42555, "analyses revealed": 2605, "models generative": 35061, "lstm gated": 31262, "scale long": 48592, "neural ranking": 37088, "efficiently effectively": 16913, "improved efficiency": 24948, "efficiency effectiveness": 16841, "sources including": 51830, "event driven": 18782, "systems understand": 54659, "auto complete": 5012, "novel algorithms": 37752, "algorithms use": 2344, "embedding parameters": 17052, "overall model": 39044, "model discriminative": 33774, "used learning": 60227, "information latent": 25946, "datasets improve": 13295, "generative approaches": 22588, "methods heavily": 32884, "query generation": 44668, "uses adversarial": 60490, "effectively improves": 16741, "improves diversity": 25124, "describes novel": 14228, "given content": 22728, "digital information": 15211, "higher scores": 23845, "use user": 60068, "based insights": 5786, "decoding procedure": 13639, "guidance future": 23327, "context crucial": 10603, "collected datasets": 8960, "dataset discuss": 12898, "challenge building": 7970, "propose research": 43607, "question regarding": 44748, "resources building": 47295, "utilize available": 61086, "novel self": 37912, "verify proposed": 61542, "transformer transformer": 58513, "function layer": 21756, "existing pretrained": 19130, "tasks pretrained": 55808, "reducing training": 45714, "yielding significant": 63110, "explicit knowledge": 19617, "powerful neural": 41438, "help disambiguate": 23557, "need paper": 36587, "western languages": 61949, "models exist": 34986, "code provided": 8848, "newly released": 37381, "models teacher": 35587, "model multitask": 34115, "surprising results": 54184, "counterparts paper": 11624, "use generated": 59898, "model manually": 34090, "does affect": 15934, "model estimate": 33837, "require use": 46896, "using discriminative": 60658, "require careful": 46843, "errors best": 18236, "tasks varying": 55960, "propose contextual": 43335, "utterance dialogue": 61135, "consider contextual": 10209, "domain adaptability": 15994, "2019 challenge": 280, "data utilized": 12768, "investigate questions": 26980, "bert use": 6730, "work showing": 62821, "data helps": 12400, "biased data": 7051, "features layer": 20612, "limits model": 30642, "baseline transformer": 6220, "furthermore evaluate": 21818, "ablation experiments": 656, "effect proposed": 16617, "quantify quality": 44612, "alignments paper": 2392, "quality use": 44595, "improve best": 24828, "end investigate": 17678, "respectively experiments": 47368, "codes used": 8882, "problems specifically": 42730, "problem providing": 42639, "help guide": 23566, "build recent": 7421, "dialogue modeling": 14779, "impact various": 24608, "scarcity problem": 48675, "model finetuning": 33897, "train robust": 57627, "dataset state": 13100, "usually rely": 61062, "transformer encoder": 58482, "spans input": 51957, "use feature": 59889, "attain state": 4668, "documents given": 15883, "systems tested": 54652, "particularly significant": 39889, "corresponding context": 11547, "questions training": 44813, "addition report": 1642, "tagging framework": 54740, "language downstream": 28038, "layer representations": 29207, "sentence token": 49659, "topics document": 57449, "models conducted": 34846, "reddit dataset": 45643, "linear support": 30670, "results accuracy": 47483, "accuracy 82": 914, "performance observed": 40464, "methods second": 33027, "believe work": 6413, "qa challenging": 44447, "understanding evaluation": 59342, "present collection": 41866, "small collection": 51466, "study context": 53348, "containing 10": 10482, "utterances based": 61144, "slot values": 51444, "classification applications": 8434, "problem directly": 42541, "stage approach": 52427, "models result": 35453, "transformer baselines": 58477, "focus fine": 21165, "demonstrated impressive": 14010, "claim generation": 8381, "sampling approach": 48498, "gpt model": 22983, "nlp computer": 37475, "study existing": 53374, "enhance model": 17915, "based various": 6127, "ml techniques": 33434, "previous utterances": 42300, "machines svm": 31399, "dataset topic": 13119, "approaches models": 3876, "datasets ii": 13293, "250 million": 337, "modeling choices": 34564, "work believe": 62586, "believe useful": 6412, "predict corresponding": 41638, "corresponding label": 11554, "models future": 35048, "dominant approaches": 16307, "data intensive": 12438, "models reflect": 35420, "produce semantic": 43006, "previous data": 42253, "analysis leads": 2689, "new directions": 37173, "expert demonstrations": 19577, "vectors encode": 61485, "scale manually": 48594, "building task": 7474, "human conversations": 24127, "existing annotated": 19023, "train universal": 57656, "investigate multiple": 26970, "supervised setup": 54046, "labeled target": 27764, "novel multilingual": 37880, "japanese russian": 27148, "exploit domain": 19653, "model followed": 33905, "scenario paper": 48688, "dataset built": 12834, "used named": 60246, "resources low": 47313, "linear projection": 30666, "domain evaluate": 16058, "survey based": 54203, "comparing results": 9485, "twitter based": 59032, "model scratch": 34340, "github repository": 22719, "datasets lack": 13309, "transfer text": 58426, "types using": 59125, "standard fine": 52493, "grained coarse": 23027, "outperforms domain": 38895, "outperforms zero": 38962, "shot fine": 50616, "systems state": 54639, "par human": 39615, "allows human": 2468, "sentences addition": 49678, "settings demonstrate": 50366, "does guarantee": 15949, "based time": 6096, "unseen entity": 59649, "method discover": 32465, "tasks supervised": 55921, "100 billion": 60, "methods develop": 32822, "task mainly": 55199, "mainly focuses": 31474, "tasks news": 55766, "mechanism significantly": 32142, "previous baseline": 42245, "descriptive statistics": 14256, "relatively higher": 46117, "pairs corresponding": 39176, "aspects human": 4540, "information concerning": 25785, "attract attention": 4872, "political social": 41113, "analysis sentiment": 2752, "suggesting possible": 53839, "based arabic": 5574, "accuracy increased": 993, "introduce pre": 26854, "embed text": 17005, "16 languages": 164, "provide performance": 44111, "generated users": 22331, "opening new": 38475, "complex linguistic": 9632, "knowledge models": 27553, "huge corpus": 24071, "answer retrieval": 3053, "evaluating large": 18561, "establish baselines": 18342, "evaluation code": 18590, "generative language": 22591, "way use": 61835, "use general": 59897, "encoder produce": 17534, "modifying model": 35739, "demo https": 13848, "leading improved": 29291, "translation self": 58673, "demonstrate robustness": 13971, "promising alternative": 43158, "extraction aims": 20045, "aims extracting": 2193, "suffer insufficient": 53767, "massive unlabeled": 31890, "preserve semantic": 42116, "relations entity": 46027, "types existing": 59087, "promising improvement": 43168, "improvement task": 25030, "performance cnn": 40238, "models gained": 35050, "high efficiency": 23733, "using relation": 60905, "upper bounds": 59774, "process instead": 42795, "understanding pipeline": 59382, "model component": 33683, "information title": 26125, "learn informative": 29383, "regarding different": 45793, "mechanism select": 32141, "performance news": 40456, "study address": 53318, "bidirectional rnn": 7083, "efficient using": 16908, "account word": 881, "translated training": 58562, "nlp area": 37466, "usually contain": 61042, "thesis propose": 57045, "model considering": 33700, "tasks keyword": 55703, "extraction used": 20129, "content document": 10522, "theoretic measure": 57014, "used efficiently": 60159, "media like": 32170, "informal language": 25740, "bleu human": 7204, "present solutions": 42020, "result paper": 47445, "demonstrates potential": 14037, "propose retrieval": 43608, "socio political": 51614, "data ensemble": 12325, "learning methodology": 29729, "large textual": 29027, "time improving": 57164, "improving cross": 25173, "linguistic studies": 30798, "seminal work": 49472, "problem current": 42527, "data unseen": 12755, "seen unseen": 49068, "performance seen": 40548, "provides robust": 44225, "word2vec bert": 62345, "bert deep": 6638, "success achieved": 53696, "effective achieving": 16627, "imitate human": 24573, "human based": 24112, "study perform": 53429, "data parameter": 12536, "similar information": 51047, "best way": 6837, "selected according": 49116, "task directly": 55023, "directly predict": 15331, "network overall": 36778, "currently deployed": 12033, "way making": 61819, "language especially": 28052, "promising approaches": 43161, "100 accuracy": 59, "extend idea": 19823, "classifiers predict": 8624, "perform manual": 40120, "correspond different": 11541, "current benchmarks": 11962, "make case": 31546, "data multi": 12497, "using evaluation": 60681, "pretraining task": 42218, "universal representations": 59544, "challenge multi": 7997, "conversations task": 11065, "existing dialogue": 19058, "novel transformer": 37942, "process design": 42770, "biases paper": 7058, "contextualized embedding": 10799, "using bilstm": 60597, "using relatively": 60906, "bilstm model": 7134, "achieves higher": 1334, "training separate": 58248, "separate model": 49876, "using coarse": 60608, "finally train": 20884, "properties model": 43266, "meaningful sentences": 32028, "sentences used": 49802, "topic work": 57437, "long form": 31012, "dataset comprises": 12855, "conventional seq2seq": 11011, "seq2seq language": 49898, "faithfulness generated": 20369, "metric achieves": 33110, "intuitive way": 26911, "attracted significant": 4888, "making informed": 31658, "informed decisions": 26182, "typically generate": 59145, "table data": 54687, "style generated": 53486, "tasks generating": 55655, "generating language": 22379, "huge performance": 24075, "control semantic": 10973, "test different": 56344, "15 points": 150, "key point": 27327, "textual contents": 56953, "text joint": 56634, "relative position": 46105, "method public": 32630, "datasets conduct": 13187, "focused developing": 21219, "paper outline": 39434, "recognition module": 45515, "based event": 5711, "token sequence": 57307, "challenging train": 8162, "training regime": 58226, "textual inference": 56968, "based prior": 5952, "models specialized": 35524, "task quality": 55313, "framework combine": 21470, "bert xlm": 6737, "compare systems": 9371, "considerable margin": 10233, "comments social": 9147, "terms content": 56279, "nlg tasks": 37447, "media based": 32159, "taken consideration": 54774, "highly flexible": 23900, "theoretically motivated": 57031, "better represent": 6952, "masked span": 31868, "span selection": 51931, "challenges opportunities": 8065, "text rewriting": 56749, "information redundancy": 26046, "extractive abstractive": 20134, "model development": 33762, "better unseen": 6988, "language employ": 28046, "simple linguistic": 51186, "detection new": 14507, "propose encoder": 43367, "multiple instance": 36229, "learning mil": 29738, "minimal supervision": 33292, "fully understand": 21746, "bert significantly": 6719, "previously overlooked": 42338, "important develop": 24716, "study compare": 53339, "models predictions": 35344, "impact paper": 24603, "medium low": 32218, "results suggested": 47868, "impact data": 24591, "extract topics": 19999, "codes publicly": 8879, "recently pre": 45448, "current pre": 11999, "usually focus": 61050, "bert xlnet": 6739, "models released": 35425, "automated solutions": 5057, "semantics domain": 49403, "baseline research": 6205, "compute time": 9881, "focus mainly": 21180, "model compatible": 33678, "bert gpt": 6665, "result new": 47442, "code base": 8796, "lingual embedding": 30699, "data followed": 12368, "trained synthetic": 57888, "embedding mapping": 17037, "task developing": 55018, "based general": 5746, "framework incorporates": 21544, "provide feedback": 44073, "method comparison": 32427, "comparison various": 9511, "models lexical": 35185, "identify relations": 24439, "systems lack": 54540, "data far": 12360, "000 labeled": 8, "relations including": 46036, "comparison future": 9496, "art work": 4441, "guidelines future": 23352, "human interaction": 24173, "proposed question": 43883, "automatic natural": 5112, "generation evaluation": 22454, "metrics propose": 33190, "approach employs": 3506, "specific target": 52152, "summarization data": 53879, "study apply": 53326, "approaches effectively": 3804, "simply treat": 51253, "effective graph": 16656, "context graph": 10649, "sequence context": 49917, "present fast": 41915, "model par": 34177, "faster compared": 20434, "achieves remarkable": 1358, "model type": 34495, "entity span": 18148, "trained fine": 57731, "task code": 54953, "language skills": 28489, "fluency semantic": 21127, "offline online": 38317, "baselines real": 6294, "networks effective": 36848, "text deep": 56529, "based interaction": 5789, "speech patterns": 52276, "incorporating contextual": 25382, "representing meaning": 46813, "correct prediction": 11472, "volume information": 61729, "nlp deep": 37480, "highly technical": 23921, "developing general": 14653, "models optimized": 35280, "number downstream": 37999, "different output": 15015, "tuned pre": 58882, "parameters demonstrate": 39690, "specialized language": 52035, "million speakers": 33258, "knowledge lack": 27539, "existing structured": 19149, "related issues": 45912, "effective structured": 16700, "texts help": 56885, "level natural": 30165, "speech models": 52270, "academia industry": 788, "way reduce": 61829, "dropout method": 16445, "tasks simple": 55891, "effectively solve": 16757, "provide model": 44102, "processing generation": 42873, "won place": 62107, "model firstly": 33898, "accuracy chinese": 942, "incorporate domain": 25351, "significant manual": 50897, "contextualized representations": 10809, "bert effectively": 6647, "content results": 10554, "results bert": 47524, "performance biomedical": 40220, "model exposed": 33862, "bert cite": 6634, "task semi": 55356, "utilize self": 61102, "task manually": 55205, "relevant document": 46210, "presents significant": 42103, "extremely important": 20160, "aspect natural": 4532, "text better": 56459, "data insufficiency": 12436, "study presented": 53437, "sequence deep": 49919, "comprehensive comparison": 9784, "based available": 5591, "attempt generate": 4687, "challenging nature": 8118, "accuracy various": 1069, "training specifically": 58265, "specifically pre": 52219, "leverage language": 30272, "adapted different": 1551, "training gives": 58115, "world situations": 62960, "scenarios present": 48705, "crafted linguistic": 11681, "representations bert": 46623, "topological structure": 57469, "remains unknown": 46355, "exhibit strong": 19005, "future efforts": 21872, "fundamental challenge": 21777, "challenge ai": 7966, "challenge current": 7972, "main parts": 31450, "abductive reasoning": 588, "irrelevant redundant": 27042, "redundant information": 45729, "create datasets": 11696, "bert task": 6724, "model efficiency": 33801, "codes available": 8874, "multimodal context": 36144, "text transformer": 56823, "visual commonsense": 61650, "25 relative": 334, "reduction error": 45719, "performance date": 40278, "public leaderboard": 44324, "detailed ablation": 14408, "ablation analysis": 655, "effectiveness new": 16800, "provided https": 44162, "existing automatic": 19036, "implicitly model": 24671, "information gain": 25888, "performance conversational": 40267, "conversational qa": 11047, "generalization different": 22120, "requires additional": 46913, "effectively modeling": 16751, "explicit syntactic": 19627, "syntactic constraints": 54294, "representations self": 46750, "syntax guided": 54348, "pseudo training": 44282, "supervised pre": 54032, "words ii": 62431, "network pre": 36784, "results commonly": 47540, "aims test": 2216, "order study": 38653, "lingual machine": 30710, "task respectively": 55343, "baseline tasks": 6215, "tasks common": 55543, "obtain reasonable": 38185, "challenges ahead": 8030, "method domain": 32468, "degrades significantly": 13811, "realistic scenarios": 45151, "multi type": 36038, "multi span": 36008, "support various": 54130, "span extraction": 51923, "achieves 79": 1291, "hidden test": 23649, "footnote url": 21281, "existing self": 19139, "predict masked": 41645, "source models": 51784, "joint representations": 27189, "art multi": 4303, "capability model": 7610, "conditional generation": 9992, "68 accuracy": 479, "performance related": 40527, "issue model": 27069, "model optimize": 34148, "dataset performs": 13028, "different demographic": 14896, "using metrics": 60803, "bias dataset": 7022, "low memory": 31161, "demonstrated state": 14019, "tuned bert": 58867, "generation long": 22486, "novel step": 37928, "addition investigate": 1623, "challenging benchmark": 8083, "causes effects": 7891, "new situations": 37317, "model combine": 33665, "morphological richness": 35843, "major limitation": 31515, "summarization code": 53878, "despite prevalence": 14376, "tweet level": 59005, "majority voting": 31535, "voting based": 61741, "level f1": 30117, "tasks directly": 55591, "directly applying": 15307, "patterns model": 39971, "prototypical networks": 43976, "based computer": 5632, "pairs previous": 39208, "works usually": 62917, "solve low": 51681, "boosts accuracy": 7263, "accuracy low": 1001, "points terms": 41080, "sensitive input": 49501, "input noise": 26306, "projection method": 43142, "generate generic": 22204, "like responses": 30498, "task shared": 55369, "task increasing": 55132, "model reasoning": 34278, "ability paper": 629, "capture useful": 7719, "standard encoder": 52487, "big model": 7091, "obtains competitive": 38245, "long sentence": 31023, "clinical research": 8673, "lack dataset": 27883, "answering based": 3064, "make dataset": 31560, "effective improve": 16660, "spaces propose": 51912, "languages question": 28764, "syntactically related": 54341, "human linguistic": 24202, "addressed problem": 1805, "novel interactive": 37843, "series questions": 50068, "analysis evaluate": 2659, "compared rule": 9448, "systems furthermore": 54509, "propose advanced": 43286, "attacks using": 4665, "generation focus": 22462, "performance generated": 40364, "proposed baseline": 43743, "scheme proposed": 48730, "using hard": 60723, "embeddings elmo": 17122, "information plays": 26008, "information findings": 25876, "deepen understanding": 13755, "uses features": 60510, "random guessing": 44881, "models provides": 35381, "potential improving": 41396, "improving understanding": 25199, "highlight model": 23867, "input agnostic": 26254, "gpt language": 22981, "optimized using": 38570, "transfer models": 58406, "dataset biases": 12831, "networks gan": 36857, "data generator": 12389, "knowledge crucial": 27430, "corpora existing": 11199, "entities work": 18092, "model commonsense": 33670, "requires careful": 46917, "based graph": 5760, "scope paper": 48779, "13 languages": 126, "evaluation recently": 18695, "contextualized embeddings": 10800, "methods comparison": 32795, "approach incorporates": 3570, "graph information": 23142, "relations context": 46021, "usually need": 61059, "need hand": 36569, "dialogue text": 14791, "generate possible": 22229, "employ multi": 17385, "constructed datasets": 10410, "parameters new": 39712, "techniques experiments": 56084, "differences paper": 14826, "respect different": 47344, "increasing research": 25461, "years existing": 63059, "design enables": 14277, "benefit large": 6564, "common evaluation": 9174, "convert words": 11074, "different pairs": 15017, "high similarity": 23802, "score 62": 48798, "today world": 57278, "help learn": 23575, "long tailed": 31037, "raises concerns": 44858, "data diversity": 12292, "features models": 20624, "classification especially": 8463, "time performance": 57190, "examples results": 18929, "2016 presidential": 257, "focuses task": 21244, "independent content": 25495, "building new": 7461, "bert propose": 6707, "style content": 53482, "scale experiment": 48571, "knowledge common": 27423, "denoising autoencoders": 14066, "encoder bert": 17493, "jointly pre": 27216, "pre trains": 41603, "tuning stage": 58959, "model transformer": 34489, "specific techniques": 52156, "techniques significantly": 56136, "theoretically grounded": 57030, "models systematically": 35574, "popular sequence": 41185, "areas improvement": 4152, "leads models": 29320, "propose extended": 43382, "applying proposed": 3376, "scenarios experiments": 48696, "training helps": 58119, "comparison strong": 9508, "models strongly": 35538, "quality input": 44535, "input dialogue": 26264, "utterance representations": 61140, "using transformer": 60999, "models automated": 34743, "model statistical": 34410, "transition model": 58541, "explicitly leverages": 19640, "parameters significantly": 39722, "representation major": 46547, "speaker role": 52001, "understand users": 59317, "fail account": 20329, "currently state": 12038, "augment model": 4943, "model like": 34063, "discuss ways": 15484, "contrast recent": 10888, "proposed end": 43764, "end approaches": 17613, "pipeline end": 40898, "text approaches": 56439, "analysis suggest": 2771, "intermediate steps": 26680, "various attributes": 61304, "view model": 61599, "lot time": 31120, "fusion layer": 21854, "new strategy": 37326, "informative prior": 26175, "experiment proposed": 19245, "new sota": 37319, "anomaly detection": 3024, "analyses model": 2599, "common patterns": 9190, "methods train": 33079, "different generation": 14942, "hinders application": 23931, "scale model": 48596, "distillation approach": 15567, "model teacher": 34447, "previous knowledge": 42257, "distillation methods": 15574, "distillation student": 15578, "partially annotated": 39808, "methods machine": 32933, "specifically trained": 52231, "better fine": 6892, "tuning technique": 58968, "domain work": 16228, "adapt model": 1504, "time test": 57230, "data achieving": 12114, "generic model": 22631, "propose modified": 43471, "usually involve": 61053, "rules paper": 48395, "statistical method": 52752, "clean corpus": 8642, "dataset performed": 13027, "extracted model": 20016, "focused english": 21220, "sizable improvements": 51375, "developments neural": 14715, "findings highlight": 20907, "novel training": 37939, "multiple granularities": 36221, "transfer downstream": 58361, "user using": 60454, "model https": 33963, "effective improving": 16661, "performance real": 40519, "procedure paper": 42744, "language distribution": 28031, "detect correct": 14437, "70 accuracy": 487, "propose syntax": 43655, "represent syntactic": 46483, "variables experiments": 61228, "achieve lower": 1168, "reconstruction loss": 45583, "informed model": 26183, "recognition based": 45494, "focused learning": 21226, "input using": 26356, "simultaneously using": 51278, "different granularity": 14946, "inspired cognitive": 26405, "dependent specific": 14150, "models assessing": 34729, "pose problem": 41240, "main tasks": 31462, "datasets current": 13204, "vanilla transformer": 61217, "long complex": 31004, "commonly adopted": 9216, "focus lexical": 21175, "identify sentences": 24443, "methods comparing": 32794, "art summarization": 4415, "data https": 12408, "performance used": 40616, "input does": 26270, "knowledge chinese": 27422, "applications downstream": 3201, "levels experimental": 30238, "recent line": 45317, "gains state": 21943, "aware semantic": 5471, "relations long": 46041, "use discourse": 59868, "make progress": 31589, "progress goal": 43099, "classification analyze": 8432, "focuses improving": 21240, "domain perform": 16132, "automatically constructing": 5153, "domain large": 16101, "gate mechanism": 21992, "mechanism control": 32105, "reduce bias": 45650, "core module": 11150, "modeling multi": 34601, "remedy problem": 46367, "multi token": 36033, "identification multi": 24392, "computationally cheap": 9872, "baselines analysis": 6232, "proposed objective": 43872, "encoder vae": 17550, "present meta": 41942, "similarity experiments": 51095, "moderate sized": 35696, "tasks focused": 55647, "need identify": 36571, "observation present": 38122, "trained auxiliary": 57678, "66 f1": 474, "model rank": 34268, "shot semantic": 50641, "opinion based": 38500, "development social": 14703, "works rely": 62906, "fine coarse": 20922, "provide competitive": 44032, "task level": 55185, "popular transformer": 41196, "required training": 46907, "effectiveness learning": 16786, "sample training": 48458, "finally highlight": 20862, "modelling techniques": 34645, "tasks finally": 55640, "sampling techniques": 48510, "reliable way": 46254, "deep multi": 13729, "motivated intuition": 35869, "performance dialogue": 40289, "robust handle": 48249, "predicting multiple": 41679, "enhanced graph": 17932, "designed based": 14309, "capturing sequential": 7745, "series state": 50069, "models measure": 35220, "video transcripts": 61587, "topics related": 57461, "detection essential": 14480, "aspect information": 4529, "spaces different": 51909, "heterogeneous graph": 23620, "information heterogeneous": 25903, "shows different": 50774, "developing new": 14661, "level control": 30088, "sentences paragraph": 49764, "model discourse": 33770, "codes data": 8876, "data help": 12399, "propose practical": 43586, "model especially": 33833, "explicitly trained": 19649, "languages neural": 28738, "accuracy natural": 1012, "nli tasks": 37456, "propose make": 43447, "attention fusion": 4752, "information state": 26104, "augmentation language": 4959, "datasets confirm": 13189, "labels model": 27838, "covering multiple": 11658, "involving entities": 27024, "quality entity": 44515, "diverse tasks": 15721, "prediction entity": 41700, "addition develop": 1609, "better entity": 6883, "gpt bert": 22973, "work multilingual": 62729, "version bert": 61551, "shot zero": 50657, "finetune model": 21041, "improve multilingual": 24876, "trained limited": 57775, "uncertainty based": 59228, "based confidence": 5637, "given dataset": 22734, "labels test": 27851, "challenging realistic": 8134, "realistic evaluation": 45149, "effective unsupervised": 16708, "lingual scenarios": 30724, "information obtained": 25994, "lingual pre": 30717, "training signals": 58258, "new pre": 37285, "models taking": 35582, "models encoder": 34963, "hinders improvement": 23932, "great successes": 23220, "contextualized information": 10803, "chinese tasks": 8322, "standard accuracy": 52461, "achieved strong": 1275, "regularize model": 45847, "model simply": 34388, "extraction complex": 20053, "nodes edges": 37591, "relations better": 46018, "types nodes": 59107, "level graph": 30128, "enables learn": 17441, "diverse collection": 15693, "baseline performances": 6201, "gap machine": 21968, "dependencies different": 14105, "aim capture": 2142, "model dependencies": 33748, "task defined": 54997, "dataset analysis": 12807, "advantages existing": 1952, "language encoders": 28048, "previously unknown": 42354, "furthermore analysis": 21802, "developed automatically": 14624, "trained combination": 57691, "stance prediction": 52456, "dataset combined": 12847, "setting human": 50326, "employ hierarchical": 17383, "accuracy baseline": 937, "models extensive": 35004, "analyses confirm": 2591, "texts short": 56924, "various information": 61347, "huge challenge": 24070, "models tackle": 35577, "task conduct": 54966, "conduct comprehensive": 10031, "experiments case": 19369, "tasks covering": 55562, "compare strong": 9369, "bert mbert": 6681, "number labeled": 38011, "annotation experiment": 2949, "tasks just": 55701, "static embedding": 52724, "years studies": 63078, "useful source": 60388, "problem instead": 42584, "structure entity": 53103, "model employing": 33813, "introduce self": 26856, "related given": 45909, "used shelf": 60301, "improve downstream": 24845, "deal issues": 13516, "model develop": 33760, "generated results": 22312, "existing medical": 19088, "novel automatic": 37774, "structure embedding": 53102, "features describing": 20557, "size diversity": 51381, "outcomes using": 38767, "based version": 6129, "tasks help": 55663, "tuning multiple": 58932, "15 languages": 149, "word substitutions": 62318, "models robustness": 35466, "test examples": 56348, "possible leverage": 41332, "accuracy gains": 982, "leading improvements": 29292, "automatically produce": 5196, "samples model": 48482, "key advantage": 27293, "studies verify": 53311, "present depth": 41887, "present fine": 41917, "models fine": 35028, "knowledge fine": 27484, "systems code": 54451, "training additional": 57927, "achieves absolute": 1300, "techniques shown": 56135, "fully trained": 21745, "trained gpt": 57745, "gpt models": 22984, "number data": 37992, "applying pre": 3374, "16 times": 166, "data fully": 12375, "achieved improvement": 1247, "model gpt": 33944, "tagging approach": 54736, "task target": 55427, "target texts": 54850, "evaluated english": 18531, "systems produce": 54601, "produce plausible": 42997, "round trip": 48359, "sentence approach": 49517, "approach successfully": 3710, "development sets": 14702, "following questions": 21267, "resource nlp": 47257, "experimental setups": 19327, "limitation existing": 30536, "training means": 58170, "ways propose": 61844, "training second": 58241, "challenging previous": 8125, "read write": 45068, "available source": 5367, "context provides": 10699, "work recent": 62801, "modeling hierarchical": 34581, "benchmark machine": 6475, "text rich": 56750, "networks text": 36916, "benefits downstream": 6581, "leverage textual": 30293, "terms work": 56325, "multi granular": 35961, "signals text": 50836, "based high": 5771, "unsupervised technique": 59740, "generating diverse": 22372, "target sequences": 54841, "method explicitly": 32498, "using general": 60702, "diverse content": 15694, "content source": 10560, "diversity training": 15741, "scores 90": 48890, "solely text": 51645, "grounding model": 23265, "metric scores": 33124, "results single": 47850, "novel entity": 37818, "informative coherent": 26169, "approach entity": 3514, "salient sentences": 48442, "generate final": 22201, "generally trained": 22172, "data spanning": 12679, "distribution propose": 15649, "overlap training": 39089, "reviews news": 48049, "accuracy statistical": 1054, "models remain": 35433, "train transformer": 57651, "produce accurate": 42974, "leverage multi": 30278, "better alignment": 6849, "costly process": 11603, "stream tasks": 52960, "tasks key": 55702, "detection benchmarks": 14463, "benchmarks empirically": 6519, "applied corpus": 3265, "effectively utilizes": 16763, "sub graph": 53519, "knowledge aware": 27403, "aware graph": 5450, "graph network": 23150, "models relation": 35421, "propose annotate": 43292, "automatically labeled": 5186, "5x speedup": 455, "video games": 61584, "approach demonstrate": 3480, "semantics pre": 49411, "trained semantic": 57859, "improved language": 24950, "aware bert": 5443, "compared bert": 9389, "comprehension language": 9766, "promising method": 43170, "according word": 871, "building robust": 7468, "simple highly": 51179, "scale pretrained": 48614, "techniques achieve": 56053, "recently transformer": 45470, "syntax structure": 54354, "module trained": 35769, "objective model": 38096, "generate better": 22181, "systematically analyze": 54408, "based propose": 5958, "help knowledge": 23573, "pretraining models": 42212, "range downstream": 44917, "modeling objectives": 34607, "objectives work": 38114, "standard bert": 52474, "bert masked": 6679, "word relation": 62278, "vanilla bert": 61215, "knowledge triples": 27636, "art using": 4436, "generate token": 22257, "generated tokens": 22327, "generate tokens": 22258, "autoregressive sequence": 5225, "knowledge grammar": 27495, "grammatical knowledge": 23074, "generated dataset": 22283, "methods conclude": 32797, "human writers": 24257, "developments natural": 14713, "interactions self": 26621, "encoders like": 17557, "like gpt": 30473, "test standard": 56381, "trained transformers": 57903, "stronger results": 53064, "second assess": 48997, "networks capture": 36836, "context clues": 10597, "form complex": 21315, "articles different": 4467, "media outlets": 32173, "questions different": 44784, "knowledge generate": 27491, "knowledge enriched": 27465, "text relevant": 56734, "study public": 53449, "problem dialogue": 42538, "shows great": 50780, "evaluate state": 18507, "bert led": 6674, "focal point": 21141, "dutch language": 16477, "order overcome": 38644, "method applies": 32384, "reasoning skills": 45223, "work largely": 62705, "student teacher": 53215, "produce reliable": 43003, "single deep": 51295, "mechanism utilizes": 32149, "alternative propose": 2508, "sentences results": 49779, "information compared": 25783, "contextualised word": 10795, "consider linguistic": 10212, "question neural": 44741, "information automatically": 25765, "incorporate syntactic": 25365, "knowledge transformer": 27635, "novel parameter": 37891, "textual sources": 56983, "results automatic": 47513, "established baselines": 18352, "baselines significant": 6299, "achieves macro": 1343, "challenging testbed": 8159, "words rare": 62492, "languages achieves": 28592, "non overlapping": 37673, "properties like": 43265, "does mean": 15959, "accuracy second": 1043, "semantic reasoning": 49323, "essential natural": 18330, "prediction nsp": 41724, "bert learns": 6673, "learns contextual": 29955, "great significance": 23216, "remedy propose": 46368, "task includes": 55130, "encourages model": 17603, "broader range": 7364, "training recent": 58223, "research hotspot": 47048, "comprehensive review": 9797, "discuss open": 15476, "open issues": 38435, "type entity": 59055, "depending context": 14155, "context recently": 10703, "embeddings bert": 17087, "synthetic sentences": 54382, "reveal models": 48010, "data created": 12259, "dataset tweets": 13124, "author text": 5001, "models compute": 34842, "texts demonstrate": 56870, "correlation score": 11528, "metrics text": 33203, "language collected": 27992, "called neural": 7551, "module generate": 35759, "bert bidirectional": 6630, "related pre": 45926, "tasks masked": 55741, "base bert": 5538, "best pre": 6804, "approach particularly": 3637, "help make": 23576, "types pre": 59111, "neural lms": 36966, "domain classification": 16029, "multiple knowledge": 36232, "scale models": 48597, "representations structured": 46762, "curated knowledge": 11951, "enhanced bert": 17930, "extraction entity": 20060, "problems existing": 42700, "propose meta": 43450, "detection shared": 14524, "tune pre": 58861, "data augment": 12148, "data conditions": 12235, "models generalization": 35052, "cross dataset": 11811, "alleviate negative": 2415, "speech datasets": 52258, "introduce fully": 26807, "transfer results": 58418, "important characteristic": 24709, "different elements": 14911, "propose fine": 43388, "known priori": 27663, "truth label": 58838, "models contrast": 34861, "original task": 38729, "trained biomedical": 57685, "discourse structures": 15400, "detection work": 14541, "models promising": 35365, "training existing": 58096, "pretrained cross": 42150, "models underperform": 35644, "train fine": 57590, "language addition": 27953, "datasets outperform": 13357, "data compute": 12232, "model hand": 33950, "model prediction": 34223, "fusion mechanism": 21855, "mechanism improve": 32122, "evaluation scores": 18710, "analysis focuses": 2668, "focuses models": 21242, "information stored": 26106, "traditional pipeline": 57539, "information token": 26126, "prediction errors": 41702, "layers language": 29226, "numerous works": 38071, "models aim": 34703, "approaches latent": 3857, "systems explicitly": 54500, "global attention": 22821, "types based": 59078, "overwhelming majority": 39124, "increasingly large": 25474, "learning bert": 29546, "method applying": 32386, "identify set": 24444, "quality demonstrate": 44508, "increases model": 25435, "detection low": 14497, "resource text": 47283, "prototypical network": 43975, "methods zero": 33106, "control style": 10974, "directly related": 15335, "related textual": 45946, "quality evaluation": 44518, "task f1": 55075, "generated sentence": 22315, "generate structured": 22249, "process called": 42763, "learn optimal": 29406, "dialog generation": 14755, "introduced dataset": 26883, "suffer limited": 53772, "annotators label": 3015, "especially dealing": 18270, "helps generate": 23606, "al 2019a": 2242, "supervised data": 53975, "data goal": 12392, "goal learn": 22890, "learn neural": 29402, "model comparable": 33671, "corpora human": 11206, "corresponding labels": 11555, "framework combines": 21471, "specifically apply": 52180, "educational applications": 16606, "model checkpoints": 33654, "model iteratively": 34022, "score test": 48878, "set low": 50187, "cost effectively": 11580, "unlabeled dataset": 59568, "dataset domain": 12902, "number real": 38030, "analysis multi": 2700, "2017 2018": 264, "language reasoning": 28460, "single hop": 51307, "interpretability model": 26715, "sub questions": 53529, "designed human": 14320, "supporting facts": 54139, "modern deep": 35704, "processing present": 42924, "diagnostic tests": 14740, "distribution learning": 15644, "learning heuristic": 29669, "similar human": 51045, "suggests new": 53850, "strategies building": 52894, "understanding study": 59405, "instead training": 26464, "training augmented": 57937, "introduce multiple": 26828, "methods approach": 32754, "22 respectively": 319, "question use": 44756, "dbpedia wikidata": 13508, "scene graph": 48714, "attributes relations": 4910, "attention graph": 4755, "achieve score": 1190, "translation st": 58682, "task including": 55131, "knowledge important": 27520, "language sources": 28492, "direction propose": 15274, "need taken": 36591, "experiment use": 19254, "general world": 22098, "lingual multi": 30714, "quality low": 44546, "new low": 37243, "aware text": 5473, "rewriting text": 48081, "generation pre": 22519, "scale raw": 48619, "texts fine": 56881, "desired task": 14352, "structures better": 53181, "need research": 36589, "systems directly": 54481, "improve final": 24855, "recently nlp": 45446, "nlp domains": 37483, "billion parameters": 7119, "benefits model": 6584, "model parallel": 34178, "experiments transformer": 19548, "accuracy code": 946, "lingual representations": 30722, "based publicly": 5964, "static embeddings": 52725, "embeddings compare": 17096, "trained massive": 57785, "remain challenge": 46312, "german german": 22671, "previously released": 42345, "performance long": 40421, "embedding feature": 17028, "tasks public": 55829, "datasets comparing": 13184, "mrc task": 35907, "performance difficult": 40296, "pressing need": 42135, "carlo dropout": 7766, "shared semantic": 50487, "direction present": 15273, "present multilingual": 41951, "transformer self": 58509, "data multilingual": 12499, "understanding important": 59352, "bert capture": 6633, "capture general": 7673, "corpora lack": 11211, "lack domain": 27886, "called knowledge": 7548, "models equipped": 34972, "training self": 58242, "parameters pre": 39716, "especially domain": 18273, "automatically provide": 5197, "statistics dataset": 52778, "model explain": 33854, "discuss current": 15463, "amounts high": 2548, "face challenges": 20241, "challenges related": 8075, "providing insights": 44248, "surprisingly little": 54190, "based explanations": 5719, "text way": 56842, "methods bert": 32770, "detection document": 14475, "systems leveraging": 54548, "uses contextual": 60499, "datasets text": 13458, "datasets diverse": 13234, "perform worse": 40161, "solving downstream": 51702, "experiments variety": 19555, "texts social": 56927, "datasets illustrate": 13294, "multiple downstream": 36205, "introduce embedding": 26803, "relation recognition": 45993, "multiple monolingual": 36252, "create language": 11704, "switched language": 54257, "difficult lack": 15171, "generate artificial": 22180, "require external": 46854, "constituency parsers": 10349, "using copy": 60630, "data leveraging": 12462, "based experimental": 5715, "acceptability judgments": 811, "surpasses previous": 54174, "contains valuable": 10507, "model comprises": 33688, "retrieve similar": 47980, "model suggesting": 34426, "finding relevant": 20902, "features various": 20694, "model fusion": 33915, "faster decoding": 20436, "emerging topics": 17273, "findings provide": 20912, "integrated representation": 26517, "problems need": 42715, "novel context": 37788, "units model": 59534, "use supervision": 60035, "set propose": 50226, "centric approach": 7927, "study assess": 53328, "feasibility proposed": 20469, "quality requirements": 44573, "language queries": 28455, "scenarios different": 48694, "domains experiments": 16255, "information domain": 25821, "specific document": 52071, "method evaluating": 32491, "datasets single": 13432, "significance testing": 50843, "methods strong": 33054, "learning important": 29676, "techniques transfer": 56146, "languages learning": 28709, "embeddings contextual": 17103, "learning contextual": 29570, "languages facilitate": 28668, "facilitate cross": 20263, "evaluate language": 18465, "information shown": 26084, "compositional manner": 9743, "structure trees": 53145, "model tuning": 34494, "features demonstrate": 20555, "output length": 38984, "model stronger": 34413, "accuracy generated": 984, "sentence corresponding": 49538, "connected neural": 10177, "network task": 36812, "task combined": 54955, "margin achieving": 31818, "great impact": 23205, "impact downstream": 24594, "various sentence": 61389, "challenging high": 8097, "improvement benchmark": 24988, "research computational": 47004, "argument spans": 4173, "used encode": 60160, "users twitter": 60484, "explore hypothesis": 19708, "task produce": 55298, "modeling semantic": 34621, "use translation": 60061, "better f1": 6890, "authors propose": 5008, "specific problems": 52131, "capture language": 7688, "summary various": 53916, "improve future": 24857, "addition performance": 1632, "model improving": 33986, "embeddings jointly": 17154, "layer training": 29209, "make robust": 31594, "expert evaluations": 19581, "produces high": 43028, "rouge bleu": 48349, "bias data": 7021, "did provide": 14810, "li et": 30419, "code used": 8867, "entities natural": 18068, "domain named": 16116, "remains significant": 46349, "provide context": 44043, "final classification": 20818, "advances language": 1912, "employ bert": 17375, "transfer capabilities": 58354, "express emotions": 19792, "knowledge dynamically": 27453, "datasets f1": 13270, "suggests models": 53847, "diverse text": 15722, "future progress": 21882, "com miulab": 9020, "training knowledge": 58140, "containing text": 10486, "difficult expensive": 15166, "signals using": 50838, "level human": 30130, "unsupervised feature": 59697, "representation documents": 46505, "carbon footprint": 7749, "additional cost": 1663, "modifications model": 35730, "times training": 57256, "predicting human": 41676, "ignored previous": 24496, "investigated work": 27001, "entities using": 18089, "extract key": 19980, "conducted datasets": 10079, "capture multi": 7698, "generated state": 22321, "highest f1": 23852, "classification critical": 8447, "areas machine": 4155, "applications conversational": 3191, "performance depends": 40286, "specifically build": 52183, "data prohibitively": 12568, "scale pretraining": 48616, "datasets indicate": 13301, "set achieved": 50102, "keyword search": 27352, "explores possibility": 19773, "adding extra": 1595, "information mi": 25972, "10 fold": 42, "low data": 31139, "resource ones": 47258, "largely depends": 29053, "type task": 59072, "contextual language": 10772, "minimal additional": 33283, "architecture encode": 4046, "single linear": 51314, "linear layer": 30658, "outperforms random": 38935, "spurious patterns": 52390, "methods resources": 33021, "models sensitive": 35479, "given documents": 22738, "labels task": 27850, "task humans": 55119, "data fail": 12359, "datasets perform": 13368, "spurious features": 52389, "available recently": 5358, "training monolingual": 58180, "models independently": 35127, "hundreds languages": 24296, "understanding essential": 59341, "paper inspired": 39397, "trained multilingual": 57816, "using mixture": 60805, "annotations corpus": 2986, "multilingual code": 36068, "cs data": 11922, "perform annotation": 40069, "various perspectives": 61377, "different cultures": 14884, "day life": 13503, "parts world": 39912, "task extremely": 55074, "label distribution": 27704, "aim improve": 2150, "framework generates": 21529, "generates pseudo": 22352, "framework improves": 21540, "performance shot": 40558, "related research": 45932, "features statistical": 20675, "demonstrate cross": 13885, "controlled generation": 10985, "multiple diverse": 36201, "generating sentence": 22394, "attributes including": 4907, "match target": 31901, "measured automatic": 32067, "contain large": 10464, "novel encoder": 37812, "successfully generates": 53745, "achieving highest": 1411, "graph aware": 23100, "prediction test": 41745, "understanding multilingual": 59368, "diverse multi": 15706, "space evaluate": 51858, "developing better": 14649, "levels lexical": 30243, "information resulting": 26058, "variational information": 61249, "sentences recent": 49777, "model general": 33919, "input existing": 26276, "controlled text": 10986, "generation training": 22569, "knowledge pretrained": 27576, "offer potential": 38295, "generative text": 22612, "marginal likelihood": 31825, "designed specific": 14330, "including news": 25284, "previously considered": 42331, "make challenging": 31547, "written summaries": 63011, "particular explore": 39845, "ways integrating": 61843, "relation arguments": 45964, "understand impact": 59297, "documents available": 15858, "model prior": 34235, "efficient use": 16907, "based tools": 6100, "image models": 24540, "training bidirectional": 57948, "better domain": 6878, "uncertainty estimates": 59229, "success downstream": 53699, "multi stream": 36013, "corpus best": 11288, "results simple": 47849, "classification layer": 8485, "layer bert": 29180, "validation dataset": 61194, "based benchmark": 5597, "performance higher": 40373, "group people": 23273, "word token": 62322, "large pretrained": 28941, "annotation work": 2981, "used label": 60221, "task human": 55118, "raises new": 44861, "trained general": 57735, "achieve improved": 1163, "correctness generated": 11497, "limitations current": 30544, "current approach": 11959, "build word": 7433, "extract word": 20003, "models assume": 34732, "defined set": 13785, "explicitly models": 19646, "quantitatively evaluate": 44631, "models aiming": 34705, "develop models": 14599, "use similarity": 60016, "target different": 54808, "commonly available": 9218, "explored literature": 19759, "expensive annotation": 19203, "complex structures": 9665, "samples labeled": 48479, "improving data": 25176, "representations limited": 46710, "long sequences": 31026, "techniques improving": 56099, "set english": 50143, "supplementary materials": 54107, "requires models": 46945, "parts text": 39911, "process propose": 42819, "consists series": 10327, "performance particular": 40481, "compositional generalization": 9742, "making hard": 31655, "attention maps": 4767, "task hope": 55117, "develop robust": 14611, "model french": 33910, "leveraging visual": 30345, "simple short": 51211, "evaluate novel": 18480, "datasets growing": 13287, "features multi": 20625, "architectures evaluate": 4109, "task helps": 55115, "user experiences": 60412, "annotation methods": 2956, "results higher": 47655, "noise input": 37599, "form domain": 21319, "form speech": 21336, "predictions work": 41772, "prone generating": 43227, "framework state": 21604, "framework shows": 21598, "standard way": 52540, "expert based": 19574, "pairs dataset": 39177, "applied training": 3306, "provide additional": 44005, "embeddings existing": 17130, "enables better": 17436, "models spoken": 35530, "language token": 28531, "desired target": 14351, "nature human": 36479, "original language": 38718, "able increase": 702, "propose algorithms": 43289, "training performed": 58206, "reasons propose": 45237, "word time": 62321, "performance generative": 40366, "tasks corresponding": 55561, "single topic": 51349, "scientific documents": 48759, "represent text": 46484, "represent documents": 46470, "bert widely": 6734, "including self": 25294, "art generation": 4264, "accomplish tasks": 847, "techniques evaluate": 56083, "corpus nlp": 11391, "explore model": 19715, "task prediction": 55284, "used measuring": 60234, "benchmarks evaluating": 6521, "provide complete": 44035, "corpus named": 11386, "proposed corpus": 43747, "baseline classifiers": 6161, "data good": 12394, "data cases": 12200, "data build": 12194, "models susceptible": 35571, "general linguistic": 22067, "model biases": 33629, "sentiment model": 49852, "performance largely": 40414, "knowledge context": 27427, "entities existing": 18049, "entities semantic": 18081, "fusion module": 21860, "target class": 54802, "unlabeled documents": 59571, "heuristic methods": 23628, "pseudo labeling": 44277, "models linear": 35190, "build multilingual": 7415, "language explore": 28063, "strategies training": 52918, "use multilingual": 59955, "impacts performance": 24614, "network text": 36813, "applied token": 3304, "use adversarial": 59816, "generate adversarial": 22176, "performance complex": 40262, "complex nlp": 9644, "training available": 57940, "trained unlabeled": 57906, "capable zero": 7632, "extractive question": 20137, "provides results": 44223, "languages directly": 28642, "size models": 51391, "downstream fine": 16338, "conversational models": 11046, "models leveraged": 35183, "context conversation": 10600, "conversation based": 11029, "datasets improvement": 13297, "performance robustness": 40543, "task subtasks": 55420, "following issues": 21265, "affect downstream": 2012, "tackle issues": 54706, "detection model": 14501, "entities proposed": 18074, "investigate language": 26964, "learned self": 29480, "help humans": 23568, "interactive tool": 26634, "languages pre": 28753, "systems demonstrate": 54473, "representations given": 46678, "parsing problem": 39792, "text sql": 56787, "approach treats": 3728, "historical corpora": 23957, "recognition methods": 45512, "resources including": 47306, "f1 macro": 20185, "macro score": 31410, "modal information": 33458, "surprisingly high": 54189, "remain competitive": 46314, "extract contextual": 19971, "text shown": 56768, "tasks leveraging": 55721, "provided pre": 44171, "trained masked": 57783, "models semi": 35477, "model run": 34332, "deployment models": 14176, "production environments": 43047, "recent trend": 45363, "roberta based": 48216, "transformer layer": 58493, "answer accuracy": 3030, "learnable parameters": 29449, "architectures training": 4127, "success recently": 53724, "tuned large": 58878, "models run": 35467, "machine human": 31302, "75 accuracy": 502, "accuracy higher": 986, "method extracts": 32505, "model wide": 34534, "single training": 51350, "training run": 58235, "course training": 11637, "lot memory": 31116, "applying deep": 3360, "researchers using": 47169, "directly comparable": 15309, "labelled dataset": 27801, "set benchmarks": 50115, "mechanism experimental": 32116, "external tools": 19956, "efficient solution": 16901, "task baseline": 54930, "trained additional": 57671, "used techniques": 60326, "techniques domain": 56079, "dataset baselines": 12824, "dl models": 15754, "achieving human": 1412, "bert classifier": 6635, "architecture integrating": 4054, "large synthetic": 29021, "architectures bert": 4104, "learn nuances": 29405, "knowledge obtained": 27562, "test items": 56352, "contains 000": 10489, "automatic way": 5136, "capture document": 7664, "information transformer": 26133, "baselines achieved": 6227, "context proposed": 10697, "encoder encoder": 17511, "classification baselines": 8438, "embeddings introduce": 17153, "accuracy bert": 939, "speaker identification": 51997, "proposed different": 43758, "different contributions": 14877, "comparison baselines": 9492, "apply multi": 3338, "language obtained": 28362, "models target": 35583, "written author": 62995, "approaches built": 3781, "techniques context": 56071, "relational graph": 46007, "learning open": 29789, "specific queries": 52134, "adversarial domain": 1967, "small gold": 51474, "performance par": 40480, "provide example": 44064, "supervised objective": 54028, "methods transfer": 33082, "previous model": 42263, "teacher knowledge": 55991, "effectively reduces": 16755, "identify word": 24451, "explore compare": 19692, "compare ways": 9377, "control output": 10970, "require information": 46863, "local graph": 30941, "encode graph": 17463, "generative tasks": 22611, "input long": 26295, "used explain": 60179, "generate similar": 22247, "predicted label": 41666, "learning key": 29690, "semantics context": 49400, "parts document": 39904, "highly non": 23907, "document content": 15777, "characteristics human": 8237, "focused exclusively": 21222, "gold annotations": 22911, "multimodal dataset": 36146, "dataset developed": 12895, "use acoustic": 59814, "dataset research": 13065, "help advance": 23551, "trained encoders": 57719, "deal issue": 13515, "trained encoder": 57717, "specific layers": 52103, "labeled instances": 27759, "space large": 51871, "using label": 60748, "newly constructed": 37373, "important low": 24743, "use internet": 59916, "expert curated": 19576, "learning tl": 29913, "91 accuracy": 556, "shot baselines": 50601, "baselines furthermore": 6263, "transfer techniques": 58425, "performance achieving": 40181, "model lastly": 34044, "utilized training": 61110, "drawn different": 16409, "having different": 23486, "level work": 30234, "datasets creating": 13202, "architecture generate": 4050, "experiments discuss": 19417, "based scores": 6006, "task relation": 55329, "respectively recent": 47381, "including various": 25319, "internal dataset": 26685, "recent breakthroughs": 45298, "learning demonstrate": 29585, "application pre": 3175, "encoder pre": 17532, "datasets notably": 13349, "applications pre": 3232, "methods comparable": 32791, "task state": 55410, "fixed number": 21078, "easy hard": 16561, "output predictions": 38991, "transformer using": 58515, "decoder layers": 13598, "approach estimate": 3517, "regression models": 45815, "generated automatic": 22269, "experts propose": 19592, "noisy input": 37619, "relation graph": 45983, "higher f1": 23823, "pretraining methods": 42210, "insertion deletion": 26379, "based estimated": 5707, "validates effectiveness": 61189, "furthermore extend": 21823, "applications computer": 3190, "investigated paper": 26999, "systems particular": 54585, "construct benchmark": 10382, "variety applications": 61261, "automatically summarize": 5202, "specific keywords": 52092, "steps taken": 52844, "subword representations": 53686, "representations context": 46632, "enriched word": 17964, "problems use": 42736, "method handle": 32521, "efficiency problem": 16851, "transformers recently": 58530, "tuning procedure": 58949, "method conceptually": 32431, "layer transformer": 29210, "classification decision": 8454, "texts natural": 56906, "generation translation": 22570, "systems represent": 54618, "represent knowledge": 46475, "nlu research": 37568, "independent representations": 25503, "algorithms learning": 2328, "traditional evaluation": 57517, "models utility": 35662, "experiments limited": 19457, "learning simple": 29880, "attention matrices": 4768, "recently generative": 45429, "common issue": 9180, "reward signal": 48071, "does appear": 15935, "choice language": 8332, "language prior": 28389, "metrics demonstrating": 33158, "importance language": 24684, "new chinese": 37149, "instructions based": 26484, "language zero": 28587, "empirical findings": 17330, "studies limited": 53278, "study differences": 53360, "differences speech": 14829, "available text": 5377, "determinantal point": 14550, "rise deep": 48152, "lack thereof": 27921, "contributing factor": 10939, "develop transformer": 14620, "small model": 51484, "larger models": 29082, "train small": 57636, "ner based": 36676, "shown benefit": 50697, "languages mainly": 28722, "far explored": 20398, "study methods": 53411, "corpus augmented": 11281, "learn syntactic": 29432, "resulting better": 47461, "annotated sentence": 2914, "applying methods": 3367, "datasets evaluating": 13254, "examine different": 18861, "unsupervised multilingual": 59715, "giving rise": 22814, "based masked": 5833, "new embedding": 37180, "languages release": 28768, "fail effectively": 20335, "understanding multiple": 59369, "information online": 25995, "news classification": 37391, "accuracy existing": 970, "scenarios code": 48692, "increasing rapidly": 25460, "users generate": 60466, "performance changes": 40232, "input contains": 26259, "multiple sub": 36295, "apply deep": 3324, "provides competitive": 44187, "setting source": 50349, "code experiments": 8812, "results available": 47515, "extraction natural": 20087, "search optimal": 48976, "models number": 35269, "generated different": 22285, "errors present": 18247, "sequence graph": 49931, "87 f1": 541, "respectively second": 47383, "exhibits state": 19010, "tailed distribution": 54766, "lead high": 29258, "according experiments": 861, "greater accuracy": 23223, "learned data": 29454, "labelled examples": 27803, "intended use": 26551, "language variations": 28574, "aim predict": 2156, "reduction using": 45723, "adaptation framework": 1525, "time aware": 57121, "corpora fine": 11203, "lottery ticket": 31127, "ticket hypothesis": 57101, "better test": 6978, "question work": 44759, "domain addition": 16012, "particular domains": 39844, "processing approach": 42853, "finally make": 20867, "19 english": 186, "multiple tokens": 36303, "prevent model": 42231, "produce multiple": 42992, "based modeling": 5864, "models follow": 35040, "explore large": 19712, "task order": 55249, "models bart": 34751, "model reconstruct": 34285, "effective fine": 16651, "performance roberta": 40541, "training resources": 58230, "glue squad": 22866, "end use": 17723, "significant advantages": 50849, "encoding module": 17573, "augmented transformer": 4984, "transformer paper": 58507, "single end": 51300, "best end": 6759, "score 72": 48807, "deal domain": 13514, "models extremely": 35010, "model unseen": 34503, "settings zero": 50405, "data 13": 12102, "model potentially": 34215, "difficulty training": 15203, "systems context": 54459, "addition improving": 1620, "systems better": 54442, "models vulnerable": 35676, "retraining model": 47936, "model integrated": 34010, "building natural": 7458, "language perform": 28378, "user intended": 60424, "accuracy 78": 911, "behavior using": 6398, "task bert": 54934, "provides large": 44210, "data bert": 12187, "power large": 41425, "bert sentence": 6715, "mining machine": 33316, "information work": 26164, "related studies": 45940, "studies recently": 53294, "data remains": 12600, "context open": 10682, "leverage data": 30262, "tasks transfer": 55940, "tuning smaller": 58956, "human model": 24208, "model loop": 34073, "dataset leads": 12982, "shortcomings current": 50579, "pretrained masked": 42166, "models mlms": 35227, "tasks instead": 55693, "gains domain": 21937, "linguistic acceptability": 30745, "greatly improving": 23233, "quite common": 44827, "number errors": 38001, "analysis common": 2631, "result training": 47455, "resource domains": 47223, "various benchmark": 61309, "languages ii": 28689, "effectively handle": 16737, "architecture capture": 4033, "previous sota": 42279, "monolingual datasets": 35799, "datasets common": 13179, "generative pre": 22602, "transformer trained": 58512, "hugging face": 24083, "generate relevant": 22235, "systems pre": 54592, "training pipeline": 58209, "personal experiences": 40756, "set proposed": 50227, "tasks identify": 55666, "similarity candidate": 51088, "candidate reference": 7577, "gram overlap": 23059, "corpus natural": 11387, "traditional metrics": 57530, "level pre": 30178, "easily adapt": 16533, "training result": 58231, "scenarios limited": 48700, "limitations evaluation": 30547, "thorough error": 57057, "guide future": 23331, "future direction": 21870, "evaluated new": 18540, "new examples": 37197, "complex documents": 9624, "gap training": 21982, "optimization algorithm": 38543, "number pre": 38029, "adding noise": 1597, "detailed annotations": 14416, "limited single": 30615, "evidence extraction": 18810, "aid future": 2127, "corpus construction": 11306, "experiments corpus": 19392, "corpus number": 11393, "defined data": 13784, "models providing": 35382, "retrieve information": 47978, "suffers lack": 53791, "test instance": 56350, "hungry models": 24301, "models heavily": 35080, "rely labeled": 46290, "scenarios data": 48693, "knowledge generating": 27492, "limited generalization": 30588, "labeling data": 27782, "extraction sentiment": 20108, "study factors": 53378, "designed assess": 14308, "fail exploit": 20336, "works attempt": 62876, "terms corresponding": 56280, "texts task": 56932, "performs tasks": 40721, "framework employs": 21501, "representations terms": 46769, "contextualized language": 10804, "tuning downstream": 58908, "power pre": 41429, "output final": 38973, "benchmark introduce": 6473, "require extra": 46855, "ensembles models": 17985, "effective knowledge": 16663, "released source": 46183, "bias training": 7045, "language coverage": 28011, "cover small": 11647, "corresponding answers": 11546, "bert downstream": 6646, "neural question": 37086, "questions additionally": 44767, "knowledge including": 27522, "label aware": 27692, "construct knowledge": 10389, "domains overcome": 16280, "limitation propose": 30538, "using individual": 60735, "domains based": 16236, "achieve effective": 1133, "knowledge sharing": 27609, "capture fine": 7671, "semantic rules": 49341, "need fine": 36566, "method chinese": 32413, "furthermore method": 21827, "main points": 31451, "datasets effectiveness": 13241, "pipeline approaches": 40892, "specifically devise": 52196, "imbalance issue": 24562, "entropy ce": 18159, "training instance": 58134, "entropy objective": 18166, "data imbalanced": 12416, "examples training": 18939, "range data": 44910, "tasks notably": 55769, "inference instead": 25662, "optimization approach": 38544, "additional loss": 1687, "loss terms": 31105, "paper contribute": 39307, "empirically validate": 17370, "tasks showing": 55885, "performance prior": 40500, "serve strong": 50082, "results seven": 47829, "multiple times": 36302, "bert multi": 6690, "does entail": 15944, "model contextualized": 33713, "overcome shortcomings": 39074, "image pairs": 24541, "texts images": 56888, "effective attention": 16632, "tasks manually": 55740, "results verify": 47905, "consumption training": 10457, "datasets various": 13479, "model testing": 34455, "model roberta": 34329, "questions generated": 44789, "models masked": 35217, "enhanced performance": 17935, "relations addition": 46015, "mlm based": 33438, "represent entities": 46472, "edges represent": 16589, "recently graph": 45430, "experiments qa": 19506, "performance computational": 40263, "space finally": 51865, "using annotations": 60560, "text low": 56651, "easily combined": 16539, "learning improve": 29677, "stopping criterion": 52868, "models 12": 34649, "training convergence": 57960, "answer driven": 3033, "research evaluation": 47030, "learns semantic": 29974, "demonstrate need": 13949, "lingual tasks": 30731, "using zero": 61030, "presents interesting": 42087, "build better": 7388, "pairs addition": 39167, "limitations models": 30552, "new probing": 37288, "knowledge stored": 27620, "models plms": 35325, "tackling problem": 54721, "evaluate machine": 18468, "conversational settings": 11052, "experiment new": 19244, "metrics measure": 33179, "measure different": 32049, "fail generate": 20339, "annotating large": 2932, "available benchmark": 5265, "making easy": 31653, "progress pre": 43110, "works investigated": 62894, "models unclear": 35643, "decoding strategies": 13648, "nucleus sampling": 37978, "method proved": 32627, "effectiveness various": 16822, "virtual adversarial": 61623, "aims correct": 2183, "hierarchical graph": 23671, "despite significant": 14388, "investigate issue": 26963, "generating adversarial": 22364, "examples new": 18919, "examples present": 18923, "performance order": 40467, "evaluate bert": 18444, "bert baselines": 6627, "original bert": 38704, "respectively proposed": 47379, "sub modules": 53523, "encoder output": 17531, "vocabulary space": 61714, "near sota": 36513, "sota performance": 51730, "using seq2seq": 60929, "sentences inference": 49737, "step framework": 52809, "learns map": 29965, "techniques aim": 56057, "multimodal features": 36148, "data original": 12524, "techniques especially": 56082, "tend rely": 56207, "rely spurious": 46301, "distribution generalization": 15639, "efficiency compared": 16839, "methods self": 33029, "shown tremendous": 50757, "tremendous progress": 58774, "number classes": 37988, "shows better": 50765, "generalization tasks": 22131, "examples label": 18914, "relative gain": 46098, "knowledge guided": 27511, "different knowledge": 14960, "text train": 56816, "having higher": 23488, "performance reduces": 40525, "knowledge helps": 27514, "problem formulation": 42571, "allowing models": 2447, "learn relations": 29412, "paper contains": 39306, "example given": 18878, "inference corpus": 25646, "80 f1": 522, "models ubiquitous": 35641, "ubiquitous natural": 59176, "english limited": 17836, "web crawled": 61884, "results good": 47651, "using larger": 60761, "semantic sentence": 49343, "latent vectors": 29148, "data generating": 12386, "sequence task": 50009, "gain insight": 21909, "focus attention": 21145, "performance indicating": 40391, "fixed sized": 21083, "generated according": 22266, "effectiveness training": 16818, "gaps current": 21987, "bert sequence": 6716, "cls token": 8728, "specific classification": 52055, "tuning phase": 58941, "improvements text": 25108, "variety settings": 61290, "introduce effective": 26801, "methods control": 32803, "method experimental": 32495, "best prior": 6808, "identifying correct": 24456, "particular context": 39837, "context address": 10581, "methods suggest": 33060, "outperformed existing": 38838, "showed performance": 50668, "involving multiple": 27027, "relations challenging": 46019, "decomposing complex": 13657, "questions existing": 44787, "combined existing": 9079, "majority existing": 31530, "utterance information": 61136, "learning capture": 29553, "design multi": 14290, "transfer pre": 58414, "general task": 22093, "tuning large": 58922, "approach establishes": 3516, "establishes state": 18362, "94 respectively": 565, "models reducing": 35419, "noisy datasets": 37616, "datasets fine": 13276, "tuning finally": 58914, "industrial setting": 25618, "set hand": 50163, "idea proposed": 24373, "world information": 62942, "just small": 27253, "improving precision": 25192, "systems machine": 54555, "review previous": 48035, "previous literature": 42258, "bert popular": 6702, "model generalizability": 33920, "datasets trained": 13460, "approaches benefit": 3776, "lead sub": 29274, "solutions work": 51673, "domain twitter": 16219, "dataset gathered": 12940, "user question": 60442, "reduce time": 45681, "type question": 59066, "speaker identity": 51998, "training learn": 58154, "varying data": 61428, "style question": 53494, "model did": 33764, "relative contributions": 46090, "bring new": 7332, "requires commonsense": 46918, "work pretrained": 62766, "corpus provide": 11412, "embeddings set": 17213, "models handling": 35077, "data especially": 12327, "widely utilized": 62024, "utilized various": 61111, "showing promising": 50685, "visualization results": 61679, "instance based": 26424, "positive transfer": 41299, "tasks tested": 55930, "better integrate": 6903, "knowledge plms": 27571, "objectives experimental": 38112, "benchmark state": 6493, "research large": 47064, "com thu": 9025, "thu keg": 57097, "capability models": 7611, "investigate unsupervised": 26992, "generate reliable": 22236, "pseudo labeled": 44275, "domains extensive": 16256, "multiple large": 36239, "scale benchmark": 48555, "generation novel": 22510, "existing nlp": 19120, "retrieval module": 47957, "gaining increasing": 21926, "effectively exploit": 16734, "architecture pre": 4077, "knowledge research": 27595, "recent embedding": 45307, "certain scenarios": 7945, "scenarios finally": 48697, "relatively smaller": 46133, "explain model": 19594, "identify non": 24433, "prior efforts": 42401, "quantify importance": 44610, "human metrics": 24207, "project page": 43135, "enhancing performance": 17949, "bag sentences": 5501, "ignoring potential": 24501, "novel dynamic": 37809, "dynamically generates": 16498, "shift problem": 50543, "study demonstrate": 53355, "generation classification": 22434, "unable capture": 59199, "documents document": 15871, "source documents": 51764, "cases study": 7814, "keywords extracted": 27354, "accuracy comparison": 950, "use sub": 60033, "domains addition": 16233, "ml based": 33429, "problem given": 42574, "connected graph": 10174, "problem addition": 42497, "according experimental": 859, "process creating": 42766, "fundamental aspect": 21775, "corpus multiple": 11385, "highly reliable": 23911, "quantitative analyses": 44615, "representation transformer": 46597, "efficient representation": 16894, "documents important": 15885, "simple weighted": 51226, "weighted averaging": 61926, "shortcomings propose": 50582, "space complexity": 51852, "critical component": 11780, "output generated": 38974, "models gpt": 35067, "knowledge embedded": 27455, "large models": 28911, "work methods": 62724, "methods presented": 32989, "model goal": 33942, "labeling effort": 27784, "classification head": 8478, "class class": 8394, "expert evaluation": 19580, "aims produce": 2209, "better match": 6916, "dataset semeval": 13078, "importance using": 24693, "model embedding": 33806, "applied real": 3289, "layout information": 29243, "based pipeline": 5937, "documents proposed": 15906, "methods performed": 32982, "sample datasets": 48449, "key findings": 27314, "slight improvement": 51433, "systems support": 54645, "shot adaptation": 50597, "instead manually": 26456, "scores computed": 48897, "improvements cross": 25064, "understanding intent": 59354, "compared current": 9399, "approaches utilize": 3952, "classification experimental": 8467, "terms score": 56313, "tasks obtained": 55774, "effective achieves": 16626, "tasks self": 55868, "single token": 51348, "use local": 59936, "propose parallel": 43580, "terms different": 56283, "attention learning": 4766, "research practice": 47094, "task estimating": 55054, "annotated large": 2902, "language findings": 28071, "consistent different": 10273, "economic news": 16578, "short period": 50561, "large news": 28920, "learning discrete": 29593, "choice training": 8338, "performing submission": 40688, "finally model": 20869, "layer word": 29215, "used proxy": 60279, "use task": 60040, "steps using": 52845, "models established": 34975, "single input": 51308, "answering document": 3071, "query context": 44664, "efficient data": 16866, "trained features": 57730, "visualization attention": 61678, "significant importance": 50872, "domain used": 16224, "perform entity": 40096, "scale public": 48618, "systems increasingly": 54532, "recognition framework": 45507, "level sequential": 30212, "output given": 38975, "network conduct": 36725, "framework results": 21592, "studies reveal": 53297, "helps better": 23604, "individual neurons": 25575, "intuition propose": 26907, "rich feature": 48099, "communication propose": 9252, "time evaluate": 57150, "extremely challenging": 20154, "real scenario": 45109, "term dependency": 56234, "including task": 25307, "serve effective": 50077, "recent results": 45345, "existing publicly": 19132, "embeddings larger": 17161, "introduced novel": 26888, "neuro symbolic": 37116, "sentences certain": 49687, "models exploited": 34999, "context topic": 10734, "analysis combine": 2629, "text ranking": 56726, "ranking approach": 44967, "selects best": 49168, "attempt learn": 4689, "learning improves": 29679, "efficiency inference": 16844, "lms text": 30923, "scale general": 48574, "tuning strategy": 58963, "studies different": 53259, "way classification": 61796, "task designed": 55009, "creation datasets": 11748, "classification corpus": 8446, "driven analysis": 16418, "85 f1": 535, "attention potential": 4810, "uses sequence": 60534, "entities experiments": 18050, "score code": 48840, "similar examples": 51041, "examples target": 18934, "models noise": 35265, "transformer experiments": 58486, "model good": 33943, "encoder transformer": 17546, "approach leveraging": 3590, "events related": 18798, "introduce meta": 26820, "rich cross": 48096, "modal representation": 33464, "incorporates external": 25375, "internal external": 26686, "seen rapid": 49061, "explicitly defined": 19633, "loss term": 31104, "chinese datasets": 8305, "models enhanced": 34969, "used small": 60305, "guided learning": 23346, "vietnamese text": 61593, "users input": 60467, "domains text": 16296, "physical world": 40861, "accuracy large": 997, "biomedical publications": 7176, "having similar": 23491, "sharing similar": 50519, "accuracy latency": 998, "manner proposed": 31725, "method introduces": 32551, "special token": 52021, "output token": 39005, "improve reasoning": 24914, "reasoning ability": 45183, "design evaluation": 14281, "class label": 8405, "pretrained transformer": 42186, "better baselines": 6853, "bert style": 6723, "style models": 53491, "automatically discovering": 5164, "quality diverse": 44510, "answers different": 3107, "com jzbjyb": 9017, "tree representation": 58755, "evaluation toolkit": 18742, "bert achieved": 6603, "knowledge pre": 27572, "finally fine": 20860, "results fully": 47641, "extra supervision": 19965, "trained weak": 57911, "examples improve": 18910, "approach aims": 3414, "lexical normalization": 30373, "text external": 56573, "accuracy classification": 943, "positional encodings": 41273, "german text": 22677, "corpus time": 11445, "training requires": 58229, "truth word": 58841, "meaning source": 32017, "multimodal language": 36151, "framework pre": 21583, "verify efficacy": 61539, "brings improvement": 7341, "target response": 54837, "low entropy": 31150, "improve diversity": 24843, "improves semantic": 25159, "quite effective": 44830, "10 datasets": 39, "data formats": 12372, "propose explore": 43380, "tackling challenge": 54720, "systems address": 54426, "different subtasks": 15088, "help build": 23555, "emotion labels": 17291, "community study": 9276, "models self": 35474, "sentence evaluate": 49553, "explored topic": 19766, "focused generating": 21223, "various granularities": 61346, "supervision available": 54077, "different emotions": 14914, "manually creating": 31772, "methods systematically": 33064, "best use": 6835, "challenge automated": 7968, "semantics important": 49405, "using web": 61023, "kinds features": 27370, "features applied": 20522, "traditional way": 57555, "works relied": 62905, "models induced": 35129, "potential large": 41397, "experiments pre": 19489, "based non": 5917, "mentions propose": 32309, "propose span": 43642, "span level": 51925, "programming algorithm": 43083, "domain medical": 16109, "annotations code": 2985, "data downstream": 12296, "different monolingual": 14999, "prior language": 42404, "process proposed": 42820, "baselines fine": 6262, "build dataset": 7393, "work obtained": 62739, "identifying semantic": 24465, "sentential contexts": 49812, "aware approach": 5440, "approach relation": 3672, "sentences better": 49686, "information entire": 25836, "experiments semeval": 19518, "generating complex": 22368, "study leverage": 53406, "scenarios using": 48711, "augmentation approaches": 4949, "baseline terms": 6216, "automatic creation": 5075, "sets evaluate": 50291, "representations high": 46682, "collect high": 8942, "embeddings test": 17227, "analyzing text": 2846, "fully exploited": 21726, "model enhance": 33824, "measure model": 32057, "model reaches": 34273, "annotate new": 2874, "differ terms": 14817, "annotations provide": 2999, "rivals state": 48170, "art recent": 4362, "furthermore best": 21805, "paper end": 39347, "benchmark set": 6492, "cnn layers": 8769, "lstm bilstm": 31253, "feature map": 20496, "datasets shown": 13425, "cause models": 7886, "datasets fail": 13271, "extra training": 19966, "perform similarly": 40143, "analysis comparison": 2633, "performance provide": 40509, "people speak": 40038, "greatly reduces": 23238, "future nlp": 21879, "present various": 42052, "rely text": 46304, "specifically extract": 52201, "systems evaluated": 54490, "ai community": 2116, "spanish catalan": 51938, "words cbow": 62375, "methods compared": 32793, "multiple steps": 36291, "executable programs": 18980, "models non": 35267, "range reasoning": 44930, "help extract": 23562, "supervision provides": 54091, "challenges make": 8059, "particular large": 39851, "translation main": 58627, "data specifically": 12687, "tuning multilingual": 58931, "baselines achieving": 6230, "68 f1": 480, "tags dependency": 54757, "critical sequence": 11792, "sequence training": 50013, "general principles": 22082, "based processing": 5955, "focus exclusively": 21162, "fast learning": 20427, "multiple encoders": 36209, "training iterations": 58138, "better encode": 6881, "usually ignore": 61052, "attributes paper": 4909, "examine role": 18869, "graph context": 23119, "fed neural": 20706, "create release": 11715, "articles neural": 4471, "past works": 39938, "approach advantage": 3413, "growing need": 23298, "task typically": 55454, "incorporates context": 25373, "high inter": 23742, "lastly present": 29109, "approaches analysis": 3762, "mandarin english": 31698, "advancing state": 1935, "models focused": 35038, "understanding capabilities": 59327, "capabilities models": 7603, "lower resourced": 31224, "trained scratch": 57856, "scratch new": 48945, "model largely": 34043, "study open": 53425, "dependencies sentences": 14111, "sentences instead": 49739, "datasets demonstrating": 13221, "10 minutes": 47, "supervised state": 54051, "grained annotations": 23023, "dataset generate": 12942, "using popular": 60863, "representations hand": 46680, "representation crucial": 46501, "conversation humans": 11034, "understand given": 59295, "novel powerful": 37892, "process humans": 42789, "model mitigate": 34101, "evaluation strategies": 18727, "written human": 63000, "coherent fluent": 8915, "text extracting": 56576, "model discriminate": 33773, "aware representation": 5467, "additional inputs": 1677, "experiments framework": 19439, "analysis pre": 2720, "effectiveness self": 16812, "incorporate knowledge": 25357, "knowledge real": 27584, "applied downstream": 3271, "f1 improvements": 20184, "experiments benchmarks": 19365, "structures learn": 53187, "especially complex": 18267, "reveal interesting": 48009, "information unstructured": 26139, "source unsupervised": 51818, "disease detection": 15497, "detection f1": 14485, "task studied": 55414, "case multiple": 7792, "scarcity low": 48672, "state ofthe": 52705, "ofthe art": 38323, "transfer settings": 58420, "better knowledge": 6905, "semantics text": 49417, "hierarchical semantic": 23691, "model guide": 33948, "chain monte": 7959, "models learns": 35180, "correct semantically": 11474, "combination deep": 9039, "understood paper": 59423, "understanding underlying": 59411, "google assistant": 22952, "understanding user": 59413, "effectively identify": 16738, "current techniques": 12018, "generalization work": 22134, "framework detect": 21492, "box models": 7291, "analysis predictions": 2722, "importance research": 24687, "research tasks": 47128, "computational biology": 9836, "based discovery": 5682, "leverage state": 30290, "thoroughly evaluated": 57069, "main source": 31459, "neighbor knn": 36657, "lack publicly": 27908, "sentence alignments": 49516, "use resulting": 59998, "tuning based": 58901, "40 000": 395, "performance demonstrate": 40284, "paper suggests": 39584, "word positions": 62266, "general solution": 22091, "computer based": 9888, "medical entity": 32205, "crucial process": 11907, "model map": 34091, "codes models": 8878, "63 accuracy": 467, "research approach": 46982, "create highly": 11700, "training paradigms": 58204, "suitable datasets": 53856, "datasets testing": 13457, "testing models": 56408, "multi paragraph": 35995, "tasks believe": 55518, "unique structure": 59517, "annotations task": 3003, "attention experiments": 4747, "success pre": 53717, "shot evaluation": 50612, "correct predictions": 11473, "evaluation automatically": 18579, "encoder self": 17541, "using pairwise": 60852, "powerful language": 41435, "models implemented": 35103, "bidirectional transformers": 7085, "embedding representation": 17057, "better robustness": 6962, "knowledge addition": 27390, "contains important": 10495, "convolutional attention": 11102, "pairs sentence": 39215, "features attention": 20526, "mechanism employed": 32110, "important roles": 24769, "lot traction": 31121, "open new": 38438, "lack comprehensive": 27878, "exploration paper": 19679, "issues challenges": 27086, "researchers engineers": 47154, "findings propose": 20911, "dataset sizes": 13090, "human capabilities": 24120, "task domains": 55034, "steps building": 52841, "data building": 12195, "data process": 12564, "experiments given": 19440, "method source": 32664, "resources propose": 47328, "method reaches": 32632, "learning various": 29935, "various model": 61365, "component systems": 9714, "systems deep": 54472, "single dataset": 51293, "native non": 36404, "research conducted": 47005, "correction models": 11485, "consider real": 10219, "focuses detecting": 21236, "tweets dataset": 59013, "used experiment": 60177, "data generalize": 12381, "representation finally": 46519, "set identify": 50166, "guidelines human": 23353, "size makes": 51389, "small models": 51485, "bert different": 6642, "different downstream": 14909, "tasks motivated": 55751, "task adaptive": 54882, "tasks incorporate": 55684, "distillation loss": 15572, "demonstrate task": 13990, "terms parameter": 56305, "mainstream methods": 31480, "representations encoding": 46650, "information method": 25970, "interactive information": 26628, "form natural": 21329, "sub field": 53517, "progress domain": 43096, "train work": 57661, "need train": 36595, "models statistical": 35535, "employ novel": 17387, "classification low": 8488, "language performance": 28379, "spread social": 52379, "bias mitigation": 7033, "methods leveraging": 32927, "experiments existing": 19433, "semantic correspondence": 49264, "english multilingual": 17847, "bert used": 6731, "measured performance": 32069, "smaller datasets": 51517, "support downstream": 54118, "content text": 10564, "study information": 53392, "based maximum": 5837, "user interaction": 60427, "baseline accuracy": 6150, "online platforms": 38379, "new style": 37328, "approach obtain": 3615, "rl training": 48177, "encouraging model": 17605, "online users": 38393, "leverage transformer": 30294, "cross modality": 11866, "dataset cross": 12874, "pre computed": 41498, "location information": 30967, "task type": 55452, "numerical vectors": 38062, "approaches like": 3862, "produce single": 43010, "meanings paper": 32034, "embeddings present": 17192, "tasks input": 55690, "especially text": 18305, "original version": 38738, "analyzing model": 2844, "datasets approaches": 13158, "learning code": 29560, "mentions paper": 32308, "detection especially": 14479, "variety machine": 61279, "denoising auto": 14063, "using bart": 60582, "complete model": 9599, "enables new": 17446, "effective pre": 16685, "tasks low": 55732, "settings large": 50380, "leveraging large": 30329, "corpora respectively": 11238, "propose simplified": 43638, "influence model": 25727, "performance 67": 40171, "samples train": 48491, "scale transformer": 48633, "tuning gpt": 58917, "gpt using": 22991, "technique data": 56032, "sets using": 50311, "automated human": 5046, "able reduce": 722, "semi autoregressive": 49450, "conditional masked": 9998, "twitter social": 59041, "comprehensive evaluations": 9789, "achieved tremendous": 1279, "tremendous success": 58775, "performance bert": 40216, "layers bert": 29218, "tuning multi": 58930, "capture relationships": 7703, "inspired human": 26406, "reach new": 45050, "challenge requires": 8013, "according given": 862, "significance tests": 50844, "analysis conducted": 2636, "south asian": 51845, "related target": 45941, "set user": 50274, "set related": 50236, "extraction main": 20079, "model conversational": 33718, "post level": 41349, "variants model": 61236, "learns sentence": 29975, "level self": 30203, "knowledge evaluate": 27469, "trained parameters": 57838, "multimodal approach": 36142, "does account": 15933, "aware sentence": 5472, "verifies effectiveness": 61532, "yields comparable": 63120, "model averaging": 33595, "queries contain": 44650, "suffer issue": 53768, "answer work": 3059, "finetuning pretrained": 21051, "approaches various": 3953, "bert bilstm": 6632, "bilstm based": 7127, "domain existing": 16062, "models involve": 35148, "domain improve": 16083, "improve domain": 24844, "sentiment target": 49861, "mutual learning": 36351, "multiple public": 36267, "learning recent": 29834, "fundamental step": 21791, "step developing": 52804, "dataset deep": 12882, "errors work": 18254, "modeling complex": 34566, "relationship documents": 46068, "pretrain large": 42144, "model serve": 34364, "documents including": 15887, "including use": 25316, "dense representations": 14079, "extensive automatic": 19857, "iterative approach": 27123, "attempt explain": 4685, "trained lms": 57778, "approach previous": 3651, "approaches method": 3872, "issue present": 27073, "framework incorporate": 21543, "investigate types": 26991, "support task": 54128, "study best": 53336, "data distributed": 12289, "efficiently generate": 16915, "text perturbations": 56699, "scoring method": 48936, "setting results": 50348, "popular real": 41180, "used guide": 60202, "documents utilize": 15928, "number general": 38006, "efficiency experiments": 16842, "artificial agents": 4488, "methods target": 33067, "agent human": 2055, "topics keywords": 57452, "highlights effectiveness": 23875, "provided new": 44169, "highlight challenges": 23860, "describes task": 14237, "results submitted": 47862, "represent complex": 46467, "neural agent": 36928, "usually lack": 61055, "formal analysis": 21344, "interdisciplinary research": 26645, "text capture": 56461, "searching large": 48992, "time language": 57171, "computational memory": 9847, "metrics model": 33181, "input audio": 26256, "method creates": 32445, "processing based": 42856, "make new": 31584, "enhance accuracy": 17910, "knowledge injected": 27525, "model fixed": 33902, "backbone model": 5485, "wikipedia wikidata": 62056, "classification entity": 8460, "approach consider": 3463, "models discriminative": 34926, "trained joint": 57752, "outperform unsupervised": 38830, "just single": 27252, "based technique": 6085, "overcome limitations": 39068, "provide real": 44118, "transformer framework": 58488, "model publicly": 34261, "information generally": 25890, "embedded representation": 17007, "led remarkable": 29992, "indic languages": 25522, "novel setup": 37921, "auxiliary data": 5230, "scores state": 48922, "particular obtain": 39857, "data scientists": 12630, "answer qa": 3046, "hierarchical bilstm": 23661, "dataset compare": 12849, "representation transformers": 46598, "text popular": 56700, "matrix based": 31940, "information utilizing": 26150, "data differ": 12280, "data synthesis": 12715, "tuned model": 58879, "achieved higher": 1242, "trained just": 57754, "synthesize new": 54361, "combining data": 9109, "generate long": 22217, "models class": 34814, "embedding alignment": 17010, "transfer using": 58430, "models solely": 35518, "reference results": 45744, "tweets collected": 59010, "information candidate": 25773, "approach efficiently": 3504, "video recordings": 61585, "video data": 61583, "nlp based": 37467, "short form": 50555, "roc auc": 48298, "aims use": 2221, "images using": 24556, "key factor": 27309, "demonstrates advantages": 14027, "problem domains": 42546, "data embeddings": 12315, "provide meaningful": 44101, "important technique": 24781, "voice assistant": 61722, "generated language": 22294, "joint pre": 27183, "pairs fine": 39192, "better using": 6990, "systems current": 54466, "context result": 10707, "masked word": 31871, "benchmarks glue": 6526, "efficiency different": 16840, "different pretrained": 15031, "models differ": 34913, "better efficiency": 6880, "lms trained": 30924, "datasets evaluated": 13253, "features respectively": 20658, "number experiments": 38003, "corpora performance": 11232, "performance embeddings": 40315, "substantially different": 53633, "different results": 15053, "quantify performance": 44611, "initialization training": 26224, "observe fine": 38132, "training validation": 58312, "training dynamics": 58076, "partially observed": 39810, "achieving 90": 1390, "text consider": 56506, "mapping task": 31806, "extensive ablations": 19854, "code large": 8821, "efficient attention": 16863, "tasks bleu": 55529, "model scaling": 34337, "scaling approach": 48648, "propose bert": 43312, "pretraining using": 42221, "text transfer": 56821, "pretraining transformer": 42220, "based image": 5778, "loss prediction": 31102, "particular design": 39842, "number available": 37984, "heavy reliance": 23539, "effectiveness downstream": 16777, "transfer nlp": 58411, "computational budget": 9837, "shot tasks": 50650, "process essential": 42775, "create corpus": 11693, "attention academia": 4707, "models recognize": 35414, "effect model": 16615, "model predicted": 34221, "relationships sentence": 46082, "modeling proposed": 34616, "future researches": 21895, "specifically design": 52191, "able deal": 686, "including single": 25299, "outperforms transformer": 38956, "provide analyses": 44006, "datasets does": 13237, "encoder text": 17543, "matrix representation": 31943, "words additionally": 62362, "behavior different": 6391, "neural modules": 36985, "model displays": 33777, "task extract": 55071, "crucial nlp": 11905, "latent knowledge": 29129, "train knowledge": 57596, "using masked": 60789, "documents demonstrate": 15869, "retrieval augmented": 47940, "augmented language": 4980, "aim bridge": 2139, "resource low": 47250, "allowing better": 2444, "gained momentum": 21920, "driven applications": 16419, "alignment different": 2367, "challenging paper": 8123, "methods embedding": 32834, "text extracted": 56575, "introduce language": 26816, "work given": 62675, "systems real": 54610, "time work": 57238, "tasks outperform": 55778, "instance specific": 26430, "decisions using": 13576, "contextual bandit": 10759, "relations target": 46058, "benchmarks method": 6530, "approaches aim": 3759, "systems capable": 54446, "algorithm unsupervised": 2308, "makes prediction": 31631, "divide conquer": 15744, "hop questions": 24003, "like humans": 30476, "adopt neural": 1865, "questions corresponding": 44780, "content quality": 10550, "systems mainly": 54557, "simple fine": 51170, "models little": 35193, "models brought": 34796, "propose replace": 43604, "encoder layer": 17521, "scientific field": 48761, "self distillation": 49195, "distillation experiments": 15569, "investigate question": 26979, "sparse matrix": 51969, "hard negative": 23446, "examples using": 18941, "required answer": 46899, "second existing": 49004, "required generate": 46901, "modalities including": 33469, "text visual": 56841, "visual audio": 61649, "text non": 56677, "non text": 37686, "generation capability": 22430, "recently multi": 45440, "task great": 55110, "introduce bert": 26787, "generation able": 22408, "generation data": 22441, "obtains substantial": 38260, "empirically compare": 17357, "task diverse": 55027, "covering languages": 11657, "size large": 51388, "model required": 34312, "kgs based": 27363, "perform joint": 40116, "model bi": 33627, "large transformer": 29033, "models inspired": 35134, "randomly generated": 44898, "fully trainable": 21744, "empirical methods": 17333, "methods transformer": 33084, "review current": 48027, "state knowledge": 52700, "main advantage": 31422, "worse performance": 62973, "tasks studies": 55913, "data considering": 12240, "small perturbations": 51493, "substantially improved": 53638, "propose pre": 43587, "train unified": 57655, "masked tokens": 31870, "tokens context": 57324, "provide global": 44079, "model bidirectional": 33630, "tasks widely": 55964, "affect final": 2014, "arabic natural": 4002, "transformers based": 58521, "specific bert": 52049, "corpus models": 11383, "research applications": 46981, "model brings": 33634, "learning efficiency": 29611, "models far": 35018, "adaptive multi": 1577, "large public": 28946, "public large": 44322, "model xlm": 34548, "al 2020": 2244, "tasks applications": 55501, "extraction challenging": 20051, "task important": 55124, "model self": 34348, "direct application": 15252, "tuning small": 58955, "manually tagged": 31788, "various deep": 61322, "based automated": 5587, "learning hybrid": 29675, "presented dataset": 42059, "corpus conduct": 11301, "corpus achieve": 11266, "used chinese": 60113, "meta embedding": 32331, "modal pre": 33462, "method shot": 32649, "based gpt": 5756, "dataset robust": 13071, "investigate applicability": 26940, "conventional gram": 11003, "translation bt": 58587, "study languages": 53403, "online https": 38369, "encoder language": 17520, "data trained": 12740, "tasks topic": 55936, "setting training": 50353, "supervised zero": 54072, "decrease number": 13668, "mbert model": 31979, "results zero": 47914, "domains tasks": 16295, "tasks aim": 55495, "greatly advanced": 23226, "success methods": 53709, "available human": 5311, "effectiveness cross": 16773, "academic writing": 795, "achieves score": 1362, "describes work": 14238, "work developing": 62635, "texts domain": 56875, "social groups": 51563, "studied large": 53227, "research create": 47006, "high context": 23718, "annotated expert": 2896, "industrial research": 25617, "depending type": 14159, "training documents": 58069, "years brought": 63052, "trained millions": 57789, "leads higher": 29314, "structured text": 53178, "challenges automatic": 8034, "construction methods": 10428, "translation sentiment": 58675, "according results": 867, "topic relevance": 57425, "large sized": 29012, "sets available": 50282, "best choice": 6756, "new technique": 37338, "accurate semantic": 1088, "relations input": 46037, "graph using": 23178, "present baseline": 41853, "systems hope": 54521, "world environment": 62939, "explore relative": 19732, "common languages": 9183, "incorporate multi": 25359, "performance simpler": 40564, "end sentence": 17706, "search paper": 48978, "representation spaces": 46583, "differ substantially": 14816, "approach combining": 3453, "datasets extensive": 13267, "lingual representation": 30720, "fasttext bert": 20446, "unsupervised weakly": 59746, "community effort": 9264, "data unfortunately": 12752, "unclear models": 59237, "words make": 62452, "curated data": 11948, "different use": 15116, "present baselines": 41855, "novel contribution": 37792, "conversational ai": 11040, "share code": 50455, "models design": 34902, "regularization loss": 45838, "labels addition": 27808, "addition original": 1630, "information furthermore": 25886, "feature word": 20512, "level interpretability": 30137, "better recall": 6950, "rules neural": 48394, "process document": 42772, "higher human": 23827, "given contexts": 22730, "takes raw": 54783, "issue especially": 27061, "propose original": 43578, "addition corpus": 1605, "fairly compare": 20361, "methods dataset": 32811, "help address": 23550, "tackling task": 54722, "problem explore": 42560, "applications traditional": 3254, "networks improve": 36867, "strategy used": 52954, "improve representation": 24917, "representation texts": 46593, "significantly advanced": 50933, "limited low": 30598, "parsing tree": 39802, "major types": 31524, "view features": 61597, "method enhance": 32484, "enhance state": 17923, "networks potential": 36892, "consists subtasks": 10333, "data costly": 12255, "expressed different": 19797, "development dataset": 14673, "sheer scale": 50534, "cohen kappa": 8902, "existing widely": 19168, "analysis including": 2680, "code documentation": 8811, "global word": 22848, "ground breaking": 23250, "breaking performance": 7313, "survey review": 54219, "common ground": 9178, "word count": 62135, "assessment data": 4591, "dataset future": 12939, "despite high": 14366, "challenging domain": 8091, "temperature scaling": 56172, "effective reducing": 16688, "lingual alignment": 30691, "alignment human": 2369, "fluency relevance": 21126, "semantic changes": 49245, "techniques finally": 56088, "general natural": 22072, "languages indian": 28696, "utilized improve": 61109, "sufficient labeled": 53804, "different platforms": 15026, "boost training": 7257, "type annotation": 59048, "datasets validate": 13476, "focus arabic": 21144, "explanation generation": 19604, "language train": 28533, "propose generating": 43399, "performance advantage": 40188, "events news": 18795, "work automatically": 62582, "novel datasets": 37800, "approach datasets": 3478, "speech natural": 52273, "texts web": 56944, "modern society": 35719, "nigerian pidgin": 37441, "datasets open": 13354, "english despite": 17796, "evaluation multilingual": 18658, "multilingual encoders": 36083, "lingual generalization": 30703, "capabilities multilingual": 7605, "release benchmark": 46142, "performance gaps": 40360, "leverage domain": 30266, "general applied": 22044, "automatically annotate": 5140, "hybrid attention": 24312, "sentences mainly": 49751, "complex domain": 9625, "core natural": 11151, "approach tasks": 3718, "small portion": 51494, "multiple factors": 36215, "sufficient diversity": 53802, "important open": 24751, "training paradigm": 58203, "significant margins": 50899, "platforms provide": 40955, "model proved": 34254, "labels existing": 27820, "tend ignore": 56200, "assign different": 4598, "token classification": 57282, "methods construct": 32801, "domain finally": 16070, "finally construct": 20848, "models modern": 35231, "studies natural": 53285, "manual qualitative": 31750, "research use": 47138, "process multiple": 42807, "training nlp": 58193, "burden manual": 7497, "tweets used": 59025, "despite widespread": 14405, "predict labels": 41644, "synthetic code": 54368, "understand sentiment": 59313, "making important": 31656, "aggregating multiple": 2077, "inference information": 25661, "abundant information": 781, "information finally": 25875, "representations substantially": 46763, "tasks benchmark": 55519, "bert performed": 6700, "commonly occurring": 9220, "significant degradation": 50861, "degradation performance": 13802, "bert performance": 6699, "shortcomings existing": 50580, "performance findings": 40348, "presence noise": 41838, "bert solve": 6720, "available cc": 5269, "information flows": 25881, "single stream": 51340, "interaction module": 26607, "single modal": 51315, "corpus fine": 11343, "gained traction": 21924, "accuracy 69": 903, "alignment results": 2382, "dialogue based": 14767, "additional layer": 1683, "solutions paper": 51668, "assist researchers": 4611, "use methods": 59947, "ai including": 2117, "attribute aware": 4900, "need learn": 36578, "lead improvement": 29262, "shown performance": 50733, "pairs low": 39201, "languages mt": 28733, "benchmark evaluate": 6464, "work benchmark": 62587, "evaluate current": 18448, "multilingual modeling": 36097, "demonstrate generalization": 13916, "resourced settings": 47290, "languages included": 28693, "fasttext word": 20448, "roman script": 48329, "task standard": 55409, "processing understanding": 42962, "online text": 38390, "based gram": 5758, "tools natural": 57382, "result better": 47435, "proposes deep": 43931, "translation benchmark": 58584, "shelf models": 50539, "data ii": 12414, "words representing": 62499, "paper addition": 39249, "datasets goal": 13286, "datasets representative": 13401, "study influence": 53391, "context results": 10708, "bidirectional models": 7080, "plays fundamental": 40997, "fundamental role": 21790, "results offer": 47750, "human automatic": 24110, "scale cross": 48561, "task provides": 55309, "extend recent": 19829, "tasks evaluated": 55620, "improved models": 24953, "information makes": 25963, "approach non": 3613, "factors model": 20313, "shifts word": 50547, "monotonic alignment": 35821, "tokens model": 57329, "model unlabeled": 34501, "text main": 56653, "original document": 38710, "objectives improve": 38113, "training achieves": 57925, "new categories": 37145, "propose introduce": 43423, "relation network": 45989, "metrics experiments": 33164, "synonym replacement": 54285, "addition generating": 1618, "shown success": 50755, "model maintaining": 34082, "content method": 10537, "research studying": 47125, "generation non": 22509, "typically small": 59157, "evaluate data": 18449, "key content": 27303, "variety baselines": 61263, "quality synthetic": 44585, "learning dynamics": 29607, "entire dataset": 18022, "brute force": 7378, "discrete tokens": 15430, "decoding steps": 13647, "allows direct": 2458, "method hierarchical": 32524, "according type": 870, "class distributions": 8401, "broad applications": 7349, "early prediction": 16513, "making better": 31646, "supervised signals": 54049, "set candidates": 50118, "benchmarks based": 6512, "attention past": 4806, "challenge problem": 8008, "paper measure": 39424, "metrics open": 33184, "extremely imbalanced": 20159, "score model": 48859, "improvement 13": 24979, "systems despite": 54476, "binary classifiers": 7149, "proposed self": 43890, "000 unique": 14, "results manual": 47713, "issues associated": 27085, "generated pseudo": 22309, "corpus annotation": 11275, "associated specific": 4623, "specific entities": 52079, "additional pre": 1692, "online services": 38382, "knowledge enhance": 27462, "baseline bert": 6158, "specific question": 52135, "work describes": 62629, "structure form": 53106, "transformer architectures": 58450, "task network": 55236, "identify linguistic": 24428, "space high": 51869, "recently bert": 45411, "studies showing": 53300, "model smaller": 34394, "distillation framework": 15570, "tasks consistent": 55556, "distilled model": 15583, "benchmark approach": 6425, "outperforms task": 38953, "spread multiple": 52378, "datasets knowledge": 13307, "introduce textit": 26872, "words original": 62473, "multilingual unsupervised": 36133, "require changes": 46844, "component human": 9704, "utilize data": 61089, "challenges facing": 8048, "speed compared": 52321, "efficient terms": 16902, "data stream": 12695, "setting evaluate": 50321, "models ptlms": 35383, "covid 19": 11668, "19 pandemic": 187, "comparing existing": 9480, "usually requires": 61066, "models crucial": 34877, "f1 respectively": 20193, "train transformers": 57653, "applied fine": 3274, "respectively propose": 47378, "ability use": 650, "models finetuned": 35033, "model finetuned": 33896, "learning rich": 29850, "success pretrained": 53720, "segment text": 49076, "analyze differences": 2810, "method matches": 32572, "pretrained lms": 42165, "role recent": 48320, "field present": 20766, "plms achieved": 41015, "extraction experiments": 20067, "shown method": 50726, "approaches explore": 3821, "improvement proposed": 25020, "trained nlp": 57831, "dataset challenge": 12836, "finetuned bert": 21043, "approach combine": 3450, "order dependency": 38607, "tasks transformer": 55942, "explicitly consider": 19632, "performance empirical": 40316, "input target": 26344, "dataset substantially": 13105, "model benefit": 33616, "reduce gap": 45663, "unified multilingual": 59475, "models approaches": 34722, "world task": 62962, "millions parameters": 33263, "performances paper": 40645, "sub networks": 53525, "roberta base": 48215, "recently research": 45463, "content moderation": 10539, "limitations present": 30554, "strategies neural": 52911, "offers advantages": 38299, "followed text": 21259, "propose metrics": 43460, "finetuned models": 21045, "dataset reddit": 13057, "generate short": 22246, "results 20": 47481, "goal experiments": 22884, "limited flexibility": 30587, "comparable size": 9310, "math word": 31930, "data development": 12279, "performance case": 40228, "learning achieve": 29501, "novel pre": 37893, "yields superior": 63137, "30 000": 355, "language construct": 28005, "set word": 50277, "evaluate recent": 18498, "models domain": 34936, "domain high": 16080, "understanding different": 59338, "tasks number": 55771, "metrics provide": 33192, "data hindi": 12406, "crowd source": 11880, "mbert based": 31978, "corpus including": 11361, "current strong": 12013, "67 f1": 476, "weights training": 61941, "end perform": 17694, "datasets nlp": 13347, "provide theoretical": 44143, "empirical observation": 17334, "dataset curated": 12877, "global pandemic": 22838, "xlm roberta": 63030, "additional unlabeled": 1708, "trained contextualized": 57694, "self trained": 49221, "text requires": 56741, "trained annotated": 57672, "corpus social": 11432, "detailed annotation": 14415, "annotation using": 2980, "task active": 54879, "framework successfully": 21607, "93 f1": 562, "000 annotated": 3, "annotated news": 2909, "resulting dataset": 47464, "general topic": 22095, "errors model": 18245, "models correlated": 34870, "perform surprisingly": 40149, "difficult explain": 15167, "make correct": 31556, "generator generates": 22618, "generates textual": 22360, "constructed graph": 10411, "multiple rounds": 36277, "scores large": 48907, "scale pre": 48611, "push state": 44426, "strategies propose": 52914, "inference latency": 25665, "terms parameters": 56306, "based predefined": 5944, "make sentence": 31596, "baseline 17": 6148, "models direct": 34921, "context understanding": 10737, "aims recognize": 2212, "extracting important": 20031, "turn conversational": 58988, "nlu benchmarks": 37563, "range research": 44932, "community driven": 9263, "trained chinese": 57686, "datasets additional": 13146, "addition previous": 1635, "benchmarks experimental": 6523, "models overfit": 35297, "generalization models": 22122, "robust adversarial": 48238, "training lead": 58152, "pretrained transformers": 42191, "distribution ood": 15646, "distribution shifts": 15651, "models necessarily": 35247, "studied task": 53236, "variant bert": 61233, "integrated model": 26516, "previous study": 42293, "constructing high": 10419, "annotations experimental": 2990, "improve target": 24931, "set latent": 50183, "tend focus": 56198, "generate candidate": 22182, "based external": 5722, "transfer ability": 58350, "text baselines": 56457, "statistical features": 52742, "later layers": 29150, "results hold": 47660, "generalizing new": 22159, "stage generation": 52432, "tuned language": 58875, "results automated": 47512, "text significantly": 56769, "outbreak covid": 38762, "study automatically": 53333, "models variants": 35667, "finally consider": 20847, "useful practice": 60380, "modeling better": 34562, "propose contrastive": 43337, "contrastive objective": 10914, "recognition dialogue": 45500, "prediction response": 41736, "forms including": 21375, "fast development": 20421, "systematic studies": 54403, "analysis investigate": 2685, "learning combined": 29562, "technique natural": 56039, "text extensive": 56571, "generation benchmarks": 22427, "benchmarks covering": 6513, "information based": 25767, "knowledge distributed": 27445, "lead severe": 29269, "information lack": 25938, "benchmark study": 6495, "models larger": 35169, "propose modification": 43469, "hard instances": 23443, "calibrated confidence": 7531, "test proposed": 56362, "resources time": 47334, "parameters compared": 39688, "users control": 60458, "dialog models": 14759, "domain requires": 16147, "human explanations": 24165, "using 20": 60547, "data label": 12447, "based label": 5799, "tasks easy": 55599, "unlabeled instances": 59574, "data popular": 12547, "models surpass": 35568, "baseline f1": 6166, "tasks providing": 55828, "document different": 15784, "based bilstm": 5611, "number vocabulary": 38053, "input example": 26274, "methods understand": 33090, "additionally analyze": 1712, "benchmark includes": 6471, "impact word": 24609, "applying bert": 3358, "bert tasks": 6725, "cross platform": 11868, "complexity measures": 9682, "state machine": 52702, "rnn variants": 48205, "data relying": 12599, "relying parallel": 46309, "task wide": 55469, "motivated propose": 35872, "process achieve": 42754, "features latent": 20611, "heavily relied": 23533, "human input": 24171, "propose suite": 43653, "amortized variational": 2541, "vae model": 61166, "sharing decoder": 50514, "optimization strategy": 38557, "dataset recently": 13056, "non contextualized": 37645, "learning meta": 29725, "modeling mlm": 34599, "mlm pre": 33440, "margin achieves": 31817, "roberta model": 48226, "sub graphs": 53520, "framework inspired": 21546, "ml model": 33430, "generates highly": 22344, "people read": 40035, "use future": 59896, "context perform": 10687, "report significant": 46446, "understanding benchmarks": 59326, "outperforms fine": 38902, "given specific": 22788, "used incorporate": 60211, "embedding context": 17021, "representation graph": 46526, "generation extensive": 22460, "understanding research": 59395, "effectiveness language": 16785, "regressive language": 45826, "models display": 34929, "models reliably": 35428, "real text": 45113, "tasks detecting": 55586, "specific sub": 52149, "introduced bert": 26880, "sentences identified": 49734, "fluency coherence": 21125, "tasked generating": 55481, "metrics evaluate": 33161, "task poses": 55278, "survey different": 54205, "trained lm": 57777, "language fluency": 28074, "tokens sequence": 57336, "like knowledge": 30479, "learning automatically": 29527, "resolve ambiguity": 47200, "suggesting new": 53838, "data support": 12713, "effort data": 16925, "focused supervised": 21230, "transfer experiments": 58363, "approaches general": 3832, "recent trends": 45364, "pretraining fine": 42202, "information specific": 26100, "framework dynamically": 21498, "using massive": 60791, "evaluation setting": 18714, "thorough empirical": 57056, "domain bert": 16026, "brought significant": 7369, "legal medical": 30006, "robustness models": 48287, "examples evaluate": 18898, "models increased": 35123, "learning weak": 29939, "present user": 42051, "explore variety": 19752, "learners english": 29496, "data avoid": 12182, "pretraining stage": 42216, "obtained experiments": 38208, "tasks typically": 55945, "performed task": 40666, "tuning performance": 58940, "tuned roberta": 58884, "ranks 1st": 44981, "semeval 2020": 49435, "2020 shared": 291, "leverage pretrained": 30284, "modeling datasets": 34568, "incorporating word": 25395, "models hard": 35078, "flat structure": 21097, "position encoding": 41266, "event centric": 18779, "major source": 31522, "dataset 100": 12789, "models settings": 35489, "improves zero": 25168, "performance 10": 40165, "increasing demand": 25450, "tune pretrained": 58863, "improves downstream": 25126, "qa performance": 44457, "evaluations english": 18758, "pretrained neural": 42175, "conventional models": 11008, "embedding proposed": 17055, "art encoder": 4253, "achieved comparable": 1224, "model relationships": 34301, "entity relationship": 18142, "score 88": 48823, "longer term": 31054, "research develop": 47016, "nlp existing": 37487, "addition incorporate": 1621, "information final": 25874, "turkish language": 58985, "framework exploits": 21517, "unlabeled sentences": 59578, "framework exploit": 21516, "extensive ablation": 19852, "bias work": 7049, "datasets currently": 13205, "specific biases": 52051, "emergence large": 17265, "unexplored work": 59442, "scenarios results": 48708, "results gpt": 47652, "aligned human": 2357, "policy network": 41100, "methods main": 32935, "work required": 62808, "input passage": 26313, "graph level": 23147, "performance questions": 40517, "performance code": 40239, "make task": 31603, "years increasing": 63062, "potential nlp": 41402, "gap exists": 21962, "extreme multi": 20152, "people rely": 40036, "long sequence": 31025, "dataset result": 13067, "words attention": 62369, "outperforms leading": 38906, "absolute gains": 743, "school students": 48741, "linguistic expertise": 30767, "covering wide": 11660, "benchmark available": 6427, "paradigm nlp": 39626, "paper bridge": 39280, "specifically utilize": 52233, "baselines extensive": 6260, "data making": 12480, "english sub": 17884, "domains biomedical": 16237, "conventional sequence": 11012, "low coverage": 31138, "detection dataset": 14471, "alleviates data": 2422, "empirical analyses": 17318, "comprehensive knowledge": 9793, "simulate human": 51256, "dataset scale": 13073, "novel tasks": 37934, "samples experimental": 48472, "demonstrate challenges": 13880, "large human": 28886, "process address": 42756, "natural utterances": 36469, "practical framework": 41463, "diverse applications": 15692, "information kgs": 25935, "model respect": 34316, "processing automatic": 42855, "texts introduce": 56893, "web science": 61893, "word ranking": 62275, "verify approach": 61534, "transfer finally": 58364, "translation effective": 58604, "english malayalam": 17842, "languages translation": 28811, "results survey": 47873, "difficult define": 15163, "proposed years": 43928, "setting specifically": 50350, "problem particularly": 42623, "highly ambiguous": 23879, "related natural": 45919, "recently focus": 45426, "medical entities": 32204, "requiring human": 46962, "advances artificial": 1906, "score recent": 48869, "task creating": 54985, "creating datasets": 11740, "datasets study": 13445, "tasks challenging": 55536, "imbalance issues": 24563, "method augment": 32390, "gpt generate": 22977, "score points": 48865, "strong base": 53001, "expertise required": 19588, "japanese language": 27147, "second challenge": 49000, "processing technique": 42955, "apply real": 3350, "aware knowledge": 5453, "organized hierarchical": 38689, "novel iterative": 37844, "class names": 8409, "candidate entity": 7571, "entity based": 18097, "based selected": 6009, "task span": 55386, "syntactic morphological": 54308, "class text": 8412, "task primarily": 55293, "changing data": 8185, "requiring access": 46959, "original human": 38715, "shot model": 50633, "documents average": 15859, "models choose": 34813, "utilizing external": 61122, "enhance representation": 17920, "encoded pre": 17482, "achieves great": 1330, "great improvements": 23208, "progress area": 43092, "sota models": 51729, "measure progress": 32059, "based findings": 5732, "create additional": 11691, "method fine": 32509, "approaches fine": 3827, "performances recent": 40646, "aim investigate": 2153, "13 different": 123, "present adversarial": 41841, "manually built": 31766, "techniques order": 56116, "effective performance": 16684, "performance enhancement": 40320, "generating additional": 22363, "aware data": 5447, "predictions used": 41769, "performance alleviate": 40191, "ability pre": 631, "better preserve": 6944, "models structured": 35539, "present self": 42003, "masking scheme": 31873, "knowledge learning": 27548, "individual languages": 25571, "preserving meaning": 42124, "source content": 51756, "surpasses existing": 54172, "years work": 63083, "motivated fact": 35865, "novel idea": 37840, "goal use": 22905, "significant step": 50926, "natural disasters": 36410, "scale paper": 48609, "task guided": 55111, "guided pre": 23348, "data tested": 12728, "lack coverage": 27881, "provide annotations": 44008, "task adaptation": 54880, "adapter modules": 1559, "pretraining model": 42211, "results representations": 47808, "learning increasingly": 29680, "recent explosion": 45310, "viability approach": 61568, "diversity language": 15737, "model reliably": 34304, "specifically augment": 52182, "trained distinguish": 57711, "irrelevant words": 27044, "fact verification": 20292, "allows fine": 2464, "lower levels": 31216, "accuracy fine": 979, "instead fine": 26449, "improvement model": 25008, "poorly calibrated": 41149, "training directly": 58067, "performance 13": 40167, "experiments strong": 19534, "pretrained encoders": 42156, "using integer": 60738, "mbert xlm": 31980, "settings pre": 50389, "generation candidate": 22428, "published result": 44371, "multiple reference": 36272, "distribution possible": 15648, "core challenges": 11144, "prohibitively large": 43129, "space potential": 51882, "questions use": 44814, "learning optimize": 29790, "specific questions": 52136, "questions demonstrate": 44782, "generated baseline": 22271, "metrics humans": 33173, "components work": 9729, "work specifically": 62826, "challenging natural": 8116, "leverage text": 30292, "experiments domains": 19423, "required information": 46902, "information critical": 25795, "propose integrate": 43420, "results adversarial": 47494, "training effectively": 58078, "generation automatic": 22423, "probabilistic approach": 42454, "candidates using": 7588, "generated natural": 22301, "score generated": 48848, "experiments twitter": 19549, "samples generated": 48476, "factual accuracy": 20318, "retrieval generation": 47945, "learn spurious": 29428, "position bias": 41262, "thoroughly examine": 57070, "self attentions": 49190, "generalize distribution": 22139, "allows explore": 2463, "seek understand": 49051, "aspects input": 4542, "lead model": 29264, "model choose": 33656, "essential role": 18333, "remains key": 46335, "systems prior": 54599, "model split": 34404, "model computes": 33692, "local models": 30946, "given texts": 22795, "nlu nlg": 37567, "set additional": 50105, "bert method": 6682, "framework introduces": 21549, "alignment input": 2371, "pair set": 39159, "knowledge various": 27646, "structure able": 53088, "able represent": 723, "original graph": 38714, "performance resource": 40536, "rich domains": 48098, "employ domain": 17378, "given pre": 22771, "domain source": 16164, "interpretation method": 26734, "method induces": 32543, "source natural": 51786, "way detect": 61799, "conduct ablation": 10024, "ablation tests": 662, "pretrained nlp": 42177, "converge faster": 11022, "mt task": 35926, "ranking candidates": 44969, "languages expensive": 28662, "ranking models": 44974, "signals used": 50837, "paper offer": 39432, "large english": 28875, "bert layers": 6671, "pretrained parameters": 42179, "benefits data": 6580, "highlighting limitations": 23873, "single data": 51292, "individual model": 25573, "control generated": 10963, "baseline classification": 6160, "high results": 23796, "attribution methods": 4916, "challenges introduce": 8055, "makes approach": 31614, "approach efficient": 3503, "linguistics based": 30819, "information bert": 25768, "performance zero": 40634, "shot accuracy": 50596, "reproducible results": 46830, "accuracy zero": 1072, "different fine": 14935, "shot results": 50637, "semantic overlap": 49304, "earth mover": 16520, "identification classification": 24384, "previous tasks": 42296, "generating training": 22403, "target models": 54833, "systems high": 54519, "modality learning": 33476, "strong existing": 53029, "data suggesting": 12708, "trained corpora": 57698, "multiple evaluation": 36212, "language observed": 28361, "training supervision": 58280, "end sequence": 17707, "falls short": 20378, "significantly boosts": 50945, "healthcare domain": 23522, "english models": 17845, "comparable human": 9297, "models suggesting": 35560, "entity alignment": 18094, "improve entity": 24850, "learning processes": 29822, "metrics fail": 33165, "metrics code": 33148, "roberta large": 48225, "match score": 31900, "approaches ignore": 3841, "datasets analyze": 13151, "labels different": 27814, "combine deep": 9064, "document levels": 15810, "scenarios including": 48698, "silver labels": 51024, "task predictions": 55285, "input prompts": 26320, "relative effectiveness": 46093, "proves effective": 43999, "cases language": 7807, "pertinent information": 40787, "italian spanish": 27112, "direction building": 15270, "multilingual pretrained": 36112, "able transfer": 729, "transfer unseen": 58429, "commercial applications": 9154, "effective zero": 16717, "properties languages": 43264, "text correct": 56517, "demonstrate pre": 13960, "performance make": 40431, "datasets introduced": 13304, "box model": 7290, "quantitative experiments": 44619, "model following": 33906, "evaluations models": 18762, "used investigate": 60219, "investigate multi": 26969, "half number": 23368, "current end": 11973, "models inherently": 35132, "framework decomposes": 21487, "bias text": 7044, "collect novel": 8950, "language terms": 28525, "models attracted": 34738, "context plays": 10688, "shown large": 50724, "improved training": 24968, "intermediate task": 26681, "intermediate tasks": 26682, "inference reasoning": 25688, "performance strongly": 40580, "strongly correlated": 53069, "methods benchmarks": 32769, "benchmarks available": 6511, "recognition question": 45528, "additionally release": 1733, "trained specifically": 57880, "standard baselines": 52469, "multilingual transformer": 36129, "datasets existing": 13260, "fail cover": 20332, "simulate real": 51257, "generalize test": 22148, "multiple random": 36268, "source entity": 51770, "typically consider": 59137, "prediction unseen": 41749, "datasets requiring": 13403, "corpus demonstrates": 11321, "data linguistic": 12467, "linguistically diverse": 30814, "prior datasets": 42396, "wise contrastive": 62080, "performance plms": 40485, "works pre": 62901, "predicting masked": 41677, "roberta perform": 48228, "confident predictions": 10121, "english knowledge": 17828, "memory intensive": 32256, "similar original": 51057, "distillation based": 15568, "evidence use": 18824, "latest advances": 29154, "datasets spanning": 13439, "spanning diverse": 51953, "simply fine": 51251, "style paper": 53492, "methods adversarial": 32743, "existing tasks": 19155, "interpretable structured": 26731, "mechanisms use": 32154, "embeddings play": 17189, "datasets require": 13402, "composing multiple": 9737, "provide faithful": 44072, "model behaviour": 33613, "auxiliary supervision": 5239, "propose recursive": 43598, "use nearest": 59959, "improve recall": 24915, "contributions follows": 10953, "bert training": 6728, "analysis received": 2739, "domain gap": 16076, "text automatic": 56449, "results generalize": 47645, "demonstrate neural": 13950, "step nlp": 52818, "method demonstrate": 32453, "traditional model": 57532, "improve zero": 24940, "consists stages": 10329, "understand improve": 59298, "smoothing method": 51539, "analysis recent": 2740, "1st place": 217, "nature task": 36488, "text makes": 56655, "sourced data": 51822, "data capture": 12197, "given speech": 22789, "english annotated": 17774, "addressing task": 1826, "performance suggesting": 40586, "models come": 34829, "information deep": 25802, "models facilitate": 35013, "thorough experiments": 57062, "approach reaches": 3665, "shows strong": 50807, "methods evaluation": 32847, "evaluation cross": 18601, "usually performed": 61060, "reference free": 45740, "directly compare": 15310, "free evaluation": 21639, "art cross": 4242, "embedding distance": 17026, "useful downstream": 60361, "improve situation": 24925, "aspects including": 4541, "consistent predictions": 10284, "absolute performance": 748, "performance perform": 40483, "better non": 6922, "large task": 29022, "novel design": 37805, "better make": 6915, "test instances": 56351, "datasets empirically": 13244, "performance established": 40324, "increase robustness": 25423, "leverage deep": 30264, "information types": 26134, "encoded contextual": 17476, "information tokens": 26127, "specifically task": 52229, "performance modern": 40442, "require hand": 46858, "features low": 20620, "method soft": 32661, "improvement points": 25015, "problem extractive": 42564, "documents answer": 15855, "multi objective": 35993, "best individual": 6767, "bert baseline": 6626, "baseline 10": 6146, "scarcity available": 48662, "special characteristics": 52016, "scarcity data": 48663, "data includes": 12424, "approach dynamically": 3498, "study online": 53424, "crucial importance": 11901, "target groups": 54818, "platforms like": 40952, "difficult researchers": 15185, "dataset codes": 12844, "majority vote": 31534, "experiments study": 19535, "problem shot": 42654, "accuracy 81": 913, "https aka": 24052, "aka ms": 2225, "understanding various": 59416, "years previous": 63071, "information fail": 25872, "classified different": 8587, "people public": 40034, "work making": 62721, "reported state": 46455, "social political": 51600, "described single": 14215, "automated systems": 5060, "memory module": 32271, "relative previous": 46109, "expensive training": 19223, "model classifier": 33660, "analyzed results": 2835, "works study": 62909, "bias results": 7042, "representations contain": 46630, "informative diverse": 26171, "understanding based": 59323, "comprehensive understanding": 9803, "local coherence": 30930, "video clips": 61582, "improvements come": 25057, "especially training": 18306, "computed based": 9883, "metrics reflect": 33196, "provide diverse": 44055, "corpus covers": 11312, "predominant approach": 41783, "learn effective": 29364, "modality specific": 33478, "predictions experiments": 41760, "ranking algorithms": 44966, "achieves relative": 1357, "model gains": 33918, "proposed mitigate": 43841, "focus unsupervised": 21211, "knowledge furthermore": 27487, "achieves 70": 1290, "accuracy predicting": 1028, "create human": 11701, "qa multi": 44453, "medical experts": 32206, "question conduct": 44723, "sourced code": 51821, "provide stronger": 44136, "stronger baseline": 53061, "code new": 8838, "addressing specific": 1825, "inputs training": 26368, "diversity paper": 15738, "mechanism capture": 32104, "present structured": 42027, "prediction algorithm": 41693, "approaches successfully": 3929, "purely unsupervised": 44398, "models way": 35677, "different segment": 15060, "time generating": 57160, "agent reinforcement": 2058, "learning difficult": 29590, "improvement models": 25009, "domains evaluate": 16250, "proposed systems": 43906, "performance outperforming": 40469, "handcrafted feature": 23399, "engineering based": 17767, "domain analysis": 16017, "successfully capture": 53742, "evaluation introduce": 18630, "results larger": 47696, "module generates": 35760, "existing multilingual": 19112, "negative effects": 36618, "help pre": 23584, "process pre": 42816, "enhanced pre": 17936, "representation multiple": 46560, "strong pre": 53043, "better adapt": 6845, "effectiveness component": 16772, "long answer": 31003, "methods benchmark": 32767, "ignoring dependencies": 24499, "utilize graph": 61094, "provided additional": 44158, "extract integrate": 19979, "features common": 20540, "document datasets": 15782, "non informative": 37657, "document previous": 15820, "context generating": 10646, "memory paper": 32278, "extremely noisy": 20165, "systems sensitive": 54629, "pairs web": 39231, "corpus make": 11376, "design stage": 14302, "leveraging recent": 30338, "perform new": 40125, "dataset compared": 12850, "develop automated": 14574, "input sources": 26339, "model challenging": 33651, "parameters experiments": 39697, "used update": 60345, "differences observed": 14825, "2017 dataset": 265, "using intrinsic": 60741, "build effective": 7395, "work report": 62805, "divided categories": 15746, "based explicitly": 5720, "corpora shows": 11242, "transformer proposed": 58508, "sentences combined": 49690, "led state": 29994, "framework addresses": 21452, "method real": 32633, "challenging given": 8096, "network gat": 36746, "2020 task": 293, "approach bert": 3431, "overcome issues": 39065, "elements including": 16978, "uses graph": 60512, "architecture provides": 4081, "domain focused": 16073, "contextual vector": 10786, "insufficient data": 26492, "robust domain": 48245, "contains wealth": 10508, "connections words": 10186, "analyze types": 2830, "public figures": 44320, "task extracts": 55073, "crucial downstream": 11899, "time analysis": 57115, "yielded state": 63106, "formulating task": 21391, "bilstm networks": 7137, "propose transformer": 43681, "superiority model": 53952, "encoder layers": 17522, "training encoder": 58084, "propose layer": 43436, "systematic experiments": 54397, "including low": 25270, "constituent labels": 10355, "used effectively": 60158, "conditioned previously": 10016, "variable number": 61225, "monolingual bert": 35790, "results challenging": 47531, "challenging implement": 8102, "quality prediction": 44563, "classification limited": 8486, "seen ones": 49060, "models architectures": 34725, "models performing": 35321, "benchmark work": 6505, "outperforms similar": 38942, "task research": 55342, "paper adopts": 39259, "dataset demonstrating": 12889, "classification multi": 8500, "classification use": 8577, "performance 95": 40174, "classical chinese": 8422, "addressing limitations": 1821, "labels natural": 27840, "enhance existing": 17912, "automatic labeling": 5099, "learn represent": 29413, "minority class": 33332, "generalization recent": 22129, "news corpora": 37394, "domain recent": 16143, "potential make": 41399, "results baseline": 47519, "models demonstrating": 34897, "languages shown": 28784, "transfer high": 58366, "languages covered": 28625, "random word": 44894, "accuracy absolute": 932, "complex structure": 9664, "approaches generating": 3835, "approach experimental": 3525, "provide human": 44087, "data subset": 12705, "corpus does": 11327, "research low": 47068, "common topic": 9206, "topic discussion": 57401, "embeddings related": 17202, "effectively combine": 16728, "multitask model": 36325, "compare transformer": 9373, "task generally": 55102, "lack appropriate": 27875, "effective combination": 16635, "evaluation present": 18679, "dataset larger": 12979, "previous datasets": 42254, "architecture bert": 4029, "using roberta": 60912, "level nlp": 30168, "performance transformer": 40609, "address key": 1774, "role context": 48302, "used current": 60134, "current dialogue": 11971, "tasks explicitly": 55632, "76 f1": 507, "study pre": 53434, "strategy data": 52930, "end transformer": 17722, "augmentation generate": 4955, "nlp including": 37490, "corpora extracted": 11201, "experiments applying": 19354, "analysis better": 2623, "follow standard": 21255, "information key": 25934, "extraction single": 20112, "work general": 62674, "contain various": 10476, "designed natural": 14326, "texts human": 56887, "textual explanations": 56964, "understudied problem": 59426, "exhibits better": 19009, "modeling specifically": 34624, "comparison previous": 9503, "16 teams": 165, "directly indirectly": 15320, "presents baseline": 42074, "2020 challenge": 290, "proposed transformer": 43918, "textual form": 56966, "distribution information": 15641, "lms bert": 30917, "resources models": 47318, "objectives based": 38111, "adapter based": 1557, "outperform bert": 38784, "sourced https": 51824, "text allowing": 56429, "analysis research": 2742, "survey methods": 54209, "analysis potential": 2719, "solving nlp": 51703, "time money": 57181, "time using": 57236, "experiments demonstrates": 19410, "source library": 51782, "new era": 37190, "massive dataset": 31884, "obtains higher": 38250, "scores datasets": 48898, "outperforming multilingual": 38854, "publicly https": 44357, "58 accuracy": 449, "evaluation novel": 18664, "transformer neural": 58505, "rule induction": 48388, "induction method": 25606, "fact triples": 20291, "form structured": 21337, "continues grow": 10835, "amounts labeled": 2551, "problem multiple": 42612, "reaching state": 45060, "growing complexity": 23292, "safety critical": 48429, "code implementation": 8820, "human social": 24240, "substantial effort": 53618, "gpt shown": 22988, "contrastive self": 10922, "important issues": 24738, "intermediate training": 26683, "data consistently": 12242, "end pipeline": 17696, "models starting": 35532, "work reveals": 62811, "include explicit": 25223, "propose transition": 43683, "approach works": 3742, "video audio": 61578, "increased performance": 25431, "goal produce": 22896, "ill suited": 24511, "models reasoning": 35404, "generation challenging": 22432, "design simple": 14299, "greatly facilitate": 23228, "use https": 59909, "various purposes": 61381, "shown models": 50728, "models possess": 35330, "force model": 21286, "little studied": 30885, "dataset improvements": 12961, "efficient models": 16887, "cost performance": 11591, "input generate": 26281, "components jointly": 9718, "context compare": 10598, "dataset problem": 13036, "tested different": 56395, "words depending": 62395, "context context": 10599, "real conversations": 45100, "detecting various": 14451, "score average": 48834, "results indicated": 47680, "english addition": 17773, "generated given": 22290, "information significant": 26085, "language ability": 27949, "language presented": 28386, "setting explore": 50323, "directly optimizing": 15328, "web sites": 61898, "increasing data": 25449, "data internet": 12439, "representation capture": 46497, "consists sub": 10331, "sub network": 53524, "bbc news": 6363, "questions question": 44801, "question corresponding": 44725, "domains data": 16243, "consuming work": 10455, "models allows": 34709, "created data": 11724, "various transformer": 61411, "learning unlabeled": 29924, "neural unsupervised": 37110, "embeddings provides": 17200, "conversations social": 11063, "mining research": 33321, "largest human": 29096, "context predict": 10689, "words making": 62454, "variety word": 61296, "method focuses": 32512, "highly depends": 23892, "access source": 828, "studies report": 53296, "work utilize": 62857, "model families": 33880, "roberta gpt": 48222, "number speakers": 38037, "based solution": 6046, "distinct datasets": 15589, "roberta albert": 48214, "deeper insights": 13758, "dataset providing": 13046, "average error": 5405, "words specific": 62519, "tuning improves": 58918, "representation pre": 46567, "bert proposed": 6708, "alleviates problem": 2425, "tasks specific": 55902, "better pre": 6940, "used submission": 60315, "task graph": 55109, "resources needed": 47321, "models beneficial": 34768, "propose modular": 43472, "modular architecture": 35743, "problem mainly": 42603, "classification uses": 8579, "documents experimental": 15877, "levels results": 30246, "approaches help": 3839, "rapidly growing": 44995, "advances pre": 1920, "large external": 28878, "recent unsupervised": 45365, "cross model": 11867, "typically focus": 59143, "pseudo labels": 44278, "labels generated": 27829, "quality pseudo": 44568, "corpus 10": 11263, "events including": 18793, "multimodal pre": 36154, "training goal": 58116, "english new": 17850, "trained lexical": 57774, "information corresponding": 25794, "latent code": 29118, "develop annotation": 14571, "difficult cases": 15158, "multiple strategies": 36292, "tool understanding": 57367, "geometric structure": 22656, "computational benefits": 9835, "improves bert": 25117, "roberta models": 48227, "efficiency model": 16846, "half training": 23369, "achieving improvements": 1414, "surpass human": 54164, "carefully design": 7761, "design self": 14298, "architectures large": 4113, "transformers achieve": 58519, "extensive research": 19910, "comprehensive empirical": 9786, "different facets": 14929, "pretrained encoder": 42155, "encoder training": 17545, "paper argues": 39272, "data generative": 12388, "model sample": 34333, "closed book": 8695, "outperform methods": 38802, "token sequences": 57308, "labeling text": 27797, "bert architecture": 6610, "larger model": 29081, "text address": 56425, "universally applicable": 59551, "connections different": 10185, "implementation publicly": 24642, "propose consider": 43330, "generation question": 22535, "generation abstractive": 22409, "dataset verify": 13133, "introduce transformer": 26873, "encoder experimental": 17513, "improving overall": 25188, "performance lack": 40404, "content propose": 10549, "identify potential": 24436, "sentences dataset": 49702, "work measure": 62722, "based virtual": 6130, "alexa google": 2254, "text downstream": 56545, "data subsequently": 12704, "resource situations": 47277, "unseen domain": 59646, "label semantics": 27724, "labels experimental": 27821, "setting pre": 50341, "replaced token": 46405, "token detection": 57285, "task masked": 55207, "understanding benchmark": 59325, "bias present": 7038, "useful future": 60365, "framework utilises": 21623, "aims increase": 2200, "impact factors": 24595, "bert evaluate": 6654, "research interests": 47056, "main research": 31457, "diverse corpus": 15696, "augmentation framework": 4954, "generate multi": 22219, "model align": 33558, "work method": 62723, "improved performances": 24961, "reasoning graph": 45196, "graph contains": 23118, "contains new": 10502, "question information": 44733, "approaches especially": 3811, "teach model": 55988, "effectively perform": 16753, "systems challenging": 54449, "generated utterances": 22333, "potential improvements": 41395, "embedding clustering": 17020, "core components": 11146, "github https": 22715, "metrics computed": 33153, "unlabelled text": 59588, "tuned specific": 58886, "xlnet model": 63033, "bert like": 6675, "like models": 30486, "english evaluate": 17801, "ignore context": 24489, "automatically selecting": 5201, "relevant evidence": 46214, "generate different": 22193, "relevant question": 46231, "superior accuracy": 53930, "domains small": 16292, "annotations obtained": 2995, "certain extent": 7941, "guarantee quality": 23320, "current text": 12019, "boundary information": 7284, "information introduced": 25932, "recognition datasets": 45499, "aforementioned challenges": 2036, "specifically make": 52215, "task shot": 55371, "categories propose": 7848, "model shot": 34371, "similarity reference": 51115, "sentences real": 49776, "classifier used": 8608, "process introduce": 42796, "extraction essential": 20061, "domain address": 16013, "address limitation": 1776, "domains performance": 16283, "average compared": 5404, "kappa score": 27268, "issue study": 27080, "prediction extensive": 41708, "11 accuracy": 84, "key ingredients": 27319, "processing especially": 42869, "translation code": 58588, "improvements code": 25056, "structure new": 53123, "supervised end": 53982, "employ graph": 17382, "different online": 15013, "key factors": 27310, "corpus comprises": 11300, "answers based": 3106, "types machine": 59100, "performance corpus": 40268, "score 84": 48819, "set corpus": 50131, "positive training": 41298, "model memory": 34098, "requires minimal": 46942, "spanish data": 51940, "probabilistic inference": 42462, "model leveraging": 34059, "state model": 52704, "language considered": 28002, "method termed": 32681, "outperforms recently": 38939, "sets recent": 50304, "experiments work": 19565, "new adversarial": 37124, "corpora small": 11243, "imbalanced datasets": 24568, "challenge training": 8021, "datasets application": 13153, "content non": 10542, "graph used": 23177, "granularity levels": 23092, "documents address": 15853, "approach considers": 3464, "maximum accuracy": 31967, "propose clustering": 43319, "shot settings": 50643, "neural modeling": 36972, "scarcity issue": 48667, "information respectively": 26057, "limited target": 30622, "fewer training": 20741, "complexity using": 9692, "achieves 95": 1298, "step various": 52836, "human centric": 24121, "words significantly": 62513, "relationships paper": 46081, "representations transformer": 46775, "training steps": 58273, "mixing phenomenon": 33416, "utterance text": 61142, "text sentiment": 56763, "transport ot": 58723, "representing entities": 46811, "incorporated existing": 25368, "baselines wide": 6320, "propose pipeline": 43582, "pipeline approach": 40891, "unsupervised multi": 59714, "texts languages": 56898, "summarization information": 53886, "strong zero": 53058, "additional task": 1702, "discriminative generative": 15444, "classification generation": 8476, "bert albert": 6608, "com salesforce": 9023, "expensive acquire": 19202, "approaches reduce": 3910, "pseudo label": 44274, "augment labeled": 4942, "underlying neural": 59273, "network leveraging": 36760, "20 30": 220, "samples class": 48466, "accuracy 91": 923, "applied learn": 3279, "use bi": 59835, "method study": 32670, "person pronouns": 40752, "recently advanced": 45405, "advanced nlp": 1892, "seen surge": 49063, "propose textit": 43669, "minimal changes": 33285, "parameters using": 39728, "far superior": 20406, "corresponding question": 11556, "learning stages": 29893, "stage fine": 52429, "effectiveness superiority": 16813, "input transformer": 26353, "factual correctness": 20320, "improving downstream": 25177, "related training": 45949, "use automated": 59828, "models core": 34867, "performance knowledge": 40402, "document better": 15769, "table text": 54689, "employ encoder": 17379, "approach terms": 3719, "transfer domains": 58360, "standard domain": 52486, "language setting": 28482, "domain recently": 16144, "recently led": 45437, "bert demonstrate": 6639, "focuses building": 21235, "model seen": 34342, "approach flexible": 3540, "19 test": 191, "tools resources": 57384, "parameter reduction": 39675, "analysis recently": 2741, "embeddings combining": 17095, "retrieval accuracy": 47938, "performing competitively": 40674, "en zh": 17418, "medical diagnosis": 32201, "gap introduce": 21965, "different sampling": 15056, "tasks verify": 55961, "learning translate": 29922, "task uses": 55462, "closing gap": 8717, "accuracy end": 967, "related covid": 45893, "literature work": 30865, "modeling generate": 34578, "train generative": 57593, "transformer gpt": 58489, "text generator": 56608, "news wikipedia": 37427, "included training": 25228, "dataset pre": 13031, "language experimental": 28059, "push limits": 44425, "effective deep": 16643, "final set": 20831, "functions including": 21772, "information decoding": 25801, "motivate research": 35862, "approaches furthermore": 3831, "content creators": 10516, "malayalam english": 31678, "neighbors knn": 36666, "similar level": 51051, "bert distilbert": 6643, "performing bert": 40671, "traditional classification": 57513, "accuracy 63": 900, "oversampling technique": 39104, "proposed pipeline": 43878, "defined based": 13782, "shapley values": 50453, "networks experimental": 36851, "art t5": 4421, "given product": 22773, "impact user": 24606, "methods methods": 32945, "application text": 3182, "presents study": 42107, "corpus spanish": 11433, "purpose model": 44405, "work possible": 62750, "process dataset": 42768, "terms input": 56296, "leverage multiple": 30280, "tasks boost": 55530, "model main": 34081, "bi level": 7010, "problem semantic": 42646, "lstm bert": 31249, "study systematically": 53465, "parsers used": 39766, "make recommendations": 31592, "despite huge": 14367, "propose textsc": 43670, "order construct": 38603, "t5 model": 54682, "model demonstrating": 33747, "benchmarks using": 6549, "method helps": 32523, "alleviate drawbacks": 2406, "approach bring": 3434, "model linear": 34066, "using metadata": 60800, "end consider": 17622, "text compare": 56499, "retrieval results": 47968, "text able": 56420, "shown pre": 50737, "results key": 47686, "tasks significantly": 55889, "performance challenging": 40231, "challenging examples": 8094, "distribution performance": 15647, "importance data": 24679, "strategies proposed": 52915, "view paper": 61600, "paper analyses": 39264, "words semantics": 62506, "speech named": 52271, "attack method": 4659, "generation requires": 22539, "model global": 33941, "using qualitative": 60887, "qualitative study": 44482, "study multiple": 53415, "content shared": 10556, "unified view": 59482, "2010 2020": 249, "facebook pages": 20245, "estimation task": 18387, "evidence retrieval": 18818, "teams registered": 56011, "registered participate": 45809, "evaluation scripts": 18711, "enable research": 17428, "furthermore design": 21814, "models public": 35385, "embeddings contrast": 17104, "capacity models": 7638, "especially shot": 18299, "datasets applying": 13154, "standard multilingual": 52510, "assess effectiveness": 4577, "representations essential": 46655, "representation final": 46518, "approach increases": 3571, "15 different": 147, "available date": 5280, "describes proposed": 14231, "results transformer": 47891, "effective task": 16701, "vision cv": 61635, "input performs": 26314, "sentences common": 49691, "subtasks paper": 53670, "proposes model": 43934, "various fine": 61344, "tuned transformer": 58891, "tree information": 58745, "utilizing local": 61125, "specifically present": 52221, "models improving": 35116, "information allows": 25759, "terms words": 56324, "leverages pretrained": 30311, "leverage different": 30265, "leverage context": 30259, "dependent context": 14146, "prediction finally": 41709, "outperforms neural": 38914, "study highlights": 53384, "classification setting": 8548, "smart devices": 51529, "voice assistants": 61723, "order accelerate": 38586, "information self": 26075, "entities identified": 18055, "architectures trained": 4126, "automatic transcripts": 5132, "cascaded approach": 7783, "models poorly": 35328, "user text": 60452, "answer natural": 3039, "relation information": 45985, "contains main": 10498, "emphasize importance": 17311, "utilizing pre": 61127, "models arabic": 34723, "platform people": 40950, "varies significantly": 61258, "categories linguistic": 7846, "analyze sentiment": 2828, "dataset according": 12795, "challenge focused": 7982, "match f1": 31896, "preserving input": 42123, "realistic settings": 45153, "development systems": 14704, "19 related": 190, "successfully employed": 53744, "time labor": 57170, "metrics best": 33142, "current metrics": 11986, "corpora make": 11219, "network performance": 36782, "better low": 6912, "personal preferences": 40758, "limited scale": 30611, "propose sentiment": 43623, "end multi": 17687, "powerful text": 41447, "benefits large": 6583, "quantities unlabeled": 44637, "models extended": 35003, "improves average": 25114, "dataset grounded": 12945, "evaluation period": 18671, "baseline code": 6162, "leaderboard https": 29284, "processing module": 42893, "structure proposed": 53131, "better performing": 6939, "generation scheme": 22542, "languages recently": 28766, "given current": 22732, "given particular": 22768, "analysis specific": 2763, "10 20": 31, "20 training": 232, "significant time": 50928, "presents unique": 42109, "evidence existing": 18809, "advancements deep": 1897, "generation research": 22540, "language improve": 28102, "furthermore provide": 21837, "models comparing": 34837, "systems discuss": 54482, "discuss promising": 15480, "available researchers": 5362, "subject areas": 53551, "development domain": 14677, "learning environment": 29625, "task computational": 54964, "domain benchmark": 16024, "shown deep": 50701, "achieving goal": 1405, "release test": 46169, "tasks critical": 55564, "models unseen": 35648, "generated candidates": 22274, "poor quality": 41142, "information fully": 25885, "rely handcrafted": 46285, "candidate ranking": 7576, "proposed effectively": 43760, "approach presents": 3650, "better ranking": 6949, "compared neural": 9425, "patterns observed": 39972, "recent study": 45354, "models critical": 34873, "focus short": 21199, "design considerations": 14268, "task attracted": 54920, "31 teams": 365, "task indicates": 55134, "choice pre": 8333, "useful feature": 60363, "classification existing": 8466, "grammatical semantic": 23077, "20 times": 231, "constituency parser": 10348, "improve parsing": 24884, "performance settings": 40556, "sentences second": 49780, "samples large": 48480, "selection mechanism": 49143, "par performance": 39616, "underlying task": 59279, "performed comparably": 40661, "extremely costly": 20155, "focus enhancing": 21159, "context provide": 10698, "mechanism named": 32130, "named multi": 36376, "randomly mask": 44900, "learning makes": 29718, "text semantics": 56759, "decades research": 13542, "method suitable": 32675, "knowledge previous": 27577, "models accurate": 34664, "query text": 44678, "online test": 38389, "nature social": 36486, "data led": 12461, "proposed applied": 43719, "shows higher": 50782, "labeling based": 27779, "learning widely": 29941, "available line": 5322, "detect errors": 14439, "extraction texts": 20123, "semantics syntactic": 49415, "parsing important": 39782, "step automated": 52799, "average gain": 5409, "user specified": 60449, "fills gap": 20805, "coherence generated": 8908, "particular given": 39847, "incorporating new": 25390, "used everyday": 60174, "aim explore": 2146, "different events": 14923, "learning additionally": 29504, "framework new": 21572, "seamlessly integrate": 48960, "gpt based": 22972, "strategy introduce": 52938, "robustness evaluation": 48278, "representation bert": 46495, "largely attributed": 29051, "attributed ability": 4904, "contained sentence": 10479, "does significantly": 15979, "encoder generate": 17517, "models t5": 35576, "text abstract": 56421, "typically evaluated": 59141, "metrics compare": 33150, "compare generated": 9342, "aim alleviate": 2135, "issues proposing": 27102, "sota language": 51726, "involving large": 27026, "despite extensive": 14360, "models adapted": 34690, "space experiments": 51862, "lack fine": 27889, "retrieved training": 47987, "data hand": 12398, "copy words": 11136, "t5 models": 54683, "quality knowledge": 44538, "reasoning existing": 45193, "features extract": 20581, "information construct": 25789, "domains labeled": 16265, "test efficacy": 56346, "alignment techniques": 2384, "domain adaptive": 16010, "adaptive fine": 1574, "scores 70": 48887, "data tackle": 12718, "problem building": 42514, "news sentences": 37413, "graph networks": 23151, "evidence lower": 18812, "method sentiment": 32648, "domains method": 16273, "obtains comparable": 38243, "stateof art": 52718, "approaches report": 3913, "exploit potential": 19660, "datasets addition": 13145, "posts news": 41371, "underlying structure": 59278, "paper analyzed": 39266, "different deep": 14893, "mainly lack": 31475, "propose chinese": 43317, "lack suitable": 27916, "knowledge instead": 27528, "need understand": 36598, "specific meanings": 52109, "capability proposed": 7613, "sa license": 48418, "understanding fundamental": 59346, "problem information": 42583, "understanding users": 59414, "novel query": 37903, "tasks leverage": 55720, "novel contrastive": 37790, "study sentiment": 53457, "combination approaches": 9032, "adaptation specific": 1538, "results cases": 47529, "encoder use": 17547, "finetuning data": 21049, "improve factual": 24853, "assessed using": 4584, "word set": 62308, "model interpretation": 34016, "remarkable performances": 46359, "conducted benchmark": 10074, "better bert": 6854, "great demand": 23203, "tasks unified": 55949, "overall framework": 39043, "bert variants": 6733, "outperforms multilingual": 38912, "available code": 5270, "tuned downstream": 58873, "pro pose": 42450, "coronavirus disease": 11168, "labeling costs": 27781, "instances specifically": 26436, "superiority effectiveness": 53950, "adverse effects": 1999, "common errors": 9173, "hierarchical transformer": 23696, "different transformer": 15105, "masked entity": 31861, "datasets additionally": 13147, "datasets deep": 13209, "expensive produce": 19216, "settings various": 50403, "classical approaches": 8421, "efficient multi": 16888, "efficient propose": 16893, "improve pre": 24905, "describes developed": 14222, "obtained f1": 38209, "samples data": 48468, "similar given": 51044, "addresses gap": 1810, "unsupervised algorithms": 59680, "potential reasons": 41404, "tuning work": 58974, "demonstrate human": 13919, "art computational": 4240, "recognition research": 45532, "span detection": 51922, "spans training": 51959, "demonstrate evaluation": 13910, "extraction propose": 20098, "problem speech": 42666, "account semantic": 880, "accuracy metric": 1006, "models ensure": 34971, "language existing": 28058, "stage pipeline": 52437, "stage paper": 52436, "gains previous": 21941, "text author": 56447, "10 years": 55, "english native": 17848, "approaches given": 3836, "gpt gpt": 22980, "scores significantly": 48921, "submission ranked": 53573, "bi grams": 7008, "text various": 56838, "employ label": 17384, "layers different": 29222, "people understand": 40040, "span identification": 51924, "ways expressing": 61841, "team semeval": 56004, "analysis specifically": 2764, "performance subtasks": 40583, "introduce adaptive": 26776, "model adaptively": 33535, "tasks review": 55866, "shared publicly": 50484, "tuning techniques": 58969, "teams total": 56013, "benchmark including": 6472, "including translation": 25314, "use benchmark": 59833, "benchmark compare": 6434, "driven systems": 16433, "team achieved": 56002, "particular type": 39868, "causal inference": 7873, "challenges particular": 8067, "work try": 62849, "main topics": 31465, "similarity embeddings": 51093, "group similar": 23274, "context covid": 10601, "results superiority": 47869, "approach baselines": 3428, "transformers pre": 58528, "strategy allows": 52927, "trained seq2seq": 57865, "influence different": 25724, "score accuracy": 48832, "framework extracts": 21522, "information produced": 26026, "using examples": 60683, "stepping stone": 52839, "response given": 47395, "showed significant": 50673, "tasks solved": 55898, "additional annotation": 1653, "setting performing": 50340, "demonstrate low": 13932, "low computational": 31133, "end document": 17631, "using classification": 60605, "problem improve": 42580, "based notion": 5918, "datasets investigate": 13305, "classification labels": 8483, "tasks framework": 55649, "benchmark performances": 6487, "including limited": 25268, "limited text": 30625, "focus real": 21192, "model dependent": 33750, "making model": 31660, "noise model": 37601, "model variant": 34522, "tuning explore": 58913, "jointly predict": 27217, "dataset obtains": 13017, "english labeled": 17830, "speech using": 52316, "higher baseline": 23814, "standard natural": 52511, "remains poorly": 46345, "result suggests": 47453, "improve coherence": 24832, "coherence consistency": 8906, "train gpt": 57594, "conditioned given": 10015, "layer pre": 29202, "realistic text": 45155, "large lms": 28900, "class conditional": 8399, "particular multi": 39855, "utterance speaker": 61141, "models prlms": 35358, "information utterance": 26151, "doi org": 15988, "org 10": 38677, "published articles": 44367, "contains large": 10497, "online offline": 38377, "easier use": 16530, "identify limitations": 24427, "ended text": 17739, "scaling model": 48650, "training furthermore": 58111, "training introduce": 58137, "non pretrained": 37677, "different entities": 14917, "non entity": 37651, "attribute value": 4901, "modeling relationship": 34619, "challenges nlp": 8064, "field training": 20772, "performance explore": 40338, "experiments compare": 19378, "ended language": 17737, "par existing": 39614, "2019 evaluation": 283, "leads improvement": 29317, "data increasing": 12427, "perform supervised": 40148, "rich knowledge": 48105, "principled method": 42388, "execution accuracy": 18986, "augmentation training": 4971, "aware transformer": 5476, "end tasks": 17714, "exploit syntactic": 19666, "architecture fine": 4049, "used adapt": 60080, "unsupervised graph": 59700, "space order": 51878, "multiple benchmark": 36172, "respective tasks": 47356, "problems caused": 42697, "primary challenges": 42369, "ambiguity natural": 2526, "language lack": 28128, "data overcome": 12527, "leverages semantic": 30313, "paper derive": 39318, "meaning context": 32000, "contextual semantic": 10780, "using strategy": 60965, "training schemes": 58239, "supervised contrastive": 53971, "dataset annotation": 12810, "original input": 38716, "positive sample": 41295, "learns attend": 29953, "standard text": 52534, "new manually": 37245, "argument extraction": 4171, "used bidirectional": 60108, "bert experimental": 6655, "multilingual systems": 36124, "bert text": 6726, "sentence contains": 49533, "labeling approaches": 27778, "various embedding": 61335, "present bert": 41857, "robustness generalization": 48281, "ability bert": 596, "segmentation cws": 49081, "bert provide": 6709, "convenient use": 10996, "terms multiple": 56301, "effort devoted": 16926, "traditional state": 57545, "including different": 25250, "improving current": 25175, "useful real": 60382, "attention new": 4800, "new long": 37240, "model validate": 34521, "far satisfactory": 20405, "way large": 61815, "network gnn": 36750, "relations important": 46035, "develop strong": 14614, "graph learning": 23146, "task stage": 55408, "joint framework": 27171, "based manually": 5832, "joint approach": 27165, "works treat": 62912, "classification neural": 8509, "models ignoring": 35100, "common features": 9175, "features tasks": 20681, "form used": 21340, "contextualized models": 10807, "experiments classification": 19373, "resources publicly": 47329, "score previous": 48867, "pairs extensive": 39190, "way future": 61804, "com csebuetnlp": 9007, "gold label": 22913, "experiments 11": 19343, "language treebank": 28541, "size parameter": 51393, "module employed": 35756, "tagging results": 54750, "methods explain": 32853, "mtl framework": 35931, "input single": 26336, "used make": 60232, "make final": 31571, "learning difficulty": 29591, "additional large": 1682, "based t5": 6079, "transformation method": 58444, "research needed": 47079, "data potential": 12549, "provide deeper": 44046, "process identify": 42790, "develop end": 14585, "end knowledge": 17679, "sharing mechanism": 50517, "bert extract": 6658, "examples data": 18894, "common method": 9185, "ood data": 38402, "softmax loss": 51633, "media comments": 32161, "encoders bert": 17553, "roberta language": 48223, "level encoding": 30109, "document information": 15800, "context relation": 10704, "document entity": 15791, "performance public": 40512, "datasets document": 13236, "having multiple": 23490, "compact representation": 9280, "sequence fine": 49922, "model supervised": 34429, "datasets bert": 13167, "types work": 59129, "experiments seven": 19521, "seven benchmark": 50415, "model ablation": 33486, "search method": 48975, "understanding state": 59404, "certain cases": 7936, "analyze factors": 2816, "classification finally": 8471, "largely based": 29052, "propose token": 43674, "increasing focus": 25451, "recent bert": 45297, "metrics models": 33182, "assign high": 4599, "context data": 10605, "metrics introduce": 33175, "metrics perform": 33187, "accuracy performance": 1022, "slu tasks": 51459, "contextualised embeddings": 10792, "models emerged": 34952, "knowledge generative": 27493, "propose generation": 43400, "tune bert": 58854, "task select": 55350, "algorithms provide": 2337, "generating realistic": 22390, "contain semantic": 10472, "use learn": 59930, "inputs paper": 26366, "learn structural": 29429, "techniques improved": 56098, "specifically firstly": 52203, "vae based": 61165, "representations downstream": 46644, "generated contents": 22278, "10 training": 53, "building evaluating": 7444, "issue proposing": 27079, "entities second": 18080, "transfer information": 58368, "directly map": 15323, "efficient end": 16870, "methods higher": 32888, "fail distinguish": 20334, "layers models": 29228, "role various": 48325, "novel adversarial": 37750, "processing currently": 42864, "studies field": 53265, "aims classify": 2181, "bert applied": 6609, "propose ensemble": 43373, "datasets transformer": 13462, "capacity large": 7636, "comprehensively evaluate": 9805, "settings neural": 50385, "study models": 53413, "years task": 63079, "attention previous": 4811, "task instead": 55142, "samples drawn": 48471, "generative framework": 22590, "knowledge tasks": 27627, "framework general": 21526, "resource high": 47228, "generates large": 22346, "generation important": 22474, "human writing": 24258, "semantic transfer": 49367, "baselines pre": 6286, "abstractive summaries": 771, "content generated": 10525, "humans able": 24271, "transformers achieved": 58520, "addition evaluating": 1613, "attempt capture": 4683, "motivated findings": 35867, "module developed": 35755, "generated pre": 22308, "stronger generalization": 53063, "principled manner": 42387, "proposed strategies": 43903, "generation problems": 22527, "par better": 39612, "scores based": 48891, "tightly coupled": 57110, "popular entities": 41164, "dataset previous": 13035, "quadratic complexity": 44464, "efforts devoted": 16937, "heterogeneous knowledge": 23623, "indicating effectiveness": 25542, "performance providing": 40511, "output embeddings": 38970, "using structured": 60967, "ability answer": 593, "better deep": 6876, "provide guidance": 44083, "uzbek language": 61156, "cost work": 11597, "lack adequate": 27871, "strong domain": 53026, "cc nc": 7897, "based distant": 5684, "end collect": 17619, "paper tried": 39598, "focused classifying": 21217, "behavior propose": 6396, "embeddings apply": 17082, "sets words": 50313, "enable users": 17431, "addresses problems": 1815, "achieved macro": 1248, "works explore": 62887, "mlm objective": 33439, "preliminary evidence": 41802, "holds potential": 23986, "evaluation methodologies": 18640, "classification aim": 8428, "metric evaluate": 33114, "art metrics": 4291, "collected chinese": 8955, "require understanding": 46895, "accuracy 53": 898, "attribution method": 4915, "shows existing": 50777, "demonstrated great": 14008, "datasets conducted": 13188, "discuss performance": 15477, "texts traditional": 56936, "method empirically": 32478, "parsing dp": 39778, "detailed evaluation": 14424, "tasks provides": 55827, "knowledge critical": 27429, "research address": 46978, "search approach": 48964, "direct access": 15251, "examples test": 18937, "highly expressive": 23899, "training domains": 58074, "survey presents": 54212, "explainable ai": 19598, "nlp model": 37500, "development maintenance": 14685, "bart model": 5534, "time location": 57174, "temporal spatial": 56193, "capturing meaning": 7741, "generic data": 22627, "domain order": 16126, "domain tasks": 16202, "known methods": 27661, "pretrained contextualized": 42149, "contextualized text": 10810, "randomly masked": 44901, "large entity": 28876, "wikipedia propose": 62053, "propose entity": 43374, "empirical performance": 17336, "resource efficient": 47225, "learning analyze": 29516, "text method": 56658, "outperforms roberta": 38940, "approaches evaluating": 3814, "languages zero": 28827, "modeling natural": 34603, "levels propose": 30245, "order effectively": 38611, "field recent": 20767, "paired image": 39163, "graph encoding": 23133, "sentences target": 49791, "high success": 23804, "graph structural": 23168, "aims predict": 2206, "use given": 59900, "related sentiment": 45935, "construct heterogeneous": 10386, "kg information": 27360, "able enhance": 692, "datasets demonstrated": 13219, "recent model": 45320, "short phrases": 50562, "sentence multi": 49603, "types fine": 59089, "learning effectiveness": 29610, "similar sentence": 51065, "relevance generated": 46191, "training making": 58168, "robust accurate": 48237, "modern language": 35707, "trained mixed": 57790, "mixed dataset": 33402, "repository https": 46464, "creating multiple": 11743, "comprehension given": 9765, "given article": 22723, "sentence generate": 49564, "questions evaluate": 44786, "suffer long": 53773, "input source": 26338, "outperforms sota": 38944, "english benchmark": 17777, "reasoning network": 45211, "understanding document": 59339, "final decision": 20819, "generation code": 22435, "process knowledge": 42798, "extracted different": 20008, "label assigned": 27689, "sentence introduce": 49573, "introduce study": 26865, "corpus input": 11362, "reducing need": 45711, "tuned different": 58871, "varying number": 61434, "finetuning bert": 21048, "algorithm provides": 2296, "aims bring": 2177, "motivation propose": 35884, "based transformers": 6111, "dravidian languages": 16398, "leader board": 29282, "trained augmented": 57676, "feature attribution": 20474, "approach employed": 3505, "phases phase": 40808, "second phase": 49015, "final submission": 20832, "achieved micro": 1250, "structures different": 53183, "embedding dimensions": 17025, "requires human": 46933, "showing high": 50679, "words small": 62516, "learning materials": 29722, "sentences help": 49730, "students learning": 53218, "parametric non": 39732, "array tasks": 4202, "limits application": 30640, "training transfer": 58304, "tasks generate": 55654, "examples achieve": 18886, "faces problem": 20254, "problem sparse": 42663, "slow convergence": 51449, "sparse reward": 51971, "leveraging human": 30326, "annotations propose": 2997, "scores code": 48895, "training make": 58166, "challenge end": 7979, "represent entire": 46471, "entities related": 18077, "previous text": 42297, "approaches achieving": 3754, "input examples": 26275, "training evaluate": 58089, "models resulting": 35455, "corpora makes": 11220, "constraint based": 10370, "various combinations": 61315, "models contain": 34858, "field work": 20774, "work look": 62713, "models remains": 35434, "remains unexplored": 46354, "use metric": 59948, "broad study": 7356, "hampered lack": 23379, "standard classification": 52475, "ability reason": 639, "improvements average": 25049, "rarely studied": 45007, "tokens different": 57325, "results https": 47661, "ability identify": 612, "net model": 36689, "estimation model": 18385, "performance gpt": 40369, "creating dataset": 11739, "gpt generated": 22978, "generated gpt": 22291, "processing works": 42969, "explicit alignment": 19611, "alignment objective": 2378, "dataset single": 13088, "mean standard": 31997, "contribute model": 10931, "examples used": 18940, "consistently significantly": 10310, "analyses reveal": 2604, "documents long": 15894, "need manually": 36582, "supervision target": 54096, "using minimal": 60804, "given limited": 22759, "variety training": 61295, "datasets pre": 13372, "building model": 7454, "predict performance": 41651, "insights model": 26391, "related sentences": 45934, "end setting": 17708, "weighted graph": 61930, "score 63": 48799, "decoder predict": 13612, "predict output": 41650, "based decoders": 5669, "methods ignore": 32893, "ignore fact": 24490, "suboptimal performance": 53595, "semantic signals": 49344, "embeddings previous": 17194, "models empirically": 34954, "integral natural": 26502, "recently different": 45420, "improvement zero": 25040, "work code": 62597, "com swarnahub": 9024, "current sota": 12009, "techniques widely": 56151, "biases model": 7056, "clustering experiments": 8740, "sentences perform": 49767, "supervised losses": 54012, "changes training": 8182, "learning combine": 29561, "extraction use": 20128, "chinese arabic": 8297, "robust representations": 48264, "lexical cues": 30360, "bert shows": 6718, "context includes": 10656, "provided context": 44159, "shows bert": 50763, "challenges data": 8037, "sets evaluation": 50292, "set topics": 50267, "cost efficient": 11581, "task introducing": 55145, "challenging benchmarks": 8084, "dialogues existing": 14798, "approaches current": 3791, "limited model": 30599, "sota approaches": 51724, "benchmarks furthermore": 6525, "multilingual scenarios": 36116, "like mbert": 30484, "languages easily": 28649, "scenarios work": 48712, "baselines supervised": 6306, "despite rapid": 14380, "multiple candidate": 36176, "14 absolute": 136, "strong retrieval": 53047, "challenge future": 7983, "maintain performance": 31483, "based zero": 6143, "critical importance": 11782, "complex nature": 9640, "language growing": 28095, "challenging evaluation": 8093, "knowledge task": 27626, "settings compared": 50361, "learning despite": 29588, "approach including": 3568, "including multilingual": 25276, "employ pre": 17388, "using majority": 60785, "high agreement": 23708, "labels models": 27839, "random guess": 44880, "low levels": 31158, "future data": 21865, "features play": 20642, "play essential": 40969, "case based": 7788, "method code": 32415, "method motivated": 32580, "build robust": 7424, "evaluation protocols": 18687, "multi reference": 36000, "study multi": 53414, "pretrained using": 42192, "tasks zero": 55974, "limitations data": 30545, "shallow heuristics": 50439, "challenge data": 7973, "corpora analysis": 11174, "scale work": 48640, "work improves": 62684, "generalization training": 22132, "generated examples": 22286, "improves generalization": 25131, "pair method": 39154, "make data": 31558, "code freely": 8814, "performance comparing": 40259, "explore training": 19743, "tuning generative": 58916, "loss fine": 31091, "loss experiments": 31088, "queries keys": 44653, "potential enhance": 41388, "output learning": 38983, "share knowledge": 50459, "propose relation": 43603, "improves prior": 25153, "superior capability": 53932, "experts moe": 19590, "word substitution": 62317, "model ensembling": 33828, "annotate dataset": 2872, "effective solutions": 16697, "information chinese": 25779, "arguments propose": 4181, "adversarial test": 1987, "data textual": 12733, "mechanism make": 32128, "longer sentences": 31052, "different source": 15073, "translation module": 58636, "number context": 37991, "train modules": 57611, "academic community": 791, "especially natural": 18289, "interaction human": 26600, "make models": 31583, "models yielded": 35692, "framework comprising": 21475, "parameters task": 39723, "recover original": 45593, "uncertainty aware": 59227, "distribution model": 15645, "specific evaluation": 52081, "datasets cases": 13171, "texts according": 56856, "meaning input": 32003, "outputs propose": 39019, "graph transformer": 23176, "art bert": 4228, "challenges design": 8038, "consistently achieves": 10289, "training conduct": 57955, "children learning": 8294, "combination data": 9037, "low correlation": 31136, "model apply": 33570, "shows models": 50790, "create evaluation": 11697, "seed examples": 49043, "build work": 7434, "generalization robustness": 22130, "fluent language": 21130, "encourage future": 17593, "attention especially": 4744, "wise attention": 62079, "tasks words": 55967, "frequency tokens": 21679, "analysis long": 2691, "generation incorporating": 22477, "consistency model": 10269, "unidirectional language": 59463, "task solved": 55384, "proposed outperforms": 43873, "specific content": 52059, "used bert": 60107, "time evaluation": 57151, "reduced model": 45685, "explore influence": 19711, "decoding results": 13643, "considerably large": 10241, "unseen cases": 59643, "bi modal": 7017, "perform downstream": 40093, "commonsense question": 9237, "studies knowledge": 53276, "leveraging machine": 30332, "24 hours": 328, "set obtain": 50205, "evaluated dataset": 18527, "ability different": 602, "models boost": 34792, "language culture": 28014, "model allow": 33560, "outputs different": 39013, "documents new": 15898, "shot approaches": 50599, "approach extensive": 3532, "classifiers identify": 8616, "development pre": 14698, "verification model": 61527, "detection demonstrate": 14473, "uses transformer": 60541, "perform various": 40159, "winning solution": 62071, "results identifying": 47666, "using transformers": 61001, "interactive systems": 26633, "problem fine": 42568, "particular achieve": 39831, "media use": 32186, "prevents model": 42235, "instead predicting": 26459, "factors affecting": 20306, "results classifying": 47533, "datasets involving": 13306, "goal mind": 22891, "models scratch": 35471, "quantities data": 44635, "including twitter": 25315, "based multimodal": 5891, "propose conditional": 43328, "level extensive": 30116, "task focusing": 55094, "code replicate": 8854, "despite tremendous": 14399, "progress limited": 43102, "translation non": 58649, "performance 12": 40166, "corpus non": 11392, "scarcity parallel": 48674, "furthermore different": 21816, "model sub": 34420, "distillation propose": 15576, "constituent parts": 10356, "generalization accuracy": 22117, "evaluation conduct": 18594, "social commonsense": 51556, "strong state": 53051, "models roberta": 35464, "task validate": 55466, "recent focus": 45312, "downstream datasets": 16337, "models distribution": 34931, "ability discriminate": 603, "framework cross": 21483, "original datasets": 38709, "identify critical": 24419, "work follow": 62672, "effectively work": 16764, "shot methods": 50632, "serves strong": 50091, "split training": 52347, "roberta xlnet": 48230, "interpret human": 26709, "framework applied": 21458, "datasets widely": 13484, "used public": 60280, "experiments ablation": 19344, "model mlm": 34103, "general pre": 22080, "studies validate": 53310, "discuss major": 15474, "improves sota": 25161, "leverage contextual": 30260, "enhance neural": 17917, "leveraging contextual": 30320, "build graph": 7402, "analyses illustrate": 2597, "illustrate effectiveness": 24515, "review process": 48036, "multiple categories": 36178, "previous papers": 42269, "time budget": 57123, "results alternative": 47497, "works surprisingly": 62911, "approach augment": 3423, "success various": 53730, "tasks efficiently": 55604, "simple lightweight": 51184, "flexible efficient": 21109, "able jointly": 703, "forgetting problem": 21309, "agnostic framework": 2090, "algorithm named": 2286, "knowledge retrieval": 27601, "multilingual benchmark": 36063, "methods single": 33044, "token predictions": 57302, "improve ability": 24821, "approach ranked": 3664, "systems predict": 54594, "evaluation settings": 18715, "set perform": 50216, "practical setting": 41471, "better aligned": 6848, "corpus benchmark": 11287, "contrastive pre": 10916, "instance wise": 26431, "effective sentence": 16694, "understanding cross": 59335, "documents typically": 15922, "consider aspects": 10207, "experiments newly": 19482, "results findings": 47634, "findings motivate": 20909, "motivate future": 35859, "achieved superior": 1278, "hold true": 23981, "suggest different": 53815, "learn different": 29358, "drawn attention": 16408, "users examine": 60461, "performance including": 40388, "classification respectively": 8536, "articles contain": 4465, "detection fine": 14487, "models consistent": 34850, "paper question": 39562, "evaluate robustness": 18502, "robustness different": 48276, "issues need": 27095, "time significantly": 57213, "influence functions": 25725, "time efficient": 57147, "language develop": 28026, "techniques build": 56066, "algorithm trained": 2306, "generate corresponding": 22190, "provides high": 44202, "integrate pre": 26508, "experiments comparing": 19380, "work analyzing": 62566, "models observe": 35270, "led development": 29988, "challenging datasets": 8089, "set generated": 50160, "narrative generation": 36382, "growing field": 23297, "generation open": 22511, "dependencies natural": 14109, "primarily focus": 42362, "construct novel": 10398, "datasets popular": 13371, "consider various": 10226, "gated graph": 21995, "domain testing": 16208, "provide basis": 44015, "weight distribution": 61917, "matching loss": 31913, "methods baselines": 32766, "task observe": 55245, "scores previous": 48913, "ordering information": 38666, "provide benchmarks": 44017, "best existing": 6762, "nlp propose": 37516, "need task": 36592, "similar meanings": 51052, "retrieval sentence": 47969, "ai models": 2119, "good starting": 22945, "results hybrid": 47665, "focus building": 21147, "supervision strategy": 54095, "approach simultaneously": 3696, "general corpora": 22048, "box nature": 7292, "measure effectiveness": 32051, "generated explanations": 22288, "explanations results": 19607, "building reliable": 7467, "systems improving": 54528, "sequence transformer": 50015, "evidence suggesting": 18820, "specific supervision": 52151, "requires supervision": 46954, "step use": 52833, "sentence paraphrasing": 49616, "documents news": 15899, "salient words": 48443, "help establish": 23559, "performance distribution": 40297, "distribution test": 15652, "setting analysis": 50316, "contrastive regularization": 10918, "introduced capture": 26882, "better estimate": 6884, "augmentation adversarial": 4947, "settings extensive": 50372, "wide applicability": 61957, "research years": 47145, "availability parallel": 5255, "scores baseline": 48892, "data despite": 12275, "larger previous": 29085, "including dialogue": 25249, "resolution tasks": 47196, "description data": 14242, "annotations experiments": 2992, "benchmark proposed": 6488, "approach yield": 3743, "spoken text": 52365, "highly noisy": 23906, "addition evaluate": 1612, "mixed sentences": 33409, "corresponding human": 11552, "metric evaluation": 33116, "correctly identifying": 11493, "domains address": 16234, "framework open": 21575, "answer related": 3052, "unique advantages": 59508, "decoding processes": 13641, "information attention": 25763, "framework consistently": 21477, "code github": 8817, "synthetic question": 54379, "action sequence": 1456, "weights encoder": 61939, "forcing model": 21291, "learning capacity": 29552, "various architectures": 61301, "dramatically improved": 16388, "provide realistic": 44119, "improvement 25": 24981, "learning generated": 29661, "hot vectors": 24032, "errors existing": 18238, "generation metrics": 22493, "ways combine": 61840, "novel use": 37949, "tail entities": 54764, "dealing long": 13521, "combining neural": 9118, "easy integration": 16563, "inference datasets": 25652, "features bi": 20533, "architectures recent": 4122, "parameters results": 39721, "long studied": 31034, "style sentiment": 53498, "rely solely": 46300, "models quality": 35390, "algorithms text": 2342, "algorithms different": 2325, "addition paper": 1631, "result best": 47434, "achieved 96": 1216, "humans communicate": 24274, "specific neural": 52118, "includes multiple": 25231, "supervised objectives": 54029, "generating target": 22399, "consistent patterns": 10280, "noisy signals": 37624, "result performance": 47446, "model supervision": 34430, "labeling process": 27791, "process sentence": 42829, "dataset existing": 12914, "english centric": 17781, "analysis sentence": 2750, "relationships documents": 46078, "samples classes": 48467, "mapping function": 31801, "showing improved": 50680, "require lot": 46876, "sentence bert": 49520, "suffer severe": 53779, "regularization improve": 45837, "calibration methods": 7536, "tasks commonly": 55544, "tasks area": 55507, "development research": 14700, "unified format": 59471, "model chosen": 33657, "accuracy increase": 992, "producing natural": 43040, "perform analyses": 40067, "multi role": 36002, "training synthetic": 58281, "improvement 10": 24976, "knowledge multi": 27554, "effective robust": 16691, "new decoding": 37169, "trained positive": 57839, "assess extent": 4578, "pretrained sequence": 42183, "recent text": 45359, "transfer transformer": 58427, "transformer t5": 58511, "unified text": 59480, "led increased": 29991, "data tokens": 12736, "humans create": 24275, "new existing": 37199, "output examples": 38971, "range simple": 44934, "training long": 58158, "shot domain": 50611, "performance improve": 40381, "unseen target": 59654, "model synthetic": 34435, "synthetic samples": 54381, "requires labeled": 46935, "evaluations indicate": 18760, "reducing gap": 45708, "shot supervised": 50648, "supervised performance": 54031, "performance smaller": 40567, "generating concise": 22369, "directly using": 15342, "experiments training": 19547, "potentially used": 41419, "tasks context": 55558, "adaptive pre": 1578, "propose feature": 43387, "bert bart": 6611, "models research": 35447, "available cross": 5274, "new shared": 37315, "events cause": 18791, "language built": 27982, "models mainstream": 35210, "align human": 2352, "method align": 32376, "level contrastive": 30086, "generation current": 22440, "model ranking": 34270, "use variational": 60069, "space generating": 51868, "pre learned": 41504, "datasets prior": 13377, "corpus retrieval": 11423, "intensive tasks": 26561, "minimal pairs": 33290, "pairs building": 39172, "features evaluation": 20573, "indian english": 25517, "examples demonstrate": 18895, "requires domain": 46925, "thousands examples": 57079, "supervision method": 54085, "data active": 12116, "alleviate challenges": 2401, "method finally": 32507, "time standard": 57223, "vocabulary generation": 61703, "representations unsupervised": 46780, "knowledge english": 27461, "information information": 25923, "understand information": 59299, "bert best": 6629, "insights future": 26388, "composed modules": 9733, "tree graph": 58744, "dataset build": 12832, "dependent model": 14148, "proposed incorporate": 43795, "model decoder": 33740, "work novel": 62737, "representation contextual": 46500, "costly collect": 11600, "interface web": 26662, "bias reduction": 7041, "using downstream": 60669, "challenges remain": 8076, "lm fine": 30906, "demonstrated significant": 14018, "significant inference": 50893, "structure given": 53108, "input context": 26260, "context extracted": 10636, "reasoning benchmarks": 45186, "outperform supervised": 38827, "properties example": 43261, "text proposed": 56717, "datasets despite": 13224, "additional lexical": 1684, "resources code": 47296, "approaches detect": 3796, "bert embedding": 6650, "using advanced": 60553, "2020 using": 294, "scores word": 48930, "questions contain": 44779, "task description": 55006, "scale domain": 48566, "dialogue tasks": 14790, "model annotated": 33565, "generating textual": 22402, "score 27": 48787, "token input": 57293, "scoring module": 48938, "scratch fine": 48944, "coronavirus pandemic": 11169, "learning hierarchical": 29670, "quality instances": 44536, "method significant": 32652, "method classify": 32414, "explain predictions": 19595, "bidirectional decoder": 7066, "sets human": 50295, "need robust": 36590, "techniques mitigate": 56111, "datasets expensive": 13261, "highly domain": 23895, "better machine": 6914, "thoroughly investigated": 57072, "corpus sentence": 11426, "volumes text": 61732, "ml algorithms": 33428, "labels present": 27845, "augmenting data": 4987, "data discuss": 12288, "bert electra": 6648, "word masked": 62243, "models transformers": 35631, "evaluation open": 18665, "demonstrate automatically": 13873, "result proposed": 47448, "tasks outperforms": 55780, "consuming error": 10443, "recognize unseen": 45551, "classification previous": 8519, "ignored paper": 24495, "recent pre": 45333, "model objectives": 34135, "experiments downstream": 19424, "feature model": 20498, "fair evaluation": 20357, "seeks address": 49056, "topic language": 57412, "additional tasks": 1703, "relevant literature": 46223, "document analysis": 15767, "continued pre": 10831, "baselines generating": 6267, "mitigation methods": 33396, "analysis article": 2617, "main methods": 31446, "method pre": 32617, "novel large": 37851, "work datasets": 62618, "innovative approach": 26250, "correlations different": 11534, "interactions social": 26622, "hierarchical levels": 23675, "tuning strategies": 58962, "encoding used": 17580, "achieving similar": 1423, "conversational setting": 11051, "retain information": 47921, "absolute error": 739, "systems vulnerable": 54668, "indonesian language": 25600, "resources experiments": 47302, "suffer catastrophic": 53759, "domain methods": 16111, "proposed solve": 43899, "occur training": 38270, "documents documents": 15872, "sentence graph": 49567, "graph semantic": 23165, "different frameworks": 14939, "mask based": 31856, "based augmentation": 5585, "studies revealed": 53298, "reasoning path": 45213, "providing comprehensive": 44239, "logical rules": 30989, "reasoning experiments": 45194, "speech inputs": 52267, "change paper": 8172, "human use": 24254, "evidence annotations": 18807, "literature task": 30864, "extraction traditional": 20125, "learning solve": 29886, "consistency regularization": 10270, "regularization based": 45836, "construct word": 10404, "representations novel": 46726, "sets code": 50284, "trained datasets": 57705, "emotion information": 17290, "derived text": 14203, "controlled experiment": 10983, "analysis compare": 2632, "editing approach": 16596, "opens possibility": 38484, "dataset sentence": 13079, "evaluating proposed": 18567, "weighting method": 61934, "errors based": 18235, "models findings": 35026, "resource availability": 47210, "detection english": 14476, "bart based": 5533, "years pre": 63069, "data uncertainty": 12751, "loss finally": 31090, "uncertainty estimation": 59230, "hindi bengali": 23935, "effective strategies": 16698, "community https": 9265, "https huggingface": 24060, "based dual": 5692, "models targeted": 35584, "bert makes": 6678, "dataset generation": 12943, "new setting": 37314, "negative mining": 36626, "auxiliary entity": 5231, "methods achieves": 32734, "describes contribution": 14219, "models potential": 35333, "performance equivalent": 40322, "source embeddings": 51769, "embeddings according": 17076, "accompanying dataset": 844, "capturing complex": 7730, "documents previous": 15903, "using discourse": 60657, "document length": 15802, "propose emph": 43365, "utterance model": 61138, "input use": 26354, "models case": 34807, "single character": 51288, "single head": 51306, "using handcrafted": 60722, "results allow": 47496, "segmenting long": 49094, "neural inference": 36960, "cues present": 11934, "provide generic": 44078, "resources based": 47294, "end study": 17711, "sequential transfer": 50052, "explore number": 19722, "number research": 38033, "algorithms recent": 2338, "work fine": 62667, "characteristics chinese": 8234, "offer insights": 38292, "auxiliary objectives": 5237, "semantically unrelated": 49396, "based filtering": 5729, "style tasks": 53500, "corpora state": 11245, "pretraining process": 42215, "classification evaluation": 8465, "proposed bert": 43745, "entire sequence": 18028, "purpose pretrained": 44409, "tasks pretraining": 55809, "lms learn": 30920, "data adopt": 12125, "encode syntactic": 17471, "interpret results": 26712, "task problem": 55295, "problem order": 42618, "set final": 50157, "major focus": 31509, "data prior": 12561, "create benchmark": 11692, "robust data": 48242, "identify correct": 24418, "increase computational": 25408, "generate single": 22248, "representations latent": 46702, "space generate": 51867, "questions study": 44811, "attention input": 4760, "previously introduced": 42335, "predicted answers": 41665, "achieves sota": 1376, "lingual ner": 30716, "data newly": 12512, "seven teams": 50421, "results highest": 47656, "effect data": 16611, "attention global": 4754, "deletion insertion": 13823, "better label": 6906, "investigate existing": 26957, "simple alternative": 51134, "based decomposition": 5671, "allows flexible": 2465, "level relations": 30190, "domain furthermore": 16075, "lexical choice": 30356, "enabling efficient": 17454, "single instance": 51309, "makes learning": 31627, "single text": 51347, "data labels": 12450, "despite long": 14373, "supervised fashion": 53984, "adopt pre": 1866, "maximum improvement": 31970, "coherent meaningful": 8917, "issues arise": 27084, "discuss shortcomings": 15482, "shortcomings previous": 50581, "explored using": 19768, "data increase": 12426, "self learning": 49200, "sentences various": 49805, "summaries based": 53869, "specific paper": 52121, "spur research": 52383, "negative pairs": 36628, "failing capture": 20348, "text combine": 56497, "utilize textual": 61104, "popular pre": 41178, "resources train": 47336, "performance cost": 40269, "paper ask": 39273, "evaluation strategy": 18728, "suggest improvements": 53821, "contribute research": 10933, "nlp solutions": 37525, "including global": 25258, "financial reports": 20893, "present tool": 42042, "density estimation": 14093, "task formulation": 55096, "leaving room": 29984, "task descriptions": 55007, "score 12": 48784, "thanks availability": 56999, "rl algorithms": 48173, "sentences fed": 49723, "learning scl": 29861, "texts long": 56901, "close supervised": 8693, "texts despite": 56871, "experiments compared": 19379, "difficulty capturing": 15198, "aspects natural": 4547, "new synthetic": 37330, "improvement different": 25002, "approaching human": 3959, "common benchmarks": 9167, "benchmarks multiple": 6533, "clear evidence": 8653, "gain deeper": 21906, "problem pre": 42625, "increasingly studied": 25477, "perform different": 40089, "tasks performed": 55797, "provided test": 44173, "set shows": 50245, "auto generated": 5019, "experiments fine": 19436, "suggest bert": 53814, "content form": 10524, "automatically identifies": 5180, "increasing amounts": 25443, "method methods": 32576, "proposed achieve": 43710, "multiple popular": 36263, "detection best": 14464, "bilstm bert": 7128, "dataset indicate": 12967, "bert learn": 6672, "technique improve": 56033, "model concepts": 33693, "aims develop": 2187, "compare using": 9375, "language typically": 28542, "studies investigate": 53273, "000 articles": 4, "datasets manually": 13326, "information constituency": 25788, "use interface": 59915, "search strategies": 48985, "extraction low": 20078, "limitation paper": 30537, "superiority approach": 53949, "systems key": 54537, "representative text": 46802, "large complex": 28860, "retrain model": 47933, "provide various": 44154, "human life": 24198, "performance greatly": 40370, "current model": 11987, "accuracy 13": 895, "evaluations conducted": 18753, "important high": 24730, "selection lead": 49142, "number authors": 37983, "tasks improving": 55675, "task leads": 55166, "knowledge injection": 27526, "facilitate downstream": 20266, "potential improvement": 41394, "serve good": 50078, "challenge aims": 7967, "developing novel": 14662, "performance information": 40394, "tasks meta": 55744, "trained input": 57751, "individual representations": 25577, "novel dependency": 37804, "linguistic ambiguity": 30746, "matching query": 31920, "knowledge domains": 27449, "learn single": 29424, "method facilitates": 32506, "second contribution": 49001, "ir systems": 27035, "identify types": 24449, "dataset arabic": 12814, "models classification": 34815, "techniques rely": 56131, "estimated using": 18377, "frequently observed": 21685, "space latent": 51872, "use alternative": 59818, "exploiting large": 19674, "modelling framework": 34642, "language bert": 27976, "research explored": 47033, "making suitable": 31668, "applying state": 3377, "shows best": 50764, "best submission": 6826, "pair level": 39153, "quality issues": 44537, "make work": 31611, "evaluating text": 18570, "texts use": 56939, "carried using": 7771, "distribution shift": 15650, "media important": 32167, "using crowd": 60638, "augmentation data": 4952, "methods neglect": 32955, "tokens propose": 57333, "significantly superior": 51014, "training achieved": 57924, "far away": 20397, "concepts present": 9940, "model potential": 34214, "computational studies": 9864, "different mechanisms": 14986, "conventional transformer": 11017, "generated datasets": 22284, "framework robust": 21593, "high f1": 23736, "level specifically": 30214, "study illustrate": 53386, "demonstrated ability": 14002, "models select": 35473, "existing researches": 19137, "knowledge enhancement": 27464, "candidates proposed": 7587, "task providing": 55310, "providing additional": 44236, "applied data": 3266, "framework natural": 21570, "based inter": 5788, "task performed": 55274, "inherent ambiguity": 26200, "annotation strategy": 2971, "unseen labels": 59650, "method investigate": 32552, "propose curriculum": 43344, "aspects generated": 4539, "correct target": 11476, "sequence architecture": 49909, "score used": 48880, "role label": 48310, "witnessed great": 62092, "news generation": 37404, "progress challenges": 43094, "potentially harmful": 41413, "modal language": 33460, "focus english": 21158, "building cross": 7440, "model trains": 34484, "normalization method": 37705, "available model": 5327, "sampling large": 48501, "corpora furthermore": 11205, "release datasets": 46153, "automatically discovers": 5165, "quality samples": 44577, "manner extensive": 31718, "limitations propose": 30555, "tuning transformer": 58971, "result obtain": 47443, "shows high": 50781, "model research": 34314, "work performance": 62749, "tuning limited": 58925, "score 46": 48793, "second issue": 49008, "used facilitate": 60184, "advantage explicit": 1939, "graph generate": 23139, "surpass strong": 54168, "model attempt": 33582, "reduce impact": 45665, "information longer": 25959, "bilstm encoder": 7133, "combines pre": 9100, "mtl approach": 35930, "tasks dataset": 55569, "based mathematical": 5836, "little progress": 30883, "perform topic": 40156, "saliency based": 48437, "limitation using": 30541, "latent spaces": 29137, "world language": 62944, "better precision": 6941, "classifier fine": 8597, "manifold mixup": 31706, "methods easy": 32831, "speech form": 52264, "using autoregressive": 60577, "private test": 42443, "relevant research": 46232, "documents methods": 15895, "methods manual": 32941, "sample size": 48455, "focus capturing": 21148, "view propose": 61602, "bert family": 6659, "roberta distilbert": 48220, "graph automatically": 23099, "complexity text": 9691, "existing computational": 19047, "analysis traditional": 2784, "poor accuracy": 41132, "tasks prior": 55814, "nature word": 36492, "corpora approach": 11177, "specific patterns": 52125, "short document": 50553, "makes comparison": 31617, "problem design": 42533, "problem high": 42577, "directly extracted": 15315, "works demonstrated": 62883, "rely language": 46291, "pairs joint": 39198, "space source": 51898, "information efficiently": 25826, "performances downstream": 40642, "aims map": 2205, "scenarios propose": 48706, "competitive previous": 9558, "global perspective": 22839, "labels better": 27812, "challenge knowledge": 7987, "achieve 93": 1108, "benchmark various": 6504, "dataset benchmarks": 12827, "domain explore": 16069, "directly generates": 15319, "reproducibility code": 46827, "recognition benchmarks": 45495, "shown potential": 50735, "focus incorporating": 21172, "artificial languages": 4496, "depending language": 14156, "rules natural": 48392, "domains investigate": 16263, "relations introducing": 46038, "features instance": 20605, "conducted widely": 10098, "scientific document": 48758, "lastly demonstrate": 29107, "empirical experiment": 17327, "labels high": 27832, "methods greatly": 32882, "tuned dataset": 58870, "decoding propose": 13642, "generation sentences": 22544, "performance input": 40395, "sub domains": 53516, "cnn bilstm": 8763, "training pairs": 58200, "embeddings usually": 17241, "provide code": 44027, "information interaction": 25930, "evaluation work": 18749, "automatic assessment": 5072, "dataset validate": 13131, "models ablation": 34652, "advancements neural": 1903, "aims build": 2178, "improve accessibility": 24822, "models assist": 34730, "applications provide": 3238, "language format": 28078, "outperforms multi": 38911, "driven end": 16422, "essential components": 18324, "domain labeled": 16095, "model generated": 33932, "data allowing": 12130, "automated generation": 5045, "dataset support": 13109, "evaluation automated": 18577, "automatically detect": 5157, "using images": 60734, "develop multimodal": 14601, "implications future": 24656, "challenge organized": 8003, "model separate": 34357, "proposed achieves": 43711, "reduce influence": 45666, "predictions models": 41764, "dataset general": 12941, "traditional knowledge": 57522, "knowledge single": 27611, "current practices": 11998, "additional benefit": 1655, "transformer encoders": 58485, "problem leveraging": 42595, "tuning stages": 58960, "tree text": 58761, "gains compared": 21935, "work offers": 62740, "universal representation": 59543, "incorporate different": 25350, "surpasses baselines": 54171, "approach constructing": 3470, "dataset open": 13018, "distilled data": 15582, "quality measures": 44550, "structure texts": 53142, "multiple decoders": 36195, "finding models": 20899, "studies benchmark": 53250, "dialogue graph": 14775, "global graph": 22829, "role labelling": 48313, "layers transformer": 29236, "layers finally": 29224, "domain long": 16105, "benchmark training": 6502, "specifically define": 52189, "tasks better": 55526, "plms bert": 41016, "wall clock": 61761, "effectiveness generalization": 16782, "language improves": 28103, "important characteristics": 24710, "superficial cues": 53925, "samples work": 48495, "relevant dimensions": 46209, "tasks lexical": 55722, "represented different": 46806, "layers pre": 29232, "trained roberta": 57855, "parsing benchmark": 39774, "methods capable": 32776, "advances large": 1914, "lm pretraining": 30912, "multilingual transformers": 36132, "constraints paper": 10376, "word span": 62313, "observe different": 38131, "outperform multiple": 38805, "multiple existing": 36213, "explanations model": 19606, "incorrect predictions": 25400, "difficult adapt": 15156, "long input": 31015, "models introducing": 35145, "supervised shot": 54047, "based strategy": 6061, "inferior performance": 25708, "design task": 14303, "learning encoder": 29620, "representation approaches": 46493, "data smaller": 12670, "primarily lack": 42365, "largest model": 29098, "articles study": 4480, "conducted human": 10086, "accuracy detecting": 958, "length propose": 30033, "structure large": 53114, "attempt evaluate": 4684, "develop effective": 14583, "com ukplab": 9028, "study set": 53458, "models set": 35487, "performance decreases": 40279, "report introduces": 46438, "token vocabulary": 57313, "understanding role": 59396, "tasks improvement": 55674, "method encourages": 32482, "align representation": 2353, "process generate": 42785, "time consumption": 57139, "models greatly": 35071, "loss landscape": 31098, "relatively understudied": 46134, "augmented fine": 4978, "trained annotators": 57673, "detection provide": 14514, "supervision approach": 54076, "t5 transformer": 54684, "retrieved evidence": 47984, "copy network": 11135, "conducted dataset": 10078, "available project": 5348, "reduces inference": 45691, "compared transformer": 9468, "models adopt": 34696, "assign labels": 4600, "lead models": 29265, "able assign": 676, "knowledge downstream": 27450, "existing newly": 19119, "aspects data": 4537, "reveal challenges": 48008, "personally identifiable": 40767, "identifiable information": 24382, "training named": 58185, "models base": 34752, "recall scores": 45247, "score 98": 48831, "f_1 points": 20236, "context analysis": 10583, "work retrieval": 62810, "datasets sentiment": 13416, "robustness method": 48284, "prediction process": 41730, "multilingual monolingual": 36099, "allows control": 2455, "access model": 827, "data highlight": 12404, "thorough review": 57064, "used introduce": 60218, "input predict": 26316, "based constituency": 5638, "especially helpful": 18278, "model seq2seq": 34359, "novel pretraining": 37896, "based commonsense": 5626, "input approach": 26255, "limitations model": 30551, "query representation": 44676, "setting investigate": 50328, "learning lack": 29693, "learning era": 29627, "generation ability": 22407, "gap work": 21985, "feature fusion": 20491, "tunes pre": 58895, "english foreign": 17805, "focus research": 21196, "dynamic graph": 16485, "relation representation": 45994, "collection diverse": 8982, "way understanding": 61834, "understanding world": 59419, "providing human": 44247, "directions task": 15301, "crowdsourcing workers": 11891, "2021 shared": 298, "proposed ensemble": 43768, "correctly classified": 11490, "present architecture": 41849, "task formulations": 55097, "applications current": 3193, "constrained devices": 10365, "attention specific": 4830, "lower model": 31218, "size existing": 51382, "lightweight model": 30458, "originally designed": 38743, "designed provide": 14329, "ii incorporating": 24504, "approach creating": 3472, "token word": 57315, "model affect": 33548, "provide case": 44023, "graph words": 23179, "standard procedure": 52519, "paper fine": 39375, "proposed fine": 43777, "evaluate range": 18497, "nlp used": 37559, "phenomena paper": 40812, "design based": 14265, "despite use": 14401, "efficient memory": 16883, "toolkit available": 57371, "topic field": 57407, "confuse model": 10157, "fail recognize": 20345, "trained recognize": 57849, "heavily relying": 23536, "research tree": 47134, "type single": 59069, "embedding position": 17053, "input recurrent": 26325, "achieved compared": 1226, "text generate": 56590, "bias improve": 7028, "use prediction": 59977, "encoders perform": 17558, "extraction requires": 20104, "highly context": 23889, "responses work": 47406, "aim address": 2134, "proposed annotation": 43718, "dataset ii": 12957, "promising source": 43186, "required fine": 46900, "models zero": 35693, "improving zero": 25201, "code fine": 8813, "present comparative": 41867, "datasets hope": 13290, "specific parts": 52124, "classifier proposed": 8604, "precision f1": 41613, "features dataset": 20552, "training generating": 58113, "augmentation using": 4972, "approach compares": 3457, "baseline evaluation": 6164, "main finding": 31439, "models don": 34939, "art contextual": 4241, "text prompts": 56714, "extending previous": 19841, "decompose complex": 13653, "strategies combined": 52895, "com gt": 9014, "gt salt": 23317, "problems work": 42738, "applies machine": 3318, "sentiment data": 49838, "context speaker": 10723, "yield effective": 63095, "texts address": 56857, "form texts": 21339, "capture key": 7687, "model match": 34093, "word overlapping": 62259, "built based": 7482, "words selected": 62504, "images video": 24557, "content generation": 10526, "set models": 50195, "context shot": 10718, "context examples": 10631, "based prompt": 5956, "datasets yield": 13489, "methods time": 33074, "need distinguish": 36556, "context form": 10640, "model contextual": 33712, "contextual string": 10784, "pairs text": 39220, "dataset natural": 13006, "tools models": 57381, "timely manner": 57245, "result experiments": 47438, "studies existing": 53262, "pre process": 41507, "critical tasks": 11796, "research far": 47035, "issues data": 27088, "methods normally": 32962, "absence explicit": 734, "lms fine": 30918, "designed improve": 14321, "corpus automatically": 11283, "detailed experimental": 14425, "step ahead": 52797, "bias lead": 7031, "similarity original": 51111, "little additional": 30869, "evaluation compared": 18592, "detect classify": 14436, "course conversation": 11636, "typical approach": 59131, "proposed recent": 43884, "features learn": 20614, "understand semantics": 59312, "baselines publicly": 6292, "components pre": 9723, "based roberta": 6002, "architecture large": 4057, "adaptive approach": 1573, "number candidate": 37986, "large fraction": 28880, "aims promote": 2210, "high levels": 23749, "based constraints": 5639, "focus models": 21183, "classification perform": 8513, "tool designed": 57361, "attention computational": 4728, "linguistics community": 30822, "community previous": 9271, "goal directed": 22882, "humans models": 24282, "model overcomes": 34171, "lack generalization": 27890, "data shortage": 12657, "framework highly": 21534, "decoding experiments": 13630, "explicit linguistic": 19619, "parameters frozen": 39700, "linguistics literature": 30823, "novel adaptation": 37748, "generated human": 22292, "solve sub": 51690, "utilizing multi": 61126, "perform differently": 40090, "utilize different": 61090, "encoded graph": 17478, "considering multiple": 10260, "analyses conducted": 2590, "conducted analyze": 10073, "used entity": 60167, "evaluation text": 18739, "focused text": 21231, "document image": 15799, "focuses developing": 21237, "contains 30": 10491, "000 pairs": 10, "attack models": 4660, "showing training": 50691, "processing particularly": 42921, "data reduce": 12591, "stages development": 52449, "information considering": 25787, "diversity complexity": 15732, "pass model": 39917, "generate texts": 22255, "models reveals": 35461, "tasks instance": 55692, "learning ensemble": 29623, "bias make": 7032, "use conditional": 59850, "learning respectively": 29847, "focus previous": 21190, "performance image": 40378, "political discourse": 41108, "tasks increasingly": 55685, "set multi": 50196, "achieve low": 1167, "models score": 35470, "individually ignoring": 25590, "efficiently incorporate": 16917, "efficient transformer": 16906, "achieve use": 1212, "models determine": 34907, "trained explicit": 57728, "enables evaluation": 17440, "articles news": 4472, "heterogeneous document": 23618, "challenge text": 8020, "work addressed": 62557, "appropriate training": 3967, "finally summarize": 20883, "large spectrum": 29016, "language consisting": 28003, "using widely": 61025, "factors impact": 20308, "contrastive losses": 10913, "yield similar": 63100, "lack language": 27900, "used multilingual": 60244, "base language": 5544, "domain framework": 16074, "transfer source": 58422, "systems handle": 54517, "contains total": 10506, "availability domain": 5247, "robust dataset": 48243, "adapts model": 1584, "performance boosts": 40225, "pairs utterances": 39229, "ability memorize": 624, "methods real": 33005, "framework encodes": 21503, "multiple bert": 36175, "aim understand": 2161, "better best": 6855, "predicting future": 41675, "larger language": 29080, "datasets combined": 13178, "models likely": 35188, "paper explains": 39359, "usually hard": 61051, "feature distribution": 20480, "roberta xlm": 48229, "science linguistics": 48747, "capabilities limitations": 7600, "single architecture": 51283, "labels text": 27852, "single set": 51336, "procedure based": 42742, "benchmark accuracy": 6423, "underrepresented groups": 59285, "attention use": 4843, "examples question": 18927, "applied directly": 3268, "model retraining": 34324, "strong models": 53037, "competitive recent": 9559, "constructing large": 10422, "label quality": 27722, "100 training": 64, "models acquire": 34686, "datasets mainly": 13323, "words definitions": 62394, "future paper": 21881, "easily available": 16538, "specifically conduct": 52186, "using template": 60983, "reasoning datasets": 45192, "propose light": 43441, "bayesian learning": 6359, "self explanatory": 49196, "method contributes": 32443, "confirm method": 10132, "unified multimodal": 59476, "annotation existing": 2948, "information additional": 25755, "label generation": 27709, "method surpasses": 32677, "improvement score": 25022, "improvement significant": 25023, "representative methods": 46798, "life settings": 30440, "focus context": 21149, "allows reduce": 2476, "methods future": 32874, "model scale": 34335, "languages respectively": 28774, "generation focused": 22463, "extraction documents": 20058, "remains relatively": 46347, "present hybrid": 41926, "hybrid framework": 24316, "datasets able": 13140, "translation bilingual": 58586, "aim achieve": 2133, "study zero": 53477, "parallel datasets": 39649, "based contextualized": 5645, "effective bert": 16633, "medium sized": 32220, "verify model": 61541, "generated comments": 22275, "introduce small": 26862, "annotation budget": 2938, "curated datasets": 11950, "problems addressed": 42693, "speed memory": 52323, "retaining high": 47925, "model leading": 34048, "types code": 59079, "work leverages": 62710, "style text": 53501, "using pointer": 60860, "methods assume": 32757, "input given": 26283, "following previous": 21266, "fusion approach": 21851, "researchers focus": 47158, "increased attention": 25429, "adaptation propose": 1536, "auxiliary model": 5235, "base large": 5545, "average points": 5413, "datasets studies": 13444, "cases domain": 7806, "real scenarios": 45110, "existing single": 19143, "analyses provide": 2602, "researchers understand": 47167, "information vital": 26156, "data cross": 12262, "relevant external": 46215, "time detection": 57144, "extensive data": 19860, "learning specific": 29888, "datasets automatic": 13161, "terms fluency": 56291, "simplify process": 51243, "step improve": 52812, "attention transformers": 4841, "learning works": 29947, "scores model": 48908, "tree main": 58750, "models codes": 34823, "rich history": 48101, "instead focusing": 26452, "set different": 50135, "typically need": 59149, "domain typically": 16220, "selecting data": 49125, "comparisons state": 9513, "relatively limited": 46120, "efficient text": 16903, "accuracy 89": 921, "resulted best": 47459, "baseline given": 6174, "augmentation models": 4964, "task pretrained": 55289, "provides dataset": 44191, "000 examples": 6, "global optimum": 22837, "strategies perform": 52912, "problem incorporating": 42582, "bilingual model": 7111, "set social": 50250, "works field": 62889, "sequential decoding": 50039, "outputs experiments": 39014, "extraction work": 20131, "current trend": 12023, "models requiring": 35446, "models construction": 34857, "implemented different": 24647, "fields work": 20785, "work defines": 62624, "online used": 38391, "propose user": 43695, "informal nature": 25741, "standard sentence": 52525, "solutions proposed": 51672, "additional pretraining": 1695, "directly input": 15321, "depend context": 14099, "lead robust": 29268, "tasks field": 55639, "based empirical": 5697, "significantly enhance": 50955, "experimentally evaluated": 19333, "semeval 2021": 49438, "2021 task": 300, "integrated gradients": 26515, "best submissions": 6827, "research english": 47029, "compare pre": 9358, "attention promising": 4813, "studies explored": 53264, "mel spectrograms": 32227, "study indicates": 53390, "research past": 47092, "research typically": 47136, "role pre": 48319, "tuned multilingual": 58881, "consistently best": 10290, "supplementary material": 54106, "input label": 26287, "relations finally": 46033, "model fails": 33879, "easily learn": 16548, "current transformer": 12021, "settings limited": 50381, "average 18": 5399, "text dynamic": 56546, "strategies compare": 52896, "age groups": 2047, "mitigate negative": 33388, "regression tasks": 45822, "knowledge scientific": 27603, "systems detecting": 54477, "strategy yields": 52956, "shot training": 50653, "lead poor": 29267, "context domain": 10617, "words relatively": 62495, "jointly leverages": 27204, "results biomedical": 47527, "knowledge represented": 27593, "hope survey": 24015, "directly extract": 15314, "performance good": 40368, "multi modality": 35992, "high resolution": 23790, "relatively explored": 46115, "use datasets": 59862, "learning classify": 29558, "higher levels": 23833, "suggest human": 53820, "t5 large": 54681, "information achieved": 25752, "intuitive humans": 26910, "reasoning method": 45202, "overall findings": 39042, "donald trump": 16315, "domains healthcare": 16259, "dataset valuable": 13132, "utility dataset": 61081, "https doi": 24056, "10 5281": 33, "5281 zenodo": 437, "knowledge linguistic": 27549, "online platform": 38378, "cc sa": 7899, "attention uses": 4845, "softmax attention": 51628, "recency bias": 45275, "combining representations": 9121, "known text": 27668, "need expensive": 36560, "expensive annotations": 19204, "task metrics": 55213, "systems required": 54621, "dynamic vocabulary": 16493, "building accurate": 7436, "driven learning": 16426, "algorithm effectively": 2271, "outperforms supervised": 38952, "gap using": 21984, "code accessed": 8791, "human decision": 24132, "answering existing": 3072, "mutual interaction": 36350, "way experiments": 61802, "corpora corpus": 11188, "candidate entities": 7570, "contextual meaning": 10775, "role nlp": 48318, "human provided": 24218, "knowledge parameters": 27566, "complex pipelines": 9646, "data studied": 12701, "harder tasks": 23455, "available easy": 5285, "systems general": 54510, "addressing question": 1824, "encoded language": 17480, "procedure using": 42746, "models bias": 34784, "data showing": 12658, "unsatisfactory performance": 59641, "data abstract": 12105, "examples hard": 18908, "develop evaluate": 14587, "fleiss kappa": 21105, "leverage recent": 30286, "achieve goals": 1144, "context instead": 10661, "known benchmark": 27654, "tuning nlp": 58935, "19 patients": 189, "human resources": 24234, "information generating": 25893, "design framework": 14284, "social issues": 51566, "domains low": 16272, "resource multilingual": 47254, "techniques fine": 56089, "human preferences": 24217, "focus types": 21209, "introduce idea": 26813, "powerful new": 41439, "analysis network": 2705, "classification fundamental": 8474, "particularly suitable": 39891, "training limited": 58156, "simple training": 51221, "standard single": 52530, "text does": 56542, "provides reasonable": 44221, "assist humans": 4610, "understanding people": 59380, "unique properties": 59516, "handling problem": 23427, "improvements f1": 25075, "improvement 11": 24977, "absolute points": 749, "dataset multiple": 13004, "dataset baseline": 12823, "studies introduced": 53272, "rnn lm": 48198, "robustness existing": 48279, "model conducted": 33697, "challenging cases": 8085, "factually consistent": 20326, "impressive capabilities": 24809, "including zero": 25322, "slow inference": 51450, "novel machine": 37859, "attention problem": 4812, "given dialogue": 22735, "vital information": 61691, "datasets user": 13473, "great performance": 23209, "model perplexity": 34207, "baseline absolute": 6149, "absolute 10": 737, "models construct": 34855, "code mix": 8822, "handle code": 23406, "using code": 60609, "resource researchers": 47267, "current sequence": 12007, "best tasks": 6831, "t5 gpt": 54680, "traditional fine": 57520, "benchmark best": 6429, "knowledge additional": 27391, "prompt engineering": 43203, "using type": 61005, "efforts make": 16941, "web app": 61876, "task form": 55095, "designed enable": 14312, "confidence based": 10111, "early training": 16517, "limited address": 30563, "effect translation": 16621, "languages combining": 28616, "reflect human": 45773, "missing important": 33362, "generally rely": 22170, "phase pre": 40805, "effectiveness pre": 16801, "continuing pre": 10837, "grained annotation": 23022, "significant work": 50930, "tweets extracted": 59015, "evaluation platform": 18674, "robustness analysis": 48274, "models aspects": 34728, "input time": 26348, "possible model": 41333, "datasets difficult": 13232, "entities sentences": 18083, "size complexity": 51377, "samples given": 48477, "metric language": 33118, "sample level": 48452, "evolving nature": 18842, "provide researchers": 44122, "posts using": 41376, "particular important": 39848, "level ones": 30170, "dense retrieval": 14080, "hard negatives": 23448, "recall 10": 45239, "languages manually": 28727, "evaluate improve": 18464, "rely lexical": 46294, "critical understanding": 11797, "dataset supports": 13110, "annotation guideline": 2952, "tasks average": 55515, "category text": 7863, "diversity fluency": 15734, "11 datasets": 85, "linear complexity": 30654, "aims convert": 2182, "maintaining accuracy": 31486, "transformers work": 58533, "improving inference": 25182, "handle multi": 23412, "necessary condition": 36530, "improve supervised": 24930, "embeddings tokens": 17230, "follow recent": 21254, "languages performing": 28751, "outperform results": 38818, "propose plug": 43583, "development training": 14709, "method especially": 32487, "84 f1": 532, "quality unsupervised": 44594, "task matching": 55209, "leverage self": 30288, "test scores": 56367, "scores paper": 48912, "hinders model": 23933, "improvement paper": 25012, "accelerate model": 801, "trainable neural": 57663, "set small": 50249, "example paper": 18880, "known knowledge": 27659, "model extra": 33870, "achieves 86": 1294, "99 accuracy": 575, "translation dialogue": 58599, "information neighboring": 25989, "finally analyse": 20835, "time particular": 57189, "extract various": 20002, "constantly changing": 10344, "news agencies": 37383, "novel vocabulary": 37951, "examples similar": 18932, "extraction document": 20057, "valuable knowledge": 61204, "provide ablation": 44002, "wider context": 62027, "major issue": 31512, "news portals": 37411, "trained attention": 57675, "results combined": 47537, "embeddings approaches": 17084, "transfer low": 58401, "dependency analysis": 14117, "ethical considerations": 18417, "extremely limited": 20162, "english high": 17819, "multilingual low": 36092, "instead used": 26466, "recently models": 45439, "model capabilities": 33642, "parsers present": 39764, "examples generate": 18905, "stage pre": 52438, "training intermediate": 58136, "guides model": 23355, "understand linguistic": 59303, "representation multi": 46559, "measures different": 32077, "expand corpus": 19184, "capable identifying": 7623, "construction grammar": 10426, "novel span": 37925, "task focused": 55092, "advances text": 1929, "enables models": 17445, "english utterances": 17898, "existing methodologies": 19091, "using ensembles": 60678, "research article": 46985, "information units": 26137, "subject domain": 53552, "techniques utilized": 56150, "hard follow": 23442, "specific metrics": 52111, "problems provide": 42726, "encourage consistency": 17590, "external supervision": 19955, "strong text": 53055, "trained achieve": 57669, "foster future": 21410, "advancements field": 1899, "research gaps": 47045, "capitalization punctuation": 7641, "000 hours": 7, "manually labeling": 31785, "detecting domain": 14447, "gan based": 21953, "works directly": 62885, "growing popularity": 23300, "help models": 23580, "performance boosting": 40224, "relation classifiers": 45968, "label words": 27734, "tuning better": 58903, "setting fine": 50324, "using strong": 60966, "dataset https": 12952, "rnn baselines": 48189, "multihead attention": 36053, "reading natural": 45089, "controllable language": 10978, "control codes": 10961, "automated evaluations": 5043, "tasks benchmarks": 55521, "studies deep": 53256, "annotations work": 3008, "low confidence": 31135, "calibration method": 7535, "conduct preliminary": 10057, "present insights": 41931, "limits applicability": 30639, "based tagging": 6081, "dependencies improve": 14107, "art accuracies": 4208, "individual feature": 25567, "faithful explanations": 20365, "meaning original": 32007, "processing resources": 42935, "lms gpt": 30919, "models collection": 34824, "collection datasets": 8981, "meta dataset": 32330, "quickly new": 44824, "baselines variety": 6318, "likely contain": 30525, "widespread attention": 62032, "performing fine": 40678, "models resulted": 35454, "generate rich": 22238, "tagging based": 54737, "model motivated": 34107, "careful design": 7756, "integrate different": 26505, "recently bidirectional": 45412, "tuning paradigm": 58937, "modeling ability": 34558, "global topic": 22846, "scores best": 48893, "use public": 59985, "world facts": 62941, "style prompts": 53493, "raise important": 44855, "conduct set": 10062, "tasks measure": 55743, "method evaluation": 32492, "art hybrid": 4267, "data analyze": 12134, "draw insights": 16401, "leverage task": 30291, "data self": 12635, "data extensive": 12350, "approach significant": 3691, "data currently": 12266, "evaluation research": 18697, "models sufficiently": 35558, "methods 10": 32723, "task predicts": 55286, "100 200": 58, "scientific abstracts": 48754, "performance trade": 40602, "multiple model": 36250, "voting approach": 61740, "texts english": 56876, "topics propose": 57459, "evaluate baseline": 18442, "used interpret": 60217, "parser achieve": 39758, "challenging state": 8142, "better solve": 6967, "external datasets": 19933, "challenge previous": 8007, "response present": 47398, "layer layers": 29186, "stack transformer": 52419, "model add": 33537, "non target": 37684, "output representations": 38997, "work current": 62616, "evaluations text": 18770, "methods exploiting": 32855, "literature dataset": 30857, "studies large": 53277, "contextualised language": 10793, "highly useful": 23922, "models plm": 35324, "abundant labeled": 782, "perform reasoning": 40133, "challenge design": 7976, "framework extracting": 21521, "interpretable reasoning": 26728, "exist text": 19017, "bart t5": 5535, "perform decently": 40085, "outputs generated": 39015, "generate effective": 22196, "reduce performance": 45679, "significantly enhances": 50956, "modern transformer": 35723, "knowledge essential": 27468, "essential downstream": 18326, "summarization multi": 53894, "evaluations confirm": 18754, "scale user": 48638, "ai assisted": 2114, "requires capturing": 46916, "representations experiment": 46659, "directions recent": 15299, "points better": 41069, "training important": 58124, "lack transparency": 27924, "causal explanations": 7872, "common text": 9205, "model transferred": 34488, "inference test": 25698, "address gaps": 1760, "work highlight": 62678, "highlight limitations": 23865, "code provide": 8847, "length prediction": 30032, "oriented semantic": 38699, "datasets designed": 13223, "extract spans": 19995, "texts recent": 56916, "article focus": 4449, "embeddings studied": 17222, "components using": 9728, "tasks findings": 55642, "changing model": 8186, "tend low": 56202, "questions regarding": 44802, "approach integrates": 3575, "smaller ones": 51525, "summarization document": 53882, "assume access": 4633, "access high": 822, "metric used": 33128, "directly source": 15336, "perturbation based": 40790, "generate entity": 22198, "content planning": 10546, "demonstrate empirically": 13909, "experimental conditions": 19259, "event information": 18785, "crosslingual transfer": 11877, "encoders task": 17559, "recently prompt": 45453, "prompt tuning": 43209, "prompt template": 43207, "representation structured": 46587, "weight word": 61921, "domain evaluations": 16060, "seek improve": 49050, "using cosine": 60633, "dataset news": 13011, "different news": 15008, "51 accuracy": 431, "paper looks": 39419, "classes text": 8418, "previous non": 42267, "contrastive samples": 10921, "studies real": 53292, "targeting specific": 54863, "presents research": 42101, "research paradigm": 47091, "application needs": 3172, "efficient approaches": 16862, "setting models": 50331, "related downstream": 45902, "appropriate pre": 3965, "multiple methods": 36247, "models highest": 35088, "introduce stage": 26864, "learn small": 29425, "models finetuning": 35034, "architecture leverages": 4061, "transfer natural": 58408, "gpt t5": 22989, "suffer performance": 53775, "improvement code": 24996, "lm pre": 30911, "embeddings sensitive": 17210, "factual consistency": 20319, "dataset dialogue": 12896, "meta evaluation": 32334, "makes models": 31629, "models visual": 35675, "significant robustness": 50921, "data instance": 12433, "classification multiple": 8503, "approaches best": 3777, "representation propose": 46571, "apply methodology": 3334, "problem standard": 42667, "context specifically": 10725, "machines ability": 31397, "introduce publicly": 26855, "better supervised": 6973, "trade performance": 57503, "knowledge produce": 27578, "problem systems": 42671, "mechanisms improve": 32151, "tasks conversational": 55560, "retrieval method": 47953, "question understanding": 44755, "semantic connections": 49256, "datasets detailed": 13225, "knowledge shared": 27608, "extract domain": 19972, "domain invariance": 16089, "sentiment features": 49845, "reasoning question": 45219, "complexity real": 9688, "train bilingual": 57569, "combining semantic": 9122, "similarities sentences": 51080, "corpus derived": 11322, "generates accurate": 22337, "contributions present": 10956, "linguistic capabilities": 30751, "remain unsolved": 46318, "systems potential": 54591, "experiments wide": 19560, "setups demonstrate": 50413, "labels extensive": 27824, "domain textual": 16211, "irrelevant sentences": 27043, "model partially": 34182, "biases learned": 7055, "input structure": 26343, "knowledge fully": 27486, "adversarial data": 1966, "use synthetic": 60038, "generation make": 22488, "robust human": 48250, "develop data": 14579, "generation pipeline": 22518, "time average": 57120, "soft prompts": 51625, "frozen language": 21696, "learned approach": 29451, "using t5": 60976, "billions parameters": 7124, "prefix tuning": 41796, "summarization experimental": 53883, "work usually": 62856, "based corresponding": 5653, "fail provide": 20344, "addressing issues": 1820, "label imbalance": 27711, "create multiple": 11710, "retriever generator": 47989, "generator framework": 22617, "datasets construct": 13195, "pairs high": 39194, "progress nlp": 43108, "tasks researchers": 55859, "propose neuro": 43492, "memory stores": 32284, "contain complex": 10460, "settings study": 50398, "evidence provided": 18817, "output pairs": 38989, "seen tasks": 49065, "terms generalization": 56293, "significant room": 50923, "quantitatively measure": 44632, "based criteria": 5659, "created task": 11732, "adaptation uda": 1546, "training results": 58232, "reduces model": 45693, "model overfitting": 34172, "vastly outperforms": 61443, "dataset textit": 13118, "settings achieve": 50358, "improvement release": 25021, "achieves 85": 1293, "prior posterior": 42410, "decoder networks": 13604, "networks considering": 36841, "original documents": 38711, "various modalities": 61364, "challenges like": 8058, "speech english": 52262, "malayalam tamil": 31679, "models commonsense": 34833, "masked token": 31869, "inference latent": 25666, "swedish danish": 54249, "leverage neural": 30281, "models news": 35260, "sources model": 51835, "designed facilitate": 14315, "shallow models": 50441, "scenarios particularly": 48704, "puts forward": 44432, "models common": 34831, "research efficient": 47025, "tuning experiments": 58912, "solution proposed": 51660, "enhanced transformer": 17940, "hierarchical relations": 23688, "model augment": 33587, "instead modeling": 26458, "questions compared": 44778, "dataset novel": 13013, "key design": 27306, "especially unseen": 18309, "concepts training": 9945, "carlo sampling": 7767, "document finally": 15794, "particular best": 39834, "achieves micro": 1346, "gpt transformer": 22990, "performance 59": 40170, "suggesting models": 53837, "questions address": 44768, "possible reduce": 41335, "dependence labeled": 14102, "manual methods": 31748, "languages research": 28772, "poor robustness": 41144, "measuring model": 32086, "detection evaluate": 14481, "reveal significant": 48013, "topic recent": 57422, "features syntactic": 20679, "models increases": 35124, "shot context": 50606, "model scales": 34336, "tasks shot": 55884, "aware dialogue": 5448, "inference costs": 25648, "comprehensive analyses": 9781, "approach mitigate": 3601, "issue learning": 27066, "75 f1": 503, "model filter": 33886, "majority studies": 31533, "propose semantics": 43618, "scale multiple": 48602, "extraction specifically": 20113, "adversarial example": 1969, "matches exceeds": 31904, "use real": 59986, "benchmark multiple": 6480, "approaches multilingual": 3878, "effective ways": 16715, "focus cross": 21150, "specific representation": 52137, "approaches experimental": 3816, "threshold based": 57090, "data requiring": 12608, "scaling number": 48651, "given fixed": 22744, "called graph": 7546, "demonstrate improvement": 13922, "features developed": 20559, "corpus allows": 11272, "words convey": 62389, "learning enables": 29618, "accuracy use": 1067, "popular knowledge": 41165, "benchmark existing": 6467, "layers training": 29235, "like self": 30499, "simple naive": 51198, "extraction relation": 20103, "detect semantic": 14441, "usually apply": 61035, "sparse datasets": 51968, "efficient existing": 16872, "propose tasks": 43660, "datasets baselines": 13166, "relevant natural": 46224, "models driven": 34942, "analysis benchmark": 2622, "set recommendations": 50234, "significant human": 50869, "processing aims": 42848, "consists 10": 10320, "strategy enhance": 52932, "enhance information": 17913, "huge text": 24078, "real nlp": 45108, "lack labelled": 27899, "reasoning step": 45224, "understand reason": 59309, "knowledge build": 27419, "encoding syntactic": 17578, "dependency edges": 14119, "explainable predictions": 19599, "comparison paper": 9500, "evaluated experiments": 18532, "probe models": 42487, "inter dependence": 26579, "augmentation da": 4951, "lead wrong": 29280, "outperforms model": 38909, "explicit content": 19612, "content language": 10535, "retrieval multilingual": 47958, "advances transformer": 1930, "datasets usually": 13475, "research document": 47022, "additional work": 1710, "continuously updated": 10860, "network demonstrate": 36729, "aims select": 2214, "containing information": 10483, "texts tweets": 56938, "paper process": 39488, "tuning neural": 58933, "settings task": 50399, "distinct language": 15592, "feasibility using": 20470, "increase usage": 25426, "language despite": 28024, "study discuss": 53363, "expressions text": 19811, "limited research": 30608, "critical challenge": 11778, "contribute new": 10932, "nlp ml": 37499, "limited capacity": 30573, "continue pre": 10829, "design training": 14304, "biases language": 7054, "experiments commonly": 19376, "language automatically": 27970, "learning existing": 29632, "explored recent": 19764, "established datasets": 18354, "datasets arabic": 13159, "provides potential": 44219, "text small": 56774, "need access": 36543, "synthetic labels": 54377, "sensitive semantic": 49504, "testing sets": 56412, "intermediate results": 26678, "building multi": 7456, "systems conversational": 54460, "detailed descriptions": 14419, "play central": 40961, "google bert": 22953, "outperform human": 38800, "generates meaningful": 22347, "help human": 23567, "properties different": 43260, "leverage bert": 30257, "based labels": 5801, "major topics": 31523, "practitioners researchers": 41492, "tuned target": 58888, "layer using": 29212, "glove embedding": 22857, "built transformer": 7492, "motivate new": 35861, "strong challenge": 53020, "unique opportunity": 59515, "tasks result": 55861, "nlp challenges": 37471, "identify challenges": 24415, "task currently": 54990, "applying model": 3368, "graph theory": 23175, "general approaches": 22046, "efficient paper": 16891, "introduce contrastive": 26790, "maximize mutual": 31958, "interaction learning": 26604, "articles multiple": 4470, "overcome propose": 39073, "evaluation various": 18747, "shot manner": 50631, "methods rarely": 33004, "testing scenarios": 56410, "aware machine": 5457, "guided attention": 23344, "layer outputs": 29198, "significantly degrades": 50951, "specific ones": 52120, "input set": 26334, "lexically diverse": 30401, "lexicon information": 30412, "bert combined": 6637, "bert paper": 6697, "external lexicon": 19947, "trained manually": 57781, "documents extract": 15880, "combining models": 9116, "years online": 63067, "increasing volume": 25467, "reasoning types": 45230, "tasks baselines": 55517, "tunes model": 58894, "generation addition": 22411, "datasets include": 13298, "offer limited": 38293, "datasets long": 13319, "realistic scenario": 45150, "finding work": 20903, "data gold": 12393, "mt5 model": 35928, "model convergence": 33717, "state encoder": 52697, "prediction approaches": 41695, "mixture expert": 33418, "success modeling": 53710, "sequence generated": 49924, "context token": 10733, "topic knowledge": 57411, "relation using": 46000, "capture interaction": 7684, "years datasets": 63053, "involves use": 27022, "aims detecting": 2185, "issue low": 27067, "types introduce": 59095, "source corpora": 51758, "release pretrained": 46165, "pretraining corpus": 42197, "issues current": 27087, "importance nlp": 24686, "labels method": 27837, "existing high": 19075, "provides automatic": 44182, "token label": 57294, "tagging scheme": 54751, "incompleteness knowledge": 25333, "ones previous": 38342, "aims investigate": 2201, "dialogue understanding": 14794, "plms paper": 41019, "core content": 11148, "existing plms": 19126, "witnessed increasing": 62093, "experiments challenging": 19370, "data produce": 12566, "produce synthetic": 43014, "hallucination problem": 23374, "generating fluent": 22375, "models weak": 35678, "experiments self": 19516, "albert roberta": 2251, "using retrieval": 60910, "paid little": 39143, "convolution networks": 11095, "module designed": 35754, "extraction pre": 20094, "different relations": 15049, "tools like": 57380, "tune plms": 58860, "manually designing": 31777, "language prompts": 28446, "propose prompt": 43594, "construct prompts": 10399, "labels fine": 27827, "language news": 28358, "shot models": 50634, "based support": 6072, "art evaluated": 4259, "proper knowledge": 43252, "problems task": 42733, "given situation": 22785, "initial training": 26220, "pretrained domain": 42153, "2020 work": 295, "experiments evaluations": 19432, "analysis new": 2707, "benchmarks evaluation": 6522, "adapt various": 1515, "results shot": 47835, "information current": 25798, "gap current": 21959, "annotation platform": 2959, "current dataset": 11967, "respect number": 47349, "paradigm called": 39622, "multi armed": 35938, "armed bandit": 4191, "relevant texts": 46240, "classification research": 8535, "performance article": 40200, "enable efficient": 17424, "based annotations": 5567, "method increase": 32541, "particularly strong": 39890, "analysis studies": 2769, "propose masked": 43448, "information layers": 25947, "universal feature": 59540, "standard pre": 52517, "collecting high": 8974, "architectures end": 4108, "recent paper": 45330, "level global": 30126, "respectively work": 47386, "accuracy traditional": 1063, "extend prior": 19828, "data accessible": 12108, "recent advancement": 45278, "learning learns": 29705, "finding answer": 20897, "modeling pairwise": 34608, "capture higher": 7678, "model explainability": 33855, "samples method": 48481, "selects subset": 49171, "token free": 57288, "free models": 21645, "framework obtains": 21574, "adaptive pretraining": 1580, "bridge semantic": 7322, "span boundaries": 51921, "english widely": 17903, "entities finally": 18053, "finally benchmark": 20840, "existing shot": 19142, "predictions methods": 41762, "based prototypical": 5961, "growing attention": 23289, "efforts focus": 16938, "settings proposed": 50392, "2021 challenge": 297, "setting outperforms": 50337, "texts structured": 56930, "task challenge": 54947, "network bert": 36710, "performance address": 40186, "precision points": 41614, "handling multiple": 23426, "unified way": 59483, "remains limited": 46338, "methods existing": 32849, "design auxiliary": 14264, "widely reported": 62001, "content lack": 10534, "specifically fine": 52202, "high lexical": 23750, "model adapting": 33534, "sentences study": 49790, "increased complexity": 25430, "source attention": 51741, "generates set": 22357, "resource downstream": 47224, "annotation approach": 2934, "propose annotation": 43293, "based types": 6116, "models annotation": 34714, "support systems": 54127, "answering sentiment": 3095, "make strong": 31601, "dataset confirm": 12858, "output probability": 38994, "history model": 23968, "study confirms": 53346, "computation resources": 9829, "specifically train": 52230, "model encourage": 33820, "hidden space": 23646, "exhibits superior": 19013, "superior generalization": 53934, "architectures techniques": 4125, "knowledge interaction": 27532, "need efficient": 36558, "informative summaries": 26177, "method 10": 32353, "score 57": 48795, "resources pre": 47326, "opportunities challenges": 38511, "production settings": 43051, "model multimodal": 34112, "explore robustness": 19733, "plms typically": 41022, "information exists": 25846, "human rating": 24223, "scoring systems": 48939, "way evaluate": 61800, "task long": 55192, "diverse types": 15725, "subjective objective": 53566, "generation diverse": 22449, "articles using": 4482, "different formats": 14937, "evaluations recent": 18768, "datasets effective": 13240, "qa paper": 44456, "suggest large": 53822, "fashion model": 20415, "sets finally": 50294, "review state": 48040, "approaches new": 3884, "respectively code": 47363, "representation different": 46503, "task labels": 55157, "improves domain": 25125, "based fact": 5726, "benefit joint": 6563, "tasks previously": 55813, "used shot": 60302, "complexity input": 9678, "effective long": 16666, "hierarchical way": 23698, "sentence transformer": 49661, "context finally": 10639, "strategy learn": 52939, "uniform prior": 59486, "driven knowledge": 16424, "different weight": 15129, "enhanced text": 17938, "directed edges": 15266, "studies usually": 53308, "degeneration problem": 13799, "propose local": 43443, "similarity source": 51121, "explore semantic": 19735, "topical coherence": 57439, "constraints language": 10374, "available unlabeled": 5384, "domain speech": 16196, "available target": 5374, "resources particular": 47325, "attention unit": 4842, "studies provide": 53290, "output attention": 38965, "analysis empirical": 2656, "com declare": 9008, "declare lab": 13580, "transformers language": 58525, "information domains": 25822, "method reduces": 32636, "processing bert": 42857, "features complex": 20543, "directions including": 15297, "knowledge question": 27583, "control level": 10967, "spurious correlation": 52387, "train classification": 57572, "methods fully": 32871, "settings code": 50359, "com rucaibox": 9022, "systems generalize": 54511, "strategies affect": 52892, "tweets different": 59014, "health support": 23520, "main obstacle": 31449, "challenge study": 8017, "sentences according": 49676, "largely limited": 29058, "general discourse": 22051, "respectively demonstrate": 47365, "methods struggle": 33055, "models retrieval": 35459, "methods exist": 32848, "perform case": 40073, "etal 2019": 18414, "data essential": 12328, "detection question": 14516, "auxiliary self": 5238, "original pre": 38723, "tasks introduced": 55695, "advance current": 1880, "models biased": 34785, "compared language": 9416, "adopted neural": 1870, "history current": 23967, "employ end": 17380, "sparsity paper": 51981, "brings challenges": 7339, "corpus words": 11460, "different attributes": 14847, "semantically correlated": 49384, "methods major": 32938, "adapting pre": 1568, "asked identify": 4521, "preserving high": 42122, "modules trained": 35775, "reasoning processes": 45218, "huge search": 24076, "generates better": 22338, "sota baselines": 51725, "explore trade": 19742, "semantic cues": 49266, "scarcity datasets": 48664, "settings experimental": 50369, "language contexts": 28007, "modeling question": 34618, "memory language": 32257, "hierarchical variational": 23697, "samples extensive": 48474, "evaluations datasets": 18755, "sentences extracted": 49722, "models utilized": 35664, "utilized generate": 61108, "high costs": 23722, "document processing": 15822, "length limit": 30029, "limited settings": 30614, "relative distance": 46091, "model public": 34260, "set evaluations": 50149, "predominantly focused": 41785, "performance report": 40533, "coherence diversity": 8907, "high probabilities": 23765, "based modules": 5886, "speech domain": 52261, "accuracy domain": 961, "popular dataset": 41161, "score macro": 48857, "stage train": 52444, "model including": 33989, "aims finding": 2194, "humans make": 24281, "copy mechanisms": 11134, "mask mechanism": 31859, "mechanism pre": 32136, "limited annotated": 30566, "efficient outperforms": 16890, "codes datasets": 8877, "based unified": 6118, "exploit pre": 19661, "model bart": 33597, "tremendous improvements": 58773, "consider time": 10224, "model defines": 33743, "representations identify": 46683, "compared humans": 9415, "context helps": 10652, "field artificial": 20750, "plays essential": 40995, "dynamic attention": 16482, "used transformer": 60341, "understand relative": 59310, "noise injection": 37598, "art character": 4231, "methods commonly": 32790, "corpus showed": 11428, "miss important": 33357, "application scenario": 3178, "directly compared": 15311, "information ignored": 25907, "dataset benchmark": 12825, "tables wikipedia": 54692, "finally develop": 20851, "develop baseline": 14576, "text tables": 56802, "text references": 56729, "proposed overcome": 43874, "bias variance": 7047, "survey present": 54211, "new taxonomy": 37337, "challenge open": 8002, "simply changing": 51248, "metrics assessing": 33138, "layer stacked": 29208, "reflect real": 45774, "different vocabulary": 15126, "labels word": 27860, "program synthesis": 43077, "challenging set": 8139, "similar semantics": 51064, "models xlnet": 35690, "context low": 10670, "weighting mechanism": 61933, "prompt based": 43198, "underlying reasoning": 59274, "task facilitate": 55077, "explored problem": 19763, "intellectual property": 26533, "used analyzing": 60087, "probe language": 42486, "build baseline": 7387, "model 12": 33482, "including support": 25305, "paper fills": 39373, "generation challenge": 22431, "uses generative": 60511, "generation unlike": 22573, "quality predictions": 44564, "sets documents": 50289, "success language": 53703, "gradient information": 23008, "response prediction": 47397, "cascaded model": 7784, "attribution scores": 4917, "makes inference": 31626, "processes dpps": 42845, "generation strategies": 22553, "compare algorithms": 9327, "articles specifically": 4479, "modeling code": 34565, "positive pairs": 41292, "single utterance": 51355, "level scores": 30199, "level local": 30152, "embeddings evaluate": 17128, "representation form": 46520, "limited performance": 30604, "leads high": 29313, "strategies present": 52913, "dataset f1": 12924, "creating training": 11746, "bi encoder": 7005, "cross encoder": 11820, "datasets shot": 13421, "data augmentations": 12168, "augmented versions": 4985, "set experimental": 50152, "highly rely": 23912, "rely explicit": 46279, "explicit text": 19628, "models modest": 35232, "sentence discourse": 49542, "wide adoption": 61956, "training jointly": 58139, "researchers proposed": 47165, "capabilities model": 7602, "restricted limited": 47423, "knowledge selection": 27604, "strategies experimental": 52901, "describes development": 14223, "generated social": 22318, "annotated sentiment": 2916, "experiments establish": 19429, "propose embedding": 43364, "performance entity": 40321, "type propose": 59065, "using interpretable": 60740, "using rules": 60915, "transfer datasets": 58357, "language benchmark": 27975, "achieve considerable": 1129, "relation specific": 45997, "overlapping problem": 39092, "conduct exhaustive": 10040, "exhaustive experiments": 18998, "layers order": 29231, "set improve": 50167, "examples based": 18890, "better metrics": 6918, "metrics compared": 33151, "provide automatic": 44011, "representations graph": 46679, "representation fed": 46517, "analysis demonstrating": 2648, "optimization objectives": 38552, "robustness methods": 48285, "encourage models": 17597, "good generalization": 22933, "accuracy explainability": 974, "contrastive training": 10923, "existing labeled": 19080, "design contrastive": 14270, "iterative manner": 27124, "parameters large": 39705, "data centric": 12201, "analysis discourse": 2652, "mainly relies": 31476, "output graph": 38976, "sentence types": 49664, "method develop": 32461, "punctuation restoration": 44389, "experiments joint": 19449, "text transcripts": 56820, "heavily used": 23537, "performance analyze": 40193, "increasing scale": 25463, "critical challenges": 11779, "knowledge mining": 27551, "model selected": 34344, "robust baseline": 48241, "labeled domain": 27757, "models active": 34687, "techniques automated": 56062, "analyze ability": 2805, "systems social": 54635, "generation empirical": 22451, "high fidelity": 23737, "answer using": 3058, "framework existing": 21511, "terms task": 56316, "work bridge": 62590, "terms experiments": 56288, "studied work": 53240, "performance increasing": 40390, "confirm hypothesis": 10131, "satisfying performance": 48530, "study nlp": 53420, "general specific": 22092, "challenges encountered": 8042, "construct robust": 10401, "documents present": 15902, "scores various": 48929, "method performance": 32611, "contains text": 10505, "large standard": 29018, "temporal context": 56183, "set guidelines": 50162, "data setting": 12653, "research landscape": 47062, "counterfactual data": 11617, "rely supervised": 46302, "training supervised": 58279, "studies explore": 53263, "need novel": 36586, "models properly": 35368, "models fewer": 35023, "performances wide": 40653, "task combining": 54956, "enable researchers": 17429, "define multi": 13775, "compare english": 9340, "multiple classification": 36184, "languages diverse": 28644, "inform future": 25738, "using higher": 60729, "propose bi": 43314, "diversity experiments": 15733, "ood performance": 38403, "according context": 855, "making work": 31675, "invariant information": 26919, "aimed improve": 2165, "text discrete": 56539, "processing sentence": 42937, "performance transformers": 40611, "attention score": 4825, "decoding method": 13633, "task unlike": 55456, "employed various": 17397, "cases text": 7815, "addition analysis": 1599, "annotation toolkit": 2977, "information solve": 26093, "approach handle": 3551, "including audio": 25240, "information modalities": 25974, "asian translation": 4515, "previous solutions": 42278, "loss experimental": 31086, "participants required": 39814, "contextualized token": 10811, "graph finally": 23138, "text topic": 56815, "models llms": 35195, "recognize words": 45552, "joint information": 27173, "corresponding entity": 11551, "probabilistic distribution": 42457, "10 30": 32, "studied research": 53235, "decision based": 13559, "robust multi": 48257, "years end": 63057, "improved ability": 24943, "making decisions": 31650, "problem need": 42615, "set obtained": 50206, "resources tools": 47335, "varying sizes": 61435, "data splits": 12690, "data hope": 12407, "survey help": 54208, "language application": 27963, "different ones": 15012, "annotations using": 3006, "task considered": 54970, "samples available": 48464, "propose tackle": 43657, "necessary training": 36535, "achieved performance": 1257, "np hard": 37966, "evolutionary algorithm": 18835, "identification language": 24389, "tokens order": 57330, "systems small": 54634, "various speech": 61394, "based logistic": 5819, "network transformer": 36818, "data examine": 12333, "shown experiments": 50708, "strongly influenced": 53071, "used propose": 60277, "balanced training": 5516, "proved challenging": 43985, "generate consistent": 22187, "content introduce": 10533, "tools allow": 57375, "manner previous": 31723, "specifically learn": 52212, "require specialized": 46888, "set parallel": 50213, "order explore": 38617, "high computation": 23712, "provides highly": 44203, "shot prompting": 50636, "research real": 47109, "models noisy": 35266, "footnote https": 21280, "results make": 47712, "questions present": 44799, "struggle tasks": 53202, "reasoning work": 45233, "examples require": 18928, "model t5": 34436, "shown produce": 50739, "task leaderboard": 55165, "domain benchmarks": 16025, "robustness neural": 48288, "utilizes pre": 61116, "improvement test": 25033, "systems spoken": 54637, "documents improve": 15886, "benchmark chinese": 6431, "typing model": 59162, "strategies based": 52893, "existing graph": 19074, "text paired": 56685, "datasets typically": 13465, "contain small": 10474, "dataset tasks": 13114, "observations motivate": 38127, "problem efficiently": 42548, "preceding context": 41607, "entire process": 18026, "solutions problems": 51670, "new work": 37363, "tasks far": 55638, "gains different": 21936, "switched text": 54258, "different code": 14866, "practitioners use": 41493, "method adversarial": 32374, "propose confidence": 43329, "manual process": 31749, "conducted comprehensive": 10077, "texts make": 56902, "metrics commonly": 33149, "aims translate": 2220, "significantly surpasses": 51015, "shown efficient": 50704, "considered difficult": 10246, "strategies data": 52897, "leads gains": 29312, "cross encoders": 11821, "explanatory power": 19609, "framework fine": 21523, "new classes": 37151, "20 newsgroups": 229, "health crisis": 23514, "topics based": 57444, "significantly increase": 50982, "loss compared": 31084, "compared pre": 9431, "good predictive": 22938, "process various": 42839, "multilingual xlm": 36135, "baselines consider": 6245, "2019 data": 281, "beneficial task": 6556, "local structure": 30950, "text build": 56460, "makes impossible": 31625, "generative method": 22595, "approach exhibits": 3523, "settings experiments": 50371, "information granularity": 25897, "performance quality": 40515, "depending sentence": 14157, "lm training": 30914, "error analyses": 18212, "architecture incorporates": 4052, "proprietary datasets": 43952, "denoising objective": 14067, "tuning propose": 58951, "especially small": 18300, "use twitter": 60063, "model 30": 33484, "datasets help": 13288, "help nlp": 23581, "community develop": 9262, "effectiveness combining": 16770, "comments written": 9149, "performance fact": 40343, "traditional ml": 57531, "time task": 57229, "hard learn": 23445, "inference cost": 25647, "best ensemble": 6761, "study reports": 53453, "efforts improve": 16940, "common human": 9179, "societal impact": 51608, "different random": 15042, "approaches improved": 3844, "modules semantic": 35774, "attributes text": 4911, "level attributes": 30068, "quality code": 44498, "objective used": 38107, "data pretraining": 12559, "require fine": 46856, "trending topic": 58779, "reverse order": 48023, "model vulnerable": 34531, "bottleneck layers": 7277, "adapter layers": 1558, "network combines": 36723, "nlp resources": 37524, "models published": 35388, "lingual datasets": 30697, "task tweet": 55451, "sentiment detection": 49841, "restricted english": 47422, "cost inference": 11586, "paper applies": 39268, "t5 base": 54678, "selecting small": 49130, "employing pre": 17400, "increasing concern": 25448, "settings shows": 50397, "languages hope": 28688, "hope findings": 24008, "efficient sequence": 16899, "dataset average": 12821, "control important": 10965, "framework helps": 21533, "information variety": 26153, "combined text": 9086, "financial documents": 20890, "number ways": 38054, "embeddings output": 17185, "goal provide": 22899, "evaluation techniques": 18737, "fundamental important": 21780, "ranked systems": 44959, "loop approach": 31074, "huggingface datasets": 24085, "models focusing": 35039, "focusing specific": 21247, "time dimension": 57146, "released url": 46184, "reducing model": 45710, "information massive": 25967, "massive labeled": 31886, "work time": 62843, "results pre": 47770, "multiple research": 36276, "task conducted": 54968, "use annotation": 59822, "depth qualitative": 14188, "model generations": 33936, "tasks exhibit": 55624, "provided models": 44166, "model lower": 34078, "inconsistent predictions": 25340, "tasks pertaining": 55798, "classified according": 8586, "bayes support": 6354, "limited attention": 30570, "demonstrate generated": 13917, "task 2021": 54870, "information helpful": 25901, "design encoder": 14278, "models increasing": 35125, "achieve optimal": 1176, "target knowledge": 54820, "tool available": 57358, "available literature": 5323, "uniformly distributed": 59489, "opinions various": 38508, "12 relative": 110, "bert obtain": 6695, "text time": 56813, "idf word2vec": 24479, "ensure model": 17989, "leads comparable": 29308, "previous supervised": 42294, "data generates": 12385, "used pretrained": 60269, "grained manner": 23041, "functions propose": 21773, "annotating data": 2931, "measuring inter": 32085, "detailed statistics": 14429, "analyses results": 2603, "produce sentence": 43007, "attention proposed": 4815, "integration external": 26528, "multiple online": 36258, "media challenging": 32160, "baselines detecting": 6252, "attention years": 4855, "information similar": 26088, "model boost": 33633, "boost classification": 7252, "ranks 2nd": 44982, "fined tuned": 21035, "metrics outperform": 33185, "knowledge researchers": 27596, "studied machine": 53229, "task suffers": 55423, "bert framework": 6663, "source platform": 51789, "networks furthermore": 36856, "used systems": 60321, "different quality": 15040, "furthermore models": 21830, "datasets provides": 13387, "research investigate": 47058, "tuning existing": 58911, "datasets twitter": 13464, "learning social": 29883, "work collect": 62598, "attention entity": 4743, "obtain higher": 38177, "sequence pre": 49966, "models transferred": 35628, "information accurately": 25750, "bert novel": 6694, "kg based": 27358, "problem determining": 42535, "baselines benchmark": 6239, "lead suboptimal": 29276, "purpose introduce": 44402, "labels finally": 27826, "corpus relevant": 11419, "metrics able": 33133, "able match": 706, "embedding using": 17069, "language survey": 28514, "dataset extract": 12922, "results code": 47535, "methods retrieve": 33024, "classification setup": 8549, "methods assign": 32756, "way interaction": 61813, "tasks properly": 55817, "approaches addition": 3756, "results common": 47539, "perturbation methods": 40791, "small changes": 51465, "task inference": 55136, "effort invested": 16927, "simple structure": 51215, "data unavailable": 12750, "benchmark code": 6432, "framework achieve": 21447, "proposed span": 43900, "new emerging": 37182, "alleviate discrepancy": 2405, "task popular": 55277, "matching method": 31914, "intelligence tasks": 26540, "small language": 51479, "better shot": 6965, "solely trained": 51646, "produce pseudo": 43001, "pattern exploiting": 39960, "exploiting training": 19676, "training pet": 58207, "task enable": 55043, "process providing": 42822, "providing valuable": 44255, "model intrinsic": 34017, "time approaches": 57118, "produce promising": 42999, "construct release": 10400, "currently popular": 12037, "systems prone": 54603, "data construction": 12247, "dataset widely": 13136, "competitive non": 9552, "way make": 61818, "dense passage": 14077, "corpus standard": 11437, "learning encode": 29619, "denoising training": 14068, "benchmark using": 6503, "experiments performance": 19486, "languages total": 28806, "batch negatives": 6340, "text relation": 56732, "performed benchmark": 40657, "verbal communication": 61513, "modelling approach": 34641, "critical review": 11790, "exploring different": 19777, "present summary": 42030, "uses machine": 60520, "guide selection": 23341, "leverages power": 30308, "robust nlp": 48258, "tasks employ": 55609, "new fine": 37206, "different label": 14961, "transfer domain": 58359, "industry recent": 25621, "users existing": 60462, "use contrastive": 59854, "knn classifier": 27383, "propose heterogeneous": 43408, "model plm": 34210, "text including": 56626, "knowledge topic": 27631, "outperforms pre": 38918, "benchmarks designed": 6517, "training develop": 58062, "decisions paper": 13575, "space code": 51851, "adaptation text": 1544, "metrics furthermore": 33168, "human quality": 24219, "construction framework": 10425, "based labeled": 5800, "data flow": 12367, "approach publicly": 3660, "processing lack": 42880, "related corpora": 45891, "cases data": 7805, "measure effect": 32050, "mitigate effect": 33382, "alleviate catastrophic": 2399, "resources work": 47341, "classification errors": 8462, "score 30": 48788, "shows data": 50772, "domains specifically": 16293, "problem class": 42519, "tuning framework": 58915, "objective pre": 38099, "examples conduct": 18893, "studies analyze": 53245, "modeling results": 34620, "gpt neo": 22985, "examples specifically": 18933, "achieves 80": 1292, "processing typically": 42961, "largely reduce": 29062, "empirically proposed": 17366, "method naturally": 32587, "implementation work": 24644, "usually depend": 61045, "issues exist": 27089, "based generated": 5747, "data modeling": 12492, "introduce adversarial": 26778, "based sampling": 6005, "robustness study": 48295, "text table": 56801, "additional techniques": 1704, "consider text": 10222, "curate new": 11945, "significant overlap": 50901, "assume gold": 4634, "extraction component": 20054, "researchers propose": 47164, "11 indic": 87, "answering framework": 3074, "corpus specific": 11434, "focuses extracting": 21238, "sources target": 51840, "improvements automatic": 25047, "explicitly handle": 19635, "node embeddings": 37585, "embeddings key": 17155, "texts specifically": 56929, "achieve outstanding": 1177, "attention training": 4839, "improvement single": 25024, "unlikelihood training": 59613, "contrastive representation": 10919, "stage propose": 52443, "data annotations": 12138, "linearly interpolating": 30684, "improve faithfulness": 24854, "pipeline composed": 40896, "components demonstrate": 9717, "training downstream": 58075, "token dependencies": 57284, "models spanish": 35523, "domain pretraining": 16137, "transfer approaches": 58352, "higher performances": 23836, "mechanism enhance": 32113, "involves multiple": 27020, "issues model": 27094, "strategies utilize": 52921, "theoretic perspective": 57016, "framework measure": 21561, "steer generation": 52789, "memory efficiency": 32253, "adequately evaluate": 1835, "receiving attention": 45272, "models seq2seq": 35483, "dialogue work": 14796, "substantially faster": 53634, "underlying assumption": 59262, "robust features": 48248, "problem error": 42551, "unified knowledge": 59473, "overall better": 39036, "codes released": 8881, "concerns paper": 9962, "quality source": 44581, "cases propose": 7812, "missing tokens": 33364, "performance research": 40535, "simply using": 51254, "model ensure": 33829, "14 dataset": 137, "study key": 53401, "benchmark different": 6463, "better competing": 6867, "generative qa": 22607, "existing zero": 19178, "unseen domains": 59647, "detection accuracies": 14454, "critical effective": 11781, "knowledge relevant": 27590, "training expensive": 58097, "data ignoring": 12413, "results leveraging": 47699, "able create": 685, "limited studies": 30619, "reasons effectiveness": 45235, "evaluated datasets": 18528, "models example": 34982, "systems central": 54448, "special emphasis": 52018, "efficient fine": 16873, "task adapters": 54881, "extraction ee": 20059, "dependencies entities": 14106, "exploit label": 19658, "incorporated pre": 25370, "document experiments": 15793, "datasets ranging": 13391, "baselines shot": 6298, "lay foundation": 29176, "propose evaluating": 43376, "annotations enable": 2988, "bias different": 7024, "select data": 49103, "label embedding": 27707, "train multiple": 57615, "capturing information": 7735, "coherence aware": 8904, "challenging long": 8110, "lack effective": 27887, "learning design": 29587, "settings respectively": 50395, "efficiency quality": 16854, "guiding model": 23357, "aware contrastive": 5445, "level use": 30229, "models memorize": 35221, "contextual sentences": 10782, "models ptms": 35384, "specific decoders": 52068, "provide critical": 44044, "deductive reasoning": 13678, "level english": 30110, "paper point": 39441, "tuning plms": 58942, "strategy proposed": 52948, "languages improved": 28692, "readable format": 45072, "span pair": 51928, "results enhanced": 47612, "lack multilingual": 27903, "remains low": 46339, "increasing need": 25456, "phrases context": 40849, "retaining performance": 47927, "baselines cross": 6246, "based measure": 5839, "results analyses": 47498, "available transformer": 5382, "trained noisy": 57833, "documents complex": 15864, "generation study": 22555, "implications findings": 24655, "fusion based": 21853, "additionally build": 1714, "space address": 51848, "propose potential": 43585, "semantic connection": 49255, "art comparable": 4237, "deployment production": 14177, "language multi": 28351, "validate findings": 61178, "propose controllable": 43339, "generation significantly": 22548, "content diversity": 10521, "novel adaptive": 37749, "directly output": 15329, "increasingly focused": 25472, "tuning tasks": 58967, "interpretable method": 26724, "corpus investigate": 11364, "context sensitivity": 10712, "characteristics make": 8239, "t5 bart": 54677, "dramatically reduce": 16389, "semantic errors": 49276, "largely focus": 29054, "focus monolingual": 21184, "lms used": 30925, "prompt templates": 43208, "ability learning": 620, "text argue": 56440, "regression random": 45817, "findings reveal": 20915, "task effectively": 55037, "main factors": 31436, "knowledge apply": 27396, "task transformer": 55448, "nn based": 37579, "longer input": 31051, "outputs approach": 39012, "approach computationally": 3462, "based meaning": 5838, "benchmark compared": 6435, "non semantic": 37681, "encode sentences": 17470, "extremely small": 20167, "lm generate": 30908, "effectively optimize": 16752, "specific tokens": 52160, "study ways": 53475, "smaller faster": 51518, "high stakes": 23803, "based gaussian": 5745, "effectively alleviates": 16723, "practical recommendations": 41468, "based svm": 6075, "annotated evaluation": 2894, "generate multiple": 22220, "models avoid": 34750, "t5 based": 54679, "understanding behavior": 59324, "research benchmark": 46993, "performance verify": 40626, "hurting performance": 24306, "benchmark sentiment": 6491, "key parts": 27324, "models surpassed": 35569, "work shed": 62818, "dense retriever": 14081, "labeling problems": 27790, "explore domain": 19702, "importance domain": 24682, "uses contrastive": 60500, "particular method": 39853, "trip translation": 58801, "results complex": 47555, "shown able": 50693, "serve valuable": 50085, "types sentences": 59117, "inference present": 25682, "way obtaining": 61824, "similar data": 51035, "experiments applied": 19352, "relevant language": 46222, "information called": 25772, "based multitask": 5894, "introduces simple": 26896, "applied widely": 3313, "available textual": 5378, "datasets finding": 13274, "context tackle": 10728, "using shot": 60938, "method datasets": 32451, "time intervals": 57167, "domain social": 16162, "compared commonly": 9393, "char level": 8193, "level bert": 30070, "neighbor retrieval": 36658, "based heuristic": 5767, "arabic chinese": 3994, "augmentation improves": 4957, "problem real": 42641, "lightweight effective": 30457, "key finding": 27313, "exploit structural": 19664, "includes subtasks": 25234, "tuning achieves": 58898, "train accurate": 57563, "processing complex": 42859, "research carried": 46997, "great improvement": 23207, "result showed": 47450, "multilingual fine": 36085, "performance additional": 40184, "potential benefit": 41383, "improving interpretability": 25183, "gap compared": 21958, "attack text": 4661, "improvements prior": 25095, "models sample": 35468, "performance sota": 40568, "examine large": 18865, "causal analysis": 7868, "bias existing": 7027, "contributions different": 10952, "method augmenting": 32391, "contain lot": 10465, "current results": 12005, "results long": 47703, "individual instances": 25569, "set carefully": 50119, "conversational semantic": 11050, "input different": 26265, "light recent": 30453, "strategies increase": 52908, "data design": 12274, "triple level": 58803, "gnn based": 22873, "languages developed": 28639, "lingual experiments": 30702, "problems text": 42734, "proposed simple": 43896, "method entity": 32486, "problem build": 42513, "data significant": 12661, "published work": 44375, "effective handling": 16657, "conduct automatic": 10028, "level results": 30195, "benchmark recently": 6489, "text answers": 56435, "information prediction": 26017, "features leads": 20613, "model resolve": 34315, "input specific": 26341, "tokens source": 57337, "handle diverse": 23408, "work assess": 62575, "drawn increasing": 16410, "text alignments": 56428, "parameter model": 39673, "researchers explored": 47156, "bilstm cnn": 7129, "relationships multiple": 46080, "paper follow": 39382, "means communication": 32040, "task teams": 55431, "baselines best": 6242, "multiple candidates": 36177, "context guided": 10650, "pairwise comparison": 39237, "bi encoders": 7007, "model termed": 34450, "encoder cross": 17494, "model optimized": 34149, "focused single": 21229, "metrics better": 33143, "impact classification": 24590, "questions annotated": 44769, "new protocol": 37290, "accuracy current": 953, "domain open": 16125, "models enabled": 34959, "tasks systematically": 55923, "provide consistent": 44042, "models prompt": 35366, "predict class": 41636, "transferred languages": 58434, "million english": 33252, "accuracy simple": 1048, "text perturbation": 56698, "varying degree": 61429, "words standard": 62521, "models 10": 34648, "score 52": 48794, "train release": 57626, "input user": 26355, "uses existing": 60509, "tweets social": 59022, "set terms": 50260, "vocabulary terms": 61715, "potential models": 41401, "pretraining downstream": 42201, "latest state": 29158, "establish benchmark": 18343, "data inference": 12429, "achieves 97": 1299, "application models": 3169, "optimization finally": 38547, "survey provide": 54213, "systematically categorize": 54409, "scale using": 48639, "text key": 56635, "analyses based": 2589, "inference module": 25672, "reference text": 45748, "setting open": 50336, "poses challenging": 41246, "evaluate baselines": 18443, "reader models": 45074, "linearly combines": 30683, "based frequency": 5741, "distribution divergence": 15637, "mask language": 31857, "training free": 58110, "create synthetic": 11717, "modular design": 35744, "task resulting": 55345, "label associated": 27691, "open book": 38411, "order guarantee": 38623, "parameters maintaining": 39707, "original performance": 38722, "comprehensive literature": 9794, "networks designed": 36843, "learning module": 29766, "propose asr": 43300, "fully understanding": 21747, "solves problem": 51699, "domain train": 16213, "manual data": 31736, "mentions given": 32306, "retrieval reading": 47966, "demonstrate surprising": 13989, "aims enhance": 2190, "explores impact": 19772, "unlabeled texts": 59584, "methods treat": 33085, "following training": 21272, "control number": 10969, "severe performance": 50425, "semantic associations": 49238, "learning pipelines": 29806, "parameters deep": 39689, "increasingly challenging": 25469, "resource regimes": 47265, "datasets distinct": 13233, "indicate training": 25536, "complementary tasks": 9592, "adaptation da": 1521, "report aim": 46425, "sentence spans": 49650, "account contextual": 878, "way approach": 61793, "capture task": 7716, "important end": 24721, "multiple input": 36228, "analyses proposed": 2601, "simultaneously paper": 51274, "paper forward": 39385, "strategy significantly": 52951, "parameters extensive": 39698, "performing task": 40690, "reasoning long": 45200, "yields performance": 63127, "set does": 50139, "learned small": 29481, "explore useful": 19749, "iteratively perform": 27132, "labels self": 27848, "human rationales": 24225, "relatedness datasets": 45958, "grained scores": 23043, "sentences semantically": 49783, "representation various": 46602, "based paper": 5932, "especially recent": 18296, "limitations future": 30549, "earlier approaches": 16506, "ability large": 617, "work aimed": 62563, "measure impact": 32054, "introduce approaches": 26780, "model alignment": 33559, "socio cultural": 51612, "common approaches": 9165, "estimate uncertainty": 18375, "used ai": 60084, "kind text": 27368, "present transformer": 42045, "single transformer": 51351, "dataset construct": 12862, "datasets capture": 13170, "evaluation challenging": 18588, "classifiers make": 8618, "previously predicted": 42339, "domains domain": 16248, "using prompt": 60875, "number domain": 37997, "fully fine": 21732, "decoding stage": 13645, "continuous prompts": 10849, "performance dramatically": 40308, "different varieties": 15122, "relied heavily": 46261, "multimodal fusion": 36149, "techniques achieving": 56055, "reasoning methods": 45203, "entities query": 18076, "lexical matching": 30371, "trained imitate": 57750, "despite small": 14392, "practical value": 41479, "increase available": 25405, "style sentence": 53497, "sentence preserving": 49623, "design adversarial": 14261, "generate complete": 22184, "external corpora": 19930, "embeddings documents": 17115, "causal commonsense": 7869, "lightweight models": 30459, "dialogue summarization": 14787, "performance important": 40380, "hybrid data": 24313, "task structured": 55413, "performance inference": 40393, "technique training": 56048, "development cycle": 14671, "compare recent": 9362, "dataset 10k": 12790, "contain important": 10462, "order use": 38659, "specific contextual": 52062, "work pre": 62751, "dataset features": 12928, "latent document": 29123, "potentially improve": 41414, "techniques generating": 56092, "semantic diversity": 49271, "retrieve generate": 47977, "technique create": 56030, "right reasons": 48141, "improvement present": 25016, "hierarchical knowledge": 23673, "researchers focused": 47159, "annotated spans": 2917, "embedding contextual": 17022, "training train": 58302, "mention context": 32296, "languages unseen": 28813, "methods induce": 32903, "topics user": 57463, "tasks train": 55937, "training mitigate": 58176, "present literature": 41938, "empirical survey": 17353, "leverages recent": 30312, "direct approach": 15253, "generation understanding": 22572, "covers different": 11664, "focusing different": 21246, "keywords used": 27356, "strategies make": 52909, "framework human": 21535, "impact noise": 24602, "outperforms matches": 38907, "consists training": 10335, "suffer spurious": 53783, "poor domain": 41133, "paradigm propose": 39628, "specific loss": 52108, "classification structured": 8557, "propose textbf": 43668, "lm architecture": 30904, "architecture introduce": 4055, "study unsupervised": 53469, "learning solution": 29884, "bypassing need": 7509, "data comprehensive": 12228, "information facilitate": 25870, "generate additional": 22175, "demonstrated superior": 14023, "problems remain": 42728, "performance multimodal": 40449, "engineering efforts": 17768, "supervision data": 54080, "applied zero": 3314, "wrong predictions": 63016, "novel entities": 37817, "documents existing": 15876, "provide weak": 44155, "given models": 22761, "template filling": 56176, "predict model": 41646, "nature models": 36483, "variants bert": 61235, "interpretable machine": 26721, "models outputs": 35294, "bias english": 7025, "available building": 5268, "trained universal": 57905, "present important": 41927, "problem simple": 42656, "boost overall": 7255, "voting ensemble": 61742, "visually rich": 61688, "human ai": 24093, "power deep": 41424, "specific scenarios": 52142, "adopt self": 1867, "effective especially": 16649, "input second": 26328, "autoregressive non": 5223, "favorable performance": 20454, "span sentence": 51932, "various strong": 61399, "faster lighter": 20440, "existing sota": 19145, "methods select": 33028, "lot manual": 31115, "topic article": 57390, "articles approach": 4463, "introduce high": 26812, "analysis shared": 2753, "legal nlp": 30007, "contains examples": 10494, "tune transformer": 58865, "obtaining f1": 38233, "tasks naturally": 55761, "different experiments": 14927, "modeling widely": 34636, "furthermore methods": 21828, "domain scientific": 16151, "recognition synthesis": 45540, "experiments varying": 19558, "baselines quality": 6293, "extraction sequence": 20110, "learning rules": 29855, "impressive improvements": 24810, "interaction text": 26613, "remained unexplored": 46321, "explicitly incorporating": 19638, "especially challenging": 18265, "classify texts": 8633, "complementary techniques": 9593, "improvement 12": 24978, "framework including": 21542, "terms classification": 56275, "learning analysis": 29515, "metropolis hastings": 33211, "generating multiple": 22382, "bert natural": 6692, "experience end": 19226, "needed paper": 36603, "tool help": 57363, "prediction benchmark": 41697, "detect lexical": 14440, "extracted texts": 20023, "prediction aims": 41692, "study investigated": 53398, "annotated human": 2900, "conclusions study": 9979, "area work": 4150, "languages observed": 28741, "pooling layers": 41126, "base classifiers": 5540, "demonstrated human": 14009, "evaluation case": 18587, "proposed enable": 43761, "achieved 1st": 1214, "content trained": 10565, "independent datasets": 25496, "deployment real": 14178, "frameworks proposed": 21632, "comprehensively study": 9806, "extremely high": 20158, "techniques fail": 56087, "structured sparsity": 53176, "policy gradients": 41097, "domain despite": 16046, "embeddings close": 17094, "challenge researchers": 8014, "inherent challenges": 26201, "limited dataset": 30579, "performance applying": 40195, "practical implications": 41464, "additional research": 1696, "aiming extract": 2168, "extract text": 19998, "usually small": 61068, "training english": 58087, "develop natural": 14602, "sentences identify": 49735, "propose combination": 43322, "better address": 6846, "positive rate": 41293, "preprocessing methods": 41827, "accuracy computational": 951, "collected real": 8966, "corpus persian": 11403, "evaluations present": 18765, "improvements data": 25067, "requires lot": 46940, "carefully crafted": 7760, "parsing data": 39775, "score 33": 48789, "humans task": 24288, "corpus code": 11293, "models expected": 34990, "learning enabled": 29617, "performance ability": 40176, "knowledge response": 27599, "context produce": 10694, "models conversational": 34864, "lower human": 31211, "transformers models": 58527, "conduct zero": 10070, "specific settings": 52147, "massive datasets": 31885, "domains furthermore": 16257, "equivalent performance": 18202, "representative data": 46796, "vector use": 61471, "work researchers": 62809, "intrinsic quality": 26773, "pretrain models": 42145, "models subtasks": 35550, "provide platform": 44112, "release novel": 46161, "hope release": 24012, "groups using": 23283, "models try": 35635, "modal contrastive": 33454, "visual knowledge": 61658, "following success": 21270, "strategies evaluated": 52900, "works evaluate": 62886, "thoroughly investigate": 57071, "existing task": 19154, "transformer structure": 58510, "achieves 58": 1289, "effectively leverages": 16748, "improved data": 24947, "bert classifiers": 6636, "truth datasets": 58837, "emphasize need": 17312, "aware learning": 5456, "problems current": 42698, "documents user": 15925, "train paper": 57622, "field lacks": 20757, "lacks systematic": 27931, "techniques effective": 56080, "having high": 23487, "sentence different": 49541, "similarity comparison": 51089, "sentences finally": 49724, "semantic correlations": 49263, "retrieval aims": 47939, "global interactions": 22832, "new graph": 37215, "reward based": 48066, "use applications": 59824, "studied effect": 53223, "newly designed": 37375, "results lack": 47689, "available experimental": 5290, "encoder input": 17518, "positive pair": 41291, "process method": 42804, "runtime performance": 48410, "language l1": 28127, "factors related": 20314, "suitable data": 53855, "resource task": 47281, "investigate strategies": 26988, "knowledge gap": 27488, "great practical": 23211, "specific focus": 52087, "noisy web": 37630, "finally best": 20841, "results exhibit": 47622, "pairs contrastive": 39175, "documents knowledge": 15890, "diverse expressions": 15703, "evaluation pipeline": 18673, "accuracy 30": 897, "providing accurate": 44235, "autoregressive fashion": 5214, "answer propose": 3045, "implementation details": 24640, "research identifying": 47051, "primarily focuses": 42364, "separate training": 49879, "unified encoder": 59468, "models success": 35551, "dataset published": 13050, "dataset accuracy": 12796, "learns transform": 29977, "25 000": 333, "description corpus": 14241, "aryan language": 4508, "develop annotated": 14570, "task generation": 55106, "purpose models": 44406, "designed novel": 14327, "multi dialect": 35950, "score best": 48837, "complex semantics": 9659, "capabilities large": 7599, "human values": 24256, "shot knowledge": 50621, "demonstrate fine": 13912, "challenging limited": 8108, "validate models": 61182, "public available": 44305, "strategy select": 52950, "strategy achieving": 52926, "languages conduct": 28622, "translated parallel": 58556, "text translated": 56824, "speech task": 52306, "task widely": 55470, "semeval 2022": 49440, "2022 task": 305, "binary class": 7143, "develop benchmark": 14577, "achieved 86": 1215, "despite promising": 14378, "benchmark natural": 6481, "statistical analyses": 52734, "including multiple": 25277, "highly consistent": 23888, "cl methods": 8379, "improve end": 24849, "make best": 31542, "analyze robustness": 2827, "available publicly": 5353, "progress various": 43119, "ability understanding": 649, "domains knowledge": 16264, "studies multi": 53284, "datasets comparison": 13185, "specific tools": 52161, "robust perturbations": 48263, "psycholinguistic features": 44288, "pairs unstructured": 39225, "approaches suggest": 3931, "accuracy multilingual": 1010, "useful detecting": 60360, "systems making": 54560, "features datasets": 20553, "improvements 15": 25044, "interaction mechanism": 26605, "maintain high": 31482, "knowledge design": 27436, "arabic bert": 3993, "depth understanding": 14190, "sensitive domains": 49498, "try use": 58846, "knowledge augmentation": 27400, "based relational": 5990, "code work": 8868, "multiple genres": 36219, "generated corpora": 22280, "semantic components": 49250, "inference phase": 25681, "datasets non": 13348, "primarily studied": 42366, "tasks problem": 55815, "evaluate generalization": 18460, "shot baseline": 50600, "generates question": 22353, "new machine": 37244, "additional neural": 1690, "token generated": 57291, "scenarios models": 48701, "set knowledge": 50174, "effectiveness improving": 16783, "style experimental": 53484, "advances pretrained": 1922, "abstractive approach": 769, "training inspired": 58133, "effectively transfer": 16759, "makes training": 31640, "classifiers evaluate": 8615, "suggests neural": 53849, "model driven": 33792, "statistical distribution": 52741, "google speech": 22958, "dataset social": 13092, "presents depth": 42079, "approaches lack": 3852, "results require": 47810, "limits usage": 30644, "prediction question": 41734, "systems providing": 54608, "conditional neural": 10000, "massive scale": 31887, "knowledge facilitate": 27481, "models produces": 35363, "provide user": 44150, "rarely available": 45002, "20 absolute": 221, "representation entire": 46509, "scenario given": 48687, "different hyper": 14951, "parameters experimental": 39695, "domains second": 16291, "existing meta": 19089, "integrate word": 26511, "phrases text": 40855, "competitive neural": 9551, "second demonstrate": 49003, "related domains": 45901, "contrast model": 10879, "languages vary": 28819, "motivates need": 35877, "robustness compared": 48275, "proposed taxonomy": 43910, "training suggesting": 58278, "models importantly": 35108, "causal knowledge": 7874, "significantly underperform": 51016, "proposed pretraining": 43881, "extraction refers": 20102, "computation overhead": 9828, "despite showing": 14387, "gap pre": 21972, "languages domain": 28646, "high uncertainty": 23807, "model pseudo": 34259, "training baselines": 57943, "dataset indian": 12966, "conversations work": 11066, "important limitations": 24741, "traditional data": 57514, "model reinforcement": 34295, "outperforms heuristic": 38904, "logical relation": 30987, "despite efforts": 14359, "paper recent": 39563, "research knowledge": 47060, "known unknown": 27669, "jointly represent": 27219, "languages achieved": 28591, "detection languages": 14496, "adopt supervised": 1868, "effectiveness supervised": 16815, "tasks automated": 55513, "current task": 12017, "low probability": 31166, "13 datasets": 122, "efficiently using": 16921, "techniques achieved": 56054, "relevant papers": 46227, "billion parameter": 7118, "overall word": 39054, "integration multiple": 26530, "directional transformer": 15286, "iterative refinement": 27126, "method information": 32544, "model retain": 34322, "range existing": 44919, "explicit interaction": 19616, "road map": 48211, "ability transformer": 646, "experiments shot": 19522, "dataset public": 13047, "shot finetuning": 50618, "lastly explore": 29108, "examples approach": 18888, "scores input": 48904, "original meaning": 38719, "latest deep": 29155, "influence training": 25729, "model aggregation": 33550, "structure context": 53094, "results representative": 47809, "driven neural": 16432, "studies task": 53303, "key advantages": 27294, "terms used": 56320, "related news": 45920, "allows seamlessly": 2478, "classification domain": 8458, "data input": 12431, "text having": 56613, "obtained dataset": 38206, "addition study": 1644, "work carry": 62595, "dataset potential": 13030, "news videos": 37425, "components text": 9727, "utility incorporating": 61082, "networks combined": 36839, "models revolutionized": 35462, "using predictive": 60870, "effective human": 16658, "datasets developed": 13227, "domains covering": 16241, "samples high": 48478, "large class": 28854, "using conventional": 60626, "light new": 30452, "results establish": 47616, "errors commonly": 18237, "work textual": 62842, "limited end": 30583, "understand effect": 59292, "work motivated": 62727, "important impact": 24731, "extraction unified": 20127, "practical utility": 41478, "specific prompts": 52132, "tasks prediction": 55806, "english release": 17863, "behavior paper": 6395, "task monolingual": 55222, "test state": 56382, "semantic complexity": 49249, "series controlled": 50060, "task determine": 55014, "detection feature": 14486, "critical real": 11788, "using class": 60604, "code training": 8864, "methods conduct": 32798, "lead state": 29272, "open text": 38465, "challenging human": 8099, "extraction achieves": 20044, "unsupervised contrastive": 59687, "task average": 54927, "tasks systems": 55924, "little performance": 30882, "present recent": 41998, "studied recent": 53233, "framework brings": 21463, "introduces multi": 26892, "information iii": 25910, "works demonstrate": 62882, "multiple feature": 36217, "detection experimental": 14482, "mixing languages": 33415, "extracted single": 20020, "performance argue": 40199, "wider variety": 62029, "issue design": 27060, "soft prompt": 51624, "pretraining strategy": 42217, "does increase": 15954, "built dataset": 7483, "token spans": 57309, "document contexts": 15779, "heuristics used": 23634, "plms fine": 41017, "evidence models": 18816, "chinese news": 8314, "environments work": 18177, "results particular": 47762, "analysis highlights": 2672, "attribute words": 4902, "benchmark text": 6500, "discriminative representations": 15448, "desired attributes": 14347, "relevant background": 46200, "using labelled": 60751, "ai technologies": 2124, "focus given": 21168, "news news": 37408, "model gain": 33917, "modeling capabilities": 34563, "information following": 25883, "text suitable": 56795, "dataset high": 12948, "handcrafted rules": 23401, "exploit semantic": 19663, "special treatment": 52023, "objective designed": 38085, "benefits training": 6589, "posts comments": 41370, "order develop": 38610, "shown existing": 50707, "methods gradient": 32881, "representations dynamically": 46645, "accuracy target": 1057, "context short": 10717, "chain thought": 7961, "model just": 34027, "present details": 41892, "superior zero": 53946, "required resources": 46904, "using gpt": 60710, "tasks label": 55706, "high flexibility": 23738, "indicate specific": 25535, "external datastore": 19934, "major bottleneck": 31502, "models practice": 35337, "scarcity large": 48671, "propose progressive": 43593, "essential developing": 18325, "methods increase": 32902, "optimized end": 38566, "linguistic computational": 30756, "10 15": 30, "model hypothesize": 33968, "needed train": 36604, "normalizing flow": 37710, "proposes multi": 43935, "layers propose": 29234, "making unsuitable": 31672, "understanding impact": 59351, "improved paper": 24955, "uses auxiliary": 60494, "sentence sentiment": 49644, "advent deep": 1957, "issue investigate": 27065, "data reducing": 12592, "training targets": 58284, "models receive": 35405, "make generated": 31573, "paper datasets": 39312, "comparing generated": 9481, "natural artificial": 36409, "tasks set": 55881, "build comprehensive": 7391, "algorithm reduces": 2297, "sentence features": 49560, "based genetic": 5753, "expressed multiple": 19798, "content use": 10567, "advanced language": 1888, "major impact": 31510, "trained google": 57744, "acceptable performance": 813, "models overall": 35295, "generation limited": 22485, "datasets associated": 13160, "datasets metrics": 13333, "different questions": 15041, "logical relations": 30988, "times parameters": 57254, "incorporating different": 25383, "sentence scoring": 49636, "combined machine": 9080, "scale diversity": 48565, "train lstm": 57602, "bengali dataset": 6592, "cost terms": 11594, "methods adopted": 32741, "specific general": 52088, "usually represented": 61063, "furthermore conduct": 21810, "introduce lightweight": 26819, "data characteristics": 12207, "problem traditional": 42676, "adequately capture": 1834, "contextual feature": 10768, "variations training": 61252, "demonstrates strong": 14044, "question ask": 44719, "survey focus": 54207, "helps alleviate": 23603, "examples class": 18891, "coherent diverse": 8914, "strategy results": 52949, "introduce various": 26877, "level proposed": 30184, "prediction error": 41701, "techniques capture": 56067, "relevant baselines": 46201, "challenge large": 7990, "decoder transformers": 13619, "work chinese": 62596, "states language": 52720, "types propose": 59112, "original words": 38740, "classification pipeline": 8517, "corpora shown": 11241, "translation supervised": 58684, "advancements natural": 1901, "learn generic": 29375, "model reducing": 34292, "sequential manner": 50045, "data computationally": 12231, "approaches supervised": 3932, "models tuned": 35636, "argue models": 4164, "especially zero": 18312, "discrete prompts": 15426, "applying nlp": 3373, "analyze recent": 2825, "naive approaches": 36362, "particular observe": 39856, "cognitive biases": 8890, "input prompt": 26319, "lexical overlaps": 30375, "effectively adapted": 16721, "real synthetic": 45111, "errors impact": 18241, "extend current": 19820, "work multimodal": 62730, "task contrast": 54978, "articles given": 4468, "word makes": 62242, "consisting multiple": 10317, "better context": 6868, "perform self": 40135, "abundant unlabeled": 783, "variety fields": 61273, "capture correlations": 7658, "prompting methods": 43220, "based span": 6049, "equal number": 18189, "statistical tests": 52766, "prompt generation": 43204, "based early": 5694, "performance fewer": 40345, "texts lack": 56896, "addition content": 1604, "modules used": 35776, "set including": 50169, "cost code": 11577, "forgetting previous": 21307, "behavior use": 6397, "minimal performance": 33291, "outperformed conventional": 38837, "structured meaning": 53164, "tuning shot": 58953, "theorem proving": 57009, "heavily large": 23532, "new mechanism": 37249, "low variance": 31204, "results literature": 47702, "documents text": 15918, "combined pre": 9082, "models examples": 34983, "_1 score": 580, "approaches alleviate": 3760, "propose target": 43658, "metric named": 33122, "baseline proposed": 6203, "tuning significantly": 58954, "inference extensive": 25657, "achieving superior": 1427, "advancements machine": 1900, "nlp pre": 37512, "enhanced training": 17939, "using active": 60549, "existing architectures": 19034, "results substantially": 47864, "level edits": 30105, "framework works": 21628, "tuning different": 58906, "adapter tuning": 1560, "parameters multi": 39711, "instruction based": 26479, "questions set": 44808, "datasets news": 13346, "systems semeval": 54628, "need design": 36553, "attention various": 4849, "techniques performance": 56120, "work main": 62717, "carefully annotated": 7758, "learning emerging": 29616, "novel prompt": 37900, "performance performance": 40484, "previous joint": 42256, "trained linguistic": 57776, "tree encoder": 58742, "creating need": 11744, "model textual": 34462, "data computation": 12229, "results resulting": 47813, "data texts": 12732, "better improve": 6902, "improve bert": 24827, "obtain complete": 38169, "continually learn": 10825, "tuning parameter": 58938, "propose techniques": 43663, "score significantly": 48873, "texts extracted": 56880, "performance precision": 40489, "manually assigned": 31764, "consistent evaluation": 10274, "tasks aims": 55496, "multiple outputs": 36259, "diverse reasoning": 15715, "seq models": 49889, "modifying training": 35740, "tune task": 58864, "training reward": 58233, "proved successful": 43989, "learning does": 29603, "new interactive": 37227, "accuracy prior": 1031, "strategy model": 52942, "techniques label": 56103, "interactions entities": 26617, "augmentation work": 4973, "understanding relationship": 59392, "work direction": 62638, "agnostic representation": 2096, "use prompt": 59983, "based plms": 5938, "signals including": 50834, "queries different": 44651, "spanning languages": 51954, "hope study": 24014, "tokens word": 57344, "size pre": 51394, "pairs approach": 39169, "10 dataset": 38, "datasets general": 13282, "better convergence": 6870, "recently various": 45474, "typically fail": 59142, "pretrained lm": 42164, "prompting approach": 43214, "substantially worse": 53650, "common strategy": 9202, "knowledge extensive": 27476, "way incorporating": 61811, "models vision": 35674, "decoder paper": 13608, "samples different": 48470, "metric work": 33131, "text respectively": 56745, "existing transformer": 19165, "development open": 14696, "higher correlations": 23819, "example generation": 18877, "including gpt": 25259, "datasets findings": 13275, "online posts": 38380, "gender agreement": 22034, "corpus essential": 11330, "language end": 28049, "method variety": 32702, "individual methods": 25572, "evaluated languages": 18534, "generation remains": 22538, "model likely": 34064, "evaluation reveals": 18706, "transformers shown": 58531, "model logical": 34071, "select candidate": 49101, "modular approach": 35742, "recent sota": 45346, "problem distinguishing": 42542, "article proposed": 4457, "rich relations": 48114, "reasoning needed": 45210, "networks good": 36864, "produce poor": 42998, "performance suggest": 40585, "instances available": 26433, "parameter efficiency": 39667, "costly human": 11601, "words improve": 62433, "various large": 61353, "design prompt": 14297, "instead conventional": 26447, "texts word": 56946, "combines best": 9093, "token representation": 57303, "automatically translating": 5204, "specific emotion": 52077, "goal understand": 22904, "work inspire": 62688, "models hallucinate": 35073, "conducting human": 10104, "model selecting": 34345, "challenging low": 8111, "generalized model": 22152, "overfitting issue": 39082, "sets task": 50307, "task aim": 54891, "text set": 56767, "interpretability analysis": 26714, "results underline": 47893, "performance small": 40566, "little understanding": 30888, "language templates": 28524, "temporal knowledge": 56189, "library provides": 30425, "training conducted": 57956, "diverse downstream": 15701, "existing theories": 19161, "express thoughts": 19795, "aims present": 2208, "data selected": 12632, "able reconstruct": 720, "rarely considered": 45004, "requires precise": 46947, "precise understanding": 41610, "dl model": 15753, "aware mechanism": 5460, "combines existing": 9095, "domain ner": 16119, "building text": 7475, "competitive alternative": 9538, "provide support": 44139, "generation generation": 22470, "assessment models": 4592, "2021 workshop": 301, "f1 test": 20231, "systems given": 54514, "model hard": 33953, "objective optimization": 38097, "architecture important": 4051, "systems production": 54602, "form context": 21317, "context account": 10579, "inconsistency problem": 25338, "focus solving": 21203, "text ignoring": 56620, "expansion based": 19189, "novel general": 37829, "2022 shared": 303, "parameters existing": 39694, "github paper": 22718, "rich annotated": 48092, "approaches self": 3917, "multi pass": 35998, "dataset relative": 13059, "alleviate limitations": 2413, "divergence based": 15686, "conducted publicly": 10089, "extracted automatically": 20006, "effectiveness bert": 16769, "contribute meaning": 10930, "processing linguistic": 42883, "guided multi": 23347, "automated speech": 5058, "texts high": 56886, "approximate posterior": 3979, "learn alignment": 29345, "language corresponding": 28010, "decoding mechanism": 13632, "train datasets": 57576, "information underlying": 26135, "study challenges": 53338, "summarization research": 53898, "tasks adapting": 55491, "shot examples": 50614, "substantially larger": 53641, "domain shifts": 16160, "model infers": 34002, "module introduced": 35762, "levels present": 30244, "used automated": 60096, "general english": 22059, "evaluation studies": 18729, "news using": 37424, "expected provide": 19198, "augmented input": 4979, "accurate model": 1080, "transformation rules": 58445, "results surpass": 47872, "dependent target": 14151, "guide decoding": 23329, "learning diverse": 29598, "tokens learn": 57328, "majority research": 31532, "gpt experiments": 22976, "learning leverages": 29707, "data widely": 12774, "synthesis method": 54358, "create noisy": 11713, "corpus create": 11313, "translation test": 58689, "hope facilitate": 24007, "generation different": 22447, "studies work": 53313, "results able": 47482, "based users": 6123, "plm based": 41013, "samples using": 48494, "construct datasets": 10385, "time leads": 57173, "method new": 32589, "results tested": 47882, "identify gaps": 24423, "prediction demonstrate": 41699, "introduce methods": 26823, "word removal": 62281, "enables easy": 17438, "end compare": 17620, "supervisory signals": 54103, "alternative fine": 2501, "detecting semantic": 14450, "model follows": 33907, "results perform": 47763, "detection compare": 14468, "using prompts": 60877, "understand differences": 59290, "high latency": 23744, "propose cluster": 43318, "information fact": 25871, "linguistic skills": 30795, "distinct types": 15596, "addition experiments": 1615, "used target": 60322, "english proposed": 17862, "evaluation low": 18633, "accuracy introduce": 995, "evaluating multi": 18564, "produce low": 42990, "memory cost": 32251, "models control": 34862, "hierarchical encoding": 23669, "hierarchical document": 23666, "based auto": 5586, "inference times": 25702, "used enrich": 60166, "enrich training": 17961, "samples paper": 48485, "models internal": 35140, "dataset lastly": 12981, "corpus performance": 11402, "label semantic": 27723, "baselines release": 6296, "learned previous": 29477, "methods formulate": 32870, "evaluate wide": 18517, "generative sequence": 22610, "training accuracy": 57922, "methods specific": 33048, "amr semantic": 2576, "multiple embedding": 36207, "task assessing": 54917, "personal experience": 40755, "evaluation 12": 18572, "finetuning strategy": 21052, "unique dataset": 59512, "plms like": 41018, "knowledge unstructured": 27640, "attention tokens": 4836, "questions important": 44792, "experiments including": 19445, "underlying information": 59266, "accurate natural": 1082, "setting provide": 50345, "interpretable manner": 26723, "common english": 9172, "relations previous": 46052, "textual signals": 56980, "lm performance": 30910, "types syntactic": 59119, "play critical": 40963, "automatically capture": 5146, "examples adversarial": 18887, "task case": 54946, "similar features": 51042, "set addition": 50104, "independent tasks": 25505, "task shows": 55374, "emotional context": 17297, "train various": 57658, "tuned t5": 58887, "accuracy 11": 894, "setting recent": 50346, "performs inference": 40708, "important real": 24758, "square error": 52399, "noise types": 37606, "performance base": 40206, "work explicitly": 62655, "limited pre": 30605, "easy data": 16559, "augmentation eda": 4953, "dataset making": 12990, "dataset demonstrates": 12888, "studies cross": 53254, "content various": 10569, "datasets suitable": 13447, "task presents": 55288, "annotator agreements": 3012, "kg paper": 27361, "ignore semantic": 24493, "novel structure": 37931, "reference sentences": 45745, "test task": 56386, "14 f1": 139, "potential methods": 41400, "separately different": 49882, "passage level": 39920, "competing systems": 9532, "generation generating": 22469, "strategy make": 52941, "motivating development": 35881, "modeling interactions": 34586, "texts general": 56882, "annotated multi": 2905, "readable form": 45071, "work typically": 62850, "indicate effectiveness": 25525, "supervision pre": 54088, "new rules": 37307, "demonstrated different": 14003, "neural rankers": 37087, "present textbf": 42040, "helps overcome": 23612, "resources machine": 47315, "smaller language": 51519, "different prompt": 15038, "dataset focusing": 12934, "maintaining original": 31495, "examples compared": 18892, "domains computer": 16240, "process release": 42825, "joint reasoning": 27186, "level correlations": 30093, "opinion paper": 38502, "semantic attributes": 49239, "tasks future": 55651, "link https": 30827, "correlations paper": 11538, "mixed languages": 33407, "shot retrieval": 50638, "tasks assess": 55511, "nature dataset": 36478, "systems primarily": 54598, "input format": 26280, "model constraint": 33706, "annotation large": 2954, "documents sentence": 15911, "useful human": 60366, "research code": 47000, "google scholar": 22957, "compared domain": 9402, "method development": 32462, "training stability": 58267, "using 100": 60546, "100 samples": 63, "dataset assessing": 12815, "respectively benchmark": 47361, "updated new": 59767, "training lm": 58157, "demonstrated efficacy": 14007, "knowledge known": 27538, "designed make": 14323, "com alibaba": 9004, "annotations training": 3004, "concrete recommendations": 9982, "leading sub": 29301, "usage propose": 59805, "set framework": 50159, "rich morphological": 48112, "created annotated": 11722, "analysis nlp": 2709, "sentences jointly": 49742, "com amazon": 9006, "inference recent": 25689, "generated adversarial": 22267, "close original": 8689, "original inputs": 38717, "adapt knowledge": 1503, "layer multi": 29191, "10 12": 29, "proposed alleviate": 43717, "work leveraged": 62709, "training mixed": 58177, "model fitting": 33901, "present promising": 41989, "provides reliable": 44222, "unified task": 59479, "encoded input": 17479, "models restricted": 35452, "develop multi": 14600, "created corpus": 11723, "addition test": 1646, "transformer multi": 58502, "level fusion": 30123, "humans acquire": 24272, "users feedback": 60465, "architecture combined": 4034, "position dependent": 41263, "tree generation": 58743, "tight coupling": 57107, "tasks reduce": 55842, "lms pretrained": 30922, "easily used": 16553, "severely limits": 50428, "aims assign": 2174, "structures human": 53185, "potentially noisy": 41416, "probability label": 42477, "model remaining": 34306, "labels experiments": 27823, "features context": 20546, "prompting large": 43216, "model llm": 34068, "text davinci": 56526, "strategy achieve": 52924, "classification goal": 8477, "considerable progress": 10235, "datasets prove": 13385, "dataset level": 12983, "features sentences": 20663, "studies examined": 53261, "finally observe": 20870, "observe proposed": 38140, "models attempt": 34733, "input automatically": 26257, "approaches approach": 3766, "metric training": 33127, "set multiple": 50197, "experiment bert": 19233, "representative datasets": 46797, "models ignore": 35099, "augmentation improve": 4956, "words provide": 62488, "documents generated": 15882, "surpass current": 54162, "art existing": 4260, "parameters achieve": 39687, "supervised finetuning": 53987, "small medium": 51482, "instruction tuning": 26481, "look problem": 31066, "problem practical": 42624, "different numbers": 15011, "prompt learning": 43205, "learning outperform": 29792, "annotation domain": 2944, "absolute score": 750, "level selection": 30202, "advances self": 1925, "question state": 44751, "need extensive": 36563, "gender occupation": 22038, "phenomenon known": 40815, "knowledge speaker": 27615, "reasoning including": 45197, "dataset table": 13111, "entities like": 18063, "challenges document": 8041, "models associated": 34731, "intent classifier": 26566, "shown fine": 50709, "labeled utterances": 27772, "analysis deep": 2644, "achieves 10": 1284, "generation primarily": 22524, "words time": 62532, "processing procedure": 42928, "approach reducing": 3671, "improves best": 25118, "intelligence applications": 26537, "ranking accuracy": 44964, "produced bert": 43017, "demonstrate accuracy": 13860, "obtains performance": 38254, "evaluation remains": 18696, "benchmark called": 6430, "using languages": 60755, "level generative": 30124, "capture user": 7720, "deployed online": 14171, "sentences article": 49682, "largest existing": 29095, "gains standard": 21942, "dual encoders": 16460, "trees propose": 58770, "lot training": 31122, "method combination": 32418, "underexplored paper": 59252, "belief propagation": 6407, "11b parameters": 100, "testing dataset": 56403, "short description": 50552, "models tailored": 35580, "easily understandable": 16551, "using finite": 60698, "given labeled": 22753, "contrastive objectives": 10915, "shot data": 50610, "risk assessment": 48161, "autoregressive manner": 5220, "data growing": 12396, "applications prior": 3236, "words corpora": 62390, "vast knowledge": 61439, "data construct": 12245, "informative knowledge": 26173, "languages remain": 28769, "performance set": 40555, "retaining original": 47926, "level perturbations": 30175, "syntactic parser": 54310, "drastically reduce": 16393, "networks generate": 36861, "15 higher": 148, "higher diversity": 23822, "produced different": 43019, "target oriented": 54834, "study compares": 53341, "algorithms analyze": 2319, "certain attributes": 7935, "samples multiple": 48483, "time high": 57161, "propose template": 43664, "adapting large": 1565, "adapters small": 1562, "outperform fine": 38798, "finetuning models": 21050, "models seven": 35490, "difference performance": 14819, "rapid adaptation": 44986, "higher frequency": 23826, "methods remove": 33014, "adaptive learning": 1576, "level despite": 30099, "theoretically analyze": 57028, "corpus adapt": 11269, "significantly longer": 50986, "metrics automatic": 33139, "baseline 12": 6147, "model gram": 33945, "transfer cross": 58355, "chinese dialogue": 8306, "research build": 46995, "simple easy": 51148, "character features": 8202, "ce loss": 7904, "subset input": 53608, "systematic investigation": 54400, "models augment": 34740, "model retrieves": 34326, "explore challenging": 19691, "western countries": 61948, "require access": 46840, "additional fine": 1670, "interesting aspects": 26648, "considering entire": 10259, "learns mapping": 29966, "improvement 20": 24980, "viable option": 61571, "approaches tend": 3937, "general effective": 22058, "knowledge especially": 27467, "data evidence": 12332, "data parameters": 12537, "model maintains": 34083, "generate non": 22224, "data explicitly": 12347, "designed training": 14335, "language effect": 28040, "leverage cross": 30261, "achieves consistently": 1322, "detection module": 14503, "proposed previous": 43882, "crowdsourced datasets": 11887, "datasets outperforming": 13358, "tokens time": 57339, "requires multiple": 46946, "hierarchical contrastive": 23664, "deal long": 13518, "modeling loss": 34594, "generation time": 22568, "adaptation training": 1545, "important improving": 24733, "demonstrates great": 14034, "language testing": 28526, "dataset represents": 13063, "effect text": 16619, "language lacks": 28129, "useful resources": 60386, "previous zero": 42325, "set achieves": 50103, "information promote": 26027, "especially new": 18291, "algorithm evaluate": 2273, "financial text": 20894, "extract large": 19983, "task self": 55351, "leverages sentence": 30314, "identify biases": 24414, "linguistic concepts": 30757, "learn long": 29392, "fed model": 20705, "framework unifies": 21617, "scientific terms": 48770, "accuracy interpretability": 994, "study carried": 53337, "generating summaries": 22397, "perform post": 40129, "identifying semantically": 24466, "language architecture": 27967, "sentences novel": 49759, "aspects discussed": 4538, "train encoder": 57583, "includes pre": 25233, "memory costs": 32252, "method given": 32518, "sentence sentences": 49643, "sentences higher": 49732, "alternative evaluation": 2500, "information despite": 25808, "lacking explicit": 27929, "tasks generation": 55656, "complex high": 9627, "detection different": 14474, "dynamically adapt": 16496, "19 paper": 188, "components propose": 9725, "intensity prediction": 26557, "resource future": 47227, "optimize performance": 38564, "leads precise": 29324, "training main": 58165, "significant implications": 50871, "languages able": 28589, "need make": 36580, "involve multiple": 27014, "problem models": 42610, "learned pretrained": 29476, "texts target": 56931, "manually create": 31770, "facilitate process": 20272, "descriptions natural": 14253, "various degrees": 61324, "support tasks": 54129, "tasks leading": 55716, "models relevant": 35427, "enables build": 17437, "able advantage": 673, "real use": 45116, "aims achieve": 2171, "reducing annotation": 45703, "studies human": 53269, "phrases extracted": 40851, "features performance": 20641, "studies approach": 53247, "outputs word": 39021, "provide clues": 44026, "consider local": 10213, "provide brief": 44022, "text refers": 56730, "defined terms": 13787, "akin human": 2227, "prompting method": 43219, "benchmarks provide": 6540, "need comprehensive": 36550, "abilities large": 590, "low perplexity": 31164, "stage pretraining": 52440, "emphasizing importance": 17316, "practical importance": 41465, "novel shot": 37922, "sentences paired": 49761, "compression ratio": 9813, "paper submitted": 39582, "benchmarks use": 6547, "task counterparts": 54982, "submitted models": 53583, "classification challenging": 8441, "graph capture": 23113, "capture relationship": 7702, "capable processing": 7627, "reward shaping": 48070, "making robust": 31667, "need look": 36579, "capabilities llms": 7601, "study attempts": 53330, "suggest use": 53831, "human studies": 24243, "points language": 41077, "datasets obtain": 13351, "conducted test": 10096, "noisy samples": 37623, "semantics data": 49401, "categories like": 7845, "furthermore build": 21806, "embedding attention": 17013, "addition fine": 1617, "scores provide": 48917, "avoid problems": 5434, "architecture designs": 4042, "difficult nlp": 15177, "case paper": 7793, "79 respectively": 515, "function language": 21755, "algorithms detect": 2323, "respect linguistic": 47347, "scores training": 48926, "method collect": 32417, "level score": 30198, "methods leveraged": 32926, "contain richer": 10471, "investigate benefits": 26945, "science psychology": 48749, "official submission": 38311, "models impressive": 35109, "strong robustness": 53048, "corpus development": 11324, "state research": 52709, "progress tasks": 43117, "negligible cost": 36653, "keeping competitive": 27278, "lexical processing": 30377, "volume variety": 61730, "vary depending": 61420, "generated best": 22272, "introduce text": 26871, "apply graph": 3330, "information modeling": 25976, "language produced": 28443, "learning integrated": 29685, "experiments result": 19513, "resulting sub": 47478, "corresponding actions": 11545, "extract local": 19985, "strategies reduce": 52916, "functions different": 21771, "makes research": 31634, "discuss existing": 15465, "finally open": 20872, "important form": 24728, "tasks crucial": 55566, "specific adapters": 52042, "evaluate validity": 18515, "allow models": 2438, "consistent word": 10287, "ignore information": 24492, "allows combine": 2453, "tokens paper": 57331, "models defined": 34891, "specific parameters": 52123, "domain zero": 16229, "curate release": 11946, "benchmark open": 6484, "domain end": 16056, "documents generate": 15881, "systems aims": 54429, "knowledge address": 27392, "enhanced generative": 17931, "methods overcome": 32971, "modal transformer": 33466, "methods popular": 32983, "popular widely": 41198, "neglected paper": 36649, "electra model": 16964, "13 improvement": 125, "utilizing deep": 61121, "performance architecture": 40198, "performing method": 40680, "articles results": 4478, "provides accurate": 44179, "problems especially": 42699, "data verify": 12771, "learning demonstrated": 29586, "context article": 10585, "analysis establish": 2658, "models random": 35394, "development artificial": 14668, "combine context": 9063, "resources open": 47322, "help enhance": 23558, "challenges problem": 8070, "proposed deal": 43754, "task combination": 54954, "extracted high": 20011, "learn fine": 29371, "provide domain": 44056, "knowledge grounding": 27509, "models supports": 35567, "modality model": 33477, "accurate estimation": 1078, "leverages context": 30301, "finally examine": 20855, "cloud platform": 8721, "addition pre": 1633, "attention variants": 4848, "consists tasks": 10334, "learning technology": 29909, "span masking": 51926, "book question": 7246, "words unknown": 62538, "feelings opinions": 20728, "hold promise": 23980, "level inter": 30136, "phrase phrase": 40842, "matching dataset": 31911, "availability gold": 5248, "entities complex": 18040, "model arabic": 33574, "time end": 57149, "available zero": 5393, "time prior": 57199, "improvement perplexity": 25014, "explore transfer": 19744, "data objective": 12517, "data coverage": 12257, "public corpus": 44310, "corpus unlabeled": 11452, "selecting representative": 49128, "models showcase": 35494, "evaluation pre": 18678, "techniques solve": 56138, "systems multiple": 54566, "representations content": 46631, "changes neural": 8180, "tasks terms": 55928, "research release": 47115, "challenges include": 8053, "text preserves": 56705, "rapidly developing": 44993, "type paper": 59063, "subjective evaluations": 53563, "traditional transformer": 57554, "languages supported": 28798, "classification large": 8484, "maintaining grammatical": 31491, "use proposed": 59984, "traditional multi": 57534, "pretrained deep": 42152, "task suffer": 55422, "dataset suitable": 13107, "propose shot": 43630, "mechanisms results": 32153, "explore properties": 19729, "embedding state": 17064, "experiment data": 19235, "adaptation pre": 1534, "offers comprehensive": 38301, "mitigate impact": 33384, "work shot": 62820, "optimal model": 38528, "specific way": 52173, "novel research": 37909, "design pre": 14295, "computes attention": 9902, "using similar": 60941, "text complexity": 56502, "wikipedia corpora": 62045, "text remains": 56735, "provides data": 44190, "insights human": 26390, "language annotation": 27961, "robustness achieved": 48271, "sample selection": 48454, "type user": 59074, "detection purpose": 14515, "techniques multi": 56113, "languages scarce": 28777, "related non": 45921, "results generating": 47648, "associated increased": 4622, "different tokens": 15101, "context set": 10716, "datasets resource": 13406, "reasonable time": 45175, "negative sample": 36632, "detecting entities": 14448, "supervised multi": 54023, "generated candidate": 22273, "problems lack": 42705, "surprising findings": 54182, "metrics designed": 33159, "model samples": 34334, "handling unseen": 23429, "context inspired": 10660, "finally leverage": 20866, "determine relevance": 14558, "domains trained": 16297, "paths entities": 39952, "entities far": 18052, "trained static": 57884, "text expressing": 56570, "develop theory": 14619, "models remarkable": 35435, "end requires": 17705, "correlations data": 11533, "topics like": 57453, "lingual setup": 30729, "data emerged": 12316, "significant variation": 50929, "work achieve": 62552, "evaluation purposes": 18690, "models manually": 35216, "gap supervised": 21981, "code based": 8797, "retrieval enhanced": 47943, "structure sequence": 53137, "models attractive": 34739, "slows inference": 51456, "achieve great": 1149, "approaches pre": 3896, "flat sequence": 21096, "finally design": 20850, "chinese lexical": 8311, "information evaluate": 25841, "dependent nature": 14149, "make text": 31604, "proposed various": 43924, "adapt state": 1510, "large noisy": 28921, "achieve 10": 1106, "method user": 32696, "dataset empirical": 12905, "crucial problem": 11906, "data extracting": 12355, "empirically analyze": 17356, "leveraging different": 30321, "inter related": 26585, "dataset set": 13081, "build release": 7422, "access ground": 821, "techniques method": 56110, "strategy fine": 52933, "tasks solve": 55897, "based theoretical": 6095, "plain language": 40937, "sampling model": 48504, "input level": 26294, "use end": 59876, "domains important": 16260, "roberta deberta": 48219, "negative impacts": 36621, "capturing local": 7739, "parameters fully": 39701, "performance source": 40569, "importance carefully": 24677, "source multi": 51785, "use complex": 59847, "finally generate": 20861, "arduous task": 4135, "analysis current": 2640, "control outputs": 10971, "node graph": 37586, "solution improving": 51655, "designed text": 14334, "identify optimal": 24434, "units different": 59530, "properly reflect": 43256, "problems study": 42732, "capabilities multi": 7604, "additionally results": 1734, "benchmark collection": 6433, "enhance diversity": 17911, "novel group": 37836, "allows develop": 2457, "problem extend": 42561, "insights dataset": 26387, "topic time": 57434, "supervision limited": 54084, "models llm": 35194, "work document": 62640, "supervised labels": 53991, "summarization generation": 53885, "struggle generate": 53201, "generation enhance": 22453, "task solving": 55385, "ability context": 600, "corpus 10k": 11264, "languages requires": 28771, "effort involved": 16928, "class imbalanced": 8404, "datasets unified": 13466, "performance generally": 40363, "kb text": 27272, "transformers specifically": 58532, "minimal data": 33286, "use powerful": 59974, "capture correlation": 7657, "demonstrates feasibility": 14033, "feasibility automatic": 20468, "video available": 61579, "analysis collected": 2628, "series methods": 50065, "using historical": 60730, "terms complexity": 56277, "performance sequential": 40554, "settings existing": 50368, "prediction used": 41750, "enable large": 17425, "based prompting": 5957, "translation pipeline": 58657, "task instructions": 55143, "generation word": 22580, "trained classify": 57689, "presents alternative": 42070, "information multimodal": 25979, "evaluation experimental": 18618, "dense vectors": 14086, "approach particular": 3636, "texts detecting": 56872, "semeval 2023": 49442, "2023 task": 308, "dataset particularly": 13024, "raises important": 44859, "focus new": 21187, "contrast performance": 10880, "outperforms commonly": 38883, "token text": 57310, "testing human": 56406, "technique achieve": 56024, "intensive paper": 26560, "models developing": 34910, "developing neural": 14660, "size quality": 51395, "world social": 62961, "setup models": 50410, "annotations specifically": 3001, "gpt codex": 22975, "relevance text": 46196, "producing coherent": 43038, "levels using": 30250, "training hypothesize": 58123, "generation algorithms": 22418, "style specific": 53499, "module capture": 35753, "attention enhanced": 4742, "predicted token": 41671, "effective shot": 16695, "evaluation public": 18689, "temporal aspects": 56179, "strategies fine": 52903, "language pattern": 28376, "sentences pre": 49768, "sentences conduct": 49694, "promising area": 43162, "discourse unit": 15402, "training manner": 58169, "task crucial": 54988, "different common": 14868, "crucial challenging": 11896, "settings generating": 50376, "model prompt": 34242, "framework case": 21469, "techniques identifying": 56095, "majority work": 31536, "stage employ": 52428, "usually suffers": 61070, "models constructed": 34856, "text performance": 56697, "errors human": 18240, "english systems": 17885, "hope new": 24010, "existing lm": 19086, "continued training": 10833, "corpora additionally": 11172, "future opportunities": 21880, "aware manner": 5459, "variants transformer": 61238, "single candidate": 51287, "relevant news": 46226, "methods ablation": 32724, "learning structured": 29898, "framework end": 21504, "set online": 50207, "impact important": 24597, "equally training": 18192, "data utilization": 12767, "mining framework": 33313, "demonstrated superiority": 14024, "languages reduce": 28767, "languages affect": 28596, "training information": 58131, "transformers pretrained": 58529, "translation modern": 58635, "careful consideration": 7755, "space furthermore": 51866, "essential model": 18328, "knowledge experimental": 27471, "data likely": 12464, "rouge meteor": 48351, "addition lexical": 1624, "uses domain": 60507, "forgetting previously": 21308, "agreement iaa": 2107, "human analysis": 24094, "effective alternative": 16628, "scenarios end": 48695, "examples investigate": 18913, "utterances propose": 61151, "future design": 21867, "provide higher": 44086, "task include": 55129, "transformer effectively": 58481, "growing size": 23303, "sample multiple": 48453, "current popular": 11996, "works generally": 62893, "propose guidelines": 43407, "focus solely": 21202, "form short": 21335, "automatic diagnosis": 5079, "models constantly": 34853, "word unit": 62327, "unsolved problem": 59662, "generation use": 22574, "works based": 62877, "methods scale": 33026, "method trains": 32688, "spurious biases": 52386, "remains underexplored": 46353, "generalize diverse": 22140, "specific subsets": 52150, "fairer models": 20359, "structured overview": 53169, "hierarchical manner": 23677, "insufficient information": 26493, "usually consist": 61041, "spoken dialogues": 52356, "time maintaining": 57175, "sentence pieces": 49619, "widely acknowledged": 61990, "analyse impact": 2586, "model prompting": 34243, "unlabeled pu": 59575, "com deeplearnxmu": 9010, "combine predictions": 9071, "methods codes": 32785, "span span": 51933, "detection key": 14495, "systems achieving": 54423, "applications crucial": 3192, "recognized important": 45554, "prompts improve": 43224, "prompting llms": 43218, "prompting strategies": 43221, "heavily affected": 23528, "foundation model": 21417, "challenging number": 8122, "textual dataset": 56957, "000 news": 9, "extensive dataset": 19861, "training generate": 58112, "established method": 18355, "minimally supervised": 33295, "research google": 47047, "set prediction": 50224, "generating final": 22374, "performances variety": 40651, "models hybrid": 35096, "generate labeled": 22214, "experiments illustrate": 19444, "bring performance": 7333, "settings datasets": 50365, "cases zero": 7818, "competitive strong": 9568, "distribution differences": 15635, "weights used": 61942, "suffers severe": 53794, "randomly masking": 44902, "translated target": 58559, "number tokens": 38046, "information level": 25953, "tuning entire": 58910, "demonstrates substantial": 14046, "datasets far": 13272, "palm 540b": 39243, "generation design": 22445, "using news": 60840, "pairs resulting": 39213, "improves effectiveness": 25127, "data condition": 12234, "domain experiments": 16065, "shared network": 50482, "quantities text": 44636, "pre order": 41505, "furthermore using": 21842, "annotated medical": 2904, "results lead": 47697, "solution space": 51661, "uses automatic": 60493, "longformer bigbird": 31059, "models sufficient": 35557, "method performing": 32612, "question recent": 44747, "problem description": 42532, "retrieves relevant": 47992, "realistic challenging": 45148, "scores task": 48924, "languages findings": 28674, "outperform vanilla": 38831, "examples introduce": 18912, "ability llms": 622, "availability annotated": 5245, "create largest": 11707, "points given": 41074, "texts tend": 56933, "observe models": 38138, "efficient search": 16897, "domain difficult": 16049, "require annotated": 46842, "process task": 42832, "relevant works": 46247, "analyze use": 2832, "dataset effective": 12903, "process furthermore": 42784, "models textit": 35598, "input modality": 26299, "growing area": 23288, "research robust": 47118, "leverage commonsense": 30258, "proposed unified": 43919, "syntactic correctness": 54297, "emnlp 2022": 17281, "datasets includes": 13299, "challenge finding": 7981, "brown et": 7372, "development multi": 14688, "quality dimensions": 44509, "cited papers": 8369, "way pre": 61826, "lm parameters": 30909, "approaches tested": 3939, "algorithmic approaches": 2314, "extract sentences": 19993, "produce large": 42989, "similar methods": 51053, "following challenges": 21263, "training entire": 58088, "utterances experiments": 61147, "challenging develop": 8090, "compare effectiveness": 9339, "predictions training": 41768, "debiasing method": 13534, "study help": 53383, "efforts paper": 16942, "document corpora": 15780, "domain key": 16092, "using strategies": 60964, "performance scenario": 40545, "compare previous": 9359, "high task": 23805, "compared naive": 9424, "quality crucial": 44504, "words punctuation": 62489, "important tokens": 24784, "reduces need": 45694, "metrics achieve": 33135, "performance effectiveness": 40313, "important difference": 24717, "tasks challenge": 55535, "embeddings domain": 17116, "gap languages": 21967, "resources scarce": 47333, "llms shown": 30902, "improved understanding": 24970, "context similar": 10719, "agreement based": 2104, "representation furthermore": 46523, "easily adaptable": 16534, "tasks evaluating": 55621, "work zero": 62861, "complexity makes": 9681, "attention current": 4732, "datasets consistent": 13193, "domain small": 16161, "score original": 48862, "annotation standards": 2970, "sentiment social": 49859, "furthermore leverage": 21826, "settings release": 50394, "relevance prediction": 46192, "degradation compared": 13801, "shot evaluations": 50613, "offer insight": 38291, "events like": 18794, "benchmark knowledge": 6474, "bridge language": 7321, "alleviate limitation": 2412, "essential modern": 18329, "works achieved": 62873, "size increases": 51386, "models rapidly": 35397, "significant benefits": 50852, "generation content": 22438, "information output": 25998, "reddit twitter": 45645, "framework self": 21594, "framework evaluated": 21508, "model continuous": 33714, "paper challenge": 39287, "shows dataset": 50773, "models broadly": 34795, "practical way": 41480, "train standard": 57637, "loss used": 31107, "source datasets": 51762, "highly likely": 23904, "emerged effective": 17259, "consist multiple": 10263, "decoding paper": 13636, "requires advanced": 46914, "biased word": 7052, "concepts work": 9947, "end evaluation": 17672, "scenarios automatic": 48691, "investigate reasons": 26981, "graph encoder": 23132, "studies tried": 53304, "generate variety": 22262, "self correction": 49193, "domains previous": 16285, "fusion approaches": 21852, "capturing inter": 7736, "challenging propose": 8131, "variety benchmarks": 61264, "step wise": 52837, "major barrier": 31501, "alternative traditional": 2509, "researchers develop": 47150, "communication based": 9246, "work employ": 62644, "performing end": 40676, "cross utterance": 11871, "achieves 13": 1285, "issues remain": 27104, "work looked": 62714, "answers address": 3105, "text outputs": 56683, "does utilize": 15982, "sentences large": 49747, "simultaneously considers": 51270, "sensitive nature": 49502, "dataset helps": 12947, "limited resource": 30609, "using latest": 60763, "construct multilingual": 10393, "distillation scheme": 15577, "proposes effective": 43932, "attention regularization": 4819, "including end": 25253, "significant data": 50860, "propose natural": 43486, "including recurrent": 25293, "multi component": 35949, "conventional pipeline": 11010, "data semi": 12638, "using adapters": 60551, "test english": 56347, "phrases source": 40854, "trains models": 58325, "propose representation": 43606, "predicting topic": 41684, "accuracy nlp": 1016, "generative ones": 22601, "text sample": 56751, "11 respectively": 91, "train target": 57643, "use encoder": 59874, "recall using": 45248, "scale annotation": 48553, "documents prior": 15904, "summary input": 53915, "findings conclude": 20905, "analysis points": 2717, "demonstrates promising": 14038, "scientific progress": 48767, "available furthermore": 5295, "step used": 52834, "foundation natural": 21418, "prominent role": 43152, "help solve": 23589, "poor model": 41139, "data balancing": 12183, "result able": 47432, "reduce negative": 45674, "regressive decoder": 45825, "recently demonstrated": 45417, "triplet network": 58809, "analysis sentences": 2751, "relatedness scores": 45959, "translation second": 58672, "second uses": 49029, "binary text": 7155, "modern approaches": 35701, "2022 workshop": 306, "ensembling models": 17987, "transfer shot": 58421, "candidate outputs": 7573, "performance sub": 40582, "par strong": 39619, "attention specifically": 4831, "leveraging powerful": 30335, "performs considerably": 40705, "negative sentiments": 36637, "score bert": 48836, "strategy outperforms": 52945, "successfully improves": 53747, "token classifier": 57283, "sentence transformers": 49662, "texts data": 56869, "future word": 21898, "respectively extensive": 47369, "distillation technique": 15579, "paper comprehensively": 39293, "analyze generalization": 2817, "schemes proposed": 48734, "based stage": 6053, "german based": 22662, "handle tasks": 23416, "bipartite matching": 7182, "competitive scores": 9565, "low model": 31162, "maintaining model": 31494, "architectures explored": 4110, "attempt answer": 4679, "corpora important": 11207, "set automatic": 50109, "surface text": 54156, "use resource": 59997, "jointly encoding": 27196, "negative consequences": 36616, "labels target": 27849, "provide fair": 44071, "difficult construct": 15161, "methods contrastive": 32802, "important work": 24794, "design constrained": 14269, "training extensive": 58102, "performance suggests": 40587, "f1 accuracy": 20180, "tasks combined": 55542, "need effective": 36557, "emerged important": 17260, "ability support": 643, "increasingly necessary": 25475, "access text": 829, "prompt models": 43206, "classification multilingual": 8502, "expansion task": 19190, "optimized maximize": 38568, "just fine": 27249, "common scenario": 9195, "crafted templates": 11683, "labels labels": 27836, "pair dataset": 39148, "size work": 51404, "description task": 14248, "based hand": 5764, "train generate": 57592, "questions best": 44776, "benchmark analysis": 6424, "language significant": 28485, "models accomplish": 34660, "leverage available": 30256, "level relevance": 30191, "document sequence": 15830, "approach collect": 3448, "systems capture": 54447, "studied topic": 53238, "drastically reducing": 16396, "models speed": 35529, "16 datasets": 163, "study attention": 53331, "reliable methods": 46252, "unsupervised automatic": 59683, "baseline metrics": 6184, "11 improvement": 86, "process evaluation": 42777, "generally requires": 22171, "exploit recent": 19662, "results previously": 47777, "proposed nlp": 43870, "low source": 31203, "low semantic": 31202, "modal multi": 33461, "self generated": 49197, "multi topic": 36034, "knowledge identify": 27519, "works address": 62874, "domain does": 16050, "construct high": 10387, "quality multi": 44553, "dataset leveraging": 12984, "observed different": 38144, "paradigm paper": 39627, "need evaluate": 36559, "multiple stages": 36288, "datasets generate": 13283, "study dataset": 53353, "improvement training": 25036, "seq2seq paradigm": 49904, "supervised loss": 54011, "generation improve": 22475, "techniques terms": 56142, "area machine": 4141, "ubiquitous human": 59175, "different length": 14975, "points bleu": 41070, "problem converting": 42525, "produces output": 43033, "annotation based": 2937, "context simple": 10720, "form based": 21313, "data pipeline": 12544, "training construct": 57958, "latent state": 29138, "given access": 22722, "consisting 100": 10315, "fail produce": 20343, "upstream tasks": 59779, "demonstrated remarkable": 14017, "gpt 175b": 22971, "translation generate": 58615, "compare supervised": 9370, "testing state": 56413, "consequently models": 10204, "significant loss": 50896, "introducing auxiliary": 26899, "understanding capability": 59328, "decoding efficiency": 13629, "data bilingual": 12191, "resource problem": 47264, "llms perform": 30901, "training subsets": 58277, "framework zero": 21629, "flan t5": 21094, "llms gpt": 30900, "leading robust": 29297, "generalizable models": 22111, "uses single": 60537, "davinci 003": 13499, "set expert": 50155, "like biases": 30464, "guide development": 23330, "understand effectiveness": 59293, "drawbacks existing": 16404, "generative pretraining": 22605, "ranging size": 44946, "new public": 37291, "efforts address": 16934, "restrict attention": 47420, "attention multiple": 4792, "existing supervised": 19152, "set demonstrating": 50134, "models corpora": 34868, "allows creation": 2456, "single target": 51341, "task match": 55208, "high prevalence": 23764, "determine appropriate": 14554, "problem binary": 42512, "important downstream": 24720, "using layers": 60765, "setting text": 50352, "textual genres": 56967, "based kg": 5795, "systems frequently": 54508, "propose decoding": 43350, "additional model": 1688, "dataset comparison": 12851, "poor interpretability": 41136, "complex state": 9662, "research aim": 46979, "interpret paper": 26711, "improve conventional": 24835, "paper cast": 39286, "gpt generates": 22979, "search dataset": 48967, "representation predicting": 46570, "english literature": 17837, "classifiers outperform": 8620, "size experiments": 51383, "information relationship": 26050, "datasets setting": 13419, "translation arabic": 58580, "building real": 7464, "style training": 53503, "efficiently utilize": 16922, "better original": 6923, "graph captures": 23114, "manual intervention": 31744, "express information": 19793, "integrating state": 26524, "effective detection": 16645, "strong non": 53039, "output used": 39007, "like gender": 30470, "methods support": 33062, "similar ones": 51056, "classification considered": 8445, "method extensive": 32502, "attempt use": 4693, "benefit applications": 6558, "like chatgpt": 30466, "work collected": 62599, "dataset structured": 13103, "gain knowledge": 21911, "technique learn": 56038, "multiple teachers": 36300, "evaluation chatgpt": 18589, "expert crafted": 19575, "distance information": 15545, "verification methods": 61526, "performance 93": 40173, "performance observe": 40463, "linear non": 30662, "contents paper": 10576, "depth width": 14191, "series forecasting": 50064, "transformer fine": 58487, "self assessment": 49174, "extraction training": 20126, "topic coverage": 57399, "setup based": 50409, "progress automatic": 43093, "strategy generate": 52934, "adapt target": 1512, "models taken": 35581, "available methods": 5326, "model vietnamese": 34529, "scaling large": 48649, "paper learn": 39416, "new inputs": 37223, "micro macro": 33227, "independently using": 25510, "paper https": 39389, "https bit": 24054, "bert lstm": 6677, "theoretic measures": 57015, "applications high": 3208, "models decisions": 34887, "grand challenge": 23086, "class balanced": 8391, "research better": 46994, "pretrained text": 42185, "generation shown": 22547, "providing natural": 44250, "architecture order": 4072, "score new": 48861, "learning good": 29665, "transfer process": 58415, "glue datasets": 22865, "experiments code": 19374, "systems example": 54492, "steer model": 52790, "provide timely": 44146, "strategies deal": 52898, "gains terms": 21946, "datasets needed": 13343, "combination retrieval": 9049, "design benchmark": 14266, "cost manual": 11588, "selects informative": 49169, "evaluate algorithm": 18436, "contextual dependency": 10762, "unseen topics": 59657, "process apply": 42758, "significant limitations": 50895, "advances area": 1905, "models comes": 34830, "learn tasks": 29436, "distance aware": 15541, "like multi": 30487, "results field": 47632, "models decision": 34886, "pipeline including": 40901, "methods findings": 32866, "metrics finally": 33166, "thousands tokens": 57080, "history based": 23966, "groups based": 23279, "retrieved information": 47985, "shot cases": 50603, "based table": 6080, "noticeable performance": 37729, "conversational contexts": 11042, "bert gpt2": 6666, "prompting gpt": 43215, "performance producing": 40501, "propose ontology": 43577, "leverage dataset": 30263, "word provided": 62273, "predict non": 41649, "survey reviews": 54220, "knowledge datasets": 27434, "available based": 5264, "overview state": 39118, "model embed": 33805, "empirical exploration": 17329, "half century": 23366, "feelings emotions": 20727, "corpus approximately": 11279, "benchmarks propose": 6537, "embeddings far": 17135, "content prior": 10548, "model difficult": 33768, "language automatic": 27969, "gpt chatgpt": 22974, "measure importance": 32055, "investigate differences": 26950, "evaluation showed": 18718, "interaction systems": 26612, "exploit unlabeled": 19667, "performed significantly": 40665, "text lack": 56641, "knowledge alleviate": 27393, "represents important": 46818, "systematic exploration": 54398, "training difficult": 58066, "random selection": 44889, "generation paradigm": 22515, "addition highlight": 1619, "partly lack": 39900, "drive future": 16416, "model evaluations": 33845, "tool open": 57364, "driven way": 16434, "way task": 61831, "reduce domain": 45659, "suggests natural": 53848, "provide little": 44100, "effectiveness incorporating": 16784, "masked sequence": 31867, "identification datasets": 24386, "texts retrieved": 56920, "quality synthesized": 44584, "strategy overcome": 52946, "labeling strategy": 27794, "adapts pre": 1585, "particular social": 39861, "gap existing": 21961, "score 23": 48785, "leading development": 29290, "achieve faster": 1139, "strict evaluation": 52984, "especially pre": 18293, "approaches analyze": 3763, "distilled version": 15584, "algorithms performance": 2334, "make encoder": 31569, "article generation": 4450, "summarize main": 53907, "limitations discuss": 30546, "tasks understood": 55947, "process does": 42773, "algorithm natural": 2287, "problem generate": 42572, "form basis": 21314, "information leakage": 25949, "suggesting dataset": 53836, "datasets visual": 13483, "novel taxonomy": 37935, "time knowledge": 57169, "projection layers": 43141, "60 accuracy": 458, "provide flexible": 44076, "baseline novel": 6197, "features current": 20550, "ner named": 36680, "recently end": 45423, "nlp mainly": 37497, "scenarios tasks": 48710, "costly annotate": 11599, "provide task": 44142, "english vice": 17899, "shot classifiers": 50605, "datasets yields": 13490, "understanding modeling": 59365, "accuracy data": 955, "augmentation proposed": 4965, "issue data": 27059, "solving text": 51708, "improve generalizability": 24858, "scenarios particular": 48703, "training design": 58061, "surrounding text": 54198, "model corresponding": 33726, "gan model": 21955, "relations obtained": 46047, "aspect language": 4530, "affective information": 2023, "work date": 62619, "chatgpt recently": 8265, "based variation": 6125, "complexity tasks": 9690, "contains modules": 10500, "module language": 35764, "500 000": 424, "set 20": 50100, "challenges exist": 8045, "research providing": 47104, "utilizing unlabeled": 61129, "limitations study": 30557, "requires specialized": 46952, "including user": 25317, "models heavy": 35082, "models questions": 35392, "different decoding": 14892, "simple interpretable": 51181, "improve output": 24881, "version used": 61558, "tasks considering": 55555, "ai model": 2118, "relevant phrases": 46229, "information incorporate": 25919, "combining output": 9119, "transform input": 58440, "select similar": 49112, "corpus apply": 11277, "problem aim": 42500, "approach especially": 3515, "apply rule": 3351, "sound complete": 51736, "context dependencies": 10608, "entailment relation": 18005, "generate expressive": 22199, "mechanism performance": 32134, "used performance": 60261, "demonstrating feasibility": 14053, "build hierarchical": 7403, "cost prohibitive": 11592, "techniques successfully": 56139, "obtained similar": 38222, "clue words": 8730, "ability task": 644, "compares different": 9475, "significantly data": 50950, "tools analyzing": 57376, "segment boundaries": 49072, "domain annotated": 16018, "compared evaluated": 9404, "integrated framework": 26514, "proposed stage": 43901, "disambiguation systems": 15360, "methods serve": 33034, "word dictionary": 62137, "robustness performance": 48292, "based robust": 6003, "information certain": 25778, "framework adopt": 21453, "language standard": 28502, "describes data": 14221, "makes text": 31639, "task created": 54984, "combine output": 9070, "markup language": 31853, "demonstrate necessity": 13948, "known method": 27660, "processing tool": 42959, "deep linguistic": 13725, "corpora specific": 11244, "analysis automatically": 2620, "based tf": 6093, "need text": 36594, "work various": 62858, "structure modeling": 53119, "representation structure": 46586, "evaluation context": 18597, "presents model": 42091, "models original": 35282, "order compare": 38602, "based component": 5629, "theoretical models": 57024, "fully explored paper": 21730, "human computer interaction": 24125, "natural language processing": 36440, "language processing nlp": 28418, "possible future research": 41326, "future research directions": 21888, "natural language understanding": 36457, "language understanding reasoning": 28558, "natural language text": 36455, "paper describes experiments": 39323, "methods make use": 32940, "strengths weaknesses different": 52978, "et al 2007": 18396, "problem paper present": 42620, "non native speakers": 37666, "new method based": 37252, "non native english": 37665, "systems make use": 54559, "gram language models": 23055, "language models lm": 28279, "syntactic semantic information": 54325, "latent semantic analysis": 29134, "semantic analysis lsa": 49234, "significant improvements compared": 50885, "short term memory": 50570, "languages like english": 28713, "english french german": 17809, "using domain specific": 60668, "language processing domain": 28405, "paper present model": 39456, "fundamental problem natural": 21787, "problem natural language": 42614, "syntactic semantic properties": 54326, "propose theoretical framework": 43672, "corpus extensive experiments": 11341, "automatic text summarization": 5130, "subject verb object": 53559, "tasks natural language": 55760, "language processing information": 28411, "processing information retrieval": 42877, "information retrieval machine": 26064, "retrieval machine translation": 47951, "achieves human level": 1339, "human level performance": 24197, "corpora different languages": 11194, "world wide web": 62968, "machine translation based": 31350, "gram language model": 23054, "supervised machine learning": 54014, "machine learning algorithm": 31307, "predicate argument structures": 41630, "multi document summarization": 35953, "multiword expressions mwes": 36334, "using large scale": 60759, "large scale collection": 28965, "syntactic structure sentence": 54329, "knowledge plays important": 27570, "plays important role": 41000, "non local features": 37662, "task paper propose": 55267, "using rule based": 60914, "paper provide overview": 39557, "specific language model": 52100, "text audio video": 56445, "artificial neural network": 4498, "real world text": 45142, "large number languages": 28924, "paper tackle problem": 39591, "using natural language": 60829, "natural language expressions": 36424, "wide range natural": 61972, "range natural language": 44925, "language processing applications": 28397, "including question answering": 25291, "question answering summarization": 44710, "advance state art": 1884, "current natural language": 11990, "language processing systems": 28431, "performance state art": 40576, "challenge natural language": 7999, "trained large scale": 57770, "large scale corpora": 28966, "order improve performance": 38627, "domains natural language": 16278, "mean squared error": 31996, "hidden markov models": 23643, "automatic speech recognition": 5125, "systems paper present": 54581, "vector space models": 61468, "open source project": 38455, "important component natural": 24712, "component natural language": 9710, "processing nlp applications": 42899, "elementary discourse units": 16976, "paper present simple": 39461, "present simple approach": 42015, "multi class classification": 35946, "used natural language": 60248, "perform extensive experiments": 40107, "extensive experiments benchmark": 19881, "benchmark data sets": 6442, "various natural language": 61369, "sentences experimental results": 49716, "experimental results obtained": 19299, "results obtained using": 47749, "natural language based": 36414, "large scale real": 28998, "scale real world": 48621, "real world setting": 45137, "english machine translation": 17841, "word sense disambiguation": 62298, "language generation process": 28087, "machine translation mt": 31369, "systems paper presents": 54582, "closely related languages": 8706, "recent work focused": 45369, "open source tool": 38459, "main contributions work": 31433, "machine learning systems": 31332, "proposed semi supervised": 43893, "semi supervised approaches": 49457, "paper consider problem": 39303, "context free grammar": 10642, "paper presents new": 39478, "natural language sentences": 36449, "words phrases sentences": 62480, "natural language interfaces": 36432, "understand natural language": 59307, "given natural language": 22763, "process natural language": 42809, "demonstrated promising results": 14015, "play different roles": 40968, "paper presents method": 39476, "sense disambiguation wsd": 49484, "achieve good results": 1148, "paper presents comprehensive": 39471, "presents comprehensive study": 42078, "previous work using": 42316, "role natural language": 48317, "natural language applications": 36413, "applications information retrieval": 3212, "english german spanish": 17816, "paper presents novel": 39479, "spoken dialogue systems": 52355, "models trained tested": 35622, "applications natural language": 3223, "like machine translation": 30483, "machine translation speech": 31383, "information retrieval question": 26066, "retrieval question answering": 47965, "using conditional random": 60619, "conditional random field": 10004, "random field crf": 44871, "fold cross validation": 21251, "paper presents preliminary": 39481, "paper presents work": 39486, "representation natural language": 46562, "using machine learning": 60783, "machine learning approach": 31310, "processing natural language": 42895, "previous work model": 42306, "significant improvements baseline": 50883, "speech recognition asr": 52283, "like natural language": 30491, "task oriented dialogue": 55255, "pre processing step": 41510, "received little attention": 45261, "resource poor languages": 47262, "paper gives overview": 39388, "answering natural language": 3086, "natural language questions": 36445, "question answering qa": 44707, "answering qa systems": 3090, "information retrieval ir": 26063, "used evaluate performance": 60170, "real world applications": 45123, "low quality paper": 31169, "real world data": 45124, "provide new insights": 44105, "machine learning framework": 31320, "machine translation systems": 31385, "construct large scale": 10391, "semantic similarity words": 49350, "optical character recognition": 38523, "written different languages": 62997, "lexical knowledge base": 30369, "evaluation metric called": 18645, "inter annotator agreement": 26576, "improvement state art": 25027, "state art propose": 52651, "world natural language": 62950, "data paper introduce": 12531, "paper introduce new": 39402, "recent years growing": 45386, "paper present new": 39458, "data sparseness problem": 12682, "domain specific terms": 16188, "paper proposes new": 39550, "context free grammars": 10643, "existing natural language": 19115, "language processing methods": 28415, "languages english french": 28654, "latent dirichlet allocation": 29121, "natural language texts": 36456, "texts paper presents": 56909, "presents novel approach": 42097, "freely available https": 21653, "available https github": 5309, "https github com": 24059, "high dimensional space": 23729, "left right contexts": 30000, "noise contrastive estimation": 37596, "neural language models": 36964, "obtaining state art": 38239, "state art results": 52657, "multi view learning": 36040, "text classification tasks": 56488, "shows proposed method": 50799, "proposed method consistently": 43814, "method consistently outperforms": 32438, "methods domain adaptation": 32830, "domain adaptation methods": 16000, "processing nlp models": 42907, "intrinsic extrinsic evaluations": 26771, "probabilistic generative model": 42460, "fine tuning step": 21022, "various state art": 61397, "state art supervised": 52674, "supervised learning algorithms": 53995, "support vector machine": 54132, "vector machine svm": 61454, "naive bayes classifier": 36364, "principal component analysis": 42381, "present new approach": 41959, "available natural language": 5331, "paper focus problem": 39379, "english french translation": 17811, "conditional random fields": 10005, "random fields crf": 44873, "help better understand": 23554, "state art approaches": 52581, "small scale datasets": 51497, "real world datasets": 45126, "plays central role": 40990, "analysis named entity": 2702, "named entity recognition": 36373, "paper proposes method": 39546, "language pairs english": 28368, "english french english": 17808, "lexical syntactic semantic": 30392, "support vector machines": 54133, "statistical machine translation": 52750, "machine translation smt": 31382, "long term goal": 31041, "rule based approach": 48379, "shows promising results": 50797, "language processing tasks": 28433, "language processing techniques": 28434, "chinese social media": 8320, "deep learning models": 13714, "success natural language": 53713, "language processing deep": 28404, "massive text corpora": 31889, "close state art": 8692, "state art speech": 52669, "speech pos tagger": 52278, "based named entity": 5896, "named entity recognizer": 36374, "problems natural language": 42714, "training data used": 58047, "natural language generation": 36426, "time natural language": 57183, "outperforms existing models": 38900, "work deep learning": 62622, "learning neural networks": 29780, "representations paper propose": 46733, "paper propose alternative": 39494, "propose novel model": 43549, "vector representations words": 61463, "types neural networks": 59106, "learn high quality": 29380, "provide state art": 44134, "state art performance": 52646, "human language acquisition": 24192, "et al 2010": 18397, "open source tools": 38461, "development natural language": 14690, "understanding natural language": 59371, "natural language interface": 36431, "paper describes submission": 39330, "using publicly available": 60885, "translation model using": 58632, "english french spanish": 17810, "machine translation information": 31362, "translation information retrieval": 58621, "information retrieval information": 26062, "retrieval information extraction": 47947, "information extraction text": 25869, "machine translation cross": 31352, "better performance existing": 6931, "different types language": 15112, "field natural language": 20763, "work natural language": 62732, "resources natural language": 47320, "processing tasks machine": 42951, "tasks machine translation": 55735, "using proposed method": 60881, "lexical semantic information": 30384, "work present results": 62758, "human effort required": 24137, "multilingual natural language": 36103, "play crucial role": 40966, "language model adaptation": 28152, "training language model": 58145, "work propose model": 62781, "play important role": 40972, "words experimental results": 62414, "performs better state": 40700, "better state art": 6970, "state art task": 52678, "extract useful information": 20001, "large collections documents": 28858, "supervised unsupervised methods": 54068, "translation natural language": 58643, "modern standard arabic": 35721, "standard arabic msa": 52465, "syntactic semantic features": 54324, "question answering systems": 44711, "continuous vector space": 10855, "machine translation techniques": 31388, "pre processing steps": 41511, "lead significant improvements": 29271, "significant improvements accuracy": 50882, "amazon mechanical turk": 2522, "lexical syntactic features": 30390, "close human performance": 8688, "pointwise mutual information": 41084, "mutual information pmi": 36349, "extensive experiments large": 19891, "publicly available datasets": 44341, "level document level": 30104, "distributed word representations": 15630, "representations word embeddings": 46789, "nlp tasks work": 37552, "train word embeddings": 57660, "word embeddings using": 62195, "near state art": 36515, "state art methods": 52630, "applications machine translation": 3220, "text target language": 56804, "morphologically rich language": 35848, "important research area": 24761, "field machine translation": 20760, "plays significant role": 41006, "quality machine translation": 44548, "machine translation paper": 31376, "evaluation machine translation": 18636, "automatic evaluation metrics": 5087, "sentence document level": 49544, "latent variable model": 29144, "experimental results demonstrate": 19281, "results demonstrate effectiveness": 47575, "demonstrate effectiveness method": 13899, "gained increasing attention": 21917, "current state art": 12012, "state art sequence": 52663, "sentences natural language": 49757, "based machine learning": 5827, "paper describes approach": 39320, "machine translation tasks": 31387, "application natural language": 3171, "large high quality": 28885, "level word level": 30232, "language processing including": 28410, "including machine translation": 25273, "outperforming existing methods": 38852, "machine learning techniques": 31335, "used shared task": 60300, "natural language tasks": 36454, "web based application": 61880, "generates natural language": 22350, "task machine translation": 55197, "natural language input": 36429, "sentiment emotion analysis": 49843, "vector space model": 61467, "distributed representation words": 15624, "non trivial task": 37689, "learn text representations": 29438, "multi label classification": 35976, "parallel training data": 39657, "training data languages": 58006, "entity recognition ner": 18129, "able achieve high": 670, "information real world": 26041, "based character based": 5615, "dataset experimental results": 12917, "experimental results proposed": 19305, "paper describes new": 39325, "new freely available": 37213, "large scale multilingual": 28989, "deep neural network": 13739, "neural network dnn": 37004, "different natural languages": 15003, "addition propose new": 1637, "propose new architecture": 43497, "state art entity": 52612, "speech pos tagging": 52279, "text speech synthesis": 56785, "machine translation research": 31381, "using different machine": 60655, "human evaluation automatic": 24143, "word level features": 62230, "best knowledge work": 6775, "weakly supervised learning": 61864, "resource rich language": 47269, "propose new method": 43504, "english german english": 17813, "labeled data used": 27752, "yields significant improvements": 63129, "significant improvements state": 50889, "improvements state art": 25101, "art supervised methods": 4418, "branch natural language": 7301, "integer linear programming": 26500, "information theoretic framework": 26121, "mutual information maximization": 36347, "available real world": 5356, "semantic similarity measures": 49347, "downstream natural language": 16344, "applications question answering": 3240, "paper propose new": 39526, "propose new task": 43513, "proposed method effective": 43817, "competitive state art": 9567, "mapping natural language": 31805, "natural language formal": 36425, "machine translation evaluation": 31357, "evaluation metrics proposed": 18653, "human evaluation metrics": 24150, "machine translation text": 31390, "layer neural network": 29195, "training data learning": 58010, "end end machine": 17649, "model significantly improves": 34381, "significantly improves performance": 50976, "improves performance state": 25143, "phrase based statistical": 40837, "based statistical machine": 6059, "state art technologies": 52682, "language models shown": 28319, "variety nlp tasks": 61286, "similar better performance": 51031, "performance deep learning": 40281, "word embeddings provide": 62185, "problem machine learning": 42601, "machine learning community": 31316, "useful nlp tasks": 60379, "nlp tasks recent": 37546, "tasks recent work": 55838, "recent work shown": 45373, "applications sentiment analysis": 3249, "word level representations": 62236, "learning distributed representations": 29597, "cross lingual document": 11832, "document classification task": 15771, "outperform previous state": 38812, "previous state art": 42283, "multiple language pairs": 36235, "recursive neural network": 45637, "neural network models": 37018, "natural language nl": 36436, "natural language query": 36443, "approach outperforms state": 3630, "outperforms state art": 38947, "machine translation task": 31386, "task recent work": 55322, "word embeddings different": 62164, "labeled data available": 27740, "learn word representations": 29446, "developing natural language": 14659, "online news articles": 38376, "text mining applications": 56661, "large text corpora": 29025, "corpus paper presents": 11399, "model substantially outperforms": 34423, "languages paper present": 28744, "text classification problem": 56481, "text classification algorithms": 56466, "learning based approach": 29530, "sufficient training data": 53807, "previous work shown": 42312, "multi word expressions": 36043, "compare performance different": 9354, "framework experimental results": 21513, "bring significant improvements": 7336, "state art model": 52632, "propose novel method": 43545, "novel method called": 37865, "fine grained semantic": 20942, "results significant improvements": 47845, "significant improvements previous": 50888, "improvements previous state": 25094, "state art tasks": 52679, "experimental evaluation shows": 19263, "evaluation shows proposed": 18723, "proposed method outperforms": 43824, "paper describes method": 39324, "experiments conducted using": 19387, "different language pairs": 14966, "predicate argument structure": 41629, "plays critical role": 40992, "maintaining competitive performance": 31490, "maximum entropy classifier": 31969, "achieves state art": 1379, "language understanding generation": 28550, "point wise mutual": 41053, "wise mutual information": 62083, "standard maximum likelihood": 52503, "hidden markov model": 23642, "provide open source": 44109, "open source implementation": 38451, "achieve significant improvements": 1193, "improvements compared previous": 25059, "compared previous methods": 9435, "task paper presents": 55266, "training machine learning": 58163, "important problem natural": 24755, "graph based approaches": 23103, "performance benchmark datasets": 40212, "data different languages": 12283, "experimental results shown": 19313, "information significantly improves": 26087, "performance propose novel": 40503, "language independent approach": 28106, "resource poor language": 47261, "recent state art": 45348, "perform extensive evaluation": 40106, "using different datasets": 60654, "performs significantly better": 40714, "machine translation language": 31363, "state art chinese": 52593, "approaches machine translation": 3868, "phrase based smt": 40836, "preliminary results using": 41806, "useful natural language": 60377, "graph based methods": 23106, "propose novel unsupervised": 43572, "empirical evaluation shows": 17324, "use training data": 60056, "training data language": 58005, "work explore use": 62659, "leads significant improvement": 29326, "significant improvement performance": 50878, "best reported results": 6813, "different real world": 15045, "text speech tts": 56786, "machine learning classification": 31314, "achieve comparable performance": 1123, "meaning natural language": 32006, "natural language utterances": 36458, "best performing systems": 6800, "automatically generate large": 5175, "et al 2012": 18399, "term frequency inverse": 56237, "frequency inverse document": 21675, "inverse document frequency": 26928, "document frequency tf": 15796, "frequency tf idf": 21678, "processing tasks paper": 42952, "information retrieval natural": 26065, "retrieval natural language": 47960, "natural language modeling": 36434, "information extraction task": 25867, "label classification problem": 27696, "single label multi": 51311, "label multi label": 27717, "area natural language": 4143, "play key role": 40975, "machine learning approaches": 31311, "plays crucial role": 40994, "languages like hindi": 28714, "perform comparative analysis": 40078, "convolutional neural network": 11113, "long range relations": 31022, "binary multi class": 7152, "achieves excellent performance": 1324, "different data sets": 14886, "plays vital role": 41008, "metrics like bleu": 33178, "automatic evaluation results": 5088, "able answer questions": 675, "long standing goal": 31032, "human labeled data": 24187, "stochastic gradient descent": 52856, "followed fine tuning": 21258, "using weak supervision": 61020, "weakly labeled data": 61858, "learn semantic representations": 29419, "models cross lingual": 34876, "document classification tasks": 15772, "prior state art": 42414, "chinese word segmentation": 8327, "method outperforms existing": 32601, "recently neural network": 45443, "neural network based": 36999, "based language models": 5804, "learning word embeddings": 29944, "neural word embeddings": 37113, "achieve state art": 1204, "achieves f1 score": 1326, "f1 score 90": 20213, "significantly better previous": 50942, "provide empirical evidence": 44061, "english second language": 17871, "published state art": 44374, "large scale text": 29003, "paper present novel": 39459, "present novel approach": 41968, "new state art": 37324, "state art performances": 52647, "social media data": 51573, "present case study": 41863, "sentence level text": 49594, "machine learning algorithms": 31308, "bag words models": 5507, "achieve new state": 1175, "art results text": 4389, "results text classification": 47884, "text classification sentiment": 56484, "classification sentiment analysis": 8544, "sentiment analysis tasks": 49830, "tasks paper presents": 55787, "probabilistic language model": 42464, "language model approach": 28155, "word similarity tasks": 62311, "morphologically rich languages": 35849, "paper propose unsupervised": 39539, "propose unsupervised method": 43693, "text data available": 56523, "obtain better results": 38164, "text classification task": 56487, "large unlabeled corpus": 29039, "corpus experimental results": 11338, "performance machine translation": 40428, "language processing tools": 28436, "texts multiple languages": 56905, "yield better results": 63092, "present novel framework": 41972, "present novel algorithm": 41967, "baselines large margin": 6276, "comparable current state": 9295, "supervised learning algorithm": 53994, "time paper propose": 57188, "paper propose novel": 39528, "propose novel neural": 43552, "novel neural network": 37885, "neural network model": 37017, "rnn encoder decoder": 48192, "recurrent neural networks": 45626, "neural networks rnn": 37070, "model jointly trained": 34026, "log linear model": 30974, "proposed model learns": 43853, "based machine translation": 5828, "focus natural language": 21186, "based information extraction": 5784, "specific use cases": 52169, "state art technique": 52680, "automatically extract information": 5169, "information natural language": 25984, "information retrieval text": 26070, "text classification important": 56474, "performance text classification": 40600, "question answering information": 44699, "text classification paper": 56479, "study paper presents": 53427, "paper presents overview": 39480, "syntactic semantic structures": 54327, "markov model hmm": 31848, "language natural language": 28354, "language processing task": 28432, "various real world": 61383, "state art systems": 52676, "knowledge base population": 27408, "word embedding method": 62148, "results proposed method": 47784, "proposed method achieves": 43807, "better results compared": 6957, "word embeddings languages": 62175, "available public use": 5352, "approach does require": 3495, "natural language process": 36439, "large annotated corpora": 28844, "information extraction systems": 25866, "machine learning models": 31326, "recent studies shown": 45353, "mechanical turk amt": 32093, "generate high quality": 22207, "high quality annotations": 23770, "develop novel model": 14609, "using monte carlo": 60814, "results shed light": 47833, "state art models": 52633, "using real world": 60896, "social media corpus": 51572, "different semantic spaces": 15062, "results achieved using": 47487, "experiments neural machine": 19480, "neural machine translation": 36968, "recently proposed approach": 45455, "machine translation neural": 31372, "single neural network": 51326, "neural network jointly": 37010, "encoder decoder architecture": 17496, "parts source sentence": 39909, "existing state art": 19147, "qualitative analysis reveals": 44473, "systems paper propose": 54583, "machine translation model": 31367, "results significant improvement": 47844, "machine translation models": 31368, "machine translation using": 31392, "translating natural language": 58567, "models significantly outperform": 35510, "complex natural language": 9639, "make use unlabeled": 31610, "use unlabeled data": 60065, "extensive empirical analysis": 19863, "approaches mainly focus": 3870, "supervised learning based": 53998, "learning based methods": 29536, "improve classification performance": 24831, "processing nlp tools": 42916, "data used training": 12762, "terms precision recall": 56310, "play vital role": 40981, "labeled unlabeled data": 27771, "classification natural language": 8507, "labeled data expensive": 27741, "processing tasks like": 42950, "paper explores use": 39370, "use machine learning": 59942, "supervised learning methods": 54001, "remains open question": 46344, "using neural network": 60834, "network based models": 36709, "models ability learn": 34651, "leads better performance": 29307, "work paves way": 62746, "experiment results demonstrate": 19249, "applying natural language": 3371, "speech tagging named": 52302, "tagging named entity": 54745, "entity recognition entity": 18127, "number state art": 38039, "improve state art": 24928, "machine translation nmt": 31373, "shown promising results": 50743, "post processing step": 41352, "improvement bleu points": 24992, "based language model": 5803, "language model based": 28156, "model based methods": 33605, "large amounts training": 28841, "amounts training data": 2561, "training data experiments": 57994, "social media texts": 51587, "tasks named entity": 55758, "based supervised machine": 6070, "mikolov et al": 33241, "attention recent years": 4817, "various nlp tasks": 61375, "word embedding models": 62151, "multiple state art": 36290, "logistic regression classifier": 30995, "question answering using": 44715, "neural network cnn": 37003, "experiments demonstrate effectiveness": 19403, "demonstrate effectiveness approach": 13894, "comparison state art": 9507, "labeled training data": 27767, "training data existing": 57991, "important task natural": 24778, "task natural language": 55234, "language processing used": 28439, "large scale applications": 28962, "applications previous studies": 3235, "real world scenarios": 45136, "based deep neural": 5674, "compare different approaches": 9336, "based neural network": 5907, "neural network outperforms": 37020, "results recent years": 47801, "recent years witnessed": 45399, "based question answering": 5971, "question answering named": 44704, "experimental results performance": 19301, "knowledge base completion": 27405, "word embeddings trained": 62191, "present systematic study": 42034, "dirichlet allocation lda": 15345, "presents new approach": 42094, "distributed representations words": 15626, "performance natural language": 40452, "model paper present": 34174, "bag words bow": 5504, "skip gram model": 51420, "word vector representations": 62336, "sentiment classification task": 49834, "features work propose": 20700, "work propose new": 62783, "semantic role labeling": 49338, "unlike previous approaches": 59599, "prior linguistic knowledge": 42406, "language models learn": 28271, "neural language model": 36962, "language model embeddings": 28162, "source target languages": 51803, "neural translation models": 37109, "based word representations": 6140, "propose simple method": 43635, "zero shot experiments": 63162, "experiments cross lingual": 19394, "method does require": 32467, "training neural networks": 58191, "gives state art": 22811, "low dimensional space": 31146, "time consuming human": 57131, "task specific embeddings": 55393, "applications paper propose": 3230, "propose novel approach": 43521, "applying machine learning": 3365, "machine learning method": 31322, "paper propose method": 39521, "effectiveness proposed method": 16806, "computer vision natural": 9896, "vision natural language": 61642, "language processing paper": 28419, "processing paper present": 42919, "model able generate": 33491, "trained convolutional neural": 57697, "achieves comparable results": 1315, "topic natural language": 57420, "processing nlp task": 42912, "achieve good performance": 1147, "zero shot setup": 63179, "nearest neighbor search": 36521, "word sense induction": 62299, "zero shot learning": 63166, "shot learning approach": 50625, "significantly improve performance": 50966, "information paper present": 26001, "commonly used word": 9229, "word level models": 62234, "experimental results approach": 19270, "translation mt systems": 58638, "widely used approach": 62008, "language model lm": 28174, "error rate reduction": 18227, "structure natural language": 53121, "natural language sentence": 36448, "recurrent neural network": 45625, "neural network language": 37011, "network language models": 36755, "training set size": 58251, "word error rates": 62202, "large scale monolingual": 28987, "alleviate data sparsity": 2404, "data sparsity problem": 12685, "experiments chinese english": 19372, "language model significantly": 28194, "model significantly improve": 34380, "improve translation quality": 24936, "target language training": 54828, "high level semantic": 23747, "uses natural language": 60524, "supervised learning approaches": 53997, "language processing research": 28429, "using recurrent neural": 60901, "neural networks long": 37055, "networks long short": 36871, "long short term": 31028, "term memory lstm": 56249, "capture long term": 7696, "long term memory": 31042, "weakly supervised manner": 61865, "significantly outperform existing": 50991, "outperform existing state": 38797, "proposed model generates": 43851, "proposed method paper": 43825, "extend previous work": 19827, "finite state transducers": 21060, "sentence level information": 49588, "sentence level features": 49587, "based convolutional neural": 5651, "neural network learn": 37012, "feed forward neural": 20714, "forward neural network": 21406, "local global information": 30940, "information large scale": 25944, "large scale experiments": 28976, "memory lstm networks": 32266, "sequence modeling tasks": 49956, "data paper propose": 12534, "paper propose non": 39527, "based neural networks": 5908, "performance experimental results": 40336, "machine translation method": 31365, "paper make attempt": 39422, "latent variable models": 29145, "models neural networks": 35255, "conduct extensive experiments": 10052, "methods experimental results": 32851, "results demonstrate proposed": 47582, "demonstrate proposed methods": 13968, "present novel method": 41974, "using state art": 60961, "state art visual": 52690, "deep convolutional neural": 13689, "recently proposed neural": 45458, "devlin et al": 14731, "et al 2014": 18401, "target language model": 54825, "achieving state art": 1425, "proposed model achieve": 43844, "model achieve significant": 33505, "curriculum learning strategy": 12046, "sentence level context": 49582, "results approach significantly": 47507, "approach significantly outperforms": 3694, "significantly outperforms baseline": 50997, "encoder decoder framework": 17500, "empirical study shows": 17351, "machine translation question": 31380, "translation question answering": 58667, "propose new approach": 43496, "wang et al": 61766, "et al 2013": 18400, "based word embedding": 6136, "capture long range": 7695, "long range dependencies": 31020, "propose convolutional neural": 43341, "jensen shannon divergence": 27154, "end end neural": 17656, "end neural network": 17691, "availability high quality": 5250, "low resource language": 31181, "resource language pair": 47236, "high resource languages": 23793, "based finite state": 5738, "previous work neural": 42307, "neural network predict": 37021, "specifically designed task": 52194, "extensive experiments text": 19902, "unstructured textual data": 59675, "word embeddings shown": 62189, "embedding models trained": 17047, "deep neural networks": 13740, "neural networks dnns": 37043, "significant performance gains": 50908, "substantial performance improvements": 53628, "tasks sentiment analysis": 55874, "sentiment analysis question": 49826, "outperforms previous state": 38925, "propose new metric": 43506, "process experimental results": 42779, "low dimensional vector": 31147, "extensive experiments model": 19893, "experiments model achieves": 19468, "model achieves significant": 33524, "achieves significant improvement": 1367, "power law distribution": 41427, "language model propose": 28189, "model propose simple": 34248, "large amounts data": 28834, "low resource languages": 31182, "high quality datasets": 23775, "state art natural": 52638, "art natural language": 4307, "processing nlp systems": 42911, "unsupervised machine learning": 59707, "machine learning methods": 31323, "used large scale": 60225, "qualitative quantitative analysis": 44480, "word representations learned": 62289, "word embedding methods": 62149, "using word embeddings": 61028, "achieve competitive results": 1128, "different word embedding": 15132, "space word embeddings": 51905, "word embeddings use": 62193, "downstream tasks present": 16362, "present new state": 41965, "knowledge base kb": 27407, "paper presents approach": 39469, "neural network rnn": 37024, "leveraging pre trained": 30337, "pre trained embeddings": 41529, "canonical correlation analysis": 7591, "task transfer learning": 55446, "experiments demonstrate proposed": 19406, "experimental results dataset": 19279, "methods paper presents": 32974, "proposed method uses": 43832, "distributed vector representations": 15628, "representations natural language": 46723, "language models task": 28329, "continuous bag words": 10842, "mildly context sensitive": 33246, "f1 score 86": 20210, "wide range domains": 61966, "rise social media": 48156, "sentiment analysis paper": 49825, "based sentence level": 6021, "experimental results using": 19319, "popular social media": 41188, "systems widely used": 54671, "paper presents results": 39482, "correlate human judgments": 11505, "based dependency parsing": 5677, "annotated natural language": 2908, "natural language descriptions": 36419, "generation previous work": 22523, "rule based methods": 48383, "correlates human judgments": 11515, "overview shared task": 39117, "shared task consists": 50495, "deep learning technologies": 13723, "tackle challenges propose": 54701, "challenges propose novel": 8072, "propose novel framework": 43536, "representations experimental results": 46661, "results shown proposed": 47841, "outperform existing methods": 38795, "uses deep learning": 60504, "conducted series experiments": 10094, "bi directional long": 7001, "directional long short": 15282, "memory lstm neural": 32267, "neural networks use": 37077, "approaches paper propose": 3892, "end end framework": 17647, "demonstrate efficacy proposed": 13907, "outperforming state art": 38861, "used end end": 60163, "neural networks model": 37057, "english chinese english": 17785, "learning approach based": 29521, "neural networks dnn": 37042, "gains natural language": 21939, "lstm long short": 31272, "neural models nlp": 36979, "language understanding tasks": 28563, "tasks paper introduce": 55784, "entity recognition sentiment": 18132, "recognition sentiment analysis": 45536, "embeddings improve performance": 17148, "improve performance tasks": 24902, "tasks speech tagging": 55906, "results highlight importance": 47658, "using amazon mechanical": 60557, "relations empirical results": 46025, "gained lot attention": 21919, "evaluation metrics based": 18647, "results compared state": 47550, "compared state art": 9459, "state art word": 52692, "word embedding techniques": 62154, "existing methods using": 19104, "abstract meaning representation": 761, "meaning representation amr": 32013, "improve previous state": 24910, "state art result": 52656, "end end performance": 17657, "based large scale": 5808, "gated recurrent unit": 21997, "multi task objective": 36025, "online social media": 38385, "cover wide range": 11649, "experimental results models": 19295, "models outperform state": 35287, "outperform state art": 38823, "state art baselines": 52587, "neural sequence sequence": 37099, "sequence sequence model": 49990, "based encoder decoder": 5699, "encoder decoder model": 17501, "term memory recurrent": 56252, "memory recurrent neural": 32280, "neural networks lstm": 37056, "natural language instructions": 36430, "achieves best results": 1306, "best results reported": 6818, "semi supervised learning": 49460, "paper addresses problem": 39257, "task specific knowledge": 55397, "neural networks encode": 37045, "sequence based models": 49913, "neural sequence models": 37098, "large training sets": 29032, "training test sets": 58293, "methods improve performance": 32895, "artificial intelligence ai": 4491, "research paper propose": 47089, "shown good performance": 50712, "used machine translation": 60231, "deep learning approach": 13702, "information extraction tasks": 25868, "traditional rule based": 57542, "higher level abstraction": 23831, "dataset containing million": 12866, "multi turn dialogues": 36037, "based neural language": 5903, "large amounts unlabeled": 28842, "amounts unlabeled data": 2563, "deep reinforcement learning": 13747, "reinforcement learning framework": 45871, "learning framework jointly": 29655, "using bag words": 60581, "average f1 score": 5407, "based models neural": 5875, "models neural models": 35253, "rnn language model": 48195, "achieve results comparable": 1187, "results comparable state": 47543, "comparable state art": 9312, "wide variety languages": 61983, "comprehensive survey existing": 9802, "neural network approaches": 36996, "recently achieved state": 45404, "achieved state art": 1274, "long distance dependencies": 31008, "used generate text": 60198, "source language target": 51779, "language target language": 28519, "new natural language": 37267, "extracting relevant information": 20036, "tasks demonstrate effectiveness": 55577, "demonstrate effectiveness proposed": 13902, "paper focus task": 39380, "propose semi supervised": 43620, "semi supervised method": 49462, "demonstrate proposed method": 13967, "online discussion forums": 38363, "community paper propose": 9269, "propose novel word": 43574, "question answering task": 44712, "tasks experimental results": 55630, "languages english spanish": 28657, "method outperforms previous": 32602, "set paper describes": 50212, "performance model trained": 40439, "proposed unsupervised method": 43921, "like word embeddings": 30513, "text mining tasks": 56664, "experimental results effectiveness": 19285, "model state art": 34409, "text classification performance": 56480, "high dimensional data": 23728, "attracting increasing attention": 4894, "deep learning architectures": 13705, "convolutional neural networks": 11114, "machine learning tasks": 31333, "low dimensional representations": 31145, "supervised representation learning": 54036, "representation learning method": 46541, "low dimensional embedding": 31143, "question answer pairs": 44687, "obtains significant improvements": 38257, "transition based parser": 58539, "lstm recurrent neural": 31280, "neural networks learn": 37054, "neural networks cnn": 37039, "long distance dependency": 31009, "pairs paper propose": 39206, "paper propose simple": 39534, "based recurrent neural": 5983, "experiments different datasets": 19415, "rnn based model": 48187, "neural network learns": 37013, "word error rate": 62201, "error rate wer": 18228, "paper presents findings": 39474, "memory lstm based": 32260, "lstm based models": 31247, "sequence tagging models": 50004, "lstm bi lstm": 31251, "bi lstm crf": 7013, "lstm crf model": 31258, "produce state art": 43012, "online social network": 38386, "paper investigate effect": 39410, "sentiment analysis sa": 49828, "text classification using": 56490, "na ive bayes": 36359, "better performance using": 6937, "word representation models": 62285, "yield state art": 63102, "widely used natural": 62019, "language processing problem": 28423, "gradient based methods": 23004, "based methods paper": 5850, "closely related language": 8705, "maximum likelihood training": 31973, "widely used automatic": 62010, "gram based metrics": 23051, "previous studies shown": 42291, "evaluate proposed approach": 18490, "proposed approach using": 43735, "domain specific corpus": 16169, "results sentence level": 47824, "results state art": 47856, "neural networks nlp": 37061, "word embedding model": 62150, "models language models": 35162, "improve model performance": 24873, "tasks including machine": 55679, "neural networks shown": 37072, "shown improve performance": 50721, "paper address issue": 39252, "models machine translation": 35207, "models natural language": 35245, "produce high quality": 42986, "release source code": 46168, "capturing semantic information": 7743, "word embeddings models": 62180, "provide comprehensive overview": 44039, "large scale data": 28969, "data driven methods": 12304, "methods widely used": 33101, "discrete latent variables": 15424, "representation learning methods": 46542, "translation nmt models": 58647, "problem previous work": 42629, "byte pair encoding": 7515, "propose end end": 43370, "model achieves best": 33512, "incorporate prior knowledge": 25362, "rhetorical structure theory": 48089, "based sentiment analysis": 6023, "document level information": 15807, "respectively experimental results": 47367, "achieves consistent improvements": 1321, "state art method": 52628, "data selection strategies": 12634, "data improve performance": 12420, "additional training data": 1707, "perform detailed analysis": 40088, "semantic vector space": 49374, "introduce new evaluation": 26835, "contrast prior work": 10886, "extensive experimental evaluation": 19873, "state art methodologies": 52629, "quality training data": 44591, "training data limited": 58011, "evaluation metrics used": 18655, "performs slightly better": 40717, "perform significantly better": 40141, "bilingual evaluation understudy": 7109, "evaluation understudy bleu": 18744, "text processing tasks": 56713, "training language models": 58146, "language models powerful": 28299, "entities knowledge base": 18061, "propose generative model": 43402, "different data sources": 14887, "previous research shown": 42272, "yielding state art": 63112, "feed forward networks": 20713, "best previously published": 6807, "previously published results": 42344, "paper introduces novel": 39408, "sentence experimental results": 49556, "experimental results large": 19291, "using pre trained": 60866, "neural network extract": 37006, "model achieves performance": 33521, "learn task specific": 29435, "specific word embeddings": 52175, "supervised training data": 54062, "training data new": 58022, "learning neural network": 29779, "word embeddings proposed": 62184, "word frequency word": 62210, "using parallel corpus": 60855, "real world scenario": 45135, "parallel data available": 39646, "using parallel data": 60856, "model achieves state": 33525, "largest publicly available": 29101, "different language models": 14965, "language models including": 28263, "paper present approach": 39444, "sequence generation models": 49927, "trained multiple languages": 57821, "perform error analysis": 40099, "held test set": 23546, "error analysis shows": 18216, "statistically significant improvement": 52772, "bidirectional long short": 7076, "properties natural language": 43269, "pos tagging task": 41235, "set state art": 50253, "rational speech acts": 45027, "use natural language": 59958, "best performance achieved": 6790, "f1 score compared": 20221, "model f1 score": 33875, "linear programming ilp": 30665, "orders magnitude faster": 38671, "sequence sequence learning": 49989, "memory lstm network": 32265, "terms bleu score": 56273, "achieving competitive performance": 1401, "tasks paper explore": 55783, "paper explore different": 39362, "different neural network": 15007, "neural network architectures": 36998, "train deep neural": 57578, "non negative matrix": 37668, "training data usually": 58049, "new approach called": 37133, "approaches typically use": 3946, "language models capture": 28238, "range state art": 44936, "state art neural": 52639, "art neural language": 4311, "art performance paper": 4341, "performance paper propose": 40478, "deep learning framework": 13709, "social media text": 51586, "character level recurrent": 8216, "level recurrent neural": 30188, "available open source": 5338, "world applications including": 62928, "word level information": 62231, "model target language": 34444, "word vectors word": 62341, "character level model": 8213, "word based models": 62119, "visual question answering": 61664, "question answering vqa": 44716, "high level semantics": 23748, "dataset proposed approach": 13040, "recent advances neural": 45286, "deep latent variable": 13699, "models paper introduce": 35300, "question answer pair": 44686, "question answering benchmarks": 44690, "novel attention based": 37771, "attention based neural": 4723, "neural network architecture": 36997, "little training data": 30887, "multi task learning": 36021, "report state art": 46448, "question answering tasks": 44713, "et al 2015": 18402, "capturing fine grained": 7734, "reduce memory footprint": 45670, "state art embedding": 52608, "using large text": 60760, "received attention lately": 45255, "numerous natural language": 38068, "processing nlp tasks": 42913, "word representation learning": 62284, "outperforms previously proposed": 38929, "previously proposed methods": 42342, "obtained state art": 38225, "parallel data training": 39647, "contrast previous work": 10884, "trained language models": 57761, "information language model": 25940, "results fine tuning": 47638, "fine tuning domain": 20985, "propose general framework": 43397, "set training data": 50270, "training data using": 58048, "word embeddings used": 62194, "network based approach": 36704, "approach natural language": 3610, "neural networks used": 37078, "fundamental natural language": 21782, "consider problem learning": 10218, "distribution training data": 15655, "order better understand": 38599, "natural language data": 36416, "unsupervised representation learning": 59725, "sentiment analysis text": 49831, "neural network lstm": 37015, "evaluate proposed architecture": 18491, "achieve excellent performance": 1135, "unlike previous work": 59603, "previous work focused": 42303, "semi supervised framework": 49459, "demonstrate framework outperforms": 13915, "outperforms competitive baselines": 38888, "spoken language translation": 52360, "language translation slt": 28539, "speech recognition systems": 52288, "lstm based model": 31246, "work address problem": 62556, "problem machine translation": 42602, "method significantly outperforms": 32657, "artificial neural networks": 4499, "translation language modeling": 58625, "present novel neural": 41977, "prediction experimental results": 41706, "achieves significant improvements": 1368, "positive negative neutral": 41286, "paper presents end": 39473, "presents end end": 42083, "generative question answering": 22609, "shows proposed model": 50800, "demonstrates proposed model": 14040, "model trained data": 34468, "context propose novel": 10696, "semantically related words": 49389, "human natural language": 24210, "train language models": 57599, "domain adaptation techniques": 16005, "results indicate approach": 47676, "minimum risk training": 33308, "training end end": 58086, "maximum likelihood estimation": 31972, "experiments approach achieves": 19356, "approach achieves significant": 3402, "art neural machine": 4312, "training neural network": 58190, "language models large": 28270, "based semi supervised": 6019, "supervised learning method": 54000, "improve performance downstream": 24889, "performance downstream tasks": 40307, "tagging dependency parsing": 54739, "task fine tuning": 55090, "fine tuning specific": 21019, "specific linguistic features": 52106, "wide variety tasks": 61986, "directed acyclic graph": 15265, "task artificial intelligence": 54915, "avenues future research": 5397, "generation task generating": 22560, "sequence learning problem": 49946, "neural encoder decoder": 36951, "supervised semi supervised": 54040, "semi supervised settings": 49466, "neural networks rnns": 37071, "generate natural language": 22222, "machine translation summarization": 31384, "summarization question answering": 53897, "systems existing methods": 54496, "existing methods typically": 19102, "problem paper propose": 42621, "methods achieve state": 32731, "art results tasks": 4387, "tasks sentiment classification": 55875, "model able capture": 33489, "model outperforms previous": 34163, "models source code": 35522, "sentence level semantics": 49592, "experimental results model": 19294, "results model outperforms": 47724, "model outperforms existing": 34162, "approaches large margin": 3855, "sequence sequence neural": 49992, "information source target": 26096, "source target sentences": 51805, "german english french": 22667, "noisy training data": 37627, "using different types": 60656, "performance wide range": 40628, "word embeddings learned": 62177, "bengali english hindi": 6594, "english hindi english": 17822, "english language pairs": 17834, "challenge paper propose": 8005, "tasks language modeling": 55709, "state art large": 52624, "art large margin": 4274, "slot filling task": 51442, "state art f1": 52615, "f1 score 95": 20217, "language training data": 28536, "significantly better performance": 50941, "multi source transfer": 36007, "large number people": 28925, "propose novel deep": 43531, "neural network used": 37030, "proposed approach significantly": 43733, "approach significantly improves": 3693, "training semi supervised": 58245, "performance machine learning": 40427, "state art nlp": 52641, "evaluate quality generated": 18496, "machine learning tools": 31337, "training data data": 57984, "paper propose framework": 39511, "arora et al": 4194, "et al 2016": 18403, "gold standard reference": 22920, "improves translation quality": 25167, "effectiveness proposed methods": 16807, "achieved high accuracy": 1240, "open domain question": 38425, "domain question answering": 16142, "probabilistic context free": 42456, "present new method": 41964, "using synthetic data": 60975, "analysis natural language": 2704, "natural language inference": 36428, "social networking sites": 51596, "various machine learning": 61359, "vocabulary oov words": 61709, "propose neural network": 43491, "model large scale": 34042, "shared embedding space": 50467, "new evaluation method": 37193, "downstream tasks text": 16368, "neural networks natural": 37060, "networks natural language": 36880, "achieve similar performance": 1199, "electronic health records": 16968, "knowledge graph completion": 27498, "multiple choice questions": 36182, "information extraction question": 25865, "answer given question": 3037, "outperforms strong baselines": 38950, "fine grained entity": 20934, "grained entity typing": 23033, "existing knowledge bases": 19079, "define new task": 13777, "entity mentions text": 18121, "datasets demonstrate effectiveness": 13212, "demonstrate effectiveness robustness": 13903, "network language model": 36754, "apply natural language": 3341, "text paper present": 56690, "nlp tasks word": 37551, "like question answering": 30495, "results demonstrate superiority": 47585, "small labeled data": 51478, "labeled data unlabeled": 27751, "data unlabeled data": 12754, "experimental results datasets": 19280, "linear discriminant analysis": 30656, "nlp tasks paper": 37543, "reach state art": 45054, "sentiment analysis task": 49829, "challenges natural language": 8063, "languages work investigate": 28823, "tasks information extraction": 55687, "embeddings trained large": 17234, "trained large corpus": 57767, "relative error reduction": 46095, "performance cross domain": 40271, "task named entity": 55232, "languages french german": 28678, "outperforms previously published": 38930, "present novel methods": 41975, "case study use": 7801, "use multi task": 59954, "based proposed method": 5960, "long term dependencies": 31039, "neural models learn": 36977, "models learn representations": 35177, "yield significant improvements": 63099, "languages paper propose": 28746, "paper propose neural": 39525, "using character based": 60602, "based attention based": 5580, "attention based bidirectional": 4717, "bidirectional recurrent neural": 7082, "neural network proposed": 37023, "task work present": 55473, "work present novel": 62757, "publicly available pre": 44350, "available pre trained": 5345, "pre trained word": 41565, "trained word vectors": 57918, "word vector space": 62337, "dialogue state tracking": 14785, "convolution neural network": 11097, "word embeddings sentence": 62188, "consistently outperforms baseline": 10302, "outperforms baseline models": 38871, "sequence tagging tasks": 50007, "classification task using": 8568, "achieves competitive performance": 1318, "methods state art": 33052, "social media posts": 51584, "present novel unsupervised": 41980, "model does require": 33786, "recent research shown": 45342, "existing methods require": 19100, "large labeled data": 28894, "hand crafted features": 23387, "comparable performance state": 9304, "languages english chinese": 28653, "unsupervised domain adaptation": 59695, "use social media": 60021, "social media twitter": 51588, "random forest classifier": 44876, "state art accuracy": 52576, "short text classification": 50572, "art results different": 4376, "results different datasets": 47593, "pretrained word embeddings": 42195, "art performance tasks": 4347, "models present novel": 35347, "correlation human ratings": 11525, "outperforming previous state": 38857, "previous best model": 42249, "information propose novel": 26031, "propose novel end": 43534, "novel end end": 37815, "model attention mechanism": 33586, "words source sentence": 62518, "dataset demonstrate proposed": 12886, "demonstrate proposed model": 13969, "compares favorably state": 9477, "favorably state art": 20457, "achieves results comparable": 1360, "improving state art": 25198, "recognizing textual entailment": 45560, "recurrent attention based": 45610, "based neural models": 5906, "open source software": 38458, "set new state": 50200, "gated recurrent units": 21998, "model multi task": 34110, "task cross lingual": 54987, "art results multiple": 4379, "training improve performance": 58126, "problem sequence sequence": 42649, "sequence sequence seq2seq": 49996, "propose new model": 43508, "new model called": 37260, "encoder decoder structure": 17507, "world data sets": 62933, "feature selection methods": 20503, "state art languages": 52623, "domain natural language": 16118, "advanced state art": 1894, "state art various": 52689, "tasks paper propose": 55788, "neural word embedding": 37112, "present experimental results": 41910, "state art graph": 52620, "graph based approach": 23102, "language generation tasks": 28090, "natural language structured": 36451, "wide range applications": 61965, "machine learning researchers": 31331, "question generation model": 44730, "long standing problem": 31033, "language processing recently": 28427, "promising results using": 43183, "sequence learning neural": 49945, "lack training data": 27923, "demonstrate state art": 13978, "semantic textual similarity": 49366, "textual similarity sts": 56982, "semantic information encoded": 49285, "data freely available": 12374, "different methods proposed": 14990, "opinions social media": 38507, "large collection documents": 28856, "training data scarce": 58033, "using reinforcement learning": 60904, "significantly outperforms traditional": 51009, "logistic regression model": 30996, "experimental results suggest": 19317, "deep neural model": 13737, "model trained using": 34478, "multilayer perceptron mlp": 36060, "features fine tuning": 20588, "fine tuning method": 20998, "benchmark data set": 6441, "domain specific language": 16179, "goal natural language": 22893, "word level sentence": 62237, "word embedding representations": 62152, "limited training data": 30628, "model sets new": 34367, "sets new state": 50299, "trained end end": 57721, "outperforms prior work": 38934, "language models lms": 28280, "neural network nn": 37019, "introduce new task": 26839, "wide range nlp": 61973, "range nlp tasks": 44927, "network based approaches": 36705, "address challenge propose": 1745, "propose new deep": 43501, "leverage pre trained": 30283, "trained word embedding": 57916, "learn high level": 29379, "results demonstrate approach": 47574, "demonstrate approach consistently": 13868, "approach consistently outperforms": 3467, "consistently outperforms existing": 10304, "outperforms existing methods": 38899, "datasets natural language": 13342, "machine translation methods": 31366, "present neural network": 41957, "network attention mechanism": 36702, "noisy user generated": 37629, "user generated text": 60419, "language model method": 28176, "method achieves state": 32367, "unsupervised semi supervised": 59729, "semi supervised methods": 49463, "supervised methods require": 54019, "training data achieve": 57969, "fine grained analysis": 20926, "modern nlp models": 35717, "models rely heavily": 35431, "tasks relation extraction": 55846, "language generation model": 28084, "achieves new state": 1350, "bag words model": 5506, "real world knowledge": 45131, "word embedding vectors": 62155, "data set contains": 12643, "empirical results approach": 17339, "results approach outperforms": 47506, "nlp tasks like": 37538, "character level word": 8218, "low dimensional vectors": 31148, "novel approach automatic": 37758, "language paper present": 28370, "character level neural": 8215, "deep language understanding": 13697, "baselines state art": 6303, "language understanding paper": 28556, "generation natural language": 22504, "trained large text": 57771, "effective low resource": 16668, "resource languages present": 47244, "transfer learning method": 58384, "method significantly improves": 32656, "high resource language": 23792, "using transfer learning": 60998, "resource language pairs": 47237, "performance low resource": 40424, "low resource machine": 31183, "resource machine translation": 47253, "transfer learning model": 58386, "state art low": 52626, "word embeddings improve": 62171, "neural network classifiers": 37002, "based long short": 5822, "knowledge graph construction": 27499, "answering information retrieval": 3076, "experimental results prove": 19306, "analysis topic modeling": 2783, "hindi english code": 23940, "english code mixed": 17787, "best knowledge attempt": 6769, "available research community": 5360, "novel multi task": 37879, "model outperforms state": 34166, "recently deep learning": 45415, "attention based encoder": 4719, "encoder decoder models": 17502, "model sentence level": 34354, "knowledge graph embedding": 27500, "semeval 2016 task": 49429, "neural networks based": 37037, "performance various natural": 40623, "processing tasks especially": 42947, "used deep learning": 60142, "deep learning methods": 13712, "end end learning": 17648, "term memory bi": 56245, "memory bi lstm": 32244, "bi lstm model": 7015, "model obtains state": 34141, "obtains state art": 38259, "training data size": 58038, "performance proposed method": 40507, "semi supervised approach": 49456, "deep learning based": 13706, "results approach achieves": 47503, "knowledge base construction": 27406, "learning vector representations": 29937, "inter rater agreement": 26584, "memory neural network": 32277, "sentence pair classification": 49610, "tasks textual entailment": 55934, "allocation lda topic": 2432, "dataset proposed method": 13041, "method outperforms state": 32605, "art methods terms": 4289, "averaged f1 score": 5422, "memory lstm language": 32262, "lstm language model": 31266, "network end end": 36739, "end end fashion": 17646, "significantly improving performance": 50981, "paper propose algorithm": 39493, "analysis social media": 2762, "important challenging task": 24708, "long range context": 31019, "context experimental results": 10634, "previous work mainly": 42305, "work mainly focused": 62719, "significantly outperforms state": 51007, "based neural machine": 5904, "experiments large scale": 19455, "large scale chinese": 28964, "model experimental results": 33851, "results large scale": 47695, "large scale english": 28974, "method achieves better": 32362, "models recurrent neural": 35417, "models trained predict": 35616, "extrinsic evaluation downstream": 20171, "variety downstream tasks": 61270, "compared previous state": 9437, "feedforward neural network": 20720, "semantics natural language": 49409, "lack large scale": 27902, "paper present large": 39454, "present large scale": 41937, "language model score": 28193, "beam search decoder": 6367, "natural language question": 36444, "strong baselines demonstrate": 53011, "automatic post editing": 5115, "machine translation output": 31375, "submitted shared task": 53588, "data paper present": 12532, "based recent advances": 5980, "recent advances deep": 45282, "including social media": 25301, "social media platforms": 51582, "media platforms twitter": 32178, "class classification problem": 8396, "semantic syntactic features": 49359, "method achieved state": 32359, "f1 score 70": 20202, "present novel model": 41976, "multiple data sets": 36191, "sentence paper propose": 49614, "paper propose deep": 39503, "datasets demonstrate efficacy": 13213, "processing social media": 42939, "human annotated data": 24097, "train machine learning": 57604, "machine learning classifiers": 31315, "word2vec word embeddings": 62353, "experiments proposed method": 19496, "proposed method achieve": 43805, "bleu points improvement": 7210, "forward neural networks": 21407, "develop novel approach": 14608, "extraction experimental results": 20066, "experimental results semeval": 19310, "yields better performance": 63118, "model trained end": 34470, "large scale training": 29004, "english german translation": 17817, "different levels granularity": 14978, "obtain new state": 38182, "standard test set": 52533, "accuracy precision recall": 1027, "precision recall f1": 41617, "standard evaluation metrics": 52491, "neural networks models": 37058, "models trained using": 35624, "machine translation image": 31360, "evaluation metrics bleu": 18648, "metrics bleu meteor": 33145, "experimental results indicate": 19289, "results indicate model": 47677, "paper study problem": 39580, "novel attention mechanism": 37772, "attention mechanism based": 4774, "art results benchmarks": 4372, "cnn daily mail": 8766, "code available https": 8794, "tasks best knowledge": 55525, "best knowledge time": 6774, "stanford natural language": 52557, "language inference snli": 28112, "obtain state art": 38194, "order magnitude fewer": 38634, "knowledge base propose": 27409, "deep recurrent neural": 13745, "outperforms current state": 38892, "low frequency words": 31152, "size training data": 51400, "training data english": 57989, "verify effectiveness proposed": 61538, "play significant role": 40979, "state art wide": 52691, "report new state": 46440, "automatic manual evaluation": 5104, "training test data": 58291, "study different types": 53362, "sequence labeling tasks": 49940, "tasks propose new": 55819, "tasks sequence labeling": 55878, "spoken language understanding": 52361, "penn treebank ptb": 40025, "word embeddings model": 62179, "character level models": 8214, "correlate better human": 11502, "better human judgments": 6900, "domain test set": 16206, "real world deployment": 45127, "recently achieved impressive": 45403, "achieved impressive results": 1246, "attentional encoder decoder": 4858, "deep neural language": 13736, "neural language modeling": 36963, "attention based seq2seq": 4724, "sequence sequence tasks": 49998, "paper propose use": 39540, "reinforcement learning rl": 45877, "context aware word": 10592, "remains challenging task": 46329, "pre trained models": 41545, "f1 score 97": 20218, "term memory network": 56250, "language specific features": 28496, "hand crafted rules": 23389, "neural models task": 36982, "model training data": 34480, "train sequence sequence": 57631, "semantic parsing datasets": 49311, "art performance standard": 4345, "task reinforcement learning": 55327, "bi directional lstm": 7002, "outperforms previous work": 38927, "language modeling task": 28219, "aspect sentiment analysis": 4534, "manually annotated test": 31761, "translation model trained": 58631, "active learning al": 1474, "neural networks cnns": 37040, "propose simple approach": 43632, "empirical results method": 17343, "results method outperforms": 47717, "method outperforms baseline": 32598, "models text classification": 35596, "produces state art": 43036, "especially low resource": 18286, "source target target": 51806, "target target source": 54846, "target language source": 54827, "language source language": 28491, "dataset approach achieves": 12813, "model natural language": 34118, "language generation nlg": 28086, "encoder decoder architectures": 17497, "training data available": 57978, "data available training": 12180, "available training data": 5380, "end end method": 17651, "art large scale": 4275, "deep learning architecture": 13704, "introduce new method": 26837, "user generated content": 60416, "labeled data paper": 27743, "data paper presents": 12533, "trained neural language": 57826, "language models generate": 28257, "model achieves f1": 33516, "features neural network": 20629, "wide range linguistic": 61971, "state art language": 52622, "art language models": 4270, "context natural language": 10679, "present generative model": 41921, "semantic parsing approach": 49309, "generalize new domains": 22145, "word level embeddings": 62229, "multi class multi": 35947, "class multi label": 8408, "work propose use": 62790, "end end speech": 17662, "wall street journal": 61763, "street journal corpus": 52969, "previous work relied": 42311, "recurrent convolutional neural": 45612, "bi directional recurrent": 7004, "question answering model": 44702, "propose novel semi": 43560, "novel semi supervised": 37916, "model trained large": 34473, "learning based models": 29538, "based models proposed": 5879, "despite recent progress": 14384, "large data set": 28866, "model takes input": 34441, "model generalization ability": 33922, "achieving competitive results": 1402, "structures natural language": 53190, "supervised learning unsupervised": 54010, "learning unsupervised learning": 29926, "art results various": 4391, "novel unsupervised framework": 37947, "chen et al": 8285, "et al 2011": 18398, "baseline model trained": 6186, "times fewer parameters": 57251, "small training dataset": 51508, "feedforward neural networks": 20721, "dense vector representations": 14085, "use language models": 59924, "language models model": 28284, "results indicate proposed": 47679, "indicate proposed model": 25534, "outperforming current state": 38850, "large margin paper": 28906, "models state art": 35534, "introduce neural network": 26830, "best results obtained": 6817, "hand crafted feature": 23386, "design end end": 14280, "outperforms previous approaches": 38920, "manual feature engineering": 31742, "evaluate performance proposed": 18484, "performance proposed model": 40508, "network rnn models": 36799, "pre defined categories": 41500, "simple neural network": 51200, "rule based approaches": 48380, "large body work": 28853, "providing fine grained": 44245, "task propose novel": 55302, "novel deep learning": 37803, "deep learning model": 13713, "domain adaptation method": 15999, "supervised domain adaptation": 53981, "performance target domain": 40592, "source domain target": 51767, "domain target domain": 16200, "neural networks trained": 37075, "trained cross entropy": 57701, "cross entropy loss": 11824, "methods paper describes": 32973, "semantically similar words": 49394, "bag words representation": 5508, "paper introduce novel": 39403, "introduce novel approach": 26843, "exceeds state art": 18951, "low resource settings": 31191, "large training data": 29030, "large scale knowledge": 28981, "scale knowledge bases": 48582, "knowledge bases kbs": 27415, "state art code": 52597, "code data available": 8799, "data available https": 12177, "paper propose effective": 39506, "translation nmt model": 58646, "translation nmt systems": 58648, "developed machine learning": 14632, "reading comprehension datasets": 45082, "reading comprehension task": 45086, "training data required": 58031, "enable zero shot": 17433, "words training data": 62536, "social media messages": 51579, "achieving new state": 1416, "tasks word embeddings": 55966, "systems natural language": 54568, "active learning framework": 1475, "results demonstrate significant": 47583, "demonstrate significant improvements": 13974, "recently proposed methods": 45457, "significantly improved performance": 50971, "recently neural networks": 45444, "neural networks proven": 37066, "effective natural language": 16679, "analysis question answering": 2737, "question answering machine": 44701, "answering machine translation": 3080, "attention mechanism applied": 4772, "model achieved state": 33509, "art performance different": 4333, "different nlp tasks": 15010, "state art recurrent": 52653, "recursive neural networks": 45638, "words paper propose": 62476, "new approach based": 37132, "appear training data": 3140, "training data evaluate": 57990, "promising research directions": 43177, "bilingual word embeddings": 7116, "imitation learning framework": 24578, "small number parameters": 51490, "model jointly learns": 34025, "task multi label": 55226, "results natural language": 47737, "results demonstrate efficacy": 47576, "free form text": 21641, "data driven approach": 12300, "publicly available present": 44351, "novel method generating": 37866, "using character level": 60603, "lstm encoder decoder": 31261, "languages paper presents": 28745, "proposed method evaluated": 43819, "data sets results": 12652, "development machine learning": 14684, "achieve human like": 1160, "present preliminary results": 41988, "state art traditional": 52685, "current language models": 11982, "knowledge language model": 27541, "state art cnn": 52596, "sentence classification tasks": 49527, "benchmark datasets experimental": 6452, "datasets experimental results": 13264, "nlp natural language": 37505, "corpus available research": 11285, "recently gained popularity": 45428, "word embeddings directly": 62165, "dependency parse tree": 14126, "approach compared state": 3456, "state art statistical": 52671, "used nlp tasks": 60253, "high computational cost": 23715, "high quality human": 23779, "nlp research community": 37522, "language models paper": 28292, "models paper presents": 35303, "high dimensional spaces": 23730, "source target language": 51802, "target language paper": 54826, "language paper propose": 28371, "paper propose approach": 39495, "based word embeddings": 6137, "word embedding based": 62144, "embedding based approach": 17015, "word mover distance": 62250, "mean average precision": 31990, "real world dataset": 45125, "conduct experiments english": 10046, "experiments english chinese": 19428, "proposed model achieves": 43846, "existing large scale": 19084, "native english speakers": 36401, "gap human performance": 21964, "representation learning models": 46544, "character recognition ocr": 8222, "domain adaptation approaches": 15996, "transfer knowledge learned": 58370, "task paper explore": 55263, "explore multi task": 19718, "learning domain adaptation": 29605, "neural network framework": 37007, "language understanding work": 28564, "word vectors trained": 62340, "trained source language": 57879, "trained target language": 57890, "target language using": 54829, "large scale natural": 28991, "scale natural language": 48604, "language understanding task": 28562, "task publicly available": 55312, "publicly available dataset": 44340, "structured knowledge base": 53160, "end end models": 17654, "models deep neural": 34890, "extraction question answering": 20101, "question answering models": 44703, "best performing model": 6797, "model word level": 34541, "word level sequence": 62238, "state art automatic": 52584, "propose novel strategy": 43564, "language processing analyze": 28396, "fixed length vectors": 21077, "averaging word vectors": 5429, "tasks pre training": 55804, "analysis sheds light": 2756, "human language processing": 24193, "neural network trained": 37029, "paper propose efficient": 39507, "character based models": 8196, "based semantic similarity": 6016, "semantic similarity metrics": 49348, "open information extraction": 38434, "train test models": 57647, "introduce simple effective": 26861, "simple effective framework": 51153, "beam search algorithm": 6366, "reinforcement learning model": 45874, "language processing existing": 28407, "experiment results model": 19251, "difficult time consuming": 15190, "propose attention based": 43302, "attention based multi": 4722, "word level attention": 62226, "sentence level attention": 49580, "language modeling approach": 28206, "nlp models trained": 37502, "svm logistic regression": 54236, "conduct case study": 10030, "capture semantic meaning": 7708, "task automatically generating": 54924, "using sequence sequence": 60932, "correlation human judgments": 11524, "models pre trained": 35339, "competitive performance compared": 9554, "performance compared state": 40255, "significantly outperforms baselines": 50998, "level convolutional neural": 30090, "conneau et al": 10171, "language models based": 28234, "processing tasks word": 42954, "experimental results popular": 19302, "language processing work": 28441, "aspect based sentiment": 4528, "results competitive state": 47554, "state art multilingual": 52636, "multi domain datasets": 35955, "hand engineered features": 23393, "error analysis reveals": 18215, "order address issues": 38592, "address issues paper": 1771, "achieves competitive results": 1319, "supervised text classification": 54059, "text classification methods": 56475, "lack parallel data": 27906, "limited parallel data": 30603, "using attention based": 60567, "neural network encoder": 37005, "encoder decoder attention": 17498, "proposed encoder decoder": 43763, "model able achieve": 33488, "achieve significantly higher": 1197, "train neural network": 57618, "github com facebookresearch": 22700, "reduce training time": 45683, "supervised learning setting": 54005, "new york times": 37365, "based transfer learning": 6105, "work present new": 62756, "systems built using": 54445, "data make use": 12478, "neural end end": 36953, "models paper present": 35302, "encoder decoder neural": 17505, "decoder neural network": 13606, "sentences paper propose": 49763, "sentence extensive experiments": 49559, "extensive experiments demonstrate": 19885, "experiments demonstrate method": 19404, "demonstrate method significantly": 13937, "method significantly improve": 32655, "existing approaches propose": 19029, "fine grained word": 20945, "significantly outperforms prior": 51006, "results demonstrate method": 47578, "demonstrate method outperforms": 13936, "method outperforms baselines": 32599, "propose new evaluation": 43502, "new dataset containing": 37166, "rely hand crafted": 46284, "crafted features domain": 11680, "features domain specific": 20564, "domain specific resources": 16183, "text experimental results": 56565, "extremely low resource": 20164, "low resource scenario": 31188, "model performs significantly": 34206, "utilize pre trained": 61101, "trained word embeddings": 57917, "usage social media": 59808, "task sentiment analysis": 55361, "training development test": 58064, "report baseline results": 46428, "precision recall measure": 41618, "art results natural": 4380, "textual visual information": 56987, "joint representation learning": 27188, "fine tuning parameters": 21007, "model evaluate approach": 33841, "approach outperforms existing": 3626, "existing generative models": 19073, "approach makes use": 3597, "neural networks achieved": 37034, "present empirical study": 41899, "sentence classification task": 49526, "classification task experimental": 8563, "task experimental results": 55066, "achieves best performance": 1305, "end propose novel": 17702, "propose novel attention": 43524, "proposed methods significantly": 43838, "methods significantly improve": 33040, "significantly improve state": 50969, "state art sentence": 52661, "based generative model": 5751, "using parallel corpora": 60854, "models shown great": 35498, "shown great success": 50716, "sequence generation tasks": 49929, "tasks previous work": 55811, "paper propose methods": 39522, "experiments state art": 19533, "model performs better": 34203, "development test sets": 14707, "large unlabeled data": 29040, "proposed method performs": 43826, "method performs better": 32614, "using unlabeled data": 61010, "decision making processes": 13565, "advances natural language": 1917, "challenging task paper": 8151, "use deep neural": 59865, "task propose new": 55301, "classification social media": 8555, "data sets demonstrate": 12647, "word embeddings based": 62158, "task information extraction": 55138, "state art average": 52585, "make use data": 31607, "systems recent years": 54614, "standard machine learning": 52500, "augmenting training data": 4990, "neural network approach": 36995, "feature based models": 20478, "selection experimental results": 49138, "baselines achieves state": 6229, "task automatically identifying": 54925, "language processing models": 28417, "task multi task": 55227, "models significantly better": 35507, "models trained text": 35623, "grounded language learning": 23262, "research natural language": 47078, "glove word embeddings": 22861, "recurrent unit gru": 45628, "network based model": 36708, "using external resources": 60689, "state art parsers": 52645, "models recently shown": 35413, "end end trainable": 17666, "learned end end": 29459, "using open source": 60849, "achieved great success": 1238, "address problem present": 1788, "models publicly available": 35387, "sentiment analysis datasets": 49820, "large number training": 28927, "work present approach": 62753, "open source framework": 38450, "language speech processing": 28500, "gaussian mixture models": 22015, "fewer model parameters": 20738, "using small number": 60948, "number model parameters": 38019, "training data paper": 58023, "memory augmented neural": 32241, "models achieve state": 34673, "open ended questions": 38431, "remains challenging problem": 46328, "linguistics cognitive science": 30821, "significantly improves baseline": 50974, "deep learning approaches": 13703, "lack annotated data": 27873, "data improve model": 12419, "model performance paper": 34192, "performance paper presents": 40477, "english indian languages": 17825, "results proposed models": 47786, "proposed models outperform": 43863, "models outperform existing": 35285, "achieve fine grained": 1141, "make code publicly": 31552, "code publicly available": 8851, "data text generation": 12731, "2016 shared task": 260, "results shared task": 47831, "entity relation extraction": 18140, "poses unique challenges": 41256, "experiments public datasets": 19503, "public datasets demonstrate": 44315, "improvement f1 score": 25006, "develop simple effective": 14613, "simple effective model": 51156, "outperform current state": 38790, "sequence sequence models": 49991, "improves state art": 25164, "log linear models": 30975, "solve problem propose": 51686, "end end approach": 17636, "multiple choice question": 36181, "template based method": 56175, "attains state art": 4676, "character level language": 8212, "outperforms previous best": 38921, "previous best results": 42252, "machine translation english": 31356, "models based recurrent": 34758, "domain specific knowledge": 16178, "released open source": 46178, "using policy gradient": 60862, "policy gradient methods": 41096, "sub word level": 53538, "detection natural language": 14506, "problem propose new": 42632, "fixed size representation": 21081, "paper presents empirical": 39472, "experimental results neural": 19298, "transfer learning settings": 58395, "gold standard dataset": 22918, "nlp tasks including": 37535, "paper present general": 39451, "using convolutional neural": 60629, "loss function based": 31094, "based models outperform": 5876, "tasks work propose": 55972, "work propose novel": 62784, "novel framework called": 37827, "results demonstrate model": 47579, "demonstrate model achieves": 13941, "tasks including sentiment": 55682, "including sentiment analysis": 25298, "supervised learning framework": 53999, "learning framework called": 29654, "make publicly available": 31591, "bi directional lstms": 7003, "sentence paper present": 49613, "achieve competitive accuracy": 1126, "accuracy state art": 1053, "natural language task": 36453, "tasks work present": 55971, "term memory networks": 56251, "sentence level tasks": 49593, "cnn based models": 8761, "approach achieving state": 3405, "previously reported results": 42347, "use recurrent neural": 59991, "decoder experimental results": 13592, "outperforms prior methods": 38932, "state art pre": 52649, "pre training methods": 41583, "yields better results": 63119, "better results using": 6960, "manually annotated data": 31758, "models widely used": 35682, "accuracy paper describes": 1020, "entity linking el": 18116, "achieved f1 score": 1232, "data significantly outperforms": 12663, "gold standard datasets": 22919, "obtain competitive results": 38168, "training data approach": 57975, "approach cross lingual": 3474, "joint learning model": 27177, "results model significantly": 47725, "consistently improve performance": 10295, "neural models achieved": 36974, "work introduce new": 62692, "structured prediction tasks": 53171, "experiments model outperforms": 19469, "use word embeddings": 60075, "models propose novel": 35371, "propose novel architecture": 43523, "using attention mechanism": 60568, "attention mechanism model": 4778, "word character level": 62125, "number trainable parameters": 38048, "propose knowledge enhanced": 43430, "hybrid neural network": 24322, "recurrent units gru": 45630, "neural network generate": 37008, "high level features": 23746, "extensive experiments public": 19898, "public data sets": 44312, "significantly outperform state": 50992, "advances representation learning": 1924, "propose neural architecture": 43489, "lstm attention mechanism": 31242, "model end end": 33822, "end end differentiable": 17643, "dataset proposed model": 13042, "model achieves better": 33513, "question answering paper": 44706, "research social media": 47122, "previous works mainly": 42320, "models paper propose": 35304, "paper propose end": 39508, "error propagation problem": 18225, "utilize contextual information": 61088, "information experimental results": 25848, "results effectiveness proposed": 47604, "effectiveness proposed model": 16808, "language state art": 28504, "achieving best performance": 1395, "rapid development deep": 44989, "development deep learning": 14675, "experiments sentiment analysis": 19520, "existing models usually": 19109, "models achieve excellent": 34670, "fine grained classification": 20930, "news social media": 37415, "tasks existing methods": 55627, "gives best results": 22804, "highly competitive results": 23885, "widely used various": 62023, "address issues present": 1772, "new encoder decoder": 37184, "using long short": 60779, "hierarchical encoder decoder": 23668, "experimental results benchmark": 19272, "results benchmark datasets": 47522, "demonstrate superiority proposed": 13988, "superiority proposed model": 53956, "models outperform baseline": 35284, "received increasing attention": 45259, "increasing attention recent": 25445, "recent years number": 45390, "time consuming expensive": 57130, "utilize unlabeled data": 61106, "binary classification problem": 7146, "introduce attention based": 26783, "capture long distance": 7694, "experiments proposed approach": 19494, "use reinforcement learning": 59993, "performance downstream task": 40306, "previous sequence sequence": 42277, "sequence seq2seq models": 49977, "data conduct extensive": 12237, "connectionist temporal classification": 10183, "achieve high accuracy": 1151, "semantic representation text": 49334, "long range dependency": 31021, "evaluate effectiveness proposed": 18455, "large amounts annotated": 28833, "labeled data training": 27750, "entity recognition models": 18128, "annotated training data": 2926, "capture domain specific": 7667, "use domain specific": 59871, "models outperform previous": 35286, "rule based statistical": 48386, "corpus paper propose": 11400, "propose new neural": 43510, "new neural network": 37270, "tasks previous works": 55812, "semantic parsing dataset": 49310, "compared previous approaches": 9434, "challenging task requires": 8154, "structured knowledge bases": 53161, "proposed model outperforms": 43855, "models improved performance": 35113, "using bidirectional encoder": 60595, "rich languages english": 48108, "representation paper propose": 46565, "novel neural architecture": 37883, "based active learning": 5557, "active learning methods": 1476, "models propose new": 35370, "present training data": 42044, "text classification model": 56476, "methods text classification": 33073, "machine learning language": 31321, "community question answering": 9273, "paper address problem": 39253, "multi turn dialogue": 36036, "classification real world": 8530, "demonstrate method achieves": 13935, "method achieves best": 32361, "specific linguistic phenomena": 52107, "pair encoding bpe": 39150, "compared previous models": 9436, "english social media": 17875, "social media websites": 51592, "plays key role": 41002, "achieve high performance": 1152, "tasks results indicate": 55864, "neural networks attention": 37036, "use external resources": 59888, "set linguistic features": 50186, "web based tool": 61881, "social media users": 51590, "sequence labeling framework": 49934, "proposed model obtains": 43854, "natural language systems": 36452, "large amounts text": 28840, "models successfully applied": 35554, "non parallel data": 37675, "models better capture": 34782, "better capture semantic": 6859, "capture semantic information": 7707, "semantic information word": 49288, "pairs experimental results": 39188, "character based neural": 8197, "language model used": 28202, "multilingual language models": 36090, "language models language": 28268, "generating natural language": 22384, "received lot attention": 45263, "used social media": 60307, "social media content": 51571, "paper propose general": 39512, "multiple nlp tasks": 36257, "neural models paper": 36980, "coarse grained fine": 8785, "grained fine grained": 23036, "f1 score 76": 20207, "demonstrate approach outperforms": 13870, "growth social media": 23311, "new loss function": 37242, "natural language paper": 36437, "investigate state art": 26987, "based sequence sequence": 6028, "approach achieves good": 3399, "achieves good results": 1329, "machine learning based": 31313, "annotated data set": 2886, "task learning mtl": 55177, "propose novel decoding": 43530, "using gradient based": 60712, "left right right": 30001, "right right left": 48143, "produced state art": 43022, "entity recognition speech": 18133, "based skip gram": 6040, "language model using": 28203, "neural network long": 37014, "network long short": 36762, "general language model": 22065, "transfer learning methods": 58385, "model neural network": 34124, "based models different": 5870, "language understanding nlu": 28555, "understanding nlu tasks": 59377, "sequence labeling problem": 49937, "propose alternative approach": 43291, "art performance text": 4349, "large parallel corpus": 28934, "results language pairs": 47692, "paper present comprehensive": 39446, "address problem propose": 1789, "knowledge target language": 27625, "models significantly improve": 35508, "open domain dialogue": 38422, "summarization machine translation": 53889, "hate speech detection": 23481, "fields natural language": 20784, "language processing recent": 28426, "recent years research": 45394, "apply state art": 3354, "state art techniques": 52681, "supervised learning ssl": 54006, "language understanding slu": 28561, "demonstrate proposed approach": 13964, "supervised learning techniques": 54009, "gradient descent sgd": 23007, "machine learning technique": 31334, "social media language": 51577, "popular natural language": 41175, "based conditional random": 5636, "hindi english bengali": 23939, "english bengali english": 17780, "different social media": 15072, "coarse fine grained": 8783, "language pairs domains": 28367, "multilingual cross lingual": 36075, "cross lingual data": 11831, "machine learning ml": 31324, "ml natural language": 33433, "new deep learning": 37171, "networks proven effective": 36900, "end end training": 17668, "linear chain conditional": 30650, "chain conditional random": 7958, "question answering natural": 44705, "language inference models": 28110, "multi modal data": 35989, "state art benchmark": 52588, "multiple data sources": 36192, "paper presents simple": 39483, "annotated corpora available": 2879, "sentence word word": 49672, "german english chinese": 22666, "data driven models": 12305, "reaches state art": 45058, "art nlp tasks": 4318, "representative nlp tasks": 46801, "data set used": 12645, "best overall performance": 6788, "proposed approach yields": 43736, "widespread use social": 62034, "health related information": 23519, "information social media": 26092, "simple rule based": 51207, "end end model": 17653, "based nmt models": 5916, "model significantly outperforms": 34383, "significantly outperforms previous": 51005, "previous best models": 42250, "constructed large scale": 10413, "grained entity types": 23032, "make datasets publicly": 31563, "datasets publicly available": 13389, "models proven effective": 35378, "memory networks lstms": 32275, "end end manner": 17650, "significantly outperform baseline": 50989, "overall f1 score": 39040, "common sense knowledge": 9198, "recent work shows": 45374, "paper investigate possibility": 39412, "method achieve better": 32356, "achieve better accuracy": 1116, "using deep learning": 60647, "different machine learning": 14983, "specific natural language": 52116, "use deep learning": 59864, "higher state art": 23847, "relatively little attention": 46122, "paper propose hybrid": 39516, "machine translation approach": 31347, "experiments language pairs": 19452, "approaches state art": 3926, "languages english german": 28655, "based cross lingual": 5661, "trained semi supervised": 57861, "semi supervised manner": 49461, "low medium high": 31160, "using neural networks": 60835, "rely large scale": 46293, "low resource datasets": 31176, "training data propose": 58028, "propose transfer learning": 43680, "model low resource": 34077, "low resource data": 31175, "data experiment results": 12343, "test set accuracy": 56370, "experiments natural language": 19478, "language inference nli": 28111, "neural attention models": 36937, "models achieved great": 34676, "data sparsity issue": 12684, "methods achieve significant": 32730, "achieve significant improvement": 1192, "propose deep learning": 43352, "model recurrent neural": 34289, "network rnn long": 36798, "rnn long short": 48200, "network based methods": 36707, "based methods proposed": 5851, "semantic relationships words": 49330, "meta learning approach": 32339, "using social media": 60952, "train recurrent neural": 57625, "achieves bleu score": 1311, "temporal classification ctc": 56182, "speech recognition models": 52286, "used feature extraction": 60187, "near real time": 36512, "models perform better": 35311, "generate training data": 22260, "training data task": 58042, "generated training data": 22329, "use hand crafted": 59905, "experiments proposed model": 19497, "micro f1 score": 33225, "extend state art": 19832, "results obtained different": 47748, "languages low resource": 28718, "based word occurrence": 6139, "lead substantial improvements": 29278, "decision making process": 13564, "approaches widely used": 3955, "information paper propose": 26002, "widely used neural": 62020, "used neural network": 60250, "automatic manual evaluations": 5105, "build large scale": 7409, "large scale datasets": 28971, "training data improve": 58002, "learning models using": 29764, "training deep neural": 58058, "fine tuning approach": 20978, "learning experimental results": 29635, "approach yields better": 3745, "machine learning research": 31330, "reading comprehension tasks": 45087, "use pre trained": 59976, "detailed error analysis": 14423, "paper present results": 39460, "automatic text generation": 5129, "text generation process": 56603, "external linguistic knowledge": 19949, "test set using": 56376, "using ground truth": 60719, "contrast state art": 10890, "experiments publicly available": 19505, "rule based machine": 48382, "rule based systems": 48387, "work present large": 62754, "large scale analysis": 28959, "report empirical results": 46432, "task question answering": 55315, "state art end": 52610, "art end end": 4256, "corpus large scale": 11370, "large scale study": 29002, "study propose novel": 53444, "generates high quality": 22343, "demonstrate proposed framework": 13966, "word embeddings semantic": 62187, "models best model": 34780, "best model achieves": 6782, "model achieves accuracy": 33511, "study natural language": 53417, "present natural language": 41954, "sequence sequence attention": 49983, "task learning approach": 55169, "tasks paper describes": 55782, "answering qa datasets": 3089, "work propose simple": 62786, "achieve competitive performance": 1127, "performance compared existing": 40252, "compared existing models": 9409, "results proposed model": 47785, "achieves competitive accuracy": 1317, "art models paper": 4297, "models paper proposes": 35305, "model consistently outperforms": 33703, "state art transformer": 52686, "machine learning model": 31325, "experimental results showed": 19312, "results showed proposed": 47838, "showed proposed method": 50672, "proposed method achieved": 43806, "achieved best performance": 1220, "based social media": 6044, "data propose novel": 12571, "ground truth data": 23252, "knowledge natural language": 27557, "popular research topic": 41184, "supervised learning problem": 54004, "end end architectures": 17639, "reinforcement learning method": 45872, "task oriented dialogues": 55256, "policy gradient algorithm": 41095, "cross lingual semantic": 11850, "dataset best knowledge": 12829, "best knowledge dataset": 6770, "data paper explore": 12530, "paper explore problem": 39364, "used improve performance": 60209, "improvements current state": 25066, "model trained proposed": 34475, "transfer learning strategy": 58396, "minimum bayes risk": 33304, "achieve better performance": 1117, "attention based model": 4720, "significantly outperforms non": 51004, "neural networks propose": 37064, "attention mechanism used": 4782, "like social media": 30505, "topics natural language": 57456, "expensive time consuming": 19221, "additional linguistic features": 1686, "language processing pipeline": 28420, "french german spanish": 21662, "areas natural language": 4157, "code mixed data": 8824, "significantly better results": 50943, "code mixed tweets": 8830, "shown great promise": 50715, "novel cross lingual": 37795, "cross lingual transfer": 11856, "state art monolingual": 52634, "data high resource": 12403, "different language families": 14964, "zero shot shot": 63180, "shot shot learning": 50646, "large annotated corpus": 28845, "recent works shown": 45380, "data automatically generated": 12173, "pseudo parallel data": 44281, "word embeddings evaluated": 62168, "proposed methods outperform": 43837, "large training corpus": 29029, "performance domain specific": 40303, "based bidirectional lstms": 5609, "like sentiment analysis": 30503, "combination convolutional neural": 9036, "training word embeddings": 58315, "embedding models using": 17048, "paper propose multi": 39524, "use neural networks": 59962, "continuous latent variables": 10848, "model outperforms single": 34165, "word embeddings capture": 62160, "social media using": 51591, "et al 2018": 18405, "et al 2017": 18404, "liu et al": 30893, "et al 2021": 18410, "et al 2022": 18411, "new annotated dataset": 37127, "typologically different languages": 59167, "expressed natural language": 19800, "state art attention": 52583, "generated content ugc": 22277, "word embeddings approach": 62157, "sentiment analysis models": 49823, "trained models using": 57811, "end end architecture": 17638, "semeval 2017 task": 49431, "cross lingual language": 11840, "referring expression generation": 45760, "work present simple": 62759, "training data sets": 58036, "evaluation shows method": 18720, "previous work paper": 42308, "close embedding space": 8685, "language processing field": 28408, "need feature engineering": 36565, "proposed model significantly": 43858, "significantly outperforms existing": 51002, "achieved promising results": 1260, "despite recent success": 14385, "present simple effective": 42016, "simple effective approach": 51151, "graph convolutional networks": 23125, "convolutional networks gcns": 11111, "neural networks evaluate": 37046, "experiments text classification": 19546, "classification task demonstrate": 8562, "task fine grained": 55087, "fine grained sentiment": 20944, "release new large": 46160, "new large scale": 37235, "large scale dataset": 28970, "comprehension question answering": 9774, "conduct human evaluation": 10054, "recent years seen": 45396, "large scale evaluation": 28975, "english paper propose": 17854, "cross lingual word": 11858, "lingual word embeddings": 30739, "embeddings machine translation": 17170, "models shown promising": 35500, "task specific features": 55394, "paper propose adversarial": 39492, "adversarial multi task": 1977, "task learning framework": 55172, "different text classification": 15098, "publicly available url": 44356, "available url http": 5386, "variety natural language": 61283, "sparse attention mechanism": 51965, "detection challenging task": 14466, "extensive experiments real": 19900, "collected social media": 8968, "sequence tagging problem": 50005, "task learning setup": 55181, "knowledge base question": 27410, "base question answering": 5551, "paper propose hierarchical": 39515, "language processing computational": 28402, "processing computational linguistics": 42861, "applications paper present": 3229, "multi layer perceptron": 35981, "state art semantic": 52660, "approaches improve performance": 3843, "classification machine translation": 8491, "machine translation automatic": 31349, "method train model": 32685, "sentiment analysis news": 49824, "present qualitative analysis": 41993, "inspired recent success": 26415, "generative adversarial networks": 22586, "adversarial networks gans": 1982, "experimental results english": 19286, "achieve significantly better": 1196, "significant performance improvements": 50911, "baseline state art": 6212, "attracted lot attention": 4885, "extensive empirical studies": 19866, "publicly available http": 44343, "available http www": 5306, "provide fine grained": 44075, "training data demonstrate": 57985, "lexically constrained decoding": 30400, "domain adaptation neural": 16001, "performance domain adaptation": 40302, "played important role": 40984, "language modeling objective": 28212, "consistent performance improvements": 10283, "paper presents attempt": 39470, "paper proposes simple": 39552, "simple machine learning": 51189, "suggest directions future": 53817, "directions future work": 15293, "present multi task": 41950, "task learning model": 55175, "state art standard": 52670, "automatic human evaluations": 5096, "pos tagging dependency": 41234, "problems paper propose": 42719, "outperforms previous methods": 38922, "propose simple effective": 43633, "experiments demonstrate approach": 19402, "demonstrate approach significantly": 13871, "based attention model": 5583, "based models achieve": 5866, "feature extraction techniques": 20488, "art neural network": 4314, "little labeled data": 30881, "additional labeled data": 1680, "cross lingual model": 11842, "corpora low resource": 11218, "propose novel data": 43529, "novel data augmentation": 37797, "data augmentation approach": 12150, "nlp tasks existing": 37534, "statistically significant improvements": 52773, "task paper describes": 55262, "paper describes participation": 39327, "term memory blstm": 56247, "different evaluation metrics": 14921, "recent years automatic": 45382, "english language datasets": 17832, "paper introduces new": 39407, "outperforms baseline model": 38870, "performance paper describes": 40475, "bayes logistic regression": 6350, "label classification task": 27697, "propose simple technique": 43637, "finite state machines": 21059, "attentional sequence sequence": 4860, "standard machine translation": 52501, "fully connected layers": 21718, "uses convolutional neural": 60502, "fully connected layer": 21717, "cross lingual text": 11854, "english source language": 17877, "art methods paper": 4287, "methods paper propose": 32975, "evaluate approach task": 18438, "improve classification accuracy": 24830, "train model using": 57609, "factoid question answering": 20297, "code trained models": 8863, "trained models publicly": 57807, "present method automatically": 41944, "method automatically generate": 32396, "wu et al": 63023, "order magnitude faster": 38633, "deep neural models": 13738, "shown great potential": 50714, "paradigm natural language": 39625, "based seq2seq model": 6025, "models capable learning": 34804, "high quality data": 23773, "language processing computer": 28403, "processing computer vision": 42863, "learning models deep": 29754, "models deep learning": 34889, "noise training data": 37605, "learning based method": 29535, "approach wide range": 3739, "results approach consistently": 47504, "approach consistently improves": 3466, "strong baseline future": 53003, "address issue introducing": 1764, "achieved good performance": 1235, "input output sequences": 26310, "improvement previous state": 25018, "human evaluation shows": 24156, "evaluation shows model": 18721, "produces higher quality": 43031, "vector space embeddings": 61466, "address problem proposing": 1790, "release new dataset": 46159, "significantly outperforms competitive": 51000, "according automatic human": 854, "automatic human evaluation": 5095, "trained models https": 57800, "models https github": 35093, "multi task training": 36030, "limited labeled data": 30593, "improve performance nlp": 24897, "computational social science": 9863, "model using combination": 34515, "models experimental results": 34994, "reduce computational cost": 45654, "data publicly available": 12580, "different natural language": 15002, "proposed model performs": 43856, "et al 2003": 18394, "features pre trained": 20644, "pre trained contextual": 41523, "contextual word embeddings": 10789, "deep natural language": 13732, "models named entity": 35243, "improves model performance": 25136, "achieves better performance": 1308, "better performance compared": 6930, "textual entailment task": 56963, "processing nlp machine": 42906, "nlp machine learning": 37496, "social media user": 51589, "improves overall performance": 25140, "automatic semi automatic": 5122, "discrete latent variable": 15423, "based human evaluation": 5773, "observation propose novel": 38124, "model achieves improvement": 33519, "improvement bleu score": 24993, "score state art": 48875, "state art baseline": 52586, "dialogue systems paper": 14789, "proposed model generate": 43850, "extensive experiments multiple": 19894, "multiple deep learning": 36197, "experiments benchmark dataset": 19363, "tasks paper present": 55786, "sequence sequence architectures": 49982, "performance text based": 40599, "machine translation present": 31377, "tasks task specific": 55926, "despite great success": 14363, "important natural language": 24746, "trained domain specific": 57714, "improve performance word": 24903, "task recent years": 55323, "neural natural language": 36990, "analysis shows model": 2759, "amounts annotated data": 2545, "consistently outperform baseline": 10299, "reduces number parameters": 45696, "child directed speech": 8290, "corpus freely available": 11346, "orders magnitude larger": 38672, "perform large scale": 40119, "freely available online": 21654, "deep learning dl": 13707, "large open domain": 28930, "open domain dataset": 38421, "transfer learning techniques": 58398, "based state art": 6056, "best performing models": 6798, "experiments machine translation": 19461, "model establishes new": 33836, "state art bleu": 52591, "empirical results demonstrate": 17341, "different types knowledge": 15111, "fine grained level": 20939, "propose simple novel": 43636, "advances state art": 1928, "state art text": 52684, "text natural language": 56673, "outperforms previous models": 38923, "tasks propose novel": 55820, "evaluation results model": 18703, "natural language datasets": 36417, "neural networks extract": 37047, "open source toolkit": 38460, "semi supervised training": 49468, "state art generative": 52619, "layer wise relevance": 29214, "wise relevance propagation": 62085, "based bi directional": 5604, "used previous work": 60272, "model solve problem": 34397, "solve problem using": 51687, "statistical language model": 52745, "cross domain knowledge": 11817, "domain specific text": 16189, "effectiveness proposed framework": 16805, "recent works proposed": 45379, "pre trained model": 41544, "large scale open": 28992, "scale open domain": 48608, "open domain qa": 38424, "approach achieve state": 3391, "language generation systems": 28088, "poses new challenges": 41251, "statistical machine learning": 52749, "approaches natural language": 3880, "scarcity high quality": 48666, "current evaluation metrics": 11976, "high quality corpus": 23772, "representation learning model": 46543, "building high quality": 7447, "shared task systems": 50506, "deployed real world": 14173, "real world use": 45143, "generate synthetic data": 22252, "language proposed approach": 28451, "report experimental results": 46435, "based attention mechanism": 5581, "used sequence sequence": 60298, "hierarchical attention networks": 23660, "achieved remarkable performance": 1262, "using multi task": 60818, "sequence sequence s2s": 49995, "superior state art": 53945, "different tasks paper": 15095, "using deep neural": 60648, "art neural models": 4313, "natural language explanations": 36422, "short term memories": 50569, "large real world": 28954, "domain specific vocabulary": 16191, "sentiment analysis experiments": 49821, "representation learning framework": 46539, "word embeddings word2vec": 62198, "generating high quality": 22377, "time consuming process": 57136, "domain cross domain": 16036, "evaluate proposed model": 18494, "sentiment analysis model": 49822, "recent years paper": 45392, "progress natural language": 43106, "data driven approaches": 12301, "attention based sequence": 4725, "sequence neural network": 49963, "large document collections": 28872, "information retrieval systems": 26067, "use state art": 60028, "speech recognition natural": 52287, "recognition natural language": 45517, "experimental results demonstrated": 19282, "establish strong baseline": 18349, "improvement classification accuracy": 24995, "propose novel simple": 43563, "conduct experiments datasets": 10044, "publicly available models": 44346, "model based deep": 33603, "based deep learning": 5673, "results proposed approach": 47782, "trained labeled data": 57757, "data state art": 12693, "time consuming costly": 57128, "real life tasks": 45107, "end end e2e": 17644, "use token level": 60049, "vocabulary oov problem": 61708, "results reveal proposed": 47816, "leverage large amounts": 30274, "word embedding layer": 62146, "task specific attention": 55389, "particularly low resource": 39886, "introduce new approach": 26832, "learning language models": 29696, "language models training": 28335, "learn word embeddings": 29445, "methods large scale": 32919, "establish new state": 18345, "recent advances natural": 45285, "benchmark datasets paper": 6458, "datasets paper presents": 13364, "presents novel method": 42098, "neural networks demonstrate": 37041, "achieves good performance": 1328, "improved performance text": 24959, "character word level": 8227, "novel approach based": 37760, "achieving performance comparable": 1418, "knowledge graphs kgs": 27506, "novel reinforcement learning": 37907, "based knowledge graph": 5798, "knowledge graph embeddings": 27501, "pre training approach": 41567, "embeddings pre trained": 17191, "pre trained large": 41539, "trained large corpora": 57766, "best knowledge study": 6773, "proposed cross lingual": 43749, "conventional machine learning": 11005, "classical machine learning": 8424, "features paper propose": 20638, "downstream nlp applications": 16346, "novel neural model": 37884, "state art unsupervised": 52687, "features state art": 20674, "learning human feedback": 29674, "reinforcement learning algorithm": 45866, "large action space": 28831, "level machine translation": 30156, "character level convolutional": 8207, "paper propose model": 39523, "evaluate proposed method": 18493, "tasks text classification": 55932, "improves performance text": 25146, "classification relation extraction": 8533, "recent neural models": 45326, "models shown significant": 35502, "text generation task": 56604, "large scale corpus": 28967, "2017 shared task": 268, "previous work used": 42315, "state art nli": 52640, "despite recent advances": 14383, "macro f1 score": 31407, "f1 score 87": 20211, "f1 score 93": 20216, "best performing baseline": 6795, "text state art": 56789, "ground truth dataset": 23253, "dataset publicly available": 13049, "language model training": 28200, "model training process": 34482, "comparable better performance": 9291, "network state art": 36807, "evaluation metrics results": 18654, "network cnn model": 36722, "pairs training data": 39224, "experimental results confirm": 19278, "proposed method able": 43804, "especially morphologically rich": 18288, "character level representations": 8217, "obtains significant improvement": 38256, "domain training data": 16216, "common nlp tasks": 9188, "nlp tasks sentiment": 37549, "performance baseline models": 40209, "model outperforms prior": 34164, "learning based framework": 29533, "experiments benchmark datasets": 19364, "benchmark datasets different": 6451, "datasets different domains": 13229, "state art data": 52603, "training data different": 57986, "best results achieved": 6816, "speech pos tags": 52280, "information improve performance": 25914, "domain adaptation using": 16007, "shared task aims": 50493, "language understanding models": 28553, "language inference task": 28113, "model shared task": 34369, "popularity social media": 41206, "mining social media": 33324, "models results suggest": 35458, "available social media": 5366, "crucial natural language": 11904, "language models word": 28348, "reinforcement learning directly": 45869, "automatic metrics human": 5111, "metrics human evaluation": 33170, "datasets propose novel": 13380, "model achieves new": 33520, "text classification datasets": 56471, "deep learning nlp": 13716, "traditional natural language": 57536, "using named entity": 60827, "task make use": 55202, "recurrent neural nets": 45624, "human evaluations model": 24160, "machine translation quality": 31379, "information input text": 25925, "neural network methods": 37016, "embedding based models": 17018, "explore state art": 19738, "single task learning": 51344, "traditional machine learning": 57527, "existing deep learning": 19057, "outperforms deep learning": 38894, "large text corpus": 29026, "news articles wikipedia": 37388, "performing natural language": 40686, "experiments real world": 19509, "tasks paper proposes": 55789, "demonstrates effectiveness proposed": 14031, "effectiveness proposed approach": 16804, "source code available": 51744, "improve performance existing": 24890, "performance existing methods": 40331, "propose new methods": 43505, "short term long": 50568, "term long term": 56241, "proposed method using": 43833, "processing nlp recently": 42909, "training evaluation data": 58092, "text social media": 56777, "using support vector": 60972, "language identification task": 28099, "leveraging unlabeled data": 30343, "leverages pre trained": 30310, "pre trained language": 41538, "trained language model": 57759, "processing tasks including": 42948, "word embeddings input": 62172, "attention natural language": 4794, "language processing community": 28401, "fundamental nlp tasks": 21785, "benchmarks state art": 6544, "previous work suggests": 42313, "language machine translation": 28145, "fine tuning model": 21000, "fine tuning data": 20982, "domain data fine": 16039, "data fine tuning": 12366, "tedious time consuming": 56163, "achieves substantial improvements": 1383, "paper propose different": 39504, "content social media": 10559, "learning fine grained": 29646, "question answering text": 44714, "compare different methods": 9337, "significant improvement baselines": 50875, "fine grained domain": 20933, "recent years researchers": 45395, "requires large scale": 46938, "performance paper present": 40476, "second propose novel": 49020, "based reinforcement learning": 5988, "extensive experimental results": 19874, "significantly improves state": 50977, "state art multiple": 52637, "models nlp tasks": 35264, "semantic syntactic information": 49360, "remains open problem": 46343, "techniques improve performance": 56097, "using labeled data": 60750, "task real world": 55319, "end end solution": 17661, "plays pivotal role": 41004, "sub word units": 53539, "surpass state art": 54167, "network models using": 36769, "nlp tasks models": 37541, "neural models natural": 36978, "apply machine learning": 3332, "including deep learning": 25248, "machine learning nlp": 31328, "languages machine translation": 28721, "document level features": 15805, "despite simplicity approach": 14391, "written natural language": 63005, "language models proposed": 28308, "models proposed method": 35375, "word embeddings training": 62192, "machine translation translate": 31391, "language models trained": 28334, "domain domain data": 16053, "stanford question answering": 52559, "question answering dataset": 44692, "introduce data augmentation": 26795, "data augmentation method": 12158, "method improves performance": 32535, "propose novel way": 43573, "datasets evaluation metrics": 13256, "random fields crfs": 44874, "substantial improvements strong": 53624, "using monolingual data": 60812, "introduce open source": 26853, "tasks information retrieval": 55688, "task empirical results": 55040, "performance models trained": 40441, "wide range tasks": 61974, "multi party conversations": 35997, "tackle challenge propose": 54699, "problem experimental results": 42558, "significantly improves accuracy": 50973, "sequence labeling task": 49939, "word embeddings character": 62161, "learning methods proposed": 29734, "benchmark datasets demonstrate": 6450, "f1 score 91": 20214, "fully data driven": 21721, "outperforms baseline methods": 38869, "human evaluation experiments": 24147, "widely used nlp": 62021, "attention mechanism attention": 4773, "self attention network": 49185, "shows state art": 50806, "natural language generator": 36427, "novel approach automatically": 37759, "external knowledge base": 19939, "achieves performance comparable": 1353, "simple effective solution": 51157, "data social media": 12672, "field computational linguistics": 20753, "based distributional semantics": 5688, "discrepancy training inference": 15418, "models improve performance": 35111, "points f1 score": 41073, "information training data": 26131, "training data insufficient": 58003, "information extensive experiments": 25854, "world datasets demonstrate": 62936, "datasets demonstrate model": 13215, "model achieves comparable": 33514, "success deep learning": 53698, "traditional language models": 57524, "order magnitude smaller": 38636, "state art non": 52642, "incorporating pre trained": 25392, "significantly outperforms strong": 51008, "semi supervised setting": 49465, "paper present method": 39455, "datasets work propose": 13488, "text paper introduce": 56688, "end end asr": 17640, "experimental results reveal": 19309, "training data machine": 58014, "state art retrieval": 52658, "training data finally": 57996, "despite success existing": 14395, "best f1 score": 6764, "increase f1 score": 25415, "character level encoder": 8209, "different people different": 15023, "large scale multi": 28988, "processing nlp techniques": 42914, "state art works": 52694, "previous studies demonstrated": 42287, "paper investigate problem": 39413, "end end deep": 17642, "end deep neural": 17627, "contrast previous approaches": 10882, "task paper investigate": 55264, "sentence level classification": 49581, "tasks language understanding": 55711, "classification slot filling": 8553, "classification named entity": 8505, "datasets paper introduce": 13362, "fine grained information": 20936, "human human dialogue": 24170, "sentence representation learning": 49632, "automatic evaluation method": 5084, "long standing challenge": 31031, "state art english": 52611, "using logistic regression": 60777, "domain specific embeddings": 16174, "improve performance models": 24893, "commonly used methods": 9225, "little attention paid": 30871, "using sentence level": 60925, "existing evaluation metrics": 19068, "provide detailed analysis": 44050, "results models trained": 47729, "models trained smaller": 35619, "perform extensive analysis": 40104, "experiment benchmark datasets": 19232, "metrics shared task": 33200, "perform series experiments": 40139, "convolutional recurrent neural": 11116, "par state art": 39618, "using fine grained": 60696, "freely available research": 21655, "available research purposes": 5361, "work focus task": 62669, "low resource setting": 31190, "fraction model parameters": 21430, "paper introduce simple": 39404, "available non commercial": 5334, "online social networks": 38387, "learning model using": 29749, "achieve f1 score": 1138, "extensive feature engineering": 19907, "present new dataset": 41961, "proposed model based": 43847, "important source information": 24774, "learning using large": 29932, "languages training data": 28809, "baseline large margin": 6179, "word embedding learning": 62147, "experimental results framework": 19287, "framework achieves state": 21450, "paper present work": 39466, "downstream tasks paper": 16361, "results experiments demonstrate": 47628, "neural network classifier": 37001, "significant performance gain": 50907, "state art classification": 52594, "new data set": 37162, "large parallel corpora": 28933, "humans learn language": 24279, "does require parallel": 15975, "level sentence level": 30208, "based natural language": 5898, "domain specific words": 16193, "paper describes systems": 39333, "model best performing": 33623, "micro average f1": 33219, "parsing natural language": 39789, "natural language semantic": 36447, "end end using": 17670, "based multi task": 5889, "best baseline model": 6752, "predict fine grained": 41640, "present deep learning": 41886, "end end trained": 17667, "trained sequence sequence": 57869, "propose novel technique": 43567, "baseline neural network": 6195, "deep learning systems": 13720, "social network data": 51594, "quality automatically generated": 44495, "time consuming laborious": 57133, "10 absolute improvement": 36, "training set training": 58252, "achieved better results": 1223, "work propose unsupervised": 62789, "evaluated proposed method": 18544, "using hand crafted": 60721, "art results compared": 4373, "strong cross lingual": 53025, "builds recent work": 7480, "recent work unsupervised": 45375, "advances deep learning": 1910, "robust machine learning": 48254, "memory lstm models": 32264, "manner experimental results": 31716, "based model learns": 5860, "data experimental results": 12345, "achieved impressive performance": 1245, "availability large scale": 5254, "large scale parallel": 28993, "model widely used": 34537, "widely used datasets": 62014, "rich source information": 48124, "standard training data": 52537, "using cross entropy": 60636, "inspired recent work": 26416, "dataset model achieves": 12997, "exact match accuracy": 18851, "language recent years": 28462, "achieved significant success": 1272, "machine reading comprehension": 31341, "framework significantly improves": 21600, "multiple strong baselines": 36294, "paper investigates use": 39415, "zero shot generalization": 63164, "sentences training data": 49798, "recent neural approaches": 45325, "state art feature": 52616, "scarcity labeled training": 48670, "training data address": 57972, "data address issue": 12124, "address issue using": 1768, "role labeling srl": 48312, "single task model": 51345, "model makes predictions": 34087, "tasks proposed method": 55825, "proposed method improve": 43820, "method improve model": 32531, "language model called": 28159, "language model model": 28178, "language model loss": 28175, "level language model": 30145, "non autoregressive model": 37636, "large scale human": 28980, "language model trained": 28199, "present end end": 41901, "active area research": 1472, "english code switched": 17788, "significant improvements existing": 50886, "proposed method improves": 43821, "language processing text": 28435, "neural networks proposed": 37065, "proposed model compared": 43848, "successful natural language": 53737, "art results number": 4382, "locality sensitive hashing": 30956, "state art classifiers": 52595, "proposed deep learning": 43756, "existing approaches require": 19030, "queries natural language": 44656, "paper conduct extensive": 39299, "discuss advantages disadvantages": 15459, "model performs best": 34202, "processing paper propose": 42920, "reading comprehension mrc": 45084, "art models use": 4301, "multi step reasoning": 36012, "performance current state": 40274, "significant improvements baselines": 50884, "reading comprehension models": 45083, "existing methods usually": 19105, "models achieved state": 34682, "meta learning framework": 32342, "introduce novel method": 26847, "extensive experiments investigate": 19890, "models achieves state": 34684, "available paper propose": 5341, "memory lstm units": 32268, "end propose simple": 17703, "problem data scarcity": 42529, "improves current state": 25123, "future research dataset": 21886, "knowledge bases kb": 27414, "propose new algorithm": 43495, "reinforcement learning approach": 45867, "art results datasets": 4374, "sequence seq2seq model": 49976, "previous methods large": 42262, "methods large margin": 32917, "great success natural": 23218, "existing end end": 19064, "entity recognition relation": 18131, "recognition relation extraction": 45531, "build high quality": 7405, "high quality dataset": 23774, "research dataset available": 47011, "dataset available https": 12820, "github com lancopku": 22705, "evaluation state art": 18726, "fine grained control": 20932, "extensive experiments proposed": 19897, "results method achieves": 47715, "existing neural models": 19117, "new method called": 37253, "resource poor target": 47263, "source target domains": 51801, "model extensive experiments": 33867, "models transfer learning": 35627, "performance nlp tasks": 40459, "machine translation approaches": 31348, "black box neural": 7193, "contribute better understanding": 10928, "develop machine learning": 14595, "natural language model": 36433, "based neural model": 5905, "achieves significant performance": 1369, "language model adapted": 28153, "text mining techniques": 56665, "achieve best results": 1114, "orders magnitude smaller": 38673, "expert domain knowledge": 19579, "word embeddings word": 62197, "propose novel methodology": 43546, "real life applications": 45105, "indo european languages": 25597, "best worst scaling": 6840, "learning model called": 29745, "vaswani et al": 61446, "based self attention": 6012, "neural network called": 37000, "supervised sequence labeling": 54043, "compare state art": 9368, "text classification approaches": 56468, "outperformed previous state": 38840, "state art benchmarks": 52589, "training data source": 58039, "data source code": 12675, "source code released": 51752, "models freely available": 35044, "fully end end": 21724, "additionally propose new": 1730, "work study problem": 62831, "proposed method significantly": 43829, "maintaining comparable performance": 31488, "compared previous work": 9438, "achieves results competitive": 1361, "answering dataset squad": 3068, "reading comprehension dataset": 45081, "work propose method": 62780, "learning rl framework": 29853, "deep learning natural": 13715, "learning natural language": 29775, "propose zero shot": 43708, "accuracy test set": 1060, "blind test set": 7218, "recently released dataset": 45462, "train neural networks": 57619, "language processing propose": 28425, "paper proposes novel": 39551, "proposes novel approach": 43941, "able achieve state": 671, "propose unsupervised approach": 43692, "monolingual word embeddings": 35815, "embeddings proposed method": 17198, "tasks sequence sequence": 55879, "learning state art": 29895, "state art machine": 52627, "leveraging external knowledge": 30324, "external knowledge bases": 19940, "performance comparable state": 40247, "machine learning applications": 31309, "publicly available data": 44339, "language models approach": 28231, "features train model": 20685, "existing methods suffer": 19101, "question answering forums": 44697, "conduct large scale": 10056, "problems propose novel": 42724, "model outperforms current": 34161, "present best knowledge": 41859, "attracted considerable attention": 4877, "words proper nouns": 62485, "semi automatic method": 49448, "method makes use": 32571, "different word embeddings": 15133, "models real world": 35402, "data sets different": 12648, "rely pre trained": 46298, "domain specific information": 16177, "introduce novel neural": 26850, "models proposed model": 35376, "demonstrated effectiveness proposed": 14006, "compared existing state": 9410, "art performance benchmark": 4327, "neural semantic parsing": 37093, "like named entity": 30489, "state art sota": 52667, "art sota results": 4409, "pointer network based": 41059, "sequence sequence approach": 49979, "extensive set experiments": 19913, "language processing machine": 28414, "processing machine translation": 42887, "promising research direction": 43176, "question answering dialogue": 44694, "experimental results chinese": 19275, "tasks proposed approach": 55823, "question answering datasets": 44693, "models convolutional neural": 34866, "entities paper propose": 18071, "conduct experiments widely": 10048, "experiments widely used": 19563, "used real world": 60285, "model make use": 34085, "simple effective mechanism": 51154, "language processing involves": 28412, "model propose novel": 34247, "challenging real world": 8133, "publicly available corpus": 44338, "state art solutions": 52666, "state art algorithms": 52578, "used text classification": 60329, "approach achieves best": 3395, "tasks demonstrate proposed": 55579, "domains experimental results": 16254, "strong baseline systems": 53007, "require task specific": 46893, "language model fine": 28165, "model fine tuning": 33895, "fine tuning language": 20995, "tuning language model": 58920, "art text classification": 4429, "embedding model trained": 17044, "better performance state": 6936, "art recurrent neural": 4364, "introduce new dataset": 26834, "new dataset called": 37164, "features significantly improve": 20669, "significantly improve accuracy": 50963, "trained reinforcement learning": 57851, "tasks paper investigate": 55785, "external knowledge sources": 19945, "neural non neural": 37083, "self attention based": 49176, "language models demonstrate": 28242, "systems low resource": 54554, "dimensional vector space": 15238, "paper propose leverage": 39520, "learning paper proposes": 29795, "outperforms existing state": 38901, "trained natural language": 57823, "natural language corpora": 36415, "traditional deep learning": 57516, "based word level": 6138, "multiple sources information": 36286, "knowledge graphs kg": 27505, "complex real world": 9654, "real world settings": 45138, "multilingual machine translation": 36095, "neural networks recently": 37068, "highly competitive performance": 23884, "computer vision tasks": 9898, "propose novel multi": 43550, "model benchmark datasets": 33615, "recent deep learning": 45301, "existing text generation": 19159, "text generation methods": 56599, "tackle problem propose": 54710, "text generation model": 56600, "generative adversarial network": 22585, "text propose novel": 56716, "dialogue generation tasks": 14774, "generation tasks demonstrate": 22563, "tasks demonstrate model": 55578, "baselines code available": 6244, "models word embeddings": 35684, "text training data": 56819, "build end end": 7397, "training machine translation": 58164, "test set paper": 56375, "cross lingual learning": 11841, "language independent features": 28107, "using word level": 61029, "work tackle problem": 62837, "sentence boundary detection": 49524, "binary classification task": 7147, "deep contextualized word": 13687, "contextualized word representation": 10815, "bidirectional language model": 7074, "transfer learning approach": 58376, "sentence level representations": 49590, "multiple source languages": 36284, "approach able achieve": 3388, "proposed approach achieve": 43722, "dataset fine tuning": 12933, "fine tuning pre": 21011, "tuning pre trained": 58944, "lingual zero shot": 30741, "zero shot setting": 63177, "present novel deep": 41970, "task existing approaches": 55062, "best single model": 6823, "state art scores": 52659, "deep generative model": 13693, "including natural language": 25281, "models end end": 34966, "experiment results proposed": 19252, "used machine learning": 60230, "present neural model": 41956, "seen training time": 49067, "human evaluation model": 24151, "state art zero": 52695, "art zero shot": 4444, "leading state art": 29300, "trained models downstream": 57797, "models downstream tasks": 34941, "data train models": 12739, "evaluate pre trained": 18488, "performance compared previous": 40254, "tasks existing approaches": 55626, "graph convolutional network": 23124, "methods natural language": 32953, "http github com": 24049, "new task called": 37334, "user generated data": 60417, "create new dataset": 11712, "gained significant attention": 21923, "model improve performance": 33978, "word level model": 62233, "model large margin": 34041, "paper present study": 39462, "study end end": 53370, "experiments effectiveness proposed": 19426, "social media sites": 51585, "word embedding space": 62153, "unlike previously proposed": 59606, "supervised deep learning": 53979, "deep learning techniques": 13722, "quantitative qualitative analyses": 44624, "single multi task": 51322, "abstractive text summarization": 775, "research topic natural": 47132, "character based word": 8198, "based word based": 6135, "widely used dataset": 62013, "learning model jointly": 29746, "language best knowledge": 27978, "standard benchmark datasets": 52472, "multi instance learning": 35974, "labeled data experiments": 27742, "datasets achieve state": 13142, "order magnitude larger": 38635, "recent years deep": 45383, "dataset extensive experiments": 12921, "learning models based": 29753, "network cnn long": 36721, "cnn long short": 8771, "github com neulab": 22708, "paper describes semeval": 39329, "semeval 2018 task": 49433, "incorporate commonsense knowledge": 25346, "data code publicly": 12214, "publicly available https": 44344, "provide high quality": 44085, "important role understanding": 24768, "fine grained semantics": 20943, "syntactic semantic analysis": 54323, "end neural model": 17689, "training data model": 58017, "word vector representation": 62335, "attention mechanism transformer": 4781, "results machine translation": 47708, "self attention mechanism": 49180, "aware self attention": 5470, "self attention mechanisms": 49181, "problem field natural": 42566, "end end text": 17665, "question answering cqa": 44691, "use neural network": 59961, "prior work used": 42426, "processing tasks language": 42949, "poses significant challenges": 41254, "supervised learning models": 54003, "learning models used": 29763, "hope work help": 24018, "newly collected dataset": 37372, "gold standard corpus": 22916, "propose method based": 43453, "paper introduce framework": 39401, "data code https": 12212, "code https github": 8819, "feed forward network": 20712, "art performance task": 4346, "tasks question answering": 55832, "paper present neural": 39457, "predictions experimental results": 41759, "substantial improvements baseline": 53623, "shared task semantic": 50505, "skip gram models": 51421, "text classification results": 56483, "semantic parsing models": 49313, "novel graph based": 37834, "art performance datasets": 4332, "neural text generation": 37105, "text generation models": 56601, "source publicly available": 51792, "publicly available parallel": 44349, "available parallel corpora": 5343, "low level features": 31157, "et al 2005": 18395, "important task nlp": 24779, "task nlp applications": 55241, "demonstrates superior performance": 14048, "require large training": 46874, "large training datasets": 29031, "unsupervised state art": 59735, "lingual cross lingual": 30695, "require large number": 46872, "trained single language": 57873, "publicly available research": 44353, "ones experimental results": 38337, "applications machine learning": 3219, "language understanding previous": 28557, "indo aryan languages": 25595, "self attention heads": 49177, "results variety tasks": 47902, "new end end": 37186, "compared strong baselines": 9462, "strong baselines including": 53014, "proposed approach outperforms": 43729, "significantly improve model": 50965, "novel approach called": 37761, "open domain setting": 38426, "word level language": 62232, "level language modeling": 30146, "language modeling problem": 28217, "transfer learning nlp": 58390, "task training data": 55444, "transfer learning pretrained": 58394, "use transfer learning": 60058, "learning transfer learning": 29920, "transfer learning using": 58400, "using sentence embeddings": 60924, "surprisingly good performance": 54188, "pre trained sentence": 41555, "overall f1 scores": 39041, "f1 scores compared": 20229, "scarcity training data": 48677, "accuracy f1 score": 976, "network trained end": 36816, "trained large amounts": 57765, "problem recent work": 42643, "recent work explored": 45368, "learning low resource": 29715, "potential directions future": 41387, "directions future research": 15292, "real time applications": 45115, "code mixed text": 8829, "sentiment positive negative": 49856, "using contrastive learning": 60625, "based text classification": 6091, "model word embeddings": 34540, "text classification proposed": 56482, "sequence neural models": 49962, "released pre trained": 46180, "datasets source code": 13438, "availability large amounts": 5253, "low resourced languages": 31199, "leveraging transfer learning": 30341, "different languages english": 14970, "english german french": 17814, "baseline neural models": 6194, "retrieval based method": 47942, "annotated gold standard": 2899, "challenging problem natural": 8127, "problem social media": 42659, "15 times faster": 153, "downstream nlp tasks": 16347, "graph based method": 23105, "datasets proposed method": 13383, "social media increasingly": 51576, "mining natural language": 33320, "discuss future directions": 15467, "language generation models": 28085, "models able generate": 34655, "models consistently outperform": 34852, "results publicly available": 47794, "consistently outperforms state": 10306, "art methods tasks": 4288, "embedding methods word2vec": 17042, "boost model performance": 7254, "state art deep": 52605, "art deep learning": 4247, "use commonsense knowledge": 59846, "multi modal fusion": 35990, "cross modal attention": 11864, "art results widely": 4393, "results widely used": 47910, "word embedding algorithms": 62143, "word embedding features": 62145, "publicly available non": 44347, "address challenges propose": 1749, "learning method learn": 29728, "self training strategy": 49226, "approach outperforms baseline": 3623, "f1 score 75": 20206, "serve starting point": 50081, "model improves performance": 33984, "present real world": 41997, "real world application": 45122, "building state art": 7472, "semantic parsing semantic": 49315, "high resource settings": 23795, "obtain competitive performance": 38167, "data augmentation methods": 12159, "multi lingual model": 35985, "space paper propose": 51880, "select high quality": 49105, "low resource scenarios": 31189, "obtained pre trained": 38218, "method achieves new": 32365, "including human evaluation": 25262, "based adversarial training": 5560, "training data augmentation": 57976, "monolingual cross lingual": 35797, "cross lingual multilingual": 11845, "monolingual multilingual models": 35807, "performance cross lingual": 40272, "combining machine learning": 9114, "introduce new benchmark": 26833, "dataset code available": 12840, "relation extraction problem": 45979, "directional gated recurrent": 15278, "multiple real world": 36270, "recent years neural": 45389, "empirical results proposed": 17345, "outperforms existing baselines": 38898, "given piece text": 22770, "significant improvement state": 50880, "leads state art": 29330, "state art single": 52665, "art single model": 4402, "inference paper propose": 25679, "paper propose unified": 39538, "achieve near human": 1173, "near human level": 36506, "large amounts parallel": 28839, "resource languages like": 47240, "achieve better results": 1118, "neural models trained": 36983, "trained maximum likelihood": 57788, "fine tune models": 20951, "fine tuned models": 20964, "maximizing mutual information": 31965, "task aims generate": 54897, "approach tackle problem": 3715, "knowledge graph based": 27497, "new benchmark dataset": 37143, "various evaluation metrics": 61337, "language processing previous": 28422, "conducting extensive experiments": 10103, "extensive experiments analysis": 19879, "tasks multi task": 55753, "compared single task": 9454, "contextualized word representations": 10816, "performance transfer learning": 40608, "tasks like machine": 55724, "yields substantial improvements": 63136, "near human performance": 36507, "performance f1 score": 40342, "f1 score 37": 20195, "vector space using": 61469, "model outperforms strong": 34167, "similarity based approaches": 51084, "context paper propose": 10685, "models recently proposed": 35412, "relation extraction task": 45981, "better zero shot": 6995, "zero shot performance": 63171, "task specific training": 55404, "zero shot translation": 63184, "language model generating": 28168, "establish state art": 18347, "results multiple datasets": 47735, "uses pre trained": 60529, "paper describes submitted": 39332, "relation extraction tasks": 45982, "end end way": 17671, "entity mention detection": 18119, "approach outperforms previous": 3627, "choice question answering": 8336, "state art pretrained": 52650, "evaluating language models": 18560, "unsupervised neural machine": 59718, "shared latent space": 50478, "address issue introduce": 1763, "shared task organized": 50502, "time consuming task": 57137, "lead better performance": 29257, "impressive results achieved": 24816, "reinforcement learning methods": 45873, "art sota methods": 4406, "like deep learning": 30468, "real world large": 45132, "learned word embeddings": 29493, "propose solution problem": 43640, "task proposed method": 55305, "test set available": 56371, "github com ibm": 22703, "baseline models task": 6191, "model used generate": 34509, "provides state art": 44229, "modeling machine translation": 34596, "rnn based approaches": 48184, "high quality sentence": 23785, "corpora different domains": 11193, "different domains demonstrate": 14906, "domains demonstrate effectiveness": 16246, "demonstrate effectiveness generality": 13898, "state art open": 52643, "training data introduce": 58004, "reinforcement learning based": 45868, "learning based model": 29537, "markov decision process": 31846, "sequence tagging task": 50006, "experimental results based": 19271, "outperformed state art": 38843, "model pre trained": 34218, "low resource domain": 31177, "benchmark datasets proposed": 6459, "paper propose cross": 39501, "shed new light": 50528, "achieving f1 score": 1404, "f1 score 73": 20204, "exposure bias problem": 19790, "address problems paper": 1792, "inverse reinforcement learning": 26930, "generate higher quality": 22209, "lack labeled data": 27898, "present new framework": 41962, "cross lingual information": 11838, "lingual information retrieval": 30705, "lingual word embedding": 30738, "unsupervised cross lingual": 59690, "cross lingual embeddings": 11834, "attention based models": 4721, "state art competitive": 52599, "based neural architectures": 5902, "sequence sequence language": 49988, "models typically trained": 35640, "approach low resource": 3593, "training deep learning": 58057, "set pre defined": 50222, "propose neural model": 43490, "benchmarks proposed model": 6539, "approach fine tuning": 3539, "word vector spaces": 62338, "models achieve competitive": 34669, "available low resource": 5325, "resource languages paper": 47243, "contrast existing approaches": 10876, "provide valuable information": 44152, "process extensive experiments": 42782, "extensive experiments conducted": 19883, "experiments conducted large": 19384, "positive negative samples": 41288, "reference based metrics": 45736, "information word embeddings": 26161, "models paper investigate": 35301, "impact different types": 24593, "data used train": 12761, "propose novel task": 43566, "build new dataset": 7418, "evaluation results proposed": 18704, "model outperforms baselines": 34158, "outperforms baselines large": 38874, "code available url": 8795, "available url https": 5387, "url https github": 59794, "level fine grained": 30122, "proposed framework outperforms": 43785, "framework outperforms state": 21579, "art methods large": 4286, "requires large amounts": 46937, "language models investigate": 28266, "error propagation paper": 18224, "proposed methods achieve": 43836, "mean reciprocal rank": 31994, "domain specific features": 16175, "domain specific word": 16192, "resource languages lrls": 47242, "cross lingual training": 11855, "method cross lingual": 32448, "proposed model uses": 43861, "sentiment classification tasks": 49835, "learning pre trained": 29811, "new publicly available": 37293, "parsing semantic parsing": 39797, "benchmarks demonstrate proposed": 6516, "art results task": 4386, "language propose novel": 28449, "significantly improves translation": 50978, "attention based bi": 4716, "bidirectional gated recurrent": 7071, "propose novel effective": 43533, "entity type information": 18153, "encourage model generate": 17596, "new open source": 37275, "proposed method substantially": 43830, "method substantially improves": 32672, "paper present end": 39450, "experimental results public": 19307, "results public datasets": 47792, "datasets demonstrate method": 13214, "neural network using": 37031, "training data results": 58032, "data results suggest": 12616, "scale language model": 48587, "prevent catastrophic forgetting": 42230, "model performance different": 34188, "setting new state": 50335, "improved state art": 24967, "present case studies": 41862, "hybrid method combines": 24318, "commonsense knowledge base": 9235, "language specific models": 28497, "use machine translation": 59943, "experimental results significant": 19314, "machine translation languages": 31364, "work presents new": 62763, "deep learning research": 13719, "trained neural network": 57828, "artificial intelligence systems": 4493, "machine generated text": 31301, "vectors pre trained": 61495, "low resourced language": 31198, "shown significant improvements": 50750, "human evaluation demonstrate": 24145, "downstream tasks recent": 16364, "translation experimental results": 58612, "optimization based meta": 38546, "based meta learning": 5845, "natural language domain": 36421, "alleviate problem propose": 2417, "classification datasets demonstrate": 8453, "datasets demonstrate proposed": 13216, "state art shot": 52664, "make code data": 31550, "feature extraction methods": 20487, "corpus publicly available": 11415, "training data major": 58015, "synthetic training data": 54386, "shared task paper": 50503, "art baseline models": 4221, "explore different strategies": 19700, "challenging task especially": 8146, "training method called": 58174, "high low resource": 23752, "tasks paper study": 55790, "language modeling machine": 28210, "social media datasets": 51574, "cross lingual resources": 11849, "model outperforms baseline": 34157, "models existing methods": 34989, "results provide insights": 47789, "evaluation real world": 18693, "specific word representations": 52176, "quality labeled data": 44540, "sequence models attention": 49960, "work cross lingual": 62615, "german french spanish": 22670, "word sentence embeddings": 62302, "tasks including classification": 55678, "obtained https github": 38212, "paper describes submissions": 39331, "machine translation improve": 31361, "learning high quality": 29672, "achieving good performance": 1407, "domain specific applications": 16166, "domain specific tasks": 16187, "tasks neural machine": 55764, "high frequency words": 23740, "training data set": 58035, "method achieves comparable": 32363, "quality state art": 44583, "machine learning architecture": 31312, "high quality training": 23787, "amounts parallel data": 2556, "data target language": 12722, "experiments zero shot": 19567, "data zero shot": 12781, "zero shot language": 63165, "models built using": 34800, "range tasks including": 44938, "create large scale": 11706, "comparing state art": 9487, "address issues introduce": 1770, "competitive results compared": 9562, "code pre trained": 8844, "trained models available": 57795, "systems experimental results": 54498, "experimental results validate": 19320, "validate effectiveness proposed": 61177, "related social media": 45937, "modalities text images": 33473, "models trained different": 35606, "deep learning frameworks": 13710, "outperform strong baselines": 38826, "unlike previous works": 59604, "previous works propose": 42321, "end deep learning": 17626, "unified end end": 59470, "machine learning deep": 31319, "learning deep learning": 29583, "qualitative analysis shows": 44474, "language models perform": 28293, "pre trained state": 41558, "trained state art": 57883, "language model bert": 28157, "embeddings language models": 17158, "propose method automatically": 43452, "analysis state art": 2767, "natural language expression": 36423, "2018 shared task": 276, "number high quality": 38008, "fine tuned using": 20973, "yields state art": 63132, "crucial real world": 11909, "domain specific corpora": 16168, "state art domain": 52607, "achieved remarkable progress": 1263, "experiments multiple datasets": 19474, "models able predict": 34658, "transfer learning based": 58378, "model transfer learning": 34487, "f1 score 69": 20201, "f1 score 78": 20208, "source code publicly": 51751, "closed world assumption": 8701, "propose joint model": 43426, "unlike existing methods": 59596, "experimental results text": 19318, "tasks method outperforms": 55746, "language models results": 28316, "build machine learning": 7411, "art performance benchmarks": 4328, "proposed model evaluated": 43849, "language models using": 28342, "improve prediction accuracy": 24907, "state art overall": 52644, "challenging task involves": 8148, "demonstrate approach achieves": 13867, "approach achieves state": 3403, "art performance present": 4343, "lstm based language": 31245, "network based method": 36706, "data recent work": 12588, "inspired recent advances": 26413, "domain adaptation problem": 16002, "perform domain adaptation": 40092, "source target domain": 51800, "datasets analysis shows": 13150, "memory lstm architecture": 32259, "neural sequence labeling": 37097, "sequence labeling models": 49936, "attention based deep": 4718, "annotated data language": 2883, "time consuming labor": 57132, "consuming labor intensive": 10448, "single model trained": 51319, "nlp tasks require": 37548, "systems propose novel": 54605, "evaluation metrics datasets": 18649, "propose new paradigm": 43511, "provide complementary information": 44034, "results multiple benchmarks": 47734, "rnn based models": 48188, "area curve auc": 4139, "encoder decoder based": 17499, "progress recent years": 43114, "self attention networks": 49186, "uses self attention": 60532, "self attention layers": 49179, "open source python": 38457, "transformer models perform": 58501, "models low resource": 35203, "language models learning": 28272, "language processing algorithms": 28395, "manually annotated corpus": 31757, "methods perform better": 32979, "significant improvements performance": 50887, "english hindi code": 17821, "hindi code mixed": 23937, "datasets social media": 13436, "nlp tasks recently": 37547, "information multiple sources": 25981, "capture syntactic semantic": 7715, "study state art": 53463, "experimental results method": 19292, "results method significantly": 47718, "simple effective way": 51160, "proposed method results": 43827, "local contextual information": 30934, "benchmarks verify effectiveness": 6551, "fundamental task natural": 21793, "relations natural language": 46046, "training data typically": 58045, "tasks deep learning": 55572, "machine translation named": 31371, "zero shot capabilities": 63156, "semantic parsing task": 49316, "character level features": 8210, "novel method using": 37867, "proposed method does": 43816, "rnn based methods": 48186, "learning framework allows": 29652, "maintaining high accuracy": 31493, "multi head attention": 35964, "art performances datasets": 4355, "datasets proposed approach": 13382, "statistical language models": 52746, "question answering knowledge": 44700, "model trained evaluated": 34471, "conduct qualitative analysis": 10059, "typologically diverse languages": 59169, "cross lingual settings": 11852, "performance widely used": 40630, "framework propose novel": 21586, "compare model performance": 9348, "propose novel techniques": 43568, "task learning models": 55176, "novel multi modal": 37878, "random forest model": 44877, "resource indian languages": 47230, "automatic metrics bleu": 5110, "correlate poorly human": 11507, "machine translation data": 31353, "better performance standard": 6935, "conducted real world": 10092, "named entity linking": 36372, "entity linking relation": 18117, "relation extraction knowledge": 45975, "nlp tasks machine": 37540, "state art research": 52655, "challenges future research": 8052, "languages cross lingual": 28627, "shared task multilingual": 50501, "contextualized word embeddings": 10814, "explore different ways": 19701, "systems large margin": 54544, "expectation maximization em": 19195, "deep learning method": 13711, "method solve problem": 32663, "reading comprehension questions": 45085, "art language model": 4269, "complex deep learning": 9622, "model language modeling": 34038, "language modeling paper": 28214, "achieve superior results": 1211, "understanding tasks including": 59409, "ground truth labels": 23256, "combines state art": 9102, "results datasets different": 47571, "datasets different languages": 13230, "results end end": 47608, "human annotated datasets": 24099, "large text collections": 29024, "systems require large": 54620, "require large annotated": 46870, "large annotated data": 28846, "domain invariant features": 16091, "leads significant improvements": 29327, "resource languages using": 47247, "experiments low resource": 19459, "learning based approaches": 29531, "using machine translation": 60784, "understanding nlu models": 59374, "large scale social": 29001, "scale social media": 48625, "memory network lstm": 32273, "model based approaches": 33600, "recurrent encoder decoder": 45614, "encoder decoder network": 17503, "standard cross entropy": 52480, "class imbalance problem": 8403, "based seq2seq models": 6026, "train evaluate models": 57589, "mutual information input": 36346, "issues paper propose": 27097, "used pre trained": 60264, "set test set": 50262, "resource language like": 47234, "establishing new state": 18366, "open source dataset": 38449, "various linguistic features": 61357, "architecture achieves state": 4023, "art results benchmark": 4371, "better previous state": 6946, "recommendations future research": 45569, "future research field": 21890, "data sets proposed": 12651, "dataset freely available": 12937, "using lexical features": 60768, "rnn based language": 48185, "indian languages hindi": 25520, "fixed size vector": 21082, "tasks despite success": 55585, "learning long term": 29713, "data source domain": 12676, "labeled data target": 27748, "data target domain": 12721, "model trained source": 34477, "trained source domain": 57878, "source domain data": 51766, "fine tuned small": 20968, "knowledge source domain": 27613, "representation experimental results": 46513, "model achieves competitive": 33515, "competitive performance state": 9555, "surpasses state art": 54177, "word embeddings widely": 62196, "embeddings widely used": 17245, "bridge gap propose": 7320, "proposed model trained": 43860, "results cross lingual": 47565, "propose novel hierarchical": 43539, "hierarchical attention mechanism": 23657, "vinyals et al": 61613, "dataset demonstrate effectiveness": 12884, "demonstrate effectiveness methods": 13900, "sequence labeling model": 49935, "address issue propose": 1767, "issue propose novel": 27077, "kullback leibler divergence": 27680, "construct new dataset": 10396, "generation propose novel": 22532, "generate human like": 22212, "significantly outperform baselines": 50990, "extensive empirical results": 19865, "manually annotated dataset": 31759, "correlation human judgements": 11522, "semi markov conditional": 49452, "markov conditional random": 31844, "models using large": 35658, "experiment results method": 19250, "challenging task nlp": 8150, "recent neural network": 45327, "represent state art": 46482, "issue propose new": 27076, "propose new training": 43515, "relation extraction model": 45977, "paper introduce task": 39405, "provide comprehensive analysis": 44037, "nlp tasks text": 37550, "answering named entity": 3084, "model computationally efficient": 33690, "propose framework named": 43393, "significant improvement compared": 50876, "conduct experiments real": 10047, "poses significant challenge": 41253, "github com google": 22701, "lexical syntactic information": 30391, "task oriented dialog": 55254, "additional annotated data": 1652, "expensive human annotation": 19210, "achieve promising results": 1182, "language understanding recent": 28559, "paper design novel": 39336, "labeled data propose": 27744, "training data second": 58034, "learning adversarial training": 29507, "hierarchical reinforcement learning": 23687, "results paper present": 47758, "address issues propose": 1773, "issues propose novel": 27101, "demonstrate model significantly": 13943, "model loss function": 34075, "conversational question answering": 11049, "obtains f1 score": 38249, "f1 score 65": 20200, "ample room improvement": 2567, "work introduce novel": 62693, "introduce novel task": 26851, "shared task data": 50497, "improves prediction accuracy": 25150, "superiority proposed method": 53955, "achieve higher accuracy": 1155, "outperform previous methods": 38811, "models based sequence": 34759, "demonstrate model outperforms": 13942, "methods data augmentation": 32810, "data augmentation text": 12164, "simple data augmentation": 51146, "data augmentation strategy": 12161, "machine translation propose": 31378, "problem propose novel": 42633, "proposed method consists": 43815, "graph based neural": 23109, "models existing approaches": 34988, "propose novel approaches": 43522, "data collection annotation": 12219, "tasks state art": 55910, "compare different models": 9338, "sequence generation task": 49928, "proximal policy optimization": 44262, "generation tasks including": 22564, "models using different": 35657, "various text classification": 61406, "self training method": 49225, "previous work focuses": 42304, "detection social media": 14529, "learning paper propose": 29794, "learning deep neural": 29584, "perform zero shot": 40163, "research recent years": 47112, "years deep learning": 63055, "better cross lingual": 6872, "address shortcoming propose": 1800, "substantially outperforms previous": 53647, "translation cross lingual": 58593, "trained cross lingual": 57702, "contextual word representations": 10790, "pre trained bidirectional": 41522, "synthetic data generated": 54371, "work explore different": 62658, "improve performance neural": 24896, "competitive baseline models": 9541, "previous best result": 42251, "propose new dataset": 43500, "dependency parse trees": 14127, "resource rich languages": 47270, "demonstrate effectiveness approaches": 13895, "text generation including": 56598, "improvements strong baselines": 25104, "introduce multi task": 26826, "multi task setup": 36029, "multi task model": 36022, "task model outperforms": 55218, "received considerable attention": 45257, "evaluate proposed framework": 18492, "downstream tasks including": 16357, "results downstream tasks": 47600, "downstream tasks existing": 16356, "gives better results": 22806, "sentiment analysis using": 49832, "knowledge graph kg": 27503, "learning reinforcement learning": 29837, "prior work shown": 42425, "models attention mechanism": 34736, "attention mechanism learn": 4777, "tasks image captioning": 55669, "achieves strong performance": 1381, "recent transformer based": 45362, "art results wide": 4392, "results wide range": 47908, "experimental results different": 19284, "collect new dataset": 8949, "approach improves performance": 3565, "word embeddings target": 62190, "propose structure aware": 43650, "based transformer model": 6109, "rnn language models": 48196, "language models achieved": 28227, "demonstrates state art": 14043, "models able learn": 34656, "quality estimation qe": 44517, "global contextual information": 22826, "translation models trained": 58634, "subject predicate object": 53556, "training data experimental": 57993, "significantly improve results": 50968, "achieves superior performance": 1385, "superior performance compared": 53936, "different benchmark datasets": 14851, "model significantly outperform": 34382, "outperform baseline models": 38782, "problem work propose": 42690, "work propose task": 62787, "low resource conditions": 31173, "datasets paper propose": 13365, "small training set": 51510, "cs cmu edu": 11921, "learned source domain": 29483, "leverage unlabeled data": 30296, "unlabeled data target": 59566, "substantially improves performance": 53640, "improves performance compared": 25142, "common sense reasoning": 9199, "model improves state": 33985, "embeddings sentence embeddings": 17212, "approach end end": 3512, "nlp tasks demonstrate": 37533, "propose new type": 43516, "challenge propose simple": 8011, "using automatic metrics": 60575, "metrics human judgments": 33172, "weakly supervised approach": 61860, "high correlation human": 23720, "specific training data": 52165, "training data use": 58046, "task paper present": 55265, "shared task task": 50507, "task pre trained": 55280, "achieve macro f1": 1170, "real world question": 45134, "based real world": 5977, "propose reinforcement learning": 43602, "recent work demonstrated": 45367, "performance large margin": 40412, "results demonstrate potential": 47581, "use sequence sequence": 60012, "word embeddings pre": 62182, "shared task dataset": 50498, "models sequence sequence": 35485, "model multi hop": 34109, "multiple pieces evidence": 36262, "graph neural networks": 23154, "convolutional network gcn": 11108, "experiments standard datasets": 19531, "model convolutional neural": 33721, "dataset low resource": 12986, "performance sentence level": 40551, "data pre trained": 12553, "word embeddings language": 62174, "experiments demonstrate model": 19405, "smaller model size": 51522, "training inference time": 58130, "improve quality generated": 24912, "models attention mechanisms": 34737, "approach improve performance": 3562, "character level information": 8211, "multi hop reasoning": 35972, "vectors word embeddings": 61502, "basic building block": 6328, "results suggest approach": 47866, "shared task automatic": 50494, "f1 score 58": 20197, "results english chinese": 47610, "competitively state art": 9572, "models usually trained": 35661, "sequence level training": 49951, "reinforcement learning experiments": 45870, "f1 score achieved": 20219, "lexico syntactic patterns": 30405, "propose novel methods": 43547, "simple effective method": 51155, "external commonsense knowledge": 19929, "existing neural network": 19118, "social media post": 51583, "qualitative analysis model": 44472, "human written references": 24261, "novel model called": 37874, "help model learn": 23579, "model outperforms competitive": 34160, "propose cross lingual": 43343, "techniques deep learning": 56074, "additional contextual information": 1661, "embeddings word embeddings": 17247, "make source code": 31599, "source code models": 51749, "code models available": 8835, "sequence sequence framework": 49986, "human evaluation results": 24154, "evaluation results indicate": 18702, "proposed approach achieves": 43723, "92 f1 score": 560, "fields computer vision": 20777, "neural mt nmt": 36987, "performing zero shot": 40695, "lingual transfer learning": 30735, "zero shot classification": 63157, "neural models generate": 36976, "better performance previous": 6934, "level language models": 30147, "tf idf features": 56992, "develop deep learning": 14582, "based pre trained": 5942, "performs substantially better": 40719, "state art span": 52668, "model performance significantly": 34195, "based supervised learning": 6069, "obtain high quality": 38176, "depth error analysis": 14186, "propose multi task": 43479, "applied wide range": 3312, "representations pre trained": 46738, "language models elmo": 28248, "release code https": 46146, "results model achieves": 47722, "public benchmark datasets": 44307, "using proposed approach": 60880, "training low resource": 58161, "domain semantic parsing": 16153, "number training examples": 38050, "lstm neural network": 31278, "time experimental results": 57155, "art performance terms": 4348, "recall f1 score": 45241, "art performance model": 4336, "training data better": 57980, "amounts unlabeled text": 2564, "task specific labeled": 55398, "specific labeled data": 52097, "models trained single": 35617, "knowledge bases generate": 27413, "social media platform": 51581, "evaluate performance model": 18482, "new dataset consisting": 37165, "demonstrate effectiveness framework": 13897, "relation extraction models": 45978, "performance large scale": 40413, "language models nlms": 28290, "significantly fewer parameters": 50959, "models trained dataset": 35604, "language generation task": 28089, "recognition ner task": 45520, "given input sentence": 22749, "shows superior performance": 50809, "large amounts labelled": 28838, "amounts labelled data": 2554, "models open domain": 35278, "dialogue response generation": 14783, "demonstrate strong performance": 13980, "annotated data available": 2882, "learning extensive experiments": 29641, "extensive experiments analyses": 19878, "new self supervised": 37309, "self supervised learning": 49210, "propose novel text": 43569, "conditional generative adversarial": 9994, "adversarial network gan": 1979, "limited labelled data": 30595, "winograd schema challenge": 62074, "paper present deep": 39448, "domain adaptation tasks": 16004, "english chinese corpora": 17783, "learn cross lingual": 29355, "settings cross lingual": 50363, "terms automatic evaluation": 56266, "context aware models": 10590, "large scale multimodal": 28990, "improve overall performance": 24883, "source language english": 51778, "target language work": 54830, "dataset model outperforms": 12998, "10 percentage points": 49, "naive bayes model": 36365, "document level context": 15804, "features extensive experiments": 20579, "model proposed model": 34251, "language representation model": 28469, "bidirectional encoder representations": 7069, "encoder representations transformers": 17540, "language representation models": 28470, "representation models bert": 46557, "left right context": 29999, "pre trained bert": 41521, "trained bert model": 57682, "bert model fine": 6684, "model fine tuned": 33894, "models wide range": 35680, "conceptually simple empirically": 9956, "obtains new state": 38253, "generation paper propose": 22514, "using neural sequence": 60836, "perform ablation study": 40065, "automated metrics human": 5055, "human evaluation generated": 24148, "state art fully": 52617, "using semi supervised": 60922, "low resource indian": 31180, "language low resource": 28143, "challenging task natural": 8149, "different state art": 15079, "fashion experimental results": 20413, "paper explore use": 39366, "experiments method significantly": 19465, "paper proposes neural": 39549, "variational autoencoder vae": 61245, "gaussian mixture model": 22014, "incorporate contextual information": 25349, "sequence sequence approaches": 49980, "previous work propose": 42309, "empirical results model": 17344, "results model able": 47721, "model achieved accuracy": 33508, "level attention mechanism": 30067, "task low resource": 55195, "rich resource language": 48117, "resource language english": 47233, "demonstrate superiority method": 13986, "method state art": 32668, "existing word embeddings": 19171, "advanced deep learning": 1887, "methods pre trained": 32986, "accuracy paper introduces": 1021, "cross domain cross": 11814, "domain cross lingual": 16037, "experiments available https": 19360, "different linguistic features": 14981, "data augmentation techniques": 12163, "performance different tasks": 40295, "neural networks state": 37073, "networks state art": 36913, "shown state art": 50753, "task neural network": 55238, "language inference dataset": 28109, "models different datasets": 34915, "using large pre": 60758, "large pre trained": 28940, "language modeling lm": 28209, "publicly available large": 44345, "training time compared": 58300, "experimental results widely": 19323, "data demonstrate effectiveness": 12272, "improvements nlp tasks": 25087, "building large scale": 7452, "model trained dataset": 34469, "experimental results various": 19321, "able outperform state": 710, "achieved good results": 1236, "labeled data supervised": 27747, "data supervised learning": 12711, "tf idf based": 56991, "state art seq2seq": 52662, "multi label text": 35978, "attention mechanism allows": 4771, "mechanism allows model": 32098, "substantially outperforms strong": 53649, "strong baselines terms": 53018, "language models work": 28349, "fine tune language": 20949, "language models improve": 28262, "language models experiments": 28253, "source code data": 51745, "github com thunlp": 22713, "word sentence representations": 62305, "including named entity": 25279, "recognition speech tagging": 45539, "external knowledge resources": 19944, "resources paper propose": 47324, "achieved remarkable results": 1264, "state art architectures": 52582, "approach achieves new": 3401, "unsupervised machine translation": 59708, "term memory bilstm": 56246, "unsupervised pre training": 59722, "pre training large": 41581, "approach benchmark datasets": 3430, "outperform previous best": 38810, "terms f1 score": 56290, "sequence tagging model": 50003, "comparable results state": 9309, "propose data driven": 43348, "multi headed attention": 35968, "task demonstrate proposed": 55003, "approach outperforms strong": 3631, "model does use": 33787, "available state art": 5371, "quality generated texts": 44526, "alternative approach based": 2498, "conduct experiments different": 10045, "model new domain": 34127, "number parameters model": 38025, "achieves significantly better": 1371, "performance fine tuning": 40351, "elastic weight consolidation": 16959, "experiments proposed framework": 19495, "low resource translation": 31196, "information source sentence": 26095, "hybrid deep learning": 24315, "proposed approach shows": 43732, "different datasets paper": 14891, "used transfer learning": 60340, "different languages different": 14968, "cross lingual transferability": 11857, "languages high resource": 28685, "multiple natural language": 36254, "effective method improve": 16672, "state art approach": 52580, "long tail distribution": 31036, "extensive experiments datasets": 19884, "models achieve better": 34667, "proposed model able": 43843, "bert devlin et": 6641, "peters et al": 40801, "radford et al": 44848, "training data training": 58044, "map natural language": 31796, "using beam search": 60586, "challenge work propose": 8024, "improvements natural language": 25085, "previous approaches model": 42240, "approach outperforms prior": 3628, "train end end": 57586, "end end automatic": 17641, "end automatic speech": 17618, "cross domain settings": 11819, "task text classification": 55435, "state art datasets": 52604, "methods low resource": 32932, "truly low resource": 58825, "learning based nlp": 29540, "based nlp tasks": 5914, "text understanding tasks": 56830, "space experimental results": 51861, "experimental results methods": 19293, "word sentence level": 62303, "task zero shot": 55476, "poses great challenge": 41248, "model zero shot": 34553, "validate effectiveness method": 61175, "make good use": 31575, "paper presents neural": 39477, "novel approach multi": 37764, "achieve significant performance": 1194, "significant performance improvement": 50910, "non autoregressive models": 37637, "models attention based": 34735, "test set best": 56372, "machine translation abstractive": 31346, "modeling experimental results": 34574, "non expert users": 37654, "using multi layer": 60817, "human robot interaction": 24238, "based end end": 5702, "art performance english": 4335, "models recently achieved": 35411, "pre trained weights": 41564, "pre training data": 41574, "results f1 score": 47631, "text summarization question": 56798, "techniques machine learning": 56108, "problem short text": 42653, "domain paper explore": 16129, "approach achieves competitive": 3398, "outperforming baseline models": 38846, "graph based models": 23108, "faster inference time": 20439, "outperforms previous unsupervised": 38926, "based models paper": 5877, "english low resource": 17839, "end end systems": 17663, "classification sequence labeling": 8546, "suffer error propagation": 53764, "propose new framework": 43503, "new framework named": 37211, "demonstrate effectiveness efficiency": 13896, "neural networks hierarchical": 37051, "outperform existing approaches": 38794, "language modeling using": 28222, "open domain multi": 38423, "dataset demonstrate model": 12885, "evaluation natural language": 18661, "likelihood estimation mle": 30519, "neural networks gnns": 37050, "attention paper propose": 4805, "task existing methods": 55063, "existing methods rely": 19099, "conduct extensive analysis": 10050, "processing deep learning": 42867, "approaches cross lingual": 3790, "propose novel problem": 43556, "large scale labeled": 28982, "future research task": 21893, "task sequence tagging": 55364, "resource language low": 47235, "datasets results demonstrate": 13410, "learning based natural": 29539, "language processing model": 28416, "need large scale": 36577, "real world tasks": 45141, "introduce large scale": 26818, "text classification sequence": 56485, "approach multi task": 3606, "outperform competitive baselines": 38787, "present rule based": 42002, "settings paper propose": 50388, "cross lingual entity": 11835, "low resource target": 31193, "large scale generative": 28978, "supervised fine tuning": 53986, "compare performance model": 9355, "competitive baselines large": 9543, "approaches benchmark datasets": 3775, "training data learn": 58009, "language understanding recently": 28560, "end end methods": 17652, "weakly supervised model": 61867, "publicly available annotated": 44335, "evaluate performance models": 18483, "results model trained": 47726, "models large margin": 35165, "question natural language": 44740, "require large labeled": 46871, "labeled data train": 27749, "costly time consuming": 11605, "time consuming paper": 57135, "performance fully supervised": 40354, "available labeled data": 5316, "demonstrate large scale": 13930, "large scale unsupervised": 29007, "attention based transformer": 4726, "fine tuning training": 21030, "high quality results": 23784, "results real world": 47799, "processing nlp field": 42904, "real world problems": 45133, "proposed method conduct": 43813, "interpretable model agnostic": 26726, "model agnostic explanations": 33552, "achieved remarkable success": 1265, "dataset results method": 13070, "used data augmentation": 60136, "approach outperforms baselines": 3624, "understanding paper propose": 59379, "attention self attention": 4828, "extract relevant information": 19989, "model empirical results": 33810, "outperforming previous best": 38856, "models based deep": 34755, "using encoder decoder": 60674, "conditional text generation": 10007, "zhang et al": 63189, "large memory footprint": 28909, "art models terms": 4299, "existing training data": 19164, "natural language description": 36418, "approach achieves better": 3396, "acyclic graph dag": 1493, "benchmark datasets furthermore": 6454, "approach achieves high": 3400, "achieves high accuracy": 1332, "compared previous works": 9439, "present state art": 42023, "applied low resource": 3281, "data training data": 12742, "makes predictions based": 31633, "cross lingual monolingual": 11844, "model based convolutional": 33602, "task previous works": 55292, "provide large scale": 44097, "active research area": 1479, "method widely used": 32707, "commonly used text": 9228, "multi task settings": 36028, "multilingual pre training": 36111, "pre training fine": 41577, "training fine tuning": 58107, "domain specific datasets": 16172, "method natural language": 32586, "work propose deep": 62773, "end neural models": 17690, "document work propose": 15847, "new question answering": 37296, "test set outperforming": 56374, "paper explore task": 39365, "user generated reviews": 60418, "analysis paper propose": 2712, "art machine learning": 4279, "document level representations": 15808, "information retrieval task": 26068, "results transfer learning": 47890, "domain specific dataset": 16171, "previously proposed approaches": 42341, "downstream tasks like": 16359, "tasks like speech": 55728, "unlike previous models": 59601, "based bi lstm": 5606, "word embeddings glove": 62170, "pretrained language modeling": 42159, "data target task": 12723, "based semantic parsing": 6015, "experimental results outperforms": 19300, "models trained large": 35614, "decision making tasks": 13566, "new task named": 37335, "pointer generator network": 41057, "automatic evaluation human": 5082, "majority class baseline": 31529, "understanding question answering": 59388, "text classification models": 56477, "machine learning natural": 31327, "human like language": 24200, "wide variety nlp": 61985, "features best performing": 20532, "standard sequence sequence": 52528, "conditional variational autoencoder": 10010, "variational autoencoder based": 61244, "using automatic human": 60574, "proposed framework able": 43779, "quantitative qualitative analysis": 44625, "semi supervised text": 49467, "self training framework": 49224, "large scale unlabeled": 29006, "existing question answering": 19135, "key value memory": 27341, "conducted large scale": 10088, "terms automatic metrics": 56268, "metrics human evaluations": 33171, "reasoning natural language": 45209, "achieved new state": 1254, "consistently improves performance": 10297, "classification model based": 8495, "automated machine learning": 5049, "transfer learning paradigm": 58392, "effectiveness transfer learning": 16820, "transfer learning natural": 58389, "present open source": 41983, "model based bert": 33601, "model available https": 33594, "com google research": 9013, "tasks real world": 55835, "automatic evaluation metric": 5086, "large scale machine": 28984, "self attention models": 49183, "supervised classification task": 53968, "information language models": 25941, "language models existing": 28251, "language model named": 28179, "benchmark demonstrate effectiveness": 6462, "beam search decoding": 6368, "high computational complexity": 23714, "data code available": 12211, "multi task deep": 36018, "task deep neural": 54996, "transformer language model": 58491, "challenging problem requires": 8129, "structures paper propose": 53192, "application deep learning": 3163, "quality experimental results": 44521, "paper propose generate": 39513, "human annotated dataset": 24098, "datasets model achieves": 13335, "bert pre training": 6705, "pre training domain": 41575, "training domain specific": 58073, "domain specific fine": 16176, "specific fine tuning": 52086, "approach highly effective": 3555, "performance commonly used": 40245, "pre trained self": 41554, "sequence generation model": 49926, "competitive better performance": 9546, "demonstrate current state": 13887, "methods perform poorly": 32980, "super characters method": 53922, "paper propose semantic": 39532, "used wide range": 60351, "neural networks language": 37053, "improvements low resource": 25079, "latent embedding space": 29125, "self attention model": 49182, "transfer learning large": 58383, "language models used": 28341, "training testing data": 58296, "extensive empirical study": 19867, "neural network structure": 37026, "network model trained": 36766, "public datasets different": 44316, "demonstrate superior performance": 13983, "superior performance model": 53937, "training data recent": 58030, "achieves significant consistent": 1365, "significant consistent improvements": 50859, "model able predict": 33494, "word overlap metrics": 62258, "evaluation paper propose": 18667, "approach code available": 3447, "training data human": 58001, "new training data": 37350, "training data study": 58040, "data study propose": 12703, "supervised learning approach": 53996, "manually annotated training": 31762, "multinomial naive bayes": 36162, "random forest rf": 44878, "sequence sequence generation": 49987, "apply proposed model": 3349, "employs attention mechanism": 17405, "extraction event extraction": 20063, "pre training framework": 41578, "conducted extensive experiments": 10085, "task specific data": 55390, "data large scale": 12455, "domain specific models": 16181, "near perfect accuracy": 36510, "embeddings experimental results": 17132, "approach zero shot": 3747, "consistently outperforms previous": 10305, "pre trained neural": 41549, "language model text": 28198, "languages propose method": 28758, "approaches end end": 3808, "achieve sota results": 1202, "used state art": 60313, "used prior work": 60275, "large scale high": 28979, "scale high quality": 48577, "neural networks widely": 37080, "propose new metrics": 43507, "extensive experiments various": 19904, "datasets proposed model": 13384, "shot text classification": 50652, "transfer learning experiments": 58379, "language models pretrained": 28303, "models pretrained large": 35352, "language model objective": 28180, "learning models proposed": 29759, "graph attention network": 23097, "results datasets demonstrate": 47570, "static word embeddings": 52728, "training data domain": 57988, "scaled dot product": 48643, "dot product attention": 16319, "recent natural language": 45323, "various downstream tasks": 61334, "techniques natural language": 56115, "processing machine learning": 42886, "existing methods focus": 19095, "datasets english chinese": 13249, "effectively improve performance": 16740, "embedding based methods": 17017, "based methods knowledge": 5848, "based methods use": 5852, "based model trained": 5862, "step natural language": 52817, "language processing problems": 28424, "2019 shared task": 285, "training state art": 58271, "sentence level cross": 49583, "information pre trained": 26014, "perform fine grained": 40110, "inter sentence dependencies": 26587, "approach pre trained": 3645, "tasks source code": 55900, "data driven method": 12303, "converting natural language": 11079, "transfer learning framework": 58381, "domain pre training": 16135, "domain fine tuning": 16072, "fine tuning target": 21025, "able improve performance": 701, "propose novel learning": 43544, "small training datasets": 51509, "sequence sequence based": 49984, "training data target": 58041, "topological data analysis": 57468, "extensive experiments using": 19903, "systems perform poorly": 54587, "new evaluation framework": 37192, "cold start problem": 8929, "macro averaged f1": 31405, "task previous work": 55291, "directly fine tuning": 15317, "fine tuning pretrained": 21012, "diverse nlp tasks": 15710, "fine grained evaluation": 20935, "released https github": 46176, "paper presents unsupervised": 39485, "data driven manner": 12302, "comparison different approaches": 9494, "provides better performance": 44185, "domain specific task": 16186, "require large amounts": 46869, "context document level": 10616, "encoder decoder transformer": 17508, "decoder transformer model": 13618, "pre processing techniques": 41512, "language models successful": 28326, "set nlp tasks": 50202, "task specific models": 55402, "language model pretraining": 28188, "performance language model": 40407, "results main findings": 47711, "sub tasks sub": 53535, "tasks sub task": 55916, "widely used benchmarks": 62012, "imdb movie reviews": 24571, "downstream task performance": 16351, "existing studies focus": 19151, "downstream tasks especially": 16355, "systems achieve high": 54421, "text classification natural": 56478, "types word embeddings": 59127, "propose method generating": 43455, "large scale empirical": 28973, "including state art": 25304, "model achieves good": 33517, "large scale annotated": 28960, "pretrained language model": 42158, "lack high quality": 27892, "high quality large": 23781, "quality large scale": 44544, "code pretrained models": 8846, "pretrained models available": 42170, "models available https": 34748, "github com allenai": 22695, "pre trained monolingual": 41546, "model achieves results": 33523, "text generation problem": 56602, "state art points": 52648, "based observation propose": 5922, "training data low": 58013, "data low resource": 12471, "bert state art": 6722, "tasks multiple datasets": 55756, "achieve comparable results": 1124, "achieve strong results": 1207, "model outperform state": 34153, "existing methods mainly": 19098, "using self attention": 60917, "tasks including text": 55683, "including text classification": 25310, "zero shot text": 63182, "effectively experimental results": 16733, "zero shot scenario": 63174, "specific language models": 52101, "described natural language": 14213, "establishes new state": 18361, "knowledge distillation method": 27442, "text generation tasks": 56605, "https www youtube": 24063, "www youtube com": 63026, "semantic similarity tasks": 49349, "attention mechanism experiments": 4776, "present new corpus": 41960, "heuristic rule based": 23630, "important challenging problem": 24707, "previous studies mainly": 42289, "process paper propose": 42815, "propose multi modal": 43477, "model real world": 34276, "significant improvement previous": 50879, "different datasets different": 14890, "multilingual multi task": 36101, "languages fine tuning": 28676, "fine tuning datasets": 20983, "best knowledge existing": 6771, "fine tuning bert": 20981, "analysis experimental results": 2663, "datasets code available": 13174, "available https www": 5310, "model test time": 34453, "work publicly available": 62798, "models trained existing": 35610, "methods significantly outperform": 33041, "downstream text classification": 16371, "traditional word embeddings": 57557, "bi directional language": 7000, "directional language model": 15280, "language model elmo": 28161, "et al proposed": 18412, "text knowledge graph": 56637, "sequence sequence problem": 49994, "different types information": 15110, "large room improvement": 28957, "promising directions future": 43165, "improve model robustness": 24874, "received relatively little": 45265, "data collection pipeline": 12220, "analysis widely used": 2793, "learning ml models": 29741, "models results indicate": 35457, "framework significantly outperforms": 21601, "f1 score improvement": 20222, "multiple pre trained": 36266, "pre trained deep": 41526, "domain adversarial training": 16015, "neural networks paper": 37062, "networks paper propose": 36889, "language models nlm": 28289, "methods require large": 33018, "semi supervised model": 49464, "experiments method achieves": 19463, "standard nlp tasks": 52515, "present unified framework": 42048, "novel loss function": 37858, "methods achieve new": 32729, "state art adversarial": 52577, "modern natural language": 35714, "performances state art": 40648, "sequential decision making": 50038, "new research directions": 37303, "results suggest method": 47867, "et al 2019": 18406, "et al 2019b": 18408, "word sentence levels": 62304, "improved performance downstream": 24958, "tasks code available": 55541, "minimum description length": 33306, "english spanish french": 17880, "classification task experiments": 8564, "language cross lingual": 28013, "cross lingual setting": 11851, "neural networks multi": 37059, "networks multi task": 36878, "strong baselines future": 53013, "use long short": 59938, "penn discourse treebank": 40023, "labels distant supervision": 27816, "data proposed method": 12573, "performance extensive experiments": 40340, "extensive experiments standard": 19901, "knowledge high resource": 27517, "propose method learn": 43456, "standard benchmark dataset": 52471, "data augmentation technique": 12162, "using distant supervision": 60660, "positive negative examples": 41285, "deep language models": 13696, "language models struggle": 28324, "underlying language model": 59269, "ability language models": 616, "task specific fine": 55395, "fine tuning using": 21032, "availability training data": 5257, "multilabel text classification": 36056, "model based approach": 33599, "state art alternatives": 52579, "posts social media": 41373, "achieved competitive performance": 1228, "learn domain invariant": 29363, "introduce novel model": 26848, "information propose new": 26030, "propose new automatic": 43498, "diverse natural language": 15708, "investigate data augmentation": 26948, "extensive experiments method": 19892, "method achieves consistent": 32364, "improvements existing approaches": 25073, "present novel language": 41973, "chinese natural language": 8313, "masked language modeling": 31864, "tasks require reasoning": 55855, "require reasoning multiple": 46886, "models code available": 34821, "leading poor performance": 29296, "propose weakly supervised": 43705, "adversarial training method": 1992, "different types models": 15113, "code url https": 8866, "github com microsoft": 22706, "conduct detailed analysis": 10037, "fine tuning models": 21001, "models like bert": 35187, "empirical results benchmark": 17340, "based pretrained language": 5949, "pretrained language models": 42160, "compared human performance": 9414, "art performance multiple": 4337, "purely data driven": 44396, "achieves high performance": 1333, "paper address task": 39254, "world low resource": 62948, "task models trained": 55221, "propose multitask learning": 43485, "art nlp models": 4317, "high quality text": 23786, "graph based representation": 23111, "achieve bleu score": 1120, "multi hop question": 35970, "hop question answering": 24002, "task binary classification": 54941, "loss function training": 31095, "human performance tasks": 24215, "using supervised learning": 60970, "classifier trained using": 8606, "bert fine tuning": 6662, "english spanish english": 17879, "create training data": 11719, "shared task evaluation": 50499, "variational auto encoder": 61242, "auto encoder based": 5015, "state art relation": 52654, "representations transformers bert": 46777, "transformers bert model": 58524, "different bert models": 14855, "world use case": 62966, "based fine tuning": 5736, "fine tuning approaches": 20979, "machine learning components": 31317, "large scale semantic": 28999, "models based encoder": 34756, "proposed method benchmark": 43810, "method benchmark datasets": 32403, "proposed approach able": 43721, "zero shot transfer": 63183, "shot transfer learning": 50655, "understanding generation tasks": 59348, "pre trained using": 41563, "language modeling tasks": 28220, "transformer language models": 58492, "language models require": 28315, "specifically propose new": 52223, "significant improvements strong": 50890, "art performance chinese": 4329, "proposed method applied": 43808, "classification textual entailment": 8575, "art baselines paper": 4223, "graph neural network": 23153, "existing methods limited": 19097, "attracted wide attention": 4890, "diverse real world": 15714, "teacher student framework": 55996, "language processing researchers": 28430, "fine grained knowledge": 20937, "provided natural language": 44168, "models able capture": 34654, "curriculum learning approach": 12044, "propose sequence sequence": 43625, "significantly outperforms current": 51001, "pre trained text": 41559, "understanding nlu natural": 59375, "nlu natural language": 37566, "improve generalization ability": 24860, "benchmarks natural language": 6535, "language understanding commonsense": 28547, "performance language models": 40408, "bert language model": 6668, "level contextual representations": 30084, "models trained language": 35613, "trained language modeling": 57760, "art results standard": 4385, "best knowledge paper": 6772, "release pre trained": 46164, "trained language representation": 57762, "network cnn based": 36720, "based model called": 5858, "using neural language": 60833, "bert pre trained": 6704, "performance various nlp": 40624, "existing pre trained": 19128, "external knowledge paper": 19943, "source code paper": 51750, "code paper obtained": 8842, "paper obtained https": 39431, "masked language models": 31866, "language models specific": 28321, "language pre trained": 28383, "art performance wide": 4353, "comprehension natural language": 9772, "detection sentiment analysis": 14523, "reduces training time": 45700, "time pre trained": 57196, "improve prediction performance": 24908, "performance introduce novel": 40399, "datasets end end": 13247, "address data sparsity": 1754, "sentence level semantic": 49591, "modern nlp systems": 35718, "require high quality": 46861, "high quality annotated": 23769, "trained domain data": 57713, "supervised learning task": 54007, "human written ones": 24260, "low resource neural": 31185, "resource neural machine": 47256, "provide valuable insights": 44153, "conduct user study": 10069, "using semantic similarity": 60920, "introduce novel framework": 26845, "neural network propose": 37022, "target domain data": 54813, "multi head self": 35966, "head self attention": 23499, "received significant attention": 45267, "task specific model": 55401, "pre train language": 41517, "train language model": 57598, "downstream tasks work": 16369, "better task specific": 6976, "language modeling pre": 28216, "large batch sizes": 28850, "model agnostic meta": 33554, "agnostic meta learning": 2092, "meta learning maml": 32343, "machine translation datasets": 31354, "datasets demonstrate superiority": 13218, "available http github": 5305, "compare proposed method": 9361, "unlike prior work": 59608, "lstm based approach": 31244, "new research direction": 37302, "effective cross lingual": 16640, "text style transfer": 56794, "modern machine learning": 35711, "challenging work propose": 8164, "solve problems propose": 51689, "datasets paper present": 13363, "best published results": 6811, "using training data": 60996, "speech tagging pos": 52303, "controllable text generation": 10980, "neural network sentence": 37025, "pre training language": 41580, "applying transfer learning": 3381, "learning large scale": 29700, "datasets empirical results": 13243, "superiority proposed approach": 53954, "datasets paper describes": 13361, "english german language": 17815, "powerful pre trained": 41442, "quantitative qualitative results": 44627, "method substantially outperforms": 32673, "substantially outperforms existing": 53646, "model able learn": 33492, "research cross lingual": 47008, "inference experimental results": 25655, "time series data": 57211, "experimental results multiple": 19297, "benchmark datasets method": 6456, "consistent improvements compared": 10279, "compared baseline methods": 9384, "joint learning framework": 27176, "language modeling performance": 28215, "used computer vision": 60123, "training zero shot": 58321, "decoder pre training": 13611, "results proposed framework": 47783, "release large scale": 46156, "extensive experiment results": 19871, "compared strong baseline": 9461, "strong baseline models": 53006, "models shown remarkable": 35501, "human machine interaction": 24206, "compared models trained": 9422, "applications real world": 3243, "rule based model": 48384, "quantitative qualitative experiments": 44626, "typically require large": 59155, "domain specific data": 16170, "approach state art": 3704, "language model pre": 28185, "zero shot cross": 63158, "shot cross lingual": 50609, "used fine tune": 60192, "fine tune model": 20950, "detailed analysis reveals": 14414, "commonly used neural": 9227, "transformer based seq2seq": 58473, "learning zero shot": 29949, "provide better understanding": 44021, "language models typically": 28338, "language models different": 28245, "current best performing": 11964, "existing benchmark datasets": 19041, "shows significant performance": 50804, "employ state art": 17392, "shows promising performance": 50796, "tasks single model": 55894, "accordingly propose novel": 875, "kullback leibler kl": 27681, "leibler kl divergence": 30015, "models evaluation metrics": 34980, "improves previous state": 25152, "hierarchical neural network": 23684, "propose coarse fine": 43321, "present novel end": 41971, "sentence level word": 49597, "improvement current state": 25001, "deep learning paper": 13717, "based model pre": 5861, "domain unlabeled data": 16222, "baselines low resource": 6278, "state art terms": 52683, "tasks demonstrate approach": 55576, "recently state art": 45468, "previous work proposed": 42310, "methods cross lingual": 32806, "tasks speech pos": 55905, "state art discriminative": 52606, "adapt pre trained": 1508, "propose effective approach": 43361, "label text classification": 27732, "classification question answering": 8527, "results low resource": 47705, "large collections text": 28859, "using adversarial learning": 60555, "datasets state art": 13443, "significant performance boost": 50903, "corpus news articles": 11390, "build general purpose": 7401, "training data models": 58018, "domains paper propose": 16282, "low resource cross": 31174, "resource cross lingual": 47218, "information retrieval tasks": 26069, "content paper propose": 10545, "paper propose study": 39536, "correlations human judgments": 11537, "provide thorough analysis": 44145, "built state art": 7491, "art nlp techniques": 4319, "task requires model": 55340, "downstream classification tasks": 16336, "form knowledge graph": 21324, "existing approaches typically": 19032, "space pre trained": 51884, "learn better representations": 29349, "framework extensive experiments": 21519, "feature based neural": 20479, "pairs english german": 39184, "open source publicly": 38456, "model cross lingual": 33732, "paper provides comprehensive": 39559, "provides comprehensive overview": 44189, "achieve impressive performance": 1162, "address problems propose": 1793, "curriculum learning cl": 12045, "trained neural networks": 57829, "domain transfer learning": 16218, "transfer learning fine": 58380, "learning fine tuning": 29649, "teacher student models": 55997, "training data automatically": 57977, "make better use": 31545, "training data applying": 57974, "ablation studies demonstrate": 658, "shown promising performance": 50742, "neural architecture search": 36932, "task learning based": 55171, "bert based baseline": 6618, "experiments https github": 19442, "pipeline based approach": 40894, "universal sentence encoder": 59547, "task specific information": 55396, "model proposed paper": 34252, "learning method based": 29727, "supervised learning model": 54002, "features experimental results": 20576, "suitable real time": 53859, "cross lingual models": 11843, "problem paper proposes": 42622, "challenging task work": 8155, "task work propose": 55474, "propose data augmentation": 43347, "bert base model": 6614, "use self attention": 60004, "performance strong baseline": 40578, "strong baseline model": 53005, "knowledge distillation kd": 27441, "leverages state art": 30316, "using weakly supervised": 61022, "perform systematic comparison": 40151, "different types errors": 15109, "absolute improvement state": 745, "svm random forest": 54239, "model makes use": 34088, "question answering requires": 44708, "yang et al": 63045, "language models capable": 28237, "large language models": 28899, "synthetic data generation": 54372, "trained publicly available": 57847, "deep pre trained": 13743, "representations learned large": 46706, "unlabeled text data": 59583, "time consuming manual": 57134, "large scale sentence": 29000, "effectiveness method conduct": 16791, "improving model performance": 25186, "performance pre trained": 40488, "chinese pre trained": 8318, "language models propose": 28307, "models propose simple": 35372, "baselines including bert": 6272, "performances nlp tasks": 40644, "resources available https": 47293, "autoregressive language modeling": 5218, "similarity based methods": 51085, "fine grained labels": 20938, "model based transformer": 33607, "language agnostic sentence": 27957, "mitigate problem propose": 33390, "entity recognition task": 18134, "experimental evaluations proposed": 19265, "proposed approach performs": 43730, "memory lstm gated": 32261, "lstm gated recurrent": 31263, "achieved significant improvements": 1269, "alleviate problems propose": 2419, "novel framework named": 37828, "paper describes novel": 39326, "describes novel approach": 14229, "processing nlp text": 42915, "translation text summarization": 58691, "alleviate issues propose": 2411, "head attention mechanism": 23496, "propose novel self": 43559, "models trained data": 35603, "computationally expensive paper": 9875, "expensive paper propose": 19215, "shows competitive performance": 50770, "code available github": 8793, "language model finetuning": 28166, "dataset state art": 13101, "attain state art": 4669, "release code models": 46147, "code models https": 8836, "information extraction methods": 25862, "text experiments demonstrate": 56567, "learning models including": 29755, "linear support vector": 30671, "language understanding evaluation": 28549, "performance work propose": 40633, "work propose effective": 62774, "language models various": 28345, "fine tuning process": 21014, "nlp computer vision": 37476, "application machine learning": 3167, "learning ml techniques": 29742, "vector machines svm": 61456, "models perform best": 35310, "results effectiveness approach": 47603, "require large scale": 46873, "large scale manually": 28985, "scale manually annotated": 48595, "paper propose using": 39541, "self attention module": 49184, "f1 score 81": 20209, "human human conversations": 24169, "domain data propose": 16040, "resources low resource": 47314, "pre trained cross": 41525, "performance training data": 40606, "proposed model achieved": 43845, "fine grained coarse": 20931, "grained coarse grained": 23028, "outperforms zero shot": 38963, "zero shot fine": 63163, "systems state art": 54640, "additionally introduce novel": 1723, "social media paper": 51580, "transfer learning tasks": 58397, "sentence level embeddings": 49585, "case studies demonstrate": 7797, "models trained end": 35607, "models neural network": 35254, "generative language models": 22593, "language models like": 28275, "language model generate": 28167, "relation extraction aims": 45973, "task knowledge graph": 55154, "help improve performance": 23571, "benchmark datasets including": 6455, "approach effectively improve": 3502, "language models evaluate": 28250, "language directions english": 28030, "lingual language model": 30708, "model pre training": 34219, "propose novel graph": 43538, "novel graph neural": 37835, "social media like": 51578, "improving cross lingual": 25174, "languages demonstrate proposed": 28636, "morpho syntactic features": 35837, "greatly improve performance": 23230, "work propose end": 62776, "neural networks recurrent": 37069, "networks recurrent neural": 36904, "propose sentence level": 43622, "training data multi": 58019, "data multi task": 12498, "propose novel transformer": 43571, "novel transformer based": 37943, "transformer based architecture": 58455, "model achieves higher": 33518, "achieves higher accuracy": 1335, "using bert based": 60589, "training data generated": 57998, "compared existing datasets": 9407, "trained multi task": 57815, "far human performance": 20401, "making informed decisions": 31659, "recently attracted lot": 45410, "attention mechanism proposed": 4780, "prior work focused": 42421, "based prior work": 5953, "transfer learning approaches": 58377, "comments social media": 9148, "generation nlg tasks": 22508, "social media based": 51568, "generation experimental results": 22457, "model generate natural": 33931, "pre training method": 41582, "data model size": 12490, "different types noise": 15114, "semantic structure text": 49356, "multiple instance learning": 36230, "instance learning mil": 26426, "datasets different sizes": 13231, "codes publicly available": 8880, "recently pre trained": 45449, "training large scale": 58150, "current pre training": 12001, "pre training tasks": 41598, "trained models released": 57808, "models released https": 35426, "large neural models": 28918, "transformer based sequence": 58474, "cross lingual embedding": 11833, "end propose new": 17701, "annotated data training": 2887, "extraction knowledge graph": 20075, "state art work": 52693, "automatic natural language": 5113, "uses attention mechanism": 60492, "trained fine tuning": 57733, "paper available https": 39278, "neural networks effective": 37044, "data natural language": 12504, "paper present dataset": 39447, "nlp deep learning": 37481, "models requires large": 35445, "general purpose language": 22086, "purpose language models": 44404, "fine tuned pre": 20966, "tuned pre trained": 58883, "trained neural models": 57827, "rich semantic information": 48120, "learning based language": 29534, "level natural language": 30166, "layer recurrent neural": 29206, "incorporate domain knowledge": 25352, "model performs competitively": 34204, "current deep learning": 11970, "sequence sequence deep": 49985, "learning models perform": 29758, "models perform task": 35317, "used train models": 60335, "specifically pre train": 52220, "real world situations": 45139, "demonstrate approach achieve": 13866, "hand crafted linguistic": 23388, "model fine grained": 33892, "word representations bert": 62288, "requires reasoning multiple": 46949, "source codes available": 51755, "establish strong baselines": 18350, "provided https github": 44163, "requires deep understanding": 46923, "prior work proposed": 42424, "transformer based encoder": 58459, "extensive experiments popular": 19896, "improvement strong baselines": 25029, "collect large scale": 8947, "data expensive obtain": 12340, "self supervised tasks": 49218, "pseudo training data": 44283, "propose self supervised": 43615, "self supervised pre": 49215, "supervised pre training": 54033, "network pre trained": 36785, "experimental results commonly": 19276, "results commonly used": 47541, "training data method": 58016, "model combines multi": 33668, "hidden test set": 23650, "footnote url https": 21282, "language model use": 28201, "state art multi": 52635, "language models fail": 28254, "demonstrated state art": 14020, "art performance various": 4352, "fine tuned bert": 20958, "tasks fine tuned": 55644, "tuned bert model": 58868, "improve performance low": 24891, "models outperforms state": 35292, "hierarchical attention network": 23659, "used train evaluate": 60333, "level f1 score": 30118, "previous works usually": 42323, "solve low resource": 51682, "accuracy low resource": 1002, "conduct empirical study": 10039, "inspire future research": 26403, "existing methods generate": 19096, "terms automatic human": 56267, "human like responses": 24201, "language model proposed": 28190, "various nlp applications": 61373, "applications existing methods": 3206, "results experimental results": 47626, "compared rule based": 9449, "gpt language model": 22982, "adversarial networks gan": 1981, "based graph based": 5761, "contextualized word embedding": 10813, "recent years existing": 45385, "pre trained english": 41532, "model generate high": 33930, "processing nlp research": 42910, "weighted f1 score": 61929, "datasets recent years": 13395, "task deep learning": 54995, "masked language model": 31863, "results demonstrate models": 47580, "pre training approaches": 41568, "fine tuning stage": 21020, "performance strong baselines": 40579, "using task specific": 60979, "popular sequence sequence": 41186, "provide depth analysis": 44048, "using transformer based": 61000, "transformer based language": 58461, "case study shows": 7800, "proposed model improves": 43852, "currently state art": 12039, "proposed end end": 43765, "end end approaches": 17637, "pipeline end end": 40899, "publicly available paper": 44348, "dataset source code": 13095, "proposed method compared": 43812, "language models use": 28340, "second language learners": 49010, "method achieves significant": 32366, "language models bert": 28235, "knowledge distillation approach": 27440, "knowledge distillation methods": 27443, "language models specifically": 28322, "fine tuning technique": 21028, "fine tuning methods": 20999, "experimental results sentiment": 19311, "results sentiment analysis": 47826, "neural attention mechanism": 36935, "news articles manually": 37386, "recent developments neural": 45305, "human written text": 24263, "human written texts": 24264, "model https github": 33964, "propose new data": 43499, "deep generative models": 13694, "syntactic structures sentences": 54331, "correlation human judgment": 11523, "state art summarization": 52673, "data https github": 12409, "using cross lingual": 60637, "character level embeddings": 8208, "levels experimental results": 30239, "existing approaches consider": 19026, "gains state art": 21944, "models work propose": 35689, "network rnn based": 36797, "auto encoder vae": 5016, "consistently outperform existing": 10300, "large scale language": 28983, "based models perform": 5878, "models perform significantly": 35316, "zero shot semantic": 63176, "fine coarse grained": 20923, "based models shown": 5880, "models demonstrate effectiveness": 34893, "sample training data": 48459, "tasks finally discuss": 55641, "series state art": 50070, "results standard benchmark": 47854, "graph representation learning": 23162, "accuracy natural language": 1013, "incorporating external knowledge": 25386, "attention mechanism using": 4783, "language modeling techniques": 28221, "context language models": 10665, "automatic evaluation measures": 5083, "representations work propose": 46793, "art pretrained language": 4360, "entity recognition tasks": 18135, "models trained limited": 35615, "code data https": 8800, "cross lingual pre": 11846, "lingual pre training": 30719, "propose new pre": 43512, "new pre training": 37287, "pre training model": 41584, "time step experiments": 57225, "models encoder decoder": 34964, "language models named": 28287, "tasks including named": 55680, "achieved strong performance": 1276, "dataset document level": 12901, "model document level": 33783, "document level graph": 15806, "based reading comprehension": 5975, "art neural architectures": 4309, "proposed approach consistently": 43725, "model based graph": 33604, "graph attention networks": 23098, "tackle challenge paper": 54698, "conduct comprehensive experiments": 10033, "dataset results demonstrate": 13069, "multilingual bert mbert": 36066, "different cross lingual": 14883, "small number labeled": 51489, "recent years studies": 45397, "new evaluation metric": 37194, "propose novel automatic": 43525, "deep learning algorithms": 13701, "pretraining language models": 42207, "language models fine": 28255, "models fine tuning": 35032, "improvements strong baseline": 25103, "expert annotated dataset": 19570, "pre training step": 41593, "pre training phase": 41588, "language model gpt": 28169, "original training data": 38734, "recent work neural": 45371, "downstream applications existing": 16333, "standard encoder decoder": 52488, "models achieved impressive": 34678, "automatic evaluation shows": 5089, "language models generally": 28256, "models generally trained": 35056, "wide range topics": 61976, "multi task framework": 36020, "vision language tasks": 61639, "transfer learning paper": 58391, "external knowledge graphs": 19941, "novel knowledge aware": 37847, "based graph convolutional": 5762, "bert based models": 6624, "based models achieved": 5867, "semantically similar sentences": 49393, "used datasets demonstrate": 60139, "language inference tasks": 28114, "large scale pretrained": 28995, "scale pretrained language": 48615, "trained supervised manner": 57887, "wide range downstream": 61967, "language modeling objectives": 28213, "task learning setting": 55180, "bert masked language": 6680, "state art using": 52688, "propose simple efficient": 43634, "achieving comparable performance": 1399, "developments natural language": 14714, "pre trained transformer": 41560, "pre trained transformers": 41561, "evaluate state art": 18508, "transformer models bert": 58499, "models bert led": 34774, "outperforming strong baselines": 38863, "preliminary results suggest": 41805, "publicly release code": 44360, "prior work largely": 42423, "work largely focused": 62706, "used pre training": 60265, "tasks recent years": 55840, "aware attention mechanism": 5442, "better overall performance": 6925, "investigate different approaches": 26952, "performing model achieves": 40682, "achieves macro f1": 1344, "supervised models trained": 54022, "speech tagging dependency": 52301, "essential natural language": 18331, "sentence prediction nsp": 49622, "method consistently improves": 32437, "achieve best performance": 1113, "augmenting training set": 4991, "performance previous work": 40498, "bert bidirectional encoder": 6631, "tasks masked language": 55742, "pre training task": 41597, "bert base bert": 6613, "base bert large": 5539, "specifically proposed model": 52226, "language modeling language": 28208, "pre trained domain": 41528, "training pre trained": 58211, "large scale models": 28986, "self supervised language": 49209, "models trained standard": 35621, "entropy loss function": 18163, "meta learning method": 32344, "results fine grained": 47636, "detection shared task": 14525, "models based pre": 34757, "fine tune pre": 20953, "tune pre trained": 58862, "bert models trained": 6689, "models generalization ability": 35053, "generalization ability models": 22116, "benchmark future research": 6470, "paper propose fine": 39510, "propose fine grained": 43389, "ground truth label": 23255, "language models promising": 28306, "pretrained cross lingual": 42151, "resource languages propose": 47245, "train fine tune": 57591, "model language model": 34037, "using language model": 60753, "language model experimental": 28164, "transformer based models": 58465, "models fine tuned": 35031, "fine tuned task": 20970, "text mining information": 56662, "approach transfer learning": 3727, "detection low resource": 14498, "applications work propose": 3261, "methods zero shot": 33107, "data existing methods": 12337, "sources knowledge bases": 51833, "task f1 score": 55076, "proposed data augmentation": 43751, "et al 2019a": 18407, "significant performance degradation": 50905, "challenge propose novel": 8010, "score test set": 48879, "set low resource": 50188, "does require labeled": 15974, "language processing present": 28421, "speech translation st": 52312, "general world knowledge": 22099, "effective way improve": 16713, "improve neural machine": 24878, "pre training transformer": 41601, "using maximum likelihood": 60793, "training transformer based": 58306, "cross lingual representations": 11848, "simple efficient approach": 51162, "based publicly available": 5965, "trained bert models": 57683, "competitive results state": 9563, "comprehension mrc task": 9770, "monte carlo dropout": 35827, "evaluate approach using": 18439, "shared semantic space": 50488, "neural networks transformer": 37076, "improvements compared state": 25060, "representation model bert": 46553, "parameters pre trained": 39717, "significantly outperforms bert": 50999, "large amounts high": 28836, "amounts high quality": 2549, "parallel corpora training": 39643, "conditional language model": 9997, "dataset introduce novel": 12970, "inference nli models": 25677, "datasets diverse domains": 13235, "natural language present": 36438, "multiple downstream tasks": 36206, "aware language models": 5455, "trained models experiments": 57798, "data model achieves": 12489, "code switched language": 8859, "based experimental results": 5716, "experimental results language": 19290, "propose novel context": 43527, "novel context aware": 37789, "results approach improves": 47505, "strong inductive bias": 53035, "natural language queries": 36442, "previous works focus": 42319, "word representations based": 62287, "statistical significance testing": 52763, "facilitate cross lingual": 20264, "training significantly improves": 58260, "training paper propose": 58202, "obtain better performance": 38163, "words word embedding": 62548, "fully connected neural": 21719, "connected neural network": 10178, "important research problem": 24763, "progress machine learning": 43104, "models different domains": 34916, "self attention transformer": 49187, "different neural architectures": 15006, "produces high quality": 43029, "performance existing models": 40332, "li et al": 30420, "generation aims generate": 22414, "results automatic human": 47514, "entities natural language": 18069, "recent advances language": 45283, "labeled data scarce": 27745, "graph attention mechanism": 23096, "large neural language": 28917, "models generate better": 35058, "github com miulab": 22707, "training knowledge distillation": 58141, "single model achieves": 51318, "recent progress neural": 45339, "model paper propose": 34176, "transformer model trained": 58497, "relations paper propose": 46050, "novel generative model": 37831, "experiments conducted datasets": 19383, "datasets real world": 13393, "generated state art": 22322, "study propose new": 53443, "based approach automatically": 5571, "text classification systems": 56486, "long training time": 31047, "large scale pretraining": 28996, "datasets indicate model": 13302, "indicate model significantly": 25528, "end end task": 17664, "model paper presents": 34175, "different information sources": 14955, "mutual information mi": 36348, "10 fold cross": 43, "low data regimes": 31140, "conduct systematic study": 10066, "low resource ones": 31186, "task aims extract": 54896, "models achieved remarkable": 34680, "token level sentence": 57299, "pre trained multilingual": 41547, "models recent years": 35409, "results demonstrate framework": 47577, "new objective function": 37272, "natural language models": 36435, "performance various tasks": 40625, "controlled text generation": 10987, "input natural language": 26303, "human written summaries": 24262, "named entity types": 36375, "autoregressive language model": 5217, "used ground truth": 60201, "achieve performance comparable": 1179, "success downstream tasks": 53700, "self attention layer": 49178, "language models achieve": 28226, "hierarchical multi task": 23681, "large pretrained language": 28942, "manually annotated datasets": 31760, "pre trained general": 41534, "trained general domain": 57736, "pre defined set": 41501, "traditional supervised learning": 57549, "substantially improve performance": 53637, "improve performance compared": 24886, "creates new state": 11737, "present detailed analysis": 41890, "task end end": 55048, "test time model": 56389, "representation input sentence": 46530, "methods nlp tasks": 32960, "performance shot learning": 40559, "new data augmentation": 37161, "models capable generating": 34803, "framework state art": 21605, "model capable generating": 33645, "closely related tasks": 8707, "train single model": 57635, "model inference time": 34001, "evaluation results reveal": 18705, "data time consuming": 12735, "learning based text": 29543, "based models like": 5873, "bert widely used": 6735, "based methods like": 5849, "performance real world": 40520, "state art generation": 52618, "multilingual bert model": 36067, "bert model trained": 6686, "crucial task natural": 11914, "widely used approaches": 62009, "augment training data": 4945, "models extensive experiments": 35005, "machine translated data": 31344, "target language data": 54824, "art deep neural": 4248, "neural network text": 37028, "models bert xlnet": 34777, "capable zero shot": 7633, "extractive question answering": 20138, "language models recently": 28314, "wide variety natural": 61984, "models need large": 35249, "word embeddings large": 62176, "downstream fine tuning": 16339, "tackle issues propose": 54707, "semantic parsing model": 49312, "proposed framework enables": 43784, "language models produce": 28305, "results paper propose": 47759, "learning model based": 29744, "transfer learning models": 58387, "pre trained masked": 41542, "trained masked language": 57784, "models semi supervised": 35478, "art pre trained": 4358, "task specific language": 55399, "roberta based models": 48218, "reduce model size": 45672, "impact model performance": 24600, "networks pre trained": 36894, "pre trained fine": 41533, "trained fine tuned": 57732, "fine tuned large": 20962, "rich resource languages": 48118, "model wide range": 34535, "make dataset publicly": 31561, "mechanism experimental results": 32117, "based transformer models": 6110, "techniques significantly improve": 56137, "new test set": 37342, "learning dl models": 29601, "achieving human level": 1413, "language models led": 28273, "language models introduce": 28265, "art performance natural": 4338, "results english german": 47611, "compared prior work": 9442, "models knowledge graph": 35155, "used evaluate quality": 60171, "siamese neural network": 50821, "contextual embeddings bert": 10766, "task analysis shows": 54901, "pre trained encoders": 41531, "pre trained encoder": 41530, "improvements competitive baselines": 25063, "large labeled datasets": 28895, "resource languages work": 47248, "transfer learning tl": 58399, "datasets demonstrate approach": 13211, "propose novel sequence": 43561, "level classification task": 30076, "application pre trained": 3176, "task state art": 55411, "models large scale": 35167, "achieve high quality": 1153, "higher f1 score": 23824, "novel training framework": 37940, "work develop new": 62632, "fine tuning procedure": 21013, "method conceptually simple": 32432, "multilingual language model": 36089, "models paper describes": 35299, "bias language models": 7030, "make predictions based": 31588, "text based models": 56456, "models outperform strong": 35288, "statistically significant differences": 52771, "determinantal point processes": 14551, "pre training knowledge": 41579, "present comprehensive survey": 41874, "compared competitive baseline": 9396, "problem low resource": 42599, "provide comprehensive evaluation": 44038, "models multilingual bert": 35238, "train transformer based": 57652, "based masked language": 5834, "experimental results compared": 19277, "art transformer based": 4433, "transformer based model": 58464, "results available https": 47516, "extraction natural language": 20088, "model fine tune": 33893, "transfer learning pre": 58393, "learning pre training": 29812, "absolute f1 points": 741, "exhibits state art": 19011, "official test sets": 38313, "neural networks text": 37074, "approach text classification": 3721, "domain adaptation framework": 15998, "lottery ticket hypothesis": 31128, "bert based model": 6623, "long form text": 31013, "conduct series experiments": 10061, "building recent advances": 7466, "model reconstruct original": 34286, "transformer based neural": 58468, "superior results compared": 53943, "single end end": 51301, "best end end": 6760, "uses deep neural": 60505, "art results paper": 4384, "settings zero shot": 50406, "multilingual pre trained": 36110, "models data augmentation": 34881, "better capture long": 6858, "building natural language": 7459, "art competitive results": 4239, "models based transformer": 34760, "based transformer architecture": 6107, "mining machine learning": 33317, "recently neural models": 45442, "tasks transfer learning": 55941, "tasks fine tuning": 55645, "models new dataset": 35257, "pretrained masked language": 42167, "language models mlms": 28283, "autoregressive language models": 5219, "low resource domains": 31178, "various benchmark datasets": 61310, "improves f1 score": 25130, "outperforms previous sota": 38924, "pre training text": 41600, "generative pre trained": 22603, "systems pre trained": 54593, "multi task setting": 36027, "pre training techniques": 41599, "thorough error analysis": 57058, "learning methods require": 29735, "baselines future research": 6265, "reach high performance": 45048, "semi supervised unsupervised": 49469, "supervised unsupervised learning": 54066, "models heavily rely": 35081, "data text classification": 12730, "extraction sentiment analysis": 20109, "training single model": 58262, "graph convolutional neural": 23126, "transformer based pre": 58471, "language models proven": 28309, "fine tuning downstream": 20986, "tuning downstream tasks": 58909, "power pre trained": 41430, "models shown effective": 35497, "simple effective strategy": 51158, "source code https": 51748, "bias training data": 7046, "generate large scale": 22216, "benefit downstream tasks": 6561, "downstream tasks sentiment": 16367, "analysis propose novel": 2729, "propose context aware": 43334, "overcome limitation propose": 39067, "limitation propose novel": 30540, "word representations obtained": 62290, "capture fine grained": 7672, "cross entropy ce": 11823, "examples paper propose": 18922, "cross entropy objective": 11825, "contextualized embeddings bert": 10801, "apply proposed method": 3348, "experimental results verify": 19322, "recently large scale": 45436, "using zero shot": 61031, "language models plms": 28297, "progress pre trained": 43111, "method improve performance": 32532, "virtual adversarial training": 61624, "paper present hierarchical": 39452, "present qualitative quantitative": 41994, "pretrained bert model": 42148, "original bert model": 38705, "based generative adversarial": 5750, "paper propose adaptive": 39490, "information different modalities": 25813, "human evaluation scores": 24155, "models tend rely": 35590, "proposed framework achieves": 43780, "learning self supervised": 29865, "training multi task": 58182, "models trained english": 35608, "trained english data": 57723, "data multiple languages": 12501, "language models languages": 28269, "language models speech": 28323, "approach substantially outperforms": 3709, "substantially outperforms state": 53648, "sequence sequence task": 49997, "apply pre trained": 3346, "paper conduct systematic": 39300, "fine tuning phase": 21009, "model performance propose": 34193, "address problem introduce": 1785, "performance end end": 40318, "propose novel joint": 43542, "low dimensional embeddings": 31144, "proposed method leads": 43822, "trained transformer models": 57902, "fine tuning large": 20996, "model target domain": 34443, "establishes state art": 18363, "datasets fine tuning": 13277, "fine tuning finally": 20991, "systems machine learning": 54556, "previous studies proposed": 42290, "weakly supervised training": 61868, "lead sub optimal": 29275, "meta learning algorithm": 32338, "en en fr": 17415, "use adversarial training": 59817, "based models use": 5882, "learning language model": 29695, "error analysis results": 18214, "unlike previous studies": 59602, "self supervised manner": 49211, "paper propose contextual": 39499, "sentence level document": 49584, "nlp tasks lack": 37537, "languages pre trained": 28754, "benchmark state art": 6494, "github com thu": 22712, "com thu keg": 9026, "model trained labeled": 34472, "approach achieves comparable": 3397, "large scale benchmark": 28963, "learned pre training": 29475, "models best knowledge": 34779, "language models utilize": 28344, "datasets work present": 13487, "issues propose new": 27100, "language model representations": 28191, "study demonstrate effectiveness": 53356, "superior performance state": 53939, "approximate nearest neighbor": 3978, "according experimental results": 860, "bidirectional encoder representation": 7068, "time space complexity": 57217, "carry extensive experiments": 7778, "language models gpt": 28259, "multi task models": 36023, "applied real world": 3290, "unlike previous methods": 59600, "using external knowledge": 60688, "proposed method shows": 43828, "shows better performance": 50766, "achieved promising performance": 1259, "resource languages remains": 47246, "compared current state": 9400, "text classification experimental": 56472, "classification experimental results": 8468, "classification models using": 8499, "bring significant improvement": 7335, "performance outperforms previous": 40471, "deep learning technique": 13721, "bert based classifier": 6620, "scale human evaluation": 48580, "question answering document": 44695, "large scale public": 28997, "language models able": 28225, "long term dependency": 31040, "neural networks using": 37079, "existing publicly available": 19133, "based data augmentation": 5664, "large scale general": 28977, "fine tuning strategy": 21024, "deep transformer based": 13752, "future research paper": 21891, "data driven analysis": 12299, "weakly supervised approaches": 61861, "language pre training": 28384, "use attention mechanism": 59827, "propose unified framework": 43689, "english chinese datasets": 17784, "large pretrained models": 28943, "shared task cross": 50496, "teams participated shared": 56009, "participated shared task": 39820, "pretrained transformer language": 42188, "models bert roberta": 34775, "significantly better baselines": 50940, "bert fine tuned": 6661, "high quality diverse": 23776, "github com jzbjyb": 22704, "wide variety domains": 61981, "models bert achieved": 34771, "knowledge pre trained": 27573, "tune language model": 58856, "language model predict": 28186, "fully supervised training": 21743, "trained weak supervision": 57912, "focused english language": 21221, "text classification based": 56469, "task language modeling": 55160, "ground truth word": 23257, "method effectively improves": 32473, "problem propose use": 42634, "introduce novel multi": 26849, "context dependent word": 10612, "provide experimental results": 44067, "paper investigate extent": 39411, "experiments pre training": 19491, "dynamic programming algorithm": 16490, "knowledge paper propose": 27565, "present novel task": 41978, "dataset model significantly": 12999, "training data expensive": 57992, "data augmentation approaches": 12151, "word embeddings obtained": 62181, "publicly available corpora": 44337, "collect high quality": 8943, "graph convolution network": 23121, "rivals state art": 48171, "state art recent": 52652, "paper end end": 39348, "bidirectional lstm bilstm": 7078, "inference nli datasets": 25675, "trained transformer based": 57898, "text mining natural": 56663, "paper propose approaches": 39496, "achieves comparable performance": 1314, "bag words cbow": 5505, "open domain text": 38427, "fine tuning multilingual": 21003, "training data leads": 58008, "paper provide detailed": 39556, "critical sequence training": 11793, "attentive neural network": 4864, "joint learning approach": 27175, "performance neural network": 40454, "high inter annotator": 23743, "transformer based bert": 58457, "based bert model": 5601, "advancing state art": 1936, "model trained scratch": 34476, "supervised state art": 54052, "dataset propose new": 13038, "art performance outperforms": 4340, "case study demonstrate": 7799, "better existing methods": 6888, "task proposed model": 55306, "detailed ablation studies": 14409, "multitask learning framework": 36324, "analysis pre trained": 2721, "task propose simple": 55303, "extensive experiments benchmarks": 19882, "downstream tasks propose": 16363, "data work propose": 12779, "methods end end": 32839, "learning improve performance": 29678, "processing nlp recent": 42908, "overcome data scarcity": 39061, "data scarcity low": 12626, "scarcity low resource": 48673, "state ofthe art": 52706, "markov chain monte": 31842, "chain monte carlo": 7960, "variety real world": 61289, "black box models": 7191, "evaluation results demonstrate": 18701, "nearest neighbor knn": 36519, "lack publicly available": 27909, "fine tuning based": 20980, "create high quality": 11699, "make code available": 31549, "previous research focused": 42271, "paper propose knowledge": 39519, "range dependencies paper": 44913, "success pre trained": 53718, "pre trained representations": 41552, "fine tuning task": 21026, "zero shot evaluation": 63160, "knowledge knowledge graphs": 27537, "achieves better results": 1309, "based attention mechanisms": 5582, "features attention mechanism": 20527, "dataset method achieves": 12993, "play important roles": 40973, "lack comprehensive survey": 27879, "transformer based architectures": 58456, "native non native": 36405, "binary classification model": 7145, "models bert shown": 34776, "different downstream tasks": 14910, "significantly affect performance": 50936, "benchmark datasets results": 6460, "despite significant progress": 14389, "classification low resource": 8489, "language model specifically": 28195, "unstructured text data": 59672, "simple efficient method": 51163, "training data compared": 57983, "tasks recent studies": 55837, "dutch language model": 16478, "tasks pre trained": 55803, "leads improved performance": 29316, "cross modal representations": 11865, "applied state art": 3296, "denoising auto encoder": 14064, "encoder pre trained": 17533, "previous approaches focused": 42239, "pre training corpus": 41572, "effective pre training": 16686, "pre training using": 41602, "tasks low resource": 55733, "large scale transformer": 29005, "conditional masked language": 9999, "achieved tremendous success": 1280, "fine tuning multi": 21002, "reach new state": 45051, "outperforms strong baseline": 38949, "strong baseline methods": 53004, "main contribution paper": 31431, "level self attention": 30204, "pre trained parameters": 41551, "enables model learn": 17444, "comparable better results": 9292, "work propose unified": 62788, "benchmark datasets approach": 6447, "tasks work investigate": 55970, "unlabeled target domain": 59580, "large language model": 28898, "extensive automatic human": 19858, "pre trained lms": 41541, "address issue present": 1766, "detailed ablation study": 14410, "data machine translation": 12473, "propose novel dynamic": 43532, "surpasses previous state": 54175, "task conduct experiments": 54967, "language processing based": 28399, "trained models like": 57803, "model works better": 34547, "model publicly available": 34262, "scores state art": 48923, "question answer qa": 44688, "encoder representation transformers": 17537, "representation transformers bert": 46599, "experimental results standard": 19315, "scale language models": 48588, "corpus manually annotated": 11378, "method outperforms strong": 32606, "speech recognition errors": 52285, "approach pre train": 3644, "achieve strong performance": 1206, "joint pre training": 27184, "outperform strong baseline": 38825, "contextual word embedding": 10788, "observe fine tuning": 38134, "rule based baseline": 48381, "propose bert based": 43313, "based models applied": 5868, "general domain corpora": 22053, "multi label multi": 35977, "label multi class": 27716, "weighted average f1": 61925, "average f1 scores": 5408, "widely spoken language": 62003, "spoken language world": 52362, "domain paper propose": 16131, "commonly used datasets": 9223, "resource languages low": 47241, "improving pre trained": 25191, "present novel corpus": 41969, "attention academia industry": 4708, "deep learning recently": 13718, "recently pre training": 45450, "pre training models": 41585, "models significantly improved": 35509, "nlp tasks question": 37545, "using masked language": 60790, "methods significant margin": 33038, "aim bridge gap": 2140, "high resource low": 23794, "resource low resource": 47251, "paper propose self": 39531, "order make use": 38638, "systems real world": 54611, "recently graph neural": 45431, "multi hop questions": 35971, "simple fine tuning": 51171, "patterns paper propose": 39974, "languages experimental results": 28665, "promising results compared": 43180, "nlp tasks limited": 37539, "self supervised pretraining": 49216, "large transformer models": 29035, "different model architectures": 14994, "model architectures training": 33578, "data real world": 12586, "propose pre train": 43588, "given input text": 22750, "language models pre": 28300, "machine translation order": 31374, "experiments diverse set": 19420, "arabic natural language": 4003, "tasks like sentiment": 55727, "transformers based models": 58522, "art results nlp": 4381, "allows model learn": 2473, "datasets various sizes": 13480, "evaluation metrics human": 18650, "public large scale": 44323, "et al 2020": 18409, "facilitate future research": 20270, "understanding human language": 59350, "models self supervised": 35475, "corpora fine tuned": 11204, "micro averaged f1": 33221, "fine tuning small": 21018, "publicly available benchmark": 44336, "various deep learning": 61323, "available github repository": 5300, "generative pre training": 22604, "data available languages": 12178, "using meta learning": 60799, "supervised zero shot": 54073, "sequence level knowledge": 49950, "level knowledge distillation": 30140, "best performance using": 6792, "results zero shot": 47915, "specific bert models": 52050, "different domains languages": 14907, "models different sizes": 34917, "paper describes work": 39334, "task remains challenging": 55335, "present baseline results": 41854, "real world environment": 45128, "cross lingual representation": 11847, "unsupervised weakly supervised": 59747, "different use cases": 15117, "topic classification task": 57395, "performance proposed approach": 40505, "improve model accuracy": 24871, "pre training strategy": 41595, "experiments conducted real": 19386, "model achieves superior": 33527, "learning methods deep": 29732, "methods deep learning": 32815, "enhance state art": 17924, "data different domains": 12282, "require labeled data": 46866, "existing widely used": 19169, "contextual embedding models": 10764, "cross lingual alignment": 11829, "general natural language": 22073, "sufficient labeled data": 53805, "datasets validate effectiveness": 13477, "alleviate data scarcity": 2403, "datasets code publicly": 13175, "speech natural language": 52274, "widely spoken languages": 62004, "future research efforts": 21889, "cross lingual tasks": 11853, "cross lingual generalization": 11837, "embedding models bert": 17046, "experiments approach outperforms": 19357, "extraction aims extract": 20046, "features word level": 20697, "core natural language": 11152, "pre training objective": 41586, "achieves similar better": 1374, "studies natural language": 53286, "active learning strategies": 1477, "tasks benchmark datasets": 55520, "experimental results bert": 19274, "capture document level": 7665, "word embeddings bert": 62159, "cross lingual cross": 11830, "domain test sets": 16207, "non english languages": 37650, "fasttext word embeddings": 20449, "bert language models": 6669, "tools natural language": 57383, "feature extraction classification": 20486, "detailed analysis experiments": 14413, "plays fundamental role": 40998, "lingual representation learning": 30721, "dataset used train": 13127, "train large scale": 57601, "large scale cross": 28968, "lingual pre trained": 30718, "pre training objectives": 41587, "training data work": 58050, "data difficult obtain": 12285, "use external knowledge": 59887, "generate adversarial examples": 22177, "strong baselines large": 53015, "based pre training": 5943, "pre training based": 41569, "low data scenarios": 31141, "traditional feature based": 57519, "feature based methods": 20477, "attention past years": 4807, "order address issue": 38591, "propose multi level": 43476, "models proposed approach": 35374, "self supervised framework": 49208, "existing data augmentation": 19052, "additional pre training": 1693, "bert based language": 6622, "achieved significant progress": 1271, "different languages domains": 14969, "language models ptlms": 28312, "covid 19 pandemic": 11669, "used pre train": 60263, "art transformer models": 4434, "relation extraction methods": 45976, "models plms achieved": 35326, "proposed approach compared": 43724, "pre trained nlp": 41550, "trained nlp models": 57832, "fine tuned model": 20963, "tasks transformer based": 55943, "model multiple languages": 34114, "approaches outperform strong": 3889, "real world task": 45140, "methods code available": 32783, "present series experiments": 42009, "automatic data augmentation": 5077, "empirical results state": 17346, "evaluation metric based": 18644, "novel pre training": 37895, "contextualized language models": 10805, "using new dataset": 60838, "bert xlm roberta": 6738, "unlabeled data using": 59567, "additional unlabeled data": 1709, "pre trained contextualized": 41524, "information extraction models": 25863, "accuracy experimental results": 972, "results significantly outperform": 47847, "outperform previously reported": 38814, "large scale pre": 28994, "scale pre trained": 48612, "push state art": 44427, "use knowledge distillation": 59919, "convolutional networks gcn": 11110, "datasets approach outperforms": 13156, "diverse set tasks": 15718, "sentence sentence pair": 49642, "based transformer based": 6108, "benchmarks experimental results": 6524, "robustness adversarial attacks": 48273, "languages english low": 28656, "constructing high quality": 10420, "annotations experimental results": 2991, "paper propose query": 39530, "fine tuned language": 20961, "tuned language model": 58876, "outbreak covid 19": 38763, "data scarcity problem": 12627, "pre training bert": 41570, "technique natural language": 56040, "existing pre training": 19129, "introduce new model": 26838, "significant computational resources": 50857, "models achieved promising": 34679, "data annotation process": 12137, "method automatically construct": 32394, "tasks especially low": 55618, "training neural models": 58189, "transformer based text": 58475, "finite state machine": 21058, "amortized variational inference": 2542, "improve cross lingual": 24838, "language modeling mlm": 28211, "large margin achieves": 28904, "tasks compared previous": 55548, "use word embedding": 60074, "models shown impressive": 35499, "language understanding benchmarks": 28545, "different pre trained": 15029, "trained models task": 57809, "outperforms fine tuning": 38903, "strong baselines automatic": 53010, "baselines automatic human": 6236, "auto regressive language": 5022, "regressive language models": 45827, "task domain specific": 55033, "model able outperform": 33493, "introduced bert model": 26881, "acoustic linguistic features": 1437, "natural language statements": 36450, "pre trained lm": 41540, "data available url": 12181, "propose model called": 43465, "graph based model": 23107, "pretraining fine tuning": 42203, "paper present automatic": 39445, "effectiveness proposed techniques": 16809, "improve robustness models": 24921, "current pre trained": 12000, "fine tuning performance": 21008, "fine tuned roberta": 20967, "semeval 2020 shared": 49436, "2020 shared task": 292, "language modeling datasets": 28207, "improves zero shot": 25169, "fine tune pretrained": 20954, "language model task": 28197, "obtain large scale": 38180, "based models natural": 5874, "state art encoder": 52609, "art encoder decoder": 4254, "f1 score 88": 20212, "nlp tasks open": 37542, "processing nlp existing": 42903, "proposed multi task": 43866, "information work propose": 26165, "datasets method achieves": 13329, "able generate high": 697, "extensive ablation studies": 19853, "recent advances nlp": 45287, "art performance code": 4330, "performance code publicly": 40241, "systems paper introduce": 54580, "significant performance gap": 50909, "recent years increasing": 45387, "covering wide range": 11661, "neural models perform": 36981, "paper bridge gap": 39281, "extensive experiments widely": 19905, "based models using": 5884, "alleviates data scarcity": 2423, "source code dataset": 51746, "studies mainly focus": 53280, "samples experimental results": 48473, "experiments named entity": 19476, "large human annotated": 28887, "address problem introducing": 1786, "art models trained": 4300, "form natural language": 21330, "recent advances artificial": 45281, "advances artificial intelligence": 1907, "train bert based": 57567, "propose novel iterative": 43541, "non english language": 37649, "multi class text": 35948, "class text classification": 8413, "results compared standard": 47549, "using model trained": 60807, "zero shot model": 63169, "utilizing external knowledge": 61123, "knowledge graph extracted": 27502, "encoded pre trained": 17483, "impressive performance various": 24813, "performance various benchmarks": 40622, "art sota models": 4407, "release data code": 46150, "method fine tuning": 32511, "approaches fine tuning": 3828, "method achieves superior": 32368, "sub optimal performance": 53527, "ability pre trained": 632, "trained model fine": 57792, "data propose new": 12570, "art methods automatic": 4283, "present comprehensive study": 41873, "guided pre training": 23349, "fundamental nlp task": 21784, "resource languages english": 47239, "models achieved high": 34677, "corpora paper propose": 11230, "propose novel multilingual": 43551, "achieving high performance": 1409, "compared existing methods": 9408, "various methods proposed": 61362, "training dataset paper": 58053, "standard fine tuning": 52494, "instead fine tuning": 26450, "using integer linear": 60739, "using bi directional": 60593, "using multilingual bert": 60820, "cross lingual zero": 11859, "reinforcement learning optimize": 45876, "challenging natural language": 8117, "superior performance proposed": 53938, "performance proposed framework": 40506, "generated natural language": 22302, "automated human evaluation": 5047, "language models usually": 28343, "adversarial learning framework": 1973, "human automatic evaluation": 24111, "supervised manner using": 54016, "method significantly better": 32654, "human reading comprehension": 24230, "key value pairs": 27342, "demonstrate effectiveness model": 13901, "domain domain adaptation": 16052, "effective fine tuning": 16652, "given pre trained": 22772, "source natural language": 51787, "pretrained nlp models": 42178, "success pre training": 53719, "large annotated datasets": 28847, "model data augmentation": 33735, "task sentence level": 55358, "sentence level using": 49596, "range downstream tasks": 44918, "models pre training": 35340, "performance zero shot": 40635, "multilingual bert fine": 36065, "accuracy zero shot": 1073, "different fine tuning": 14936, "earth mover distance": 16521, "recent work proposed": 45372, "using data augmentation": 60643, "languages language families": 28705, "scale knowledge graph": 48583, "art results terms": 4388, "using human evaluation": 60732, "higher correlation human": 23818, "exact match score": 18853, "standard multi task": 52509, "single task multi": 51346, "tasks improve performance": 55672, "german french italian": 22669, "recent success large": 45356, "multilingual pretrained language": 36113, "language models provides": 28311, "effective zero shot": 16718, "existing methods adopt": 19093, "demonstrate pre training": 13961, "code datasets publicly": 8810, "models bert gpt": 34773, "address challenge present": 1744, "pre train model": 41519, "dataset fine tune": 12931, "great progress recent": 23213, "current end end": 11974, "learning models trained": 29762, "contextual language models": 10774, "capabilities language models": 7598, "entity recognition question": 18130, "recognition question answering": 45529, "multilingual transformer based": 36130, "recent embedding based": 45308, "embedding based approaches": 17016, "overcome issue propose": 39064, "works pre trained": 62902, "open source code": 38448, "answering qa tasks": 3091, "advances language modeling": 1913, "simply fine tuning": 51252, "sentence level annotations": 49579, "existing work focuses": 19174, "improve language model": 24867, "language model performance": 28182, "introduce new framework": 26836, "improve performance state": 24899, "english language models": 17833, "transfer learning multi": 58388, "improve zero shot": 24941, "reference free evaluation": 45741, "state art cross": 52602, "art cross lingual": 4243, "use large scale": 59927, "diversity training data": 15742, "useful downstream applications": 60362, "promising future research": 43167, "task specific dataset": 55391, "achieves new sota": 1349, "make use existing": 31608, "existing methods fail": 19094, "based contrastive learning": 5648, "contrastive learning based": 10900, "require hand crafted": 46859, "media platforms like": 32177, "available online https": 5336, "online https github": 38370, "propose machine learning": 43446, "available https aka": 5308, "https aka ms": 24053, "recent years previous": 45393, "reported state art": 46456, "features downstream tasks": 20566, "indicate proposed method": 25533, "proposed method based": 43809, "used different tasks": 60151, "significant performance drop": 50906, "especially training data": 18307, "train model end": 57607, "open sourced code": 38463, "attention mechanism capture": 4775, "propose non autoregressive": 43519, "agent reinforcement learning": 2059, "attention based architectures": 4715, "feature engineering based": 20484, "capture semantic syntactic": 7709, "task specific architectures": 55388, "generation extensive experiments": 22461, "sentiment analysis approaches": 49816, "level pre trained": 30179, "based shot learning": 6033, "classification models based": 8497, "issue present novel": 27074, "tasks work explore": 55969, "generation sequence sequence": 22546, "task best knowledge": 54936, "neural networks applied": 37035, "strong baselines tasks": 53017, "led state art": 29995, "art performance achieved": 4325, "attention network gat": 4796, "results benchmark dataset": 47521, "semeval 2020 task": 49437, "uses graph neural": 60513, "word embeddings represent": 62186, "relation extraction question": 45980, "recent research efforts": 45341, "yielded state art": 63107, "propose transformer based": 43682, "transformer based network": 58467, "task multi class": 55225, "task learning techniques": 55183, "demonstrate superiority model": 13987, "including low resource": 25271, "models perform poorly": 35314, "using recently developed": 60899, "training bert model": 57946, "embeddings map words": 17172, "evaluate model performance": 18473, "incorporating commonsense knowledge": 25380, "labels natural language": 27841, "text challenging task": 56464, "strong pre trained": 53044, "approach experimental results": 3526, "research low resource": 47069, "models recent work": 35408, "language models models": 28285, "model trained predict": 34474, "pre train fine": 41516, "end end transformer": 17669, "baseline future research": 6172, "open domain conversational": 38420, "data augmentation generate": 12156, "guide future work": 23333, "pre trained different": 41527, "lack annotated datasets": 27874, "f1 score 72": 20203, "proposes new approach": 43938, "data shared task": 12656, "transformer based approach": 58453, "make code models": 31551, "code models publicly": 8837, "research community paper": 47003, "entity recognition using": 18136, "variety language understanding": 61276, "models substantially outperform": 35549, "open sourced https": 38464, "sourced https github": 51825, "contextualized language representations": 10806, "open source library": 38452, "powerful language models": 41436, "models transformer based": 35630, "based models bert": 5869, "art performance compared": 4331, "models data available": 34882, "nlp tasks pre": 37544, "publicly https github": 44358, "large amounts labeled": 28837, "reaching state art": 45061, "present simple efficient": 42017, "address challenge introduce": 1743, "speech tags dependency": 52305, "shows significant improvement": 50802, "end end pipeline": 17658, "task semantic parsing": 55353, "challenging nlp task": 8121, "generation challenging task": 22433, "languages non trivial": 28740, "non commercial use": 37642, "nlp systems paper": 37529, "propose approach automatically": 43297, "significant improvement accuracy": 50874, "sentiment analysis classification": 49818, "social networks twitter": 51598, "topic modeling approach": 57416, "semantic information text": 49287, "art models large": 4296, "time consuming work": 57138, "self attention weights": 49188, "labeled data achieve": 27739, "data achieve state": 12111, "conversations social media": 11064, "growing body work": 23291, "recent studies report": 45351, "representations large scale": 46701, "tuning specific tasks": 58958, "fine tuning improves": 20994, "representation pre trained": 46568, "extended new languages": 19838, "documents experimental results": 15878, "advances pre trained": 1921, "paper aim improve": 39261, "present manually annotated": 41941, "bert based classifiers": 6621, "baselines future work": 6266, "language models significantly": 28320, "used fine tuning": 60193, "downstream tasks compared": 16354, "pretrained models publicly": 42171, "art performance popular": 4342, "paper conduct comprehensive": 39298, "using pretrained language": 60872, "information word level": 26162, "task fine tuned": 55089, "language models widely": 28347, "language models study": 28325, "corpus pre training": 11405, "implementation publicly available": 24643, "publicly available github": 44342, "training data present": 58026, "fully supervised models": 21742, "paper propose improve": 39517, "introduce transformer based": 26874, "information word embedding": 26160, "encoder experimental results": 17514, "transformer encoder decoder": 58483, "low resource situations": 31192, "labels experimental results": 27822, "tasks large scale": 55714, "scale pre training": 48613, "replaced token detection": 46406, "based contextual embeddings": 5644, "data augmentation framework": 12155, "data fine tune": 12365, "pre trained knowledge": 41537, "future research direction": 21887, "github https github": 22716, "languages work propose": 28824, "demonstrate competitive performance": 13883, "sentiment analysis dataset": 49819, "post processing technique": 41353, "bert like models": 6676, "word boundary information": 62122, "proposed method outperformed": 43823, "models shown success": 35503, "shot learning setting": 50629, "novel objective function": 37889, "data high quality": 12402, "comprehensive experiments demonstrate": 9792, "wide range languages": 61970, "recent studies demonstrated": 45350, "self supervised training": 49219, "language processing especially": 28406, "trained transformer model": 57901, "machine translation code": 31351, "supervised end end": 53983, "methods automatic human": 32762, "based models experiments": 5871, "f1 score test": 20225, "context aware representations": 10591, "experiments analysis demonstrate": 19350, "machine translation document": 31355, "different granularity levels": 14947, "world applications paper": 62929, "domain test data": 16205, "pseudo labeled data": 44276, "high quality pseudo": 23783, "suffers data scarcity": 53790, "data scarcity issue": 12625, "issue paper propose": 27071, "code mixing phenomenon": 8832, "processing existing methods": 42871, "methods mainly focus": 32937, "optimal transport ot": 38534, "pre trained sequence": 41557, "strong zero shot": 53059, "github com salesforce": 22710, "amounts labeled data": 2552, "scale unlabeled data": 48636, "work propose approach": 62772, "neural network applied": 36994, "confirm effectiveness proposed": 10130, "neural networks achieve": 37033, "art performance addition": 4326, "propose novel generative": 43537, "language models achieving": 28228, "experimental results real": 19308, "second stage fine": 49022, "challenging problem paper": 8128, "task sequence labeling": 55363, "achieves promising results": 1356, "methods neural network": 32957, "language models text": 28332, "xlm roberta model": 63031, "trained multilingual language": 57818, "related covid 19": 45894, "propose novel bert": 43526, "train machine translation": 57605, "dataset pre trained": 13032, "trained transformer language": 57900, "language experimental results": 28060, "experimental results state": 19316, "language models transfer": 28336, "nearest neighbors knn": 36523, "networks experimental results": 36852, "state art t5": 52677, "community recent years": 9275, "languages english arabic": 28652, "approach bring significant": 3435, "visual textual information": 61672, "shown pre trained": 50738, "trained models perform": 57805, "speech named entity": 52272, "contrastive learning specifically": 10910, "experimental results benchmarks": 19273, "information external knowledge": 25856, "large number labeled": 28923, "time work propose": 57239, "translation speech translation": 58681, "translation quality paper": 58665, "paper describes proposed": 39328, "results transformer based": 47892, "computer vision cv": 9895, "transformer based approaches": 58454, "paper proposes model": 39547, "fine tuned transformer": 20972, "methods proposed method": 32997, "strong baselines task": 53016, "context dependent context": 10611, "dataset human evaluation": 12955, "datasets used training": 13471, "model best model": 33622, "f1 score 92": 20215, "language models provide": 28310, "answer natural language": 3040, "utilizing pre trained": 61128, "language models downstream": 28247, "address issue paper": 1765, "exact match f1": 18852, "unlabeled target language": 59581, "covid 19 related": 11671, "learning propose novel": 29824, "language model pretrained": 28187, "model pretrained large": 34230, "sentiment analysis based": 49817, "end end multi": 17655, "text generative models": 56607, "domain knowledge base": 16094, "demonstrate proposed models": 13970, "based approaches proposed": 5573, "information used improve": 26143, "advancements deep learning": 1898, "public datasets model": 44317, "choice pre trained": 8334, "trained models used": 57810, "inspired recent progress": 26414, "level character level": 30073, "achieves higher performance": 1336, "proposed framework significantly": 43786, "commonsense knowledge graph": 9236, "best performance compared": 6791, "learning models task": 29761, "data models code": 12495, "large transformer based": 29034, "pretrained large language": 42162, "training data hand": 58000, "models multilingual models": 35239, "models different tasks": 34918, "vision language models": 61638, "domains labeled data": 16266, "adaptive fine tuning": 1575, "f1 scores 70": 20228, "propose novel training": 43570, "test set consisting": 56373, "strong baselines especially": 53012, "classification task paper": 8566, "evidence lower bound": 18813, "different deep learning": 14894, "scale training data": 48632, "task learning strategy": 55182, "consistently outperforms strong": 10307, "task field natural": 55082, "propose novel contrastive": 43528, "new method automatically": 37251, "code mixed language": 8827, "propose novel pre": 43555, "novel pre trained": 37894, "experiments conducted benchmark": 19382, "conducted benchmark datasets": 10075, "art performance downstream": 4334, "monolingual bert based": 35791, "fine tuned downstream": 20960, "help pre trained": 23585, "usually requires large": 61067, "benchmark dataset demonstrate": 6445, "dataset demonstrate superiority": 12887, "modern deep learning": 35705, "trained multilingual bert": 57817, "pre trained gpt": 41536, "respectively paper describes": 47376, "paper describes developed": 39322, "obtained f1 score": 38210, "paper addresses gap": 39256, "fine tuning work": 21033, "shared task participants": 50504, "pointer network model": 41060, "datasets used experiments": 13469, "graph based semantic": 23112, "using small set": 60949, "languages propose novel": 28759, "team semeval 2020": 56005, "specifically introduce novel": 52210, "task learning architecture": 55170, "bert achieved great": 6604, "fine tuning techniques": 21029, "better performance achieved": 6928, "models achieved excellent": 34675, "language models help": 28260, "propose model based": 43464, "context covid 19": 10602, "datasets method outperforms": 13330, "using language models": 60754, "easily incorporated existing": 16544, "model achieve better": 33503, "text classification text": 56489, "fine tuning explore": 20990, "performance compared baselines": 40251, "standard natural language": 52512, "remains poorly understood": 46346, "presents large scale": 42089, "layer pre trained": 29203, "language models prlms": 28304, "doi org 10": 15989, "models masked language": 35218, "open ended text": 38432, "ended text generation": 17740, "practical applications paper": 41459, "code dataset released": 8807, "training large models": 58149, "open ended language": 38430, "relation extraction event": 45974, "data augmentation training": 12165, "class classification task": 8397, "unsupervised graph based": 59701, "graph based ranking": 23110, "based ranking model": 5973, "multiple benchmark datasets": 36173, "processing nlp community": 42901, "ambiguity natural language": 2527, "monolingual multilingual settings": 35808, "used training data": 60338, "pre training stage": 41592, "supervised contrastive learning": 53972, "significantly higher performance": 50961, "attention research community": 4822, "new manually annotated": 37246, "text paper propose": 56691, "bert experimental results": 6656, "large parallel data": 28935, "word segmentation cws": 62293, "art sota performance": 4408, "traditional state art": 57546, "neural network gnn": 37009, "applications paper introduce": 3228, "datasets model outperforms": 13336, "machine translation experiment": 31358, "resources publicly available": 47330, "github com csebuetnlp": 22697, "models recent advances": 35407, "transformer based methods": 58463, "learning mtl framework": 29768, "models lack robustness": 35160, "domain paper present": 16130, "methods improve robustness": 32896, "develop end end": 14586, "model domain specific": 33789, "position aware attention": 41261, "social media comments": 51570, "document paper propose": 15818, "performance public datasets": 40513, "models achieved significant": 34681, "ablation study demonstrates": 660, "multi task transfer": 36031, "shot language transfer": 50623, "based models fine": 5872, "models large number": 35166, "work systematically study": 62835, "analysis provides insights": 2734, "dataset code publicly": 12842, "understanding slu tasks": 59401, "structural semantic information": 53085, "aware text generation": 5474, "task fine tune": 55088, "fine tune bert": 20948, "art results english": 4377, "experimental results illustrate": 19288, "10 training data": 54, "learning multi task": 29770, "efficient end end": 16871, "provide useful information": 44149, "texts social media": 56928, "outperforms existing approaches": 38897, "demonstrate approach effectively": 13869, "capture local global": 7692, "art performance public": 4344, "performance transformer based": 40610, "low resource high": 31179, "generated language models": 22296, "focus low resource": 21178, "better comparable performance": 6865, "simple effective data": 51152, "effective data augmentation": 16642, "data augmentation strategies": 12160, "training data fine": 57997, "benchmarks demonstrate effectiveness": 6515, "code datasets available": 8809, "improves performance strong": 25144, "cc nc sa": 7898, "nc sa license": 36499, "based distant supervision": 5685, "state art metrics": 52631, "release code data": 46145, "model predictions paper": 34225, "overview current state": 39110, "widely used language": 62017, "training large language": 58148, "self supervised models": 49212, "self supervised contrastive": 49206, "languages zero shot": 28828, "modeling natural language": 34604, "experiments datasets different": 19400, "adversarial training improve": 1990, "incorporate external knowledge": 25355, "graph structural information": 23169, "information knowledge base": 25937, "obtain promising results": 38184, "propose novel knowledge": 43543, "years existing approaches": 63060, "existing approaches focus": 19027, "modern language models": 35708, "models require large": 35442, "data costly time": 12256, "github repository https": 22720, "repository https github": 46465, "specific downstream tasks": 52075, "downstream tasks natural": 16360, "shown impressive performance": 50719, "trained models fine": 57799, "model word embedding": 34539, "small amounts data": 51463, "code mixed dataset": 8825, "proposed pre training": 43880, "scale empirical study": 48568, "bilstm crf model": 7131, "bilstm crf models": 7132, "parametric non parametric": 39733, "augmentation method improve": 4962, "models better performance": 34783, "scale labeled data": 48585, "resulting poor performance": 47473, "poor performance domain": 41141, "general domain specific": 22055, "language model results": 28192, "different parts speech": 15021, "results https github": 47662, "datasets pre trained": 13373, "model training evaluation": 34481, "word embeddings introduce": 62173, "end end setting": 17660, "trained models bert": 57796, "bert roberta albert": 6712, "novel bert based": 37778, "performance variety tasks": 40620, "integral natural language": 26503, "answer qa pairs": 3047, "improvement zero shot": 25041, "future work code": 21900, "github com swarnahub": 22711, "use graph convolutional": 59903, "data sets evaluation": 12650, "outperforms baseline approaches": 38868, "tackle issue propose": 54705, "tasks recent works": 55839, "transformer models like": 58500, "existing approaches usually": 19033, "consistently significantly outperform": 10311, "based zero shot": 6144, "neural based models": 36939, "training data neural": 58021, "employ pre trained": 17389, "future data collection": 21866, "play essential role": 40970, "case based reasoning": 7789, "datasets approach significantly": 13157, "art embedding based": 4252, "method code available": 32416, "examples paper present": 18921, "tasks zero shot": 55975, "classification tasks paper": 8570, "token level predictions": 57297, "significantly improves generalization": 50975, "models including bert": 35119, "make data code": 31559, "code freely available": 8815, "fine tuning generative": 20993, "loss fine tuning": 31092, "knowledge work propose": 27650, "training data long": 58012, "popular benchmark dataset": 41158, "mixture experts moe": 33420, "wide variety downstream": 61982, "issue propose simple": 27078, "especially natural language": 18290, "models require significant": 35443, "address challenges introduce": 1748, "generative language model": 22592, "text generation aims": 56594, "standard language modeling": 52497, "state art bert": 52590, "f1 score using": 20226, "fully supervised methods": 21741, "methods transfer learning": 33083, "feature attribution methods": 20475, "remains major challenge": 46341, "art performance unsupervised": 4350, "generation task based": 22559, "language model perform": 28181, "commonsense question answering": 9238, "unsupervised pre trained": 59721, "generalization ability different": 22115, "zero shot approaches": 63153, "training data train": 58043, "generated language model": 22295, "level paper propose": 30173, "problem fine tuning": 42569, "directions english german": 15290, "release large dataset": 46155, "information pre training": 26015, "effective way learn": 16714, "using knowledge distillation": 60746, "effectiveness approach achieving": 16767, "strong state art": 53052, "submission shared task": 53576, "framework cross lingual": 21484, "multiple languages using": 36238, "neural ranking model": 37089, "zero shot methods": 63168, "datasets widely used": 13485, "experiments ablation studies": 19345, "language model mlm": 28177, "models prior work": 35357, "pre training pre": 41589, "training pre training": 58212, "task large scale": 55164, "great success various": 23219, "paper propose address": 39491, "catastrophic forgetting problem": 7834, "metrics correlate human": 33155, "correlate human judgements": 11504, "code released https": 8853, "trained self supervised": 57858, "contrastive pre training": 10917, "prior work focuses": 42422, "motivate future research": 35860, "tasks remains unclear": 55852, "remains unclear extent": 46352, "end introduce novel": 17677, "text based methods": 56455, "work propose multi": 62782, "trained relatively small": 57853, "token level representations": 57298, "achieved comparable performance": 1225, "comparable performance compared": 9302, "impressive performance gains": 24812, "capture high level": 7677, "integrate pre trained": 26509, "context open domain": 10683, "models introduce new": 35143, "consistent significant improvement": 10286, "language processing various": 28440, "word phrase level": 62264, "f1 scores previous": 20230, "previous unsupervised methods": 42299, "trained contextual embeddings": 57693, "propose novel hybrid": 43540, "need task specific": 36593, "new method learning": 37254, "new dataset named": 37167, "good starting point": 22946, "room future work": 48339, "specific pre training": 52128, "downstream applications including": 16334, "large amounts domain": 28835, "black box nature": 7192, "increasing model size": 25455, "present empirical results": 41898, "sequence sequence transformer": 49999, "human evaluation present": 24152, "hierarchical attention model": 23658, "resource settings extensive": 47276, "settings extensive experiments": 50373, "propose novel evaluation": 43535, "widely used benchmark": 62011, "human annotated corpus": 24096, "small training data": 51507, "work propose knowledge": 62779, "baselines automatic evaluation": 6235, "issue paper proposes": 27072, "paper address challenge": 39251, "using pre training": 60867, "model xlm roberta": 34549, "cross attention mechanism": 11808, "small number examples": 51488, "model agnostic framework": 33553, "available data set": 5277, "semantic parsing paper": 49314, "multi task approach": 36016, "outperforms recent state": 38938, "self supervised objectives": 49214, "text question answering": 56725, "overcome problem propose": 39071, "model based neural": 33606, "models demonstrated strong": 34896, "previous studies focus": 42288, "mitigate issue propose": 33386, "nlp recent years": 37518, "datasets low resource": 13321, "automated evaluation metrics": 5042, "tasks entity linking": 55616, "language models unsupervised": 28339, "pretrained sequence sequence": 42184, "text text transfer": 56811, "text transfer transformer": 56822, "transfer transformer t5": 58428, "unified text text": 59481, "text text format": 56808, "available paper introduce": 5340, "roberta language model": 48224, "language models automatically": 28233, "zero shot domain": 63159, "train test sets": 57648, "significantly boosts performance": 50946, "new pre trained": 37286, "works mainly focus": 62897, "languages lack annotated": 28703, "domain adaptive pre": 16011, "adaptive pre training": 1579, "art bert based": 4229, "code data models": 8801, "available cross lingual": 5275, "pre training strategies": 41594, "pre training propose": 41591, "training propose novel": 58219, "level contrastive learning": 30087, "downstream language understanding": 16341, "training data performance": 58024, "yields substantial gains": 63135, "available training time": 5381, "novel self supervised": 37913, "self supervised approach": 49205, "datasets prior work": 13378, "existing language models": 19082, "knowledge intensive tasks": 27531, "shows significant improvements": 50803, "data active learning": 12117, "outperforms prior state": 38933, "provide insights future": 44094, "improve performance task": 24901, "best published result": 6810, "present simple method": 42018, "tuned language models": 58877, "tasks like question": 55725, "sequence prediction task": 49968, "large scale domain": 28972, "specifically propose novel": 52224, "problem domain adaptation": 42545, "present new large": 41963, "code dataset publicly": 8806, "tuned downstream tasks": 58874, "achieves similar performance": 1375, "lower resource languages": 31223, "highly domain specific": 23896, "weakly supervised dataset": 61863, "language previous work": 28388, "large volumes text": 29049, "learning ml algorithms": 29740, "dataset introduce new": 12969, "language models data": 28241, "neural models based": 36975, "training experimental results": 58100, "general language understanding": 22066, "understanding evaluation benchmark": 59343, "time consuming error": 57129, "consuming error prone": 10444, "sentence level representation": 49589, "work perform extensive": 62748, "nlp tasks information": 37536, "specific knowledge graph": 52094, "learning weak supervision": 29940, "continued pre training": 10832, "performance automatic evaluation": 40203, "word embeddings encode": 62166, "downstream tasks results": 16366, "based cnn lstm": 5624, "interactions social media": 26623, "demonstrate proposed architecture": 13965, "fine tuning strategies": 21023, "suffer catastrophic forgetting": 53760, "conduct extensive empirical": 10051, "scarcity labeled data": 48669, "quality generated data": 44525, "experiments different languages": 19416, "training examples available": 58095, "work available https": 62584, "art results outperforming": 4383, "widely used improve": 62016, "word embeddings experimental": 62169, "recent pre trained": 45334, "methods fine tuning": 32868, "years pre trained": 63070, "bert based pre": 6625, "performs better existing": 40699, "datasets used train": 13470, "new multilingual dataset": 37265, "language understanding key": 28551, "introduce novel graph": 26846, "paper describes contribution": 39321, "describes contribution semeval": 14220, "contribution semeval 2020": 10948, "zero shot scenarios": 63175, "trained human annotated": 57748, "commonly used metrics": 9226, "cross domain scenarios": 11818, "quality human evaluation": 44531, "learning methods learn": 29733, "using annotated data": 60559, "text classification framework": 56473, "information extraction natural": 25864, "sequential transfer learning": 50053, "baseline models experimental": 6189, "corpora state art": 11246, "large pretrained transformer": 28944, "pretrained transformer based": 42187, "document level tasks": 15809, "general purpose pretrained": 22088, "previous works shown": 42322, "increase computational cost": 25409, "achieves sota results": 1377, "effect data augmentation": 16612, "compare performance proposed": 9356, "answering knowledge graph": 3078, "present comprehensive review": 41872, "achieves comparable better": 1313, "semi supervised fashion": 49458, "training data zero": 58051, "shot learning tasks": 50630, "based model language": 5859, "encoder representations transformer": 17539, "human evaluation demonstrates": 24146, "low high resource": 31154, "work proposes novel": 62794, "positive negative pairs": 41287, "popular pre trained": 41179, "tasks using different": 55955, "trained encoder decoder": 57718, "contrastive learning scl": 10909, "end present novel": 17698, "aspects natural language": 4548, "recently transformer based": 45471, "paper presents study": 39484, "using random forest": 60892, "experiments fine tuning": 19437, "tuning language models": 58921, "achieve good balance": 1146, "consuming task paper": 10454, "model performs par": 34205, "performance compared baseline": 40250, "study aims develop": 53323, "using single model": 60946, "dependency parsing dp": 14132, "promising results natural": 43182, "easy use interface": 16569, "using automated metrics": 60572, "demonstrate superiority approach": 13985, "models tackle problem": 35578, "approach outperforms competitive": 3625, "information recent years": 26045, "models code publicly": 34822, "generation aims generating": 22415, "various downstream nlp": 61333, "shot zero shot": 50658, "explore different approaches": 19699, "language model improves": 28172, "meta embedding learning": 32332, "language models applied": 28229, "effectiveness superiority proposed": 16814, "implementation available https": 24639, "training data points": 58025, "applying state art": 3378, "texts paper propose": 56910, "language models knowledge": 28267, "contrastive learning cl": 10901, "propose contrastive learning": 43338, "contrastive learning framework": 10903, "efficacy proposed method": 16836, "social media important": 51575, "using crowd sourced": 60639, "data work present": 12778, "tuning large pre": 58924, "language models demonstrated": 28243, "sequence sequence architecture": 49981, "language model evaluation": 28163, "recent studies revealed": 45352, "building cross lingual": 7441, "learning models achieved": 29751, "pseudo labels unlabeled": 44279, "manner extensive experiments": 31719, "suggest future work": 53819, "fine tuning limited": 20997, "multi lingual pre": 35987, "shot learning problem": 50628, "yield better performance": 63091, "popular recent years": 41182, "propose pre training": 43589, "showing promising results": 50686, "model takes advantage": 34440, "model evaluate model": 33842, "self supervised fashion": 49207, "performance human evaluation": 40376, "private test set": 42444, "use language model": 59923, "large corpus text": 28864, "static word embedding": 52727, "pretrained transformer model": 42189, "shed light future": 50525, "light future research": 30450, "language processing requires": 28428, "recently deep neural": 45416, "paper propose joint": 39518, "presents new dataset": 42095, "rules natural language": 48393, "experiments conducted widely": 19388, "conducted widely used": 10099, "ensemble model combines": 17979, "fine tuned dataset": 20959, "enhance model performance": 17916, "consists sub tasks": 10332, "training data proposed": 58029, "bert based architectures": 6617, "accuracy downstream tasks": 963, "attention network model": 4797, "present data driven": 41882, "driven end end": 16423, "model knowledge distillation": 34031, "achieves promising performance": 1355, "increasing attention recently": 25446, "model performance various": 34197, "stage fine tuning": 52430, "fine tuning stages": 21021, "performance gains compared": 40358, "train evaluate model": 57588, "methods machine learning": 32934, "novel training method": 37941, "scale unlabeled corpora": 48635, "verify effectiveness method": 61537, "generation pre trained": 22520, "novel contrastive learning": 37791, "masked language modelling": 31865, "language understanding machine": 28552, "tasks verify effectiveness": 55962, "work develop novel": 62633, "information improves performance": 25916, "layers pre trained": 29233, "pre trained roberta": 41553, "task specific pre": 55403, "domain specific pre": 16182, "performance multi task": 40446, "systems challenging task": 54450, "art models struggle": 4298, "github com ukplab": 22714, "pretrained multilingual language": 42173, "performance multilingual model": 40448, "publicly available text": 44355, "codes available https": 8875, "pre training process": 41590, "model compression techniques": 33687, "novel data driven": 37798, "distant supervision approach": 15558, "better results existing": 6958, "obtains comparable performance": 38244, "multiple languages english": 36237, "personally identifiable information": 40768, "best f1 scores": 6765, "highest f1 score": 23853, "pretrained neural language": 42176, "tasks conduct extensive": 55553, "datasets sentiment analysis": 13417, "general nlp tasks": 22077, "original training set": 38735, "trained text text": 57894, "text text transformer": 56812, "experiments demonstrate state": 19407, "learning rl based": 29852, "capture different aspects": 7663, "fine tunes pre": 20976, "tunes pre trained": 58896, "2021 shared task": 299, "various training strategies": 61410, "propose model agnostic": 43463, "model agnostic method": 33555, "fine tuning transformer": 21031, "tuning transformer based": 58972, "model sequence sequence": 34361, "efficacy proposed approach": 16835, "language models zero": 28350, "models zero shot": 35694, "improve performance data": 24887, "improving zero shot": 25202, "limited amounts labeled": 30565, "conduct ablation study": 10026, "processing nlp approaches": 42900, "data augmentation using": 12166, "state art contextual": 52601, "contextual language model": 10773, "field machine learning": 20759, "aware graph neural": 5451, "demonstrated strong performance": 14022, "publicly released code": 44363, "released code https": 46172, "github com gt": 22702, "com gt salt": 9015, "table text generation": 54690, "given large number": 22758, "dataset verify effectiveness": 13134, "quality natural language": 44556, "classification sequence tagging": 8547, "dataset natural language": 13007, "analysis human evaluation": 2674, "wide variety applications": 61980, "word embeddings derived": 62163, "model text classification": 34457, "dataset achieve state": 12798, "training test time": 58294, "proposed recent years": 43885, "components pre trained": 9724, "language models transformer": 28337, "multi lingual models": 35986, "achieve goal propose": 1143, "model performance experiments": 34190, "results human evaluation": 47664, "improve performance natural": 24895, "development language technology": 14681, "attention computational linguistics": 4729, "computational linguistics community": 9846, "using graph neural": 60716, "model simple effective": 34386, "popular language models": 41167, "significantly improve classification": 50964, "transition based approach": 58538, "transformer based pretrained": 58472, "manually labeled data": 31782, "language processing use": 28438, "address problem paper": 1787, "finally propose method": 20877, "different languages evaluate": 14971, "previous sota methods": 42280, "future research present": 21892, "methods real world": 33006, "language models better": 28236, "evaluate language models": 18466, "social science research": 51603, "high quality labeled": 23780, "baselines terms automatic": 6310, "models learn generate": 35176, "simple model agnostic": 51194, "constructing large scale": 10423, "based self supervised": 6013, "art models benchmark": 4294, "models benchmark datasets": 34766, "real life settings": 45106, "potential future directions": 41391, "art relation extraction": 4366, "remains relatively unexplored": 46348, "shot learning model": 50627, "shot learning methods": 50626, "shared encoder decoder": 50469, "study zero shot": 53478, "tasks language models": 55710, "proposed method effectively": 43818, "language models recent": 28313, "domain adaptation propose": 16003, "neural attention model": 36936, "relevant external knowledge": 46216, "outperforms baselines terms": 38876, "text text generation": 56809, "using term frequency": 60986, "using majority voting": 60786, "comparisons state art": 9514, "models achieve new": 34672, "task extensive experiments": 55070, "types natural language": 59104, "computer vision speech": 9897, "learning models work": 29765, "tasks like semantic": 55726, "semeval 2021 task": 49439, "language models increasingly": 28264, "language models experimental": 28252, "fine tuned multilingual": 20965, "contrastive learning objectives": 10908, "open challenges future": 38414, "challenges future directions": 8051, "sequence language models": 49943, "training test datasets": 58292, "text pre trained": 56702, "based nlp models": 5913, "brings significant improvement": 7344, "demonstrate efficacy approach": 13906, "models analysis shows": 34711, "unlike existing approaches": 59595, "performance different models": 40294, "different models compared": 14997, "language models varying": 28346, "widely used text": 62022, "https doi org": 24057, "org 10 5281": 38678, "10 5281 zenodo": 34, "model different types": 33767, "performance compared strong": 40256, "terms accuracy efficiency": 56264, "promising results cross": 43181, "propose novel metric": 43548, "standard data sets": 52482, "task demonstrate approach": 55001, "various nlp problems": 61374, "word level character": 62227, "demo available https": 13847, "model performance low": 34191, "art results achieved": 4370, "classification models trained": 8498, "extracting structured information": 20041, "art results despite": 4375, "data collected using": 12217, "fine tuning nlp": 21005, "input sequence length": 26332, "low resource multilingual": 31184, "techniques fine tuning": 56090, "auxiliary task improve": 5241, "order achieve better": 38589, "automatically generated questions": 5177, "work propose generate": 62778, "word level embedding": 62228, "overcome limitations propose": 39069, "train test splits": 57649, "slow inference speed": 51451, "trained models paper": 57804, "novel machine learning": 37860, "propose new way": 43517, "transfer learning ability": 58375, "language model perplexity": 28183, "models demonstrated impressive": 34895, "word level phrase": 62235, "level phrase level": 30177, "method experimental results": 32496, "code dataset available": 8805, "traditional fine tuning": 57521, "achieve comparable better": 1122, "models lms trained": 35200, "code mixed english": 8826, "significantly outperforms multilingual": 51003, "address limitation propose": 1777, "token sentence level": 57306, "phase pre training": 40806, "effectiveness pre training": 16802, "continuing pre training": 10838, "fine grained annotation": 20927, "fine grained annotations": 20928, "experiments demonstrate superiority": 19408, "non autoregressive transformer": 37638, "effective domain adaptation": 16647, "averaged f1 scores": 5423, "models trained human": 35612, "achieves significantly higher": 1372, "low quality data": 31168, "step paper propose": 52820, "models based bert": 34754, "trained multilingual models": 57819, "propose plug play": 43584, "methods benchmark datasets": 32768, "self supervised speech": 49217, "baseline model using": 6187, "paper present submission": 39463, "representation pre training": 46569, "experimental results achieve": 19269, "achieve promising performance": 1181, "end trainable neural": 17718, "test time experiments": 56388, "improve performance model": 24892, "level contextual information": 30083, "results machine learning": 47707, "transfer low resource": 58402, "important nlp task": 24749, "language models low": 28281, "multilingual low resource": 36093, "stage pre training": 52439, "proposed address problem": 43713, "recent advances text": 45289, "task semeval 2021": 55355, "models bert albert": 34772, "various pre trained": 61379, "task specific metrics": 55400, "proposed approach improves": 43728, "models domain specific": 34937, "foster future research": 21411, "benchmark dataset containing": 6444, "based models trained": 5881, "strong baselines achieves": 53009, "increase model complexity": 25418, "layer perceptron mlp": 29200, "lower level tasks": 31215, "information fine tuning": 25878, "publicly release dataset": 44361, "dataset https github": 12953, "text generation capabilities": 56596, "performance bert based": 40217, "evaluating quality generated": 18569, "use contextual information": 59853, "analyses demonstrate effectiveness": 2594, "state art accuracies": 52575, "code open sourced": 8840, "extract meaningful information": 19987, "pretrained transformer models": 42190, "use language specific": 59925, "tuned bert models": 58869, "benefits pre trained": 6587, "model compared state": 33674, "contrastive learning method": 10904, "models specifically propose": 35527, "benchmark datasets extensive": 6453, "fine tuning paradigm": 21006, "like recurrent neural": 30497, "recently proposed method": 45456, "model achieve state": 33506, "continuous embedding space": 10845, "state art hybrid": 52621, "domain specific rules": 16184, "data self training": 12636, "data extensive experiments": 12351, "token level sequence": 57300, "different transformer based": 15106, "thanks availability large": 57000, "domain domain settings": 16054, "distant supervision method": 15559, "words given text": 62428, "languages using english": 28817, "data models available": 12494, "contextualised language models": 10794, "language models plm": 28296, "propose self training": 43616, "tasks open source": 55776, "human evaluations demonstrate": 24159, "modern transformer based": 35724, "used benchmark dataset": 60104, "learning domain specific": 29606, "release dataset code": 46152, "task oriented semantic": 55257, "oriented semantic parsing": 38700, "beam search generate": 6369, "pre trained seq2seq": 41556, "trained seq2seq models": 57867, "language models tend": 28331, "negative log likelihood": 36624, "based evaluation metrics": 5709, "evaluation shows models": 18722, "classification tasks recent": 8571, "access high quality": 823, "language models encode": 28249, "f1 score model": 20224, "language model xlm": 28204, "datasets available https": 13163, "retrieval ir systems": 47949, "zero shot settings": 63178, "pre training synthetic": 41596, "address gap propose": 1759, "dataset news articles": 13012, "effectiveness method using": 16792, "extensive experimental studies": 19875, "studies real world": 53293, "related downstream tasks": 45903, "multilingual transformer model": 36131, "performance pretrained language": 40495, "task specific datasets": 55392, "evaluate models using": 18475, "pre training effective": 41576, "transfer natural language": 58409, "like bert gpt": 30463, "existing approaches suffer": 19031, "improves performance tasks": 25145, "propose automatic evaluation": 43307, "generation question answering": 22536, "standard text classification": 52535, "shot fine tuning": 50617, "achieve excellent results": 1136, "training data annotation": 57973, "require complex reasoning": 46846, "lack parallel corpora": 27905, "methods document level": 32828, "experiments wide range": 19561, "labels extensive experiments": 27825, "paper present empirical": 39449, "simple strong baseline": 51214, "limited task specific": 30624, "summarization experimental results": 53884, "fine tuning results": 21016, "existing work usually": 19175, "extensive experiments english": 19887, "datasets demonstrate superior": 13217, "propose neuro symbolic": 43493, "input output pairs": 26309, "results indicate models": 47678, "model propose new": 34246, "domain adaptation uda": 16006, "propose fine tune": 43390, "processing nlp algorithms": 42898, "indicate proposed approach": 25532, "proposed approach effective": 43726, "methods recent years": 33008, "rise deep learning": 48153, "text fine tuning": 56581, "fine tuning experiments": 20989, "relative position encoding": 46106, "pretrained large scale": 42163, "achieved competitive results": 1229, "competitive results strong": 9564, "results strong baselines": 47859, "monte carlo sampling": 35828, "experiments various datasets": 19557, "learning fine tune": 29647, "statistically significant performance": 52774, "models trained solely": 35620, "dependence labeled data": 14103, "low resource tasks": 31195, "paper present systematic": 39464, "shot context learning": 50607, "results demonstrate superior": 47584, "improvements compared strong": 25061, "learning algorithms using": 29513, "code data publicly": 8802, "learning based systems": 29542, "learning experiments demonstrate": 29637, "massive amounts data": 31880, "task learning problem": 55179, "shed light important": 50526, "performance nlp models": 40458, "relevant natural language": 46225, "paper propose pre": 39529, "language processing aims": 28394, "achieved high performance": 1241, "weak supervision based": 61851, "outside training distribution": 39028, "data augmentation da": 12153, "proposed model shows": 43857, "model shows significant": 34377, "models multi task": 35236, "experimental results multi": 19296, "fine tuning neural": 21004, "extensive empirical evaluation": 19864, "natural language despite": 36420, "text classification approach": 56467, "data model training": 12491, "experiments commonly used": 19377, "attention nlp community": 4802, "using cosine similarity": 60634, "faster inference speed": 20438, "downstream tasks information": 16358, "play central role": 40962, "shelf language models": 50538, "paper propose automatic": 39498, "human evaluation method": 24149, "key challenge task": 27298, "annotated data model": 2884, "learning text classification": 29911, "fine tuned target": 20969, "based model using": 5863, "introduce contrastive learning": 26791, "maximize mutual information": 31959, "models achieve high": 34671, "leaving room improvement": 29985, "text classification benchmarks": 56470, "demonstrates effectiveness approach": 14030, "zero shot manner": 63167, "manually labeled training": 31784, "ablation study shows": 661, "aware machine translation": 5458, "bert roberta xlnet": 6713, "trained manually annotated": 57782, "focus sentence level": 21198, "aim improve performance": 2151, "fine tunes model": 20975, "learning models natural": 29756, "models using sequence": 35659, "shown remarkable success": 50748, "paper propose dual": 39505, "existing open source": 19125, "benchmark datasets model": 6457, "training data previous": 58027, "propose novel regularization": 43558, "response generation tasks": 47393, "challenging tasks natural": 8157, "neural generation models": 36958, "years witnessed increasing": 63082, "novel unsupervised method": 37948, "based token level": 6098, "paid little attention": 39144, "graph convolution networks": 23122, "using graph convolutional": 60715, "fine tune plms": 20952, "classification task results": 8567, "based support vector": 6073, "representation model trained": 46554, "state art evaluated": 52613, "used various downstream": 60348, "learning based solutions": 29541, "deep learning era": 13708, "trained transformer encoder": 57899, "evaluations proposed model": 18767, "multi armed bandit": 35939, "domain language model": 16099, "model work propose": 34544, "proposed method used": 43831, "collecting high quality": 8975, "provide baseline performance": 44013, "crucial downstream tasks": 11900, "model augmented data": 33589, "unlabeled data improve": 59565, "way improve performance": 61809, "improve performance strong": 24900, "art text generation": 4430, "knowledge language models": 27542, "propose multi label": 43475, "finally conduct extensive": 20846, "achieved best results": 1221, "significant performance boosts": 50904, "representation learning language": 46540, "despite recent advancements": 14382, "generation models trained": 22499, "shown remarkable performance": 50747, "classification task fine": 8565, "data data augmentation": 12268, "extensive experiments evaluate": 19888, "suffer data scarcity": 53762, "encourage future research": 17594, "question answering sentiment": 44709, "answering sentiment analysis": 3096, "test sets using": 56379, "present comprehensive analysis": 41871, "attracted increasing attention": 4882, "address limitations propose": 1779, "correlations human judgements": 11536, "dataset large scale": 12978, "contrastive learning multi": 10906, "art models datasets": 4295, "performance fine tuned": 40350, "datasets models trained": 13338, "review state art": 48041, "models available url": 34749, "machine learning data": 31318, "annotated test sets": 2922, "issue low resource": 27068, "various language models": 61351, "github com declare": 22698, "com declare lab": 9009, "language processing bert": 28400, "learning models automatically": 29752, "improve performance results": 24898, "multiple low resource": 36243, "text extensive experiments": 56572, "methods fully supervised": 32872, "github com rucaibox": 22709, "strategies improve performance": 52905, "address challenge study": 1746, "different previous methods": 15034, "generation models produce": 22498, "use parallel data": 59972, "pretrained multilingual models": 42174, "language models method": 28282, "adopted neural machine": 1871, "employ end end": 17381, "using sequence labeling": 60931, "applying pre trained": 3375, "language model downstream": 28160, "adapting pre trained": 1569, "reasoning question answering": 45220, "settings experimental results": 50370, "language modeling question": 28218, "words natural language": 62464, "samples extensive experiments": 48475, "conventional sequence sequence": 11013, "representations fine tuning": 46671, "substantially outperforms baselines": 53645, "conduct ablation studies": 10025, "tackle problems propose": 54712, "results method improves": 47716, "methods large margins": 32918, "f1 score macro": 20223, "using self supervised": 60918, "limited annotated data": 30567, "sequence model bart": 49953, "model achieve competitive": 33504, "extract structured information": 19997, "datasets transformer based": 13463, "field artificial intelligence": 20751, "plays essential role": 40996, "used transformer based": 60342, "state art character": 52592, "model source code": 34400, "model able transfer": 33495, "introduce novel dataset": 26844, "existing automatic metrics": 19037, "graph based encoder": 23104, "reflect real world": 45775, "ability models generalize": 627, "art performance nlp": 4339, "including support vector": 25306, "language models performing": 28295, "weakly supervised methods": 61866, "use transformer based": 60060, "tasks commonly used": 55545, "task demonstrate effectiveness": 55002, "point processes dpps": 41049, "model performance address": 34186, "performance address issue": 40187, "influence model performance": 25728, "use task specific": 60041, "language work propose": 28584, "term memory language": 56248, "performance automatic human": 40204, "generated social media": 22319, "available github https": 5299, "pre trained massive": 41543, "conduct exhaustive experiments": 10041, "increases model size": 25436, "languages domains paper": 28648, "datasets method significantly": 13331, "high quality automatic": 23771, "knowledge knowledge graph": 27536, "high computational costs": 23716, "transfer learning language": 58382, "model new data": 34126, "annotated data scarce": 2885, "general pre trained": 22081, "data recent years": 12589, "generation empirical results": 22452, "task learning paradigm": 55178, "work bridge gap": 62591, "release benchmark dataset": 46143, "improved performance various": 24960, "roberta based model": 48217, "language processing approaches": 28398, "model performance compared": 34187, "performance compared models": 40253, "rely supervised learning": 46303, "supervised learning technique": 54008, "manually labeled dataset": 31783, "train state art": 57639, "performances wide range": 40654, "experiments pre trained": 19490, "despite success large": 14396, "need labeled data": 36575, "compared baseline model": 9385, "easily integrated existing": 16546, "workshop asian translation": 62921, "loss experimental results": 31087, "supervised unsupervised machine": 54067, "language models llms": 28278, "research open domain": 47084, "datasets open domain": 13355, "years end end": 63058, "trained large datasets": 57768, "number training samples": 38051, "tasks proposed framework": 55824, "based logistic regression": 5820, "model knowledge graph": 34032, "medium low resource": 32219, "models trained datasets": 35605, "text language model": 56643, "zero shot prompting": 63172, "research real world": 47110, "trained models achieve": 57794, "make model learn": 31581, "trained models large": 57802, "improve data efficiency": 24841, "language models train": 28333, "based large language": 5806, "language model t5": 28196, "submissions shared task": 53579, "shared task leaderboard": 50500, "source code freely": 51747, "utilizes pre trained": 61117, "consistent performance gains": 10282, "code switched text": 8860, "scores previous state": 48914, "applications deep learning": 3197, "based bi encoder": 5605, "multilingual xlm roberta": 36136, "higher f1 scores": 23825, "affect downstream performance": 2013, "various transformer based": 61412, "performance language understanding": 40409, "propose new multi": 43509, "different pre training": 15030, "amounts text data": 2558, "available benchmark datasets": 5266, "fine tuning propose": 21015, "require fine tuning": 46857, "tuning large language": 58923, "compared standard transformer": 9457, "questions require complex": 44806, "employing pre trained": 17401, "able achieve better": 669, "shot shot settings": 50647, "achieved macro f1": 1249, "https huggingface datasets": 24061, "research direction propose": 47020, "dataset code released": 12843, "released url https": 46185, "performance improvement compared": 40384, "labeled training set": 27768, "results pre trained": 47771, "recent pretrained language": 45336, "bayes support vector": 6355, "using fine tuned": 60697, "demonstrate models trained": 13946, "aims automatically generate": 2176, "tf idf word2vec": 56994, "tasks extensive experiments": 55636, "existing works focus": 19177, "leveraging large scale": 30330, "fine grained manner": 20940, "jointly learns representations": 27203, "zero shot baselines": 63155, "social media challenging": 51569, "shared task 2021": 50492, "based bert architecture": 5600, "fine tuning existing": 20988, "applied various tasks": 3310, "uses language model": 60517, "sequence sequence pre": 49993, "pre train large": 41518, "bert based baselines": 6619, "baselines benchmark datasets": 6240, "benchmark datasets code": 6448, "baseline future work": 6173, "models fine tune": 35030, "yields best results": 63116, "joint entity relation": 27169, "language models obtained": 28291, "machine learning paper": 31329, "nature social media": 36487, "novel large scale": 37852, "gap training inference": 21983, "approaches significantly outperform": 3921, "method make use": 32569, "performance code available": 40240, "furthermore propose novel": 21835, "pattern exploiting training": 39961, "task document level": 55030, "external knowledge knowledge": 19942, "produce promising results": 43000, "data new domain": 12511, "dense passage retrieval": 14078, "language models present": 28302, "complex multi step": 9637, "work propose self": 62785, "outperforms baselines significantly": 38875, "performance existing state": 40333, "performed benchmark datasets": 40658, "reinforcement learning agent": 45865, "cross domain generalization": 11816, "new fine tuning": 37207, "validate effectiveness model": 61176, "contrastive learning objective": 10907, "language model plm": 28184, "approach publicly available": 3661, "research field natural": 47037, "language processing lack": 28413, "method improve accuracy": 32530, "macro average f1": 31403, "models new domains": 35258, "alleviate catastrophic forgetting": 2400, "need fine tuning": 36567, "fine tuning framework": 20992, "text text models": 56810, "objective pre training": 38100, "relatively small number": 46131, "language processing typically": 28437, "nlp tasks datasets": 37532, "based graph neural": 5763, "model performance using": 34196, "intent classification task": 26565, "study pre trained": 53435, "11 indic languages": 88, "question answering framework": 44698, "improvements automatic evaluation": 25048, "consistently outperforms baselines": 10303, "contrastive representation learning": 10920, "work presents novel": 62764, "tasks propose use": 55821, "pre training corpora": 41571, "pre trained natural": 41548, "work propose efficient": 62775, "high quality generated": 23778, "german italian spanish": 22673, "models text generation": 35597, "non autoregressive generation": 37634, "learning framework based": 29653, "compared baseline models": 9386, "resource target language": 47280, "best models achieve": 6784, "existing zero shot": 19179, "paper tackles problem": 39593, "models perform reasonably": 35315, "data augmentation based": 12152, "efficient fine tuning": 16874, "tuning pretrained models": 58948, "incorporated pre trained": 25371, "achieve human level": 1159, "train neural models": 57617, "research topic field": 47131, "domain labeled data": 16096, "models fine grained": 35029, "consistently outperforms vanilla": 10308, "trained models ptms": 57806, "multi task pre": 36026, "task pre training": 55281, "fine tuning plms": 21010, "machine readable format": 31339, "manual automatic evaluation": 31734, "model downstream task": 33791, "baselines cross lingual": 6247, "generate pseudo labels": 22231, "imitation reinforcement learning": 24580, "based domain specific": 5691, "domain specific model": 16180, "state art comparable": 52598, "evaluation human evaluation": 18625, "tasks end end": 55612, "paper propose controllable": 39500, "fine tuning tasks": 21027, "language fine tuning": 28073, "improve model generalization": 24872, "lm fine tuned": 30907, "logistic regression random": 30997, "regression random forest": 45818, "transformer model performance": 58496, "language models limited": 28276, "methods deep neural": 32816, "supervised contrastive loss": 53973, "publicly available resources": 44354, "language models new": 28288, "human annotation paper": 24102, "quality human annotated": 44530, "language models predict": 28301, "results language models": 47691, "outperforms baseline systems": 38872, "methods achieves state": 32735, "better performance paper": 6933, "tuning pretrained language": 58947, "work shed light": 62819, "sequence labeling problems": 49938, "models trained small": 35618, "round trip translation": 48360, "larger existing datasets": 29078, "analyze impact different": 2819, "inference nli model": 25676, "contrastive learning approach": 10899, "different domains results": 14908, "models plms bert": 35327, "monolingual multilingual cross": 35806, "compared commonly used": 9394, "nearest neighbor retrieval": 36520, "models trained general": 35611, "set real world": 50233, "pre trained generative": 41535, "trained generative language": 57740, "different word order": 15134, "natural language reasoning": 36446, "propose novel non": 43553, "propose curriculum learning": 43345, "task learning method": 55174, "meaning original text": 32008, "cross lingual experiments": 11836, "results fine tuned": 47637, "learning fine tuned": 29648, "outperform existing models": 38796, "data available task": 12179, "conduct comprehensive analysis": 10032, "competitive performance using": 9556, "end training data": 17721, "training data directly": 57987, "corpus propose novel": 11409, "accuracy current state": 954, "trained using different": 57909, "prompt based methods": 43201, "training data explore": 57995, "training data multiple": 58020, "best performing submission": 6799, "latest state art": 29159, "recent developments natural": 45304, "settings code available": 50360, "enhanced pre trained": 17937, "survey provide comprehensive": 54214, "empirically demonstrate effectiveness": 17359, "evaluation downstream tasks": 18612, "retrieval reading comprehension": 47967, "domain language models": 16100, "continual learning cl": 10823, "construct new benchmark": 10395, "training data challenging": 57981, "transformer based nlp": 58469, "domain adaptation da": 15997, "outperforms best performing": 38879, "local global context": 30939, "recent approaches use": 45293, "does improve performance": 15953, "mean absolute error": 31988, "fine grained scores": 20941, "models known suffer": 35157, "based finding propose": 5731, "present transformer based": 42046, "guide generation process": 23335, "corpus proposed model": 11411, "model generalize new": 33925, "machine translation multilingual": 31370, "small number domain": 51487, "fully fine tuned": 21733, "wide range language": 61969, "tasks including question": 55681, "method conduct extensive": 32434, "hard negative mining": 23447, "recurrent neural language": 45623, "natural language prompts": 36441, "multiple datasets demonstrate": 36194, "approach paper present": 3634, "models perform par": 35313, "low computational cost": 31134, "previous work usually": 42317, "processing nlp domain": 42902, "using bert model": 60590, "bert model pre": 6685, "training data collected": 57982, "approach substantially improves": 3708, "baselines human evaluation": 6269, "language models human": 28261, "demonstrate large language": 13929, "used train model": 60334, "learning methods using": 29737, "model achieves strong": 33526, "suffer spurious correlations": 53784, "classification multi label": 8501, "human annotated training": 24100, "paper provide comprehensive": 39555, "perform significantly worse": 40142, "applied zero shot": 3315, "helps improve model": 23609, "obtaining high quality": 38235, "provide weak supervision": 44156, "different evaluation settings": 14922, "review recent progress": 48038, "interpretable machine learning": 26722, "dataset real world": 13054, "autoregressive non autoregressive": 5224, "study problem generating": 53440, "outperforms various strong": 38960, "various strong baselines": 61400, "new sota performance": 37320, "analysis shared task": 2754, "paper propose textbf": 39537, "fine tune transformer": 20956, "popular text classification": 41194, "architecture pre trained": 4078, "provide comprehensive review": 44040, "explored paper propose": 19761, "based bidirectional long": 5608, "bert natural language": 6693, "propose multi stage": 43478, "important research topic": 24764, "model better performance": 33626, "develop natural language": 14603, "false positive rate": 20382, "model transfer knowledge": 34486, "domain specific sentiment": 16185, "new multi task": 37263, "language models robust": 28317, "different bert based": 14854, "approaches tackle problem": 3934, "conduct zero shot": 10071, "word embeddings evaluate": 62167, "work introduces novel": 62696, "modal contrastive learning": 33455, "contrastive learning model": 10905, "learning model learns": 29747, "systems end end": 54486, "ground truth datasets": 23254, "human evaluation protocol": 24153, "mask language modeling": 31858, "trained existing datasets": 57726, "global structural information": 22844, "meta learning based": 32341, "available experimental results": 5291, "verify effectiveness approach": 61536, "especially social media": 18302, "results compared baseline": 47546, "f1 score 46": 20196, "novel knowledge graph": 37848, "questions work propose": 44817, "metrics bleu rouge": 33146, "results cross domain": 47564, "labeled data source": 27746, "indo aryan language": 25594, "f1 score best": 20220, "demonstrate fine tuning": 13913, "based metrics bleu": 5855, "data pre processing": 12552, "semantic information model": 49286, "task improve performance": 55127, "semeval 2022 task": 49441, "despite promising results": 14379, "benchmark natural language": 6482, "make best use": 31543, "new target domain": 37332, "real world events": 45129, "art sota approaches": 4405, "specific pre trained": 52127, "models diverse set": 34933, "propose prompt based": 43595, "datasets extensive experimental": 13268, "source code work": 51753, "code work available": 8869, "baseline models trained": 6192, "zero shot baseline": 63154, "contrast previous studies": 10883, "domain specific training": 16190, "style experimental results": 53485, "recent advances pretrained": 45288, "dataset code models": 12841, "large neural networks": 28919, "computational resources training": 9860, "proposed model text": 43859, "generation models generate": 22497, "propose transition based": 43684, "different hyper parameters": 14952, "parameters experimental results": 39696, "experimental results depth": 19283, "results depth analysis": 47588, "experiments analyses demonstrate": 19348, "learning approach allows": 29520, "improvements benchmark datasets": 25053, "gap pre training": 21973, "sequence models achieved": 49959, "model reinforcement learning": 34296, "findings provide insights": 20913, "given knowledge graph": 22752, "seven benchmark datasets": 50416, "performance previous state": 40497, "perform manual evaluation": 40121, "neural network task": 37027, "model new task": 34129, "work present systematic": 62760, "modeling long range": 34593, "experiments datasets demonstrate": 19399, "memory lstm model": 32263, "based findings propose": 5733, "wide range existing": 61968, "f1 score 62": 20198, "training data addition": 57971, "number labeled data": 38012, "key design choices": 27307, "unsupervised supervised models": 59737, "latest deep learning": 29156, "multiple information sources": 36227, "research paper present": 47087, "guide future research": 23332, "classification fundamental task": 8475, "based large pre": 5807, "current neural models": 11992, "recent progress language": 45338, "test state art": 56383, "understanding nlu research": 59376, "series controlled experiments": 50061, "language understanding capabilities": 28546, "counterfactual data augmentation": 11618, "lead state art": 29273, "models struggle generalize": 35541, "unsupervised contrastive learning": 59688, "language models successfully": 28327, "training validation test": 58313, "models terms accuracy": 35592, "widely studied recent": 62006, "studied recent years": 53234, "data set results": 12644, "different classification models": 14864, "based models used": 5883, "models fail capture": 35016, "wide range text": 61975, "data driven systems": 12306, "novel latent variable": 37854, "used training corpus": 60337, "benchmark text classification": 6501, "better performance model": 6932, "set large scale": 50182, "corpus high quality": 11354, "representation learning approach": 46537, "paper propose automated": 39497, "multilingual code mixed": 36069, "ability large language": 618, "superior zero shot": 53947, "challenging task aims": 8144, "optimized end end": 38567, "does rely external": 15967, "question answering existing": 44696, "paper proposes multi": 39548, "input neural network": 26305, "does require training": 15977, "models using data": 35656, "advent deep learning": 1958, "information extraction documents": 25861, "tuning pretrained bert": 58946, "novel model based": 37873, "macro f1 scores": 31408, "dataset f1 score": 12925, "make correct predictions": 31557, "models automatically generate": 34746, "sequence classification task": 49916, "data code models": 12213, "training data significantly": 58037, "model improves accuracy": 33983, "strategies experimental results": 52902, "demonstrate practical utility": 13959, "open source models": 38453, "zero shot task": 63181, "text generation evaluation": 56597, "outperforming previous work": 38858, "advancements natural language": 1902, "model better capture": 33625, "especially zero shot": 18313, "furthermore propose new": 21834, "scale human annotated": 48579, "trained general purpose": 57737, "language models potential": 28298, "qualitative quantitative analyses": 44479, "demonstrate model able": 13940, "performance drops dramatically": 40311, "prompt based learning": 43199, "better low resource": 6913, "english dataset demonstrate": 17794, "generation aims produce": 22416, "data expensive time": 12341, "fine tuning shot": 21017, "train model large": 57608, "appropriate training data": 3968, "training data based": 57979, "reason paper propose": 45170, "datasets significant improvements": 13429, "combined pre trained": 9083, "transformer based large": 58462, "probability language model": 42479, "systems previous works": 54597, "inference extensive experiments": 25658, "model performance results": 34194, "using active learning": 60550, "text previous works": 56708, "leverage large scale": 30275, "fine tuning different": 20984, "achieved significant performance": 1270, "recent works demonstrated": 45377, "trained seq2seq model": 57866, "novel prompt based": 37901, "cross lingual knowledge": 11839, "language models apply": 28230, "existing approaches perform": 19028, "essential task natural": 18336, "performance compared supervised": 40257, "shown large language": 50725, "baseline f1 score": 6167, "serve baseline future": 50074, "based cosine similarity": 5655, "learning models paper": 29757, "observe fine tuned": 38133, "approaches experimental results": 3817, "based automatic human": 5589, "seq seq models": 49891, "fine tune task": 20955, "data augmentation work": 12167, "conduct comprehensive study": 10034, "non neural models": 37671, "experiments method outperforms": 19464, "heterogeneous graph based": 23621, "available github com": 5298, "based classification models": 5619, "trained models usually": 57812, "trained models language": 57801, "labeled data using": 27753, "hyper parameter tuning": 24326, "higher correlations human": 23820, "method based deep": 32399, "art baselines terms": 4225, "propose novel prompt": 43557, "shown language models": 50723, "consisting multiple sentences": 10318, "conducting human evaluation": 10105, "trained large language": 57769, "methods require significant": 33019, "challenging low resource": 8112, "limited data available": 30578, "method outperforms competitive": 32600, "based learning based": 5812, "does require additional": 15970, "prior work explored": 42420, "level entity level": 30112, "work proposes new": 62793, "multi objective optimization": 35994, "proposed method better": 43811, "literature paper propose": 30859, "token level classification": 57296, "training domain data": 58072, "general domain text": 22056, "2022 shared task": 304, "mean standard deviation": 31998, "improve final performance": 24856, "experiments conducted publicly": 19385, "conducted publicly available": 10090, "positive negative sentiment": 41289, "automated speech recognition": 5059, "non zero shot": 37692, "improve generation quality": 24862, "tasks self supervised": 55869, "language models despite": 28244, "knowledge pre training": 27574, "success large language": 53705, "compared existing approaches": 9406, "machine translation test": 31389, "provide detailed description": 44051, "design simple effective": 14300, "ability transformer based": 647, "lead improved performance": 29261, "training self supervised": 58243, "bert achieved state": 6605, "knowledge downstream tasks": 27451, "prompt tuning methods": 43211, "meta learning approaches": 32340, "models including state": 35120, "alternative fine tuning": 2502, "use pretrained language": 59980, "language understanding current": 28548, "models trained evaluated": 35609, "using large language": 60757, "bert base models": 6615, "enrich training data": 17962, "aware pre training": 5466, "propose token level": 43675, "commonly used evaluation": 9224, "seen surge research": 49064, "training work propose": 58317, "work propose framework": 62777, "detection experimental results": 14483, "methods achieved promising": 32733, "domain social media": 16163, "corpus fine tune": 11344, "evaluation demonstrates effectiveness": 18608, "important real world": 24759, "robust domain shift": 48246, "gap propose novel": 21978, "easy data augmentation": 16560, "data augmentation eda": 12154, "work compare different": 62603, "inter annotator agreements": 26577, "effectiveness method compared": 16790, "data models trained": 12496, "support research task": 54124, "supervised unsupervised approaches": 54065, "domains work propose": 16302, "previous work typically": 42314, "paper study performance": 39579, "results shot learning": 47836, "smaller language models": 51520, "nlp data augmentation": 37478, "word embeddings learnt": 62178, "remains key challenge": 46336, "code mixed languages": 8828, "language models tasks": 28330, "zero shot retrieval": 63173, "especially low data": 18285, "paper propose evaluate": 39509, "annotated test set": 2921, "use fine tuned": 59891, "github com alibaba": 22694, "extensive experiments based": 19880, "strong performance zero": 53041, "current transformer based": 12022, "leading sub optimal": 29302, "github com amazon": 22696, "language models leverage": 28274, "facilitate research direction": 20276, "semi structured data": 49454, "model self attention": 34349, "demonstrated impressive results": 14012, "prompting large language": 43217, "language model llm": 28173, "model effectively learn": 33799, "alleviate issue propose": 2409, "semantic role labelling": 49339, "conduct experiments benchmark": 10043, "using diverse set": 60665, "provide comparative analysis": 44030, "data augmentation improve": 12157, "surpass current state": 54163, "state art existing": 52614, "achieve sota performance": 1201, "performance improvement state": 40385, "perform human evaluation": 40113, "objective subjective evaluation": 38105, "human decision making": 24133, "shown fine tuning": 50710, "propose retrieval augmented": 43609, "develop data driven": 14580, "artificial intelligence applications": 4492, "data cross lingual": 12263, "propose new technique": 43514, "general purpose models": 22087, "lot training data": 31123, "semantic representation input": 49333, "method outperforms prior": 32603, "comprehensive empirical study": 9787, "capable generating diverse": 7621, "propose multi view": 43480, "empirical results analysis": 17338, "propose template based": 43665, "outperform fine tuning": 38799, "improve performance domain": 24888, "higher level semantic": 23832, "significantly outperforming previous": 50995, "given language model": 22756, "using prompt based": 60876, "code data released": 8803, "data released https": 12598, "entropy ce loss": 18160, "additional fine tuning": 1671, "automatically construct large": 5151, "20 f1 score": 226, "zero shot models": 63170, "fully supervised baselines": 21740, "consistently better performance": 10292, "extraction sequence labeling": 20111, "model achieves promising": 33522, "available large scale": 5320, "language model ii": 28170, "rich information contained": 48103, "applications text generation": 3253, "propose method called": 43454, "downstream tasks code": 16353, "previous zero shot": 42326, "important research direction": 24762, "input language model": 26289, "proposed approach state": 43734, "learn latent representation": 29389, "model uses word": 34513, "approach domain adaptation": 3497, "modal pre training": 33463, "human evaluation study": 24157, "long input sequences": 31016, "models able perform": 34657, "impressive results various": 24817, "bert models different": 6688, "gap propose new": 21977, "knowledge state art": 27619, "evaluation low resource": 18634, "dataset cross lingual": 12875, "employ multi task": 17386, "task text mining": 55436, "real use cases": 45117, "propose novel shot": 43562, "single task counterparts": 51343, "novel approach generating": 37762, "high quality entity": 23777, "automated text processing": 5063, "resource constrained devices": 47213, "recent advances large": 45284, "provides easy use": 44194, "learning based automatic": 29532, "baseline models performance": 6190, "prior work demonstrated": 42419, "adversarial training approach": 1989, "applied text classification": 3303, "learning models recently": 29760, "performance standard fine": 40574, "methods typically rely": 33087, "compared pre trained": 9432, "approach shows competitive": 3690, "adversarial training language": 1991, "art performance using": 4351, "improve performance multi": 24894, "text sentiment analysis": 56764, "play critical role": 40964, "used supervised learning": 60318, "models models trained": 35230, "understanding language models": 59358, "domain zero shot": 16230, "challenging task existing": 8147, "limitations propose novel": 30556, "processing nlp information": 42905, "techniques pre trained": 56123, "best performing method": 6796, "models learn better": 35175, "development artificial intelligence": 14669, "proposed approach reduces": 43731, "learn fine grained": 29372, "existing methods use": 19103, "real world facts": 45130, "neural text classification": 37104, "book question answering": 7247, "methods proven effective": 32999, "substantial performance gains": 53627, "improve downstream performance": 24846, "dataset social media": 13093, "scale benchmark dataset": 48556, "performance paper proposes": 40479, "previous neural models": 42266, "open question paper": 38442, "embedding state art": 17065, "result proposed model": 47449, "model outperforms best": 34159, "design pre training": 14296, "sentence level evaluation": 49586, "lingual cross domain": 30694, "representations low resource": 46714, "improve robustness model": 24920, "pre training cross": 41573, "training cross domain": 57967, "approaches paper present": 3891, "text remains challenging": 56736, "language model improve": 28171, "approach achieves accuracy": 3394, "word embeddings demonstrate": 62162, "method significantly reduces": 32658, "various downstream applications": 61332, "present novel training": 41979, "end propose method": 17700, "paper explore various": 39367, "sequence model generate": 49954, "including pre trained": 25289, "test data using": 56341, "outperform baseline model": 38781, "processing tasks recently": 42953, "datasets results proposed": 13411, "extensive experiments existing": 19889, "conditional variational auto": 10009, "art large language": 4273, "end end requires": 17659, "data centric approach": 12202, "model learns identify": 34055, "low resources languages": 31201, "word embeddings propose": 62183, "traditional sequence sequence": 57544, "approaches pre trained": 3897, "development deep neural": 14676, "adapt state art": 1511, "transformer encoder model": 58484, "neural networks generate": 37048, "paper provides overview": 39560, "sentence level accuracy": 49578, "recall f1 scores": 45242, "human loop approach": 24204, "dataset empirical results": 12906, "use end end": 59877, "explicit semantic information": 19625, "task propose multi": 55300, "previous approaches use": 42241, "present detailed error": 41891, "multi domain multi": 35956, "simple effective unsupervised": 51159, "empirical results using": 17348, "languages make code": 28724, "methods typically require": 33088, "language models llm": 28277, "based data driven": 5665, "discuss future research": 15468, "language models given": 28258, "representation amr graph": 46491, "work demonstrates feasibility": 62628, "video available https": 61580, "fully supervised baseline": 21739, "experiments public benchmarks": 19502, "paper proposed novel": 39543, "evaluation experimental results": 18619, "learning large language": 29699, "semeval 2023 task": 49443, "based multiple choice": 5893, "model new domains": 34128, "outperforms commonly used": 38884, "93 f1 score": 563, "demonstrated impressive performance": 14011, "text generation approaches": 56595, "systems conversational agents": 54461, "propose encoder decoder": 43368, "art results unsupervised": 4390, "learning data augmentation": 29579, "based hypothesis propose": 5776, "leverage external knowledge": 30269, "standard pre trained": 52518, "use fine tuning": 59892, "limited english language": 30585, "level sequence labeling": 30211, "better performance baselines": 6929, "performance compared using": 40258, "evaluation dataset consisting": 18604, "experiment results benchmark": 19248, "terms exact match": 56287, "framework end end": 21505, "bleu rouge meteor": 7212, "does require task": 15976, "annotator agreement iaa": 3011, "used benchmark datasets": 60105, "solve problem introduce": 51685, "transformer based plms": 58470, "opening new avenues": 38476, "discriminative generative models": 15445, "like bert achieved": 30462, "language models attention": 28232, "methods based pre": 32765, "positive unlabeled pu": 41301, "unlabeled pu learning": 59576, "performance paper conduct": 40474, "github com deeplearnxmu": 22699, "addition propose novel": 1638, "different previous works": 15035, "use real world": 59987, "diverse downstream tasks": 15702, "news articles using": 37387, "poor generalization ability": 41135, "data source language": 12677, "source language data": 51777, "non autoregressive manner": 37635, "learning shown great": 29875, "cases zero shot": 7819, "fine tuning entire": 20987, "improved performance compared": 24957, "languages large amounts": 28708, "text paper investigate": 56689, "tasks sentence classification": 55872, "largest human annotated": 29097, "prompt tuning method": 43210, "future research area": 21884, "text image generation": 56622, "widely used methods": 62018, "gold standard data": 22917, "baselines terms f1": 6311, "extensive experiments performed": 19895, "based fine tuned": 5735, "brown et al": 7373, "learned language models": 29464, "benchmark datasets compared": 6449, "recently attracted attention": 45409, "attracted attention researchers": 4875, "problems propose new": 42723, "respectively proposed method": 47380, "despite widespread use": 14406, "embeddings domain specific": 17117, "transformer based classification": 58458, "model annotated data": 33566, "effectiveness data augmentation": 16775, "work zero shot": 62862, "paper propose data": 39502, "recent research shows": 45343, "use bidirectional encoder": 59837, "sentiment social media": 49860, "sentiment analysis results": 49827, "attention mechanism pre": 4779, "zero shot evaluations": 63161, "inference time paper": 25701, "task language models": 55161, "model bert bidirectional": 33619, "representation learning based": 46538, "task best model": 54937, "open research question": 38445, "compared competitive baselines": 9397, "paper present hybrid": 39453, "end end evaluation": 17645, "data used improve": 12760, "leading better performance": 29289, "paper fills gap": 39374, "requires expert knowledge": 46927, "new unseen domains": 37355, "develop neural model": 14605, "performing end end": 40677, "use unsupervised methods": 60067, "including end end": 25254, "empirical results indicate": 17342, "propose natural language": 43487, "english foreign language": 17806, "large scale annotation": 28961, "foundation natural language": 21419, "future research code": 21885, "auto regressive decoder": 5021, "recently demonstrated impressive": 45418, "general domain language": 22054, "generation text summarization": 22567, "relatively small datasets": 46130, "data challenging task": 12206, "evaluation metrics model": 18651, "performs considerably better": 40706, "results various nlp": 47904, "positive negative sentiments": 41290, "respectively extensive experiments": 47370, "simple easy implement": 51149, "significant improvement existing": 50877, "models work analyze": 35688, "weakly supervised data": 61862, "label classification tasks": 27698, "training extensive experiments": 58103, "performance monolingual multilingual": 40444, "performing bert based": 40672, "hand crafted templates": 23390, "proposed framework effectively": 43783, "based hand crafted": 5765, "survey recent advances": 54217, "information unstructured text": 26140, "64 f1 score": 470, "language understanding natural": 28554, "learning framework using": 29656, "learning dl based": 29600, "specific task paper": 52154, "tasks empirical results": 55607, "multi modal multi": 35991, "perform comparably better": 40076, "construct high quality": 10388, "high quality multi": 23782, "use self supervised": 60005, "relatively little work": 46123, "provide case study": 44024, "achieves absolute improvement": 1301, "machine translation generate": 31359, "testing state art": 56414, "low resource problem": 31187, "framework zero shot": 21630, "results compared previous": 47548, "text davinci 003": 56527, "evaluation metrics paper": 18652, "benefit transfer learning": 6572, "computational resources paper": 9859, "furthermore conduct detailed": 21811, "deep learning technology": 13724, "complex state art": 9663, "datasets significant improvement": 13428, "model text davinci": 34458, "method wide range": 32705, "domain low resource": 16107, "model previous work": 34233, "processing nlp used": 42917, "present experiments using": 41912, "language models t5": 28328, "integrating state art": 26525, "address gap introduce": 1758, "paper propose stage": 39535, "transformer based multi": 58466, "end paper propose": 17693, "machine learning text": 31336, "knowledge extensive experiments": 27477, "time series forecasting": 57212, "proposes new method": 43939, "art neural based": 4310, "recent years thanks": 45398, "adapt target domain": 1513, "model different datasets": 33766, "models lms pretrained": 35199, "various types knowledge": 61414, "speakers different languages": 52005, "https bit ly": 24055, "information theoretic measures": 26122, "shelf language model": 50537, "dataset fine tuned": 12932, "trainable neural network": 57664, "models cross domain": 34875, "challenging task complexity": 8145, "parameters propose novel": 39719, "language models contain": 28240, "aware contrastive learning": 5446, "contrastive learning strategy": 10911, "scale large language": 48590, "language models shot": 28318, "information state art": 26105, "suffer significant performance": 53782, "datasets previous studies": 13376, "minimal human intervention": 33289, "finally discuss limitations": 20853, "overview state art": 39119, "social commonsense knowledge": 51557, "model shows better": 34376, "training data large": 58007, "knowledge experimental results": 27472, "tasks limited training": 55730, "models llms gpt": 35196, "task model needs": 55217, "topic recent years": 57423, "various domains including": 61330, "does require access": 15969, "rule based models": 48385, "problem recent works": 42644, "novel approach using": 37766, "strategy data augmentation": 52931, "adapts pre trained": 1586, "platforms like twitter": 40953, "structured information unstructured": 53157, "motivated recent advances": 35874, "model training scheme": 34483, "fine tuned training": 20971, "pretrained sentence encoders": 42182, "results test set": 47881, "language models performance": 28294, "algorithm natural language": 2288, "recognition ner named": 45519, "ner named entity": 36681, "recently end end": 45424, "english vice versa": 17900, "language model best": 28158, "transformer based framework": 58460, "demonstrate significant improvement": 13973, "task aims automatically": 54895, "art baselines significantly": 4224, "approaches low resource": 3866, "safety critical applications": 48430, "state art computational": 52600, "domain annotated data": 16019, "generation method based": 22491, "method automatically extract": 32395, "based tf idf": 6094, "speech recognition error": 52284, "sequenceto": 50032, "scatter": 48678, "implicated": 24652, "successor": 53754, "interchangeably": 26639, "vt": 61750, "agendas": 2052, "enlightened": 17956, "scm": 48777, "512": 433, "filed": 20794, "gaussians": 22019, "preset": 42128, "monologues": 35819, "demos": 14059, "experimenter": 19339, "235": 325, "proceeded": 42749, "impairing": 24617, "exaggerated": 18856, "wei": 61911, "maximized": 31961, "440": 408, "155": 159, "extractable": 20004, "interaction different": 26597, "classification annotation": 8433, "corpora arabic": 11178, "strength model": 52971, "training criterion": 57965, "presents hybrid": 42086, "roles paper": 48327, "conditions using": 10021, "utterances dialogue": 61146, "influence future": 25726, "function evaluate": 21754, "model effectiveness": 33800, "dataset showed": 13083, "architecture widely": 4099, "jointly performs": 27215, "technique paper": 56042, "emission probabilities": 17275, "strategy produces": 52947, "par current": 39613, "directly evaluate": 15313, "domain like": 16103, "based handcrafted": 5766, "context memory": 10673, "variables experimental": 61227, "constructed model": 10414, "opened new": 38472, "networks promising": 36896, "propose modifications": 43470, "problem single": 42657, "stronger correlation": 53062, "release publicly": 46166, "incorporates different": 25374, "type location": 59061, "associated confidence": 4621, "possible candidates": 41318, "specific individual": 52089, "generation widely": 22579, "methods topic": 33076, "transfer different": 58358, "interaction users": 26614, "irrelevant noise": 27041, "architecture transfer": 4093, "investigated performance": 27000, "propose novelty": 43575, "work inspired": 62689, "seq2seq methods": 49900, "time making": 57176, "effective self": 16692, "importance training": 24692, "existing online": 19123, "reported using": 46457, "flexible approach": 21108, "detection given": 14489, "studies confirm": 53253, "lstms achieved": 31290, "text tweets": 56826, "step achieve": 52796, "complex long": 9633, "introduce fine": 26805, "84 accuracy": 531, "control quality": 10972, "strategy generating": 52935, "nature makes": 36482, "implement framework": 24634, "named emph": 36369, "good fit": 22932, "benefit wide": 6576, "develop text": 14618, "available address": 5260, "method expand": 32494, "main difficulties": 31434, "task harder": 55113, "problem artificial": 42507, "user social": 60447, "generation difficult": 22448, "attention information": 4759, "lack supervised": 27918, "methods unable": 33089, "importance identifying": 24683, "suffer limitations": 53771, "traditional training": 57553, "apply adversarial": 3320, "prediction specifically": 41739, "distribution labels": 15642, "generates multiple": 22348, "tasks validate": 55958, "unsupervised does": 59693, "attention focused": 4750, "74 f1": 499, "entire conversation": 18020, "utterance conversation": 61134, "models majority": 35212, "sequenceto sequence": 50033, "ranking process": 44976, "exist multiple": 19015, "utterances structured": 61152, "natural structure": 36467, "characteristics natural": 8240, "generalized different": 22151, "opinion piece": 38503, "generative latent": 22594, "task user": 55461, "architecture achieve": 4020, "sub topics": 53536, "years methods": 63064, "possible combinations": 41320, "model relevant": 34303, "domains minimal": 16274, "attains better": 4674, "relations limited": 46040, "resource paper": 47259, "supervision human": 54082, "information extracting": 25859, "extracting sentence": 20038, "supervised signal": 54048, "improvement overall": 25011, "approach complex": 3461, "obtain strong": 38195, "levels hierarchy": 30241, "evaluation achieve": 18573, "rich useful": 48127, "input experiments": 26277, "choosing suitable": 8348, "supports multi": 54143, "known produce": 27665, "trained hundreds": 57749, "list relevant": 30840, "classifiers perform": 8622, "based news": 5911, "corpus building": 11290, "challenges compared": 8035, "yale lily": 63042, "sufficiently capture": 53809, "work treats": 62847, "step perform": 52821, "occurrence relations": 38277, "extracted directly": 20009, "architecture self": 4084, "central research": 7920, "representation textual": 46594, "stream nlp": 52959, "framework context": 21482, "local topic": 30952, "contexts different": 10750, "parts model": 39906, "supervision information": 54083, "specific label": 52095, "years despite": 63056, "perspectives experimental": 40779, "similar domain": 51039, "problems standard": 42731, "utilize external": 61092, "reducing accuracy": 45702, "approach adapted": 3406, "optimal choice": 38526, "samples single": 48488, "additional supervised": 1700, "layer based": 29179, "provided task": 44172, "popular ones": 41177, "various useful": 61416, "filtering mechanism": 20811, "scores 79": 48888, "recently emergence": 45422, "systems hand": 54516, "recently pretrained": 45451, "ranked 3rd": 44954, "rank best": 44949, "words makes": 62453, "trained external": 57729, "experiments unsupervised": 19550, "rapid advancement": 44987, "compared approach": 9379, "applications end": 3202, "reference set": 45746, "correlation words": 11530, "established metrics": 18357, "accelerate research": 802, "knowledge encoding": 27459, "framework fully": 21525, "metrics large": 33176, "12 improvement": 107, "models tackling": 35579, "study improve": 53389, "generative transformer": 22613, "train embedding": 57581, "ii joint": 24505, "problem set": 42650, "simple unified": 51222, "12 respectively": 111, "approach semi": 3684, "representations include": 46687, "results pipeline": 47765, "art strong": 4414, "generating corresponding": 22370, "leverage user": 30298, "supervision self": 54092, "results detailed": 47590, "gives improvement": 22807, "study create": 53350, "efficiently exploit": 16914, "respectively achieves": 47358, "transfer furthermore": 58365, "provides little": 44211, "problem annotating": 42503, "sub fields": 53518, "attracted growing": 4879, "baselines evaluated": 6256, "component generate": 9703, "cleaned dataset": 8647, "gpt radford": 22986, "architecture long": 4062, "framework guide": 21531, "module framework": 35758, "approach quite": 3663, "powerful pretrained": 41443, "considered existing": 10247, "encoder jointly": 17519, "grammatical inference": 23073, "making progress": 31666, "adopt contrastive": 1863, "advent neural": 1960, "models importance": 35106, "improves inference": 25133, "capture essential": 7668, "context detect": 10613, "approach strong": 3705, "shows comparable": 50767, "evaluations approach": 18751, "style word": 53505, "learning sample": 29856, "serves effective": 50090, "tasks separately": 55876, "level contextualized": 30085, "various learning": 61354, "effectively capturing": 16727, "tokens attention": 57322, "explore current": 19694, "minimal annotation": 33284, "closely follow": 8703, "examine model": 18866, "input making": 26296, "conventional training": 11016, "relies word": 46269, "exploit interactions": 19656, "queries extract": 44652, "results individual": 47681, "extend study": 19833, "2016 proposed": 258, "model enhances": 33826, "tasks utilizing": 55957, "generate reasonable": 22234, "encoder extract": 17516, "sensitive quality": 49503, "functions designed": 21770, "ranks second": 44983, "preserving content": 42121, "aspects related": 4551, "robustness issues": 48282, "transfer propose": 58416, "help select": 23588, "propose document": 43356, "length 512": 30024, "showed approach": 50664, "method rule": 32642, "use metrics": 59949, "tend learn": 56201, "method effectiveness": 32474, "related papers": 45922, "architecture optimization": 4071, "predicted results": 41670, "unknown training": 59559, "chosen based": 8351, "performance speed": 40572, "similar vector": 51075, "illustrate utility": 24519, "classification score": 8540, "50 improvement": 421, "combining textual": 9125, "quantify impact": 44609, "metrics rely": 33197, "provides interpretable": 44208, "systems analyze": 54430, "scenario experimental": 48685, "larger original": 29084, "novel retrieval": 37910, "greatly simplifies": 23239, "original semantics": 38726, "reference sets": 45747, "bias paper": 7036, "variables paper": 61229, "level different": 30101, "decoder paradigm": 13609, "benefits jointly": 6582, "task relations": 55330, "learning select": 29863, "make inference": 31576, "special form": 52020, "considered state": 10252, "model assign": 33581, "reconstruction task": 45584, "learning received": 29833, "generator trained": 22623, "increase inference": 25416, "task importantly": 55125, "corpus composed": 11299, "queries challenging": 44649, "based bag": 5592, "confidence estimates": 10112, "importance developing": 24680, "effectively alleviate": 16722, "leverages text": 30317, "corpus highly": 11355, "corpus general": 11348, "sampling negative": 48505, "attention design": 4735, "simply concatenating": 51250, "build predictive": 7420, "sets annotated": 50281, "employ teacher": 17393, "problem inconsistency": 42581, "manner model": 31720, "corpora improves": 11208, "solely relies": 51643, "tend overfit": 56204, "generation introducing": 22479, "methods extensive": 32858, "learning create": 29575, "distilbert model": 15564, "regarding word": 45794, "deep text": 13750, "objective maximize": 38093, "text open": 56680, "framework implement": 21538, "plms pre": 41020, "nli based": 37449, "existing words": 19172, "fusion models": 21859, "recent decade": 45299, "level segment": 30200, "models followed": 35041, "data codes": 12215, "raw textual": 45039, "automatically aligning": 5139, "phonetic semantic": 40828, "domain important": 16082, "work tried": 62848, "domain experimental": 16063, "audio samples": 4929, "attention trained": 4838, "framework demonstrates": 21489, "related previous": 45927, "score task": 48877, "conditioning input": 10018, "detecting implicit": 14449, "form multi": 21328, "underlying causes": 59263, "propose refine": 43599, "available proposed": 5350, "range current": 44909, "shows excellent": 50776, "problems apply": 42694, "adjacency matrix": 1839, "strategy leverages": 52940, "model checkpoint": 33653, "loss computed": 31085, "development text": 14708, "domains datasets": 16244, "results sub": 47861, "baselines standard": 6301, "simpler approach": 51229, "module predict": 35767, "metrics focus": 33167, "new technologies": 37340, "nodes based": 37589, "systems contain": 54458, "features graph": 20593, "type different": 59052, "generation furthermore": 22467, "works adopt": 62875, "components called": 9716, "process output": 42813, "increasing interests": 25453, "sequential generation": 50042, "finally validate": 20887, "techniques particularly": 56118, "experiment real": 19246, "overall effectiveness": 39038, "approach linguistic": 3591, "model textbf": 34459, "inter document": 26581, "helps learn": 23610, "set learning": 50184, "community attention": 9259, "methods release": 33012, "obtained promising": 38219, "model textit": 34460, "proposes solution": 43943, "handle wide": 23420, "metrics trained": 33204, "word utterance": 62333, "approaches combined": 3784, "experiments reported": 19511, "studies achieved": 53242, "iteratively improves": 27131, "chinese benchmark": 8298, "make errors": 31570, "faster baseline": 20433, "benefit pre": 6568, "development technology": 14705, "generate faithful": 22200, "margin especially": 31820, "context better": 10595, "labeling demonstrate": 27783, "capturing dependencies": 7732, "self improvement": 49198, "outputs human": 39016, "perform exhaustive": 40101, "methods difficult": 32825, "end zero": 17728, "level identification": 30131, "detection word": 14540, "length complexity": 30026, "design efficient": 14276, "tasks entailment": 55614, "interpretable features": 26720, "level train": 30224, "improving entity": 25179, "simple generic": 51175, "indicating possible": 25543, "dataset considering": 12859, "baselines improved": 6270, "leveraging graph": 30325, "share insights": 50458, "labeling accuracy": 27776, "accuracy 65": 902, "examples human": 18909, "used resources": 60292, "text summarization text": 56799, "paper presents hybrid": 39475, "introduces novel method": 26895, "architecture widely used": 4100, "novel approach learning": 37763, "learning methods applied": 29731, "paper explore novel": 39363, "neural networks promising": 37063, "learning widely used": 29942, "new domains paper": 37177, "performs competitively state": 40704, "model obtain state": 34138, "multi task neural": 36024, "research paper presents": 47088, "hierarchical structure model": 23693, "showed proposed approach": 50671, "different annotation schemes": 14837, "prior works focus": 42428, "problem artificial intelligence": 42508, "74 f1 score": 500, "systems usually rely": 54665, "challenging research problem": 8138, "model able extract": 33490, "make use knowledge": 31609, "characteristics natural language": 8241, "level representation learning": 30193, "novel approach task": 37765, "does require human": 15973, "recent years methods": 45388, "neural networks provide": 37067, "background knowledge paper": 5493, "obtain strong results": 38196, "problem proposing novel": 42637, "compared baselines including": 9388, "cost human annotation": 11585, "explore deep learning": 19697, "information different sources": 25814, "neural generation model": 36957, "architecture self attention": 4085, "knowledge graphs paper": 27507, "perspectives experimental results": 40780, "multi choice questions": 35944, "easily extended new": 16541, "proposed framework compared": 43781, "recently pretrained language": 45452, "model learns generate": 34054, "novel encoder decoder": 37813, "convolution neural networks": 11098, "language models multi": 28286, "propose multi agent": 43474, "explore transfer learning": 19745, "widely used evaluation": 62015, "achieves significant gains": 1366, "human evaluation compared": 24144, "tuning task specific": 58966, "pre existing knowledge": 41503, "generation models based": 22496, "state art strong": 52672, "art attention based": 4217, "attracted growing attention": 4880, "extensive experiments demonstrated": 19886, "various neural network": 61371, "gpt radford et": 22987, "data semi supervised": 12639, "models jointly learn": 35151, "training data achieves": 57970, "shows comparable performance": 50768, "does require fine": 15972, "models successfully used": 35555, "existing datasets limited": 19055, "using naive bayes": 60825, "self supervised neural": 49213, "information recent studies": 26044, "experimental results presented": 19304, "paper propose graph": 39514, "comparable performance previous": 9303, "training data given": 57999, "task adaptive pretraining": 54883, "knowledge distillation technique": 27444, "method rule based": 32643, "automatic evaluation methods": 5085, "correlation human evaluation": 11521, "scenario experimental results": 48686, "demonstrating effectiveness proposed": 14052, "demonstrate effectiveness using": 13904, "construct new datasets": 10397, "encoder decoder paradigm": 17506, "test sets respectively": 56378, "does require domain": 15971, "datasets perform poorly": 13369, "considered state art": 10253, "paper proposes effective": 39545, "corpus training data": 11449, "self training approach": 49223, "based bag words": 5593, "small set labeled": 51500, "insights future research": 26389, "model proposed approach": 34250, "employ teacher student": 17394, "plms pre trained": 41021, "previous sota models": 42281, "mutual information based": 36345, "relatively small scale": 46132, "results framework significantly": 47640, "level segment level": 30201, "contrastive learning experimental": 10902, "models new tasks": 35259, "art supervised approaches": 4417, "domain experimental results": 16064, "collect large number": 8946, "intelligence ai systems": 26536, "different experimental settings": 14926, "utilize external knowledge": 61093, "publicly available propose": 44352, "achieves superior results": 1386, "obtained promising results": 38220, "better results fine": 6959, "relational graph convolutional": 46008, "large margin especially": 28905, "end zero shot": 17729, "method outperforms standard": 32604, "method fine tunes": 32510, "accuracy compared existing": 949, "based knowledge distillation": 5797, "semisupervised": 49475, "nt": 37972, "mic": 33216, "revolve": 48063, "reshaping": 47181, "angular": 2861, "mixes": 33413, "representation feature": 46516, "research persian": 47093, "words included": 62434, "performance algorithms": 40190, "multimedia content": 36140, "processing makes": 42888, "drastically reduced": 16394, "multilayer networks": 36058, "corpus previous": 11407, "exploiting semantic": 19675, "improves scores": 25158, "consider multi": 10214, "proposed lstm": 43801, "models hmm": 35091, "explore parameter": 19723, "task computationally": 54965, "change data": 8168, "component results": 9713, "set patterns": 50215, "scale state": 48626, "filling gap": 20802, "techniques despite": 56076, "input distribution": 26267, "require supervision": 46891, "representations auxiliary": 46621, "systems great": 54515, "augmenting neural": 4988, "typical approaches": 59132, "architectures present": 4120, "baseline achieve": 6151, "units grus": 59532, "progress achieved": 43091, "solution improve": 51654, "recognition key": 45509, "model negative": 34122, "shown natural": 50729, "trained study": 57885, "work comparing": 62604, "task fashion": 55079, "capability paper": 7612, "lstm using": 31287, "obtain significantly": 38191, "segments text": 49096, "framework unsupervised": 21618, "evaluated test": 18551, "embeddings state": 17220, "cnn trained": 8776, "set order": 50208, "evaluated recent": 18546, "knowledge best": 27417, "task architectures": 54911, "semantic nature": 49302, "retrieved based": 47982, "sentences analysis": 49679, "large publicly": 28947, "samples produced": 48486, "lack supervision": 27919, "discuss methods": 15475, "area recent": 4147, "outperform lstm": 38801, "complex contextual": 9619, "resources evaluation": 47301, "investigated approaches": 26998, "efforts shown": 16943, "support online": 54122, "progress generating": 43098, "production use": 43053, "automatically finding": 5173, "better single": 6966, "simplify model": 51242, "motivated development": 35864, "vector quantized": 61460, "word network": 62251, "apply domain": 3327, "representation quality": 46573, "step ranking": 52826, "errors systems": 18250, "embeddings additional": 17080, "usually evaluated": 61048, "attempt mitigate": 4690, "standard baseline": 52468, "specifically incorporate": 52208, "levels accuracy": 30237, "public english": 44319, "robustness learned": 48283, "end achieve": 17611, "expensive limited": 19211, "method utilize": 32699, "independent specific": 25504, "embedding paper": 17051, "distillation techniques": 15580, "different design": 14897, "data difficulty": 12286, "big gap": 7090, "head attentions": 23497, "networks bi": 36834, "generic architecture": 22626, "problem utilizing": 42685, "long span": 31029, "degrade model": 13805, "powerful representations": 41445, "catalyze research": 7831, "specific adaptation": 52041, "novel modeling": 37875, "input bert": 26258, "settings outperforming": 50386, "demonstrate outperforms": 13953, "effectiveness real": 16810, "task primary": 55294, "text followed": 56582, "earlier studies": 16507, "improves increasing": 25132, "using approximate": 60563, "utilize text": 61103, "structured machine": 53163, "trained pseudo": 57845, "languages corpus": 28624, "better terms": 6977, "words short": 62512, "predicted language": 41668, "datasets special": 13440, "segment length": 49074, "developing text": 14664, "dataset 12": 12791, "especially difficult": 18272, "new record": 37298, "refine output": 45765, "provides intuitive": 44209, "argue performance": 4166, "including character": 25242, "identify models": 24431, "language far": 28068, "12 14": 104, "evaluate context": 18447, "experiments adding": 19346, "mixup method": 33426, "larger domain": 29076, "time word": 57237, "avoid information": 5432, "needed improve": 36601, "dependent text": 14152, "works focusing": 62892, "problem words": 42688, "possible word": 41342, "applications medical": 3221, "discuss importance": 15470, "advances unsupervised": 1931, "used arabic": 60093, "guide network": 23340, "essential tool": 18338, "relies large": 46267, "mechanism reduce": 32140, "based relevance": 5991, "baselines bert": 6241, "learning underlying": 29923, "terms proposed": 56311, "data bias": 12190, "algorithmic bias": 2315, "accelerate development": 800, "decoding constraints": 13628, "corpora despite": 11191, "pretraining domain": 42200, "study intrinsic": 53394, "training recently": 58224, "directions improving": 15296, "case task": 7802, "size representations": 51397, "results just": 47685, "predictions compared": 41756, "popularity research": 41204, "language constraints": 28004, "supervised transformer": 54063, "15 respectively": 151, "trained experimental": 57727, "modify input": 35737, "performed manually": 40664, "unavailable low": 59214, "datasets set": 13418, "method attains": 32388, "evaluation despite": 18609, "solution data": 51653, "words strong": 62522, "cnn layer": 8768, "relying human": 46308, "applying various": 3382, "progress present": 43112, "successful text": 53739, "similarity loss": 51100, "languages standard": 28795, "automatically mining": 5191, "domain lack": 16097, "aspect multi": 4531, "reduces memory": 45692, "gaining momentum": 21928, "usually contains": 61043, "evaluations benchmark": 18752, "directly encode": 15312, "scheme model": 48729, "overall training": 39052, "incorporating large": 25389, "works data": 62881, "including arabic": 25239, "works suffer": 62910, "characters common": 8253, "models relying": 35432, "performance applied": 40194, "effectiveness technique": 16817, "propagation errors": 43244, "linear non linear": 30663, "language models domain": 28246, "models achieve comparable": 34668, "prediction task predicting": 41743, "good performance task": 22937, "combination data augmentation": 9038, "neural networks improve": 37052, "different writing systems": 15137, "relatively large corpus": 46119, "learn long term": 29393, "recurrent units grus": 45631, "language models code": 28239, "task model achieves": 55216, "multi task fashion": 36019, "use semantic information": 60007, "open source platform": 38454, "embeddings state art": 17221, "supervised neural network": 54026, "paper propose semi": 39533, "large publicly available": 28948, "low resource task": 31194, "models findings suggest": 35027, "propose knowledge distillation": 43429, "layers deep neural": 29221, "evaluation proposed approach": 18685, "consistently significantly outperforms": 10312, "input text output": 26346, "train encoder decoder": 57584, "task learning improve": 55173, "different design choices": 14898, "text based features": 56454, "make model robust": 31582, "address data scarcity": 1753, "multi head attentions": 35965, "neural networks bi": 37038, "practical use cases": 41477, "web search engines": 61895, "model does need": 33785, "supervised neural models": 54025, "manually designed features": 31776, "model performance domain": 34189, "data sets english": 12649, "contain rich information": 10470, "encoder decoder networks": 17504, "recent years end": 45384, "cross domain evaluation": 11815, "multi task architectures": 36017, "challenging task recent": 8153, "recent advances unsupervised": 45290, "performance different datasets": 40293, "available publicly https": 5354, "language processing works": 28442, "text based approach": 56453, "based representation learning": 5994, "methods heavily rely": 32885, "challenging task previous": 8152, "model experiments proposed": 33853, "representation learning specifically": 46545, "particularly challenging task": 39878, "unavailable low resource": 59215, "opens new avenues": 38483, "14 f1 score": 140, "pre trained unsupervised": 41562, "accuracy f1 scores": 977, "better human judgment": 6899, "space extensive experiments": 51864, "propose compare methods": 43325, "inhibits": 26212, "discreteness": 15431, "sas": 48520, "modeling existing": 34572, "memory store": 32283, "features long": 20619, "learning useful": 29929, "method similar": 32659, "different candidate": 14856, "gain improvements": 21908, "fixed context": 21074, "recent baselines": 45296, "analyze current": 2808, "derived corpus": 14200, "strong bert": 53019, "tree neural": 58753, "matching given": 31912, "random samples": 44886, "large networks": 28915, "rise new": 48154, "information inter": 25929, "method self": 32646, "robust overfitting": 48261, "tasks systematic": 55922, "empirically effective": 17360, "classifiers performance": 8623, "base task": 5552, "processing important": 42874, "various augmentation": 61305, "achieved far": 1233, "terms size": 56315, "brings consistent": 7340, "space leverage": 51873, "model lightweight": 34062, "effectively combines": 16729, "utilizing bert": 61119, "learning solutions": 29885, "network components": 36724, "text achieved": 56423, "various advanced": 61298, "comprehensive studies": 9799, "50 training": 422, "networks research": 36906, "method general": 32514, "mechanism ensure": 32114, "evaluations multiple": 18763, "encoder effectively": 17510, "models affect": 34700, "solution called": 51652, "parameter inference": 39670, "crucial success": 11912, "different graph": 14948, "bert domain": 6645, "techniques knowledge": 56102, "available end": 5287, "used conduct": 60124, "lexical structural": 30387, "covers diverse": 11665, "attempt automatically": 4681, "graph extensive": 23135, "question remains": 44749, "mechanism does": 32108, "methods categories": 32779, "pipeline called": 40895, "scale synthetic": 48629, "efficient interpretable": 16879, "19 datasets": 184, "assigning labels": 4604, "sequences time": 50027, "unfortunately models": 59453, "streaming data": 52963, "approaches real": 3906, "contrastive framework": 10897, "expert models": 19584, "successfully perform": 53748, "framework knowledge": 21552, "attempt apply": 4680, "weak supervised": 61849, "perturbed data": 40795, "scenarios study": 48709, "case propose": 7795, "popular english": 41163, "examples extensive": 18901, "extract sentence": 19992, "english finally": 17803, "task findings": 55085, "superior baselines": 53931, "decomposition method": 13659, "large transformers": 29036, "learning contrastive": 29572, "does come": 15937, "augmentation introduce": 4958, "especially suited": 18304, "dataset encourage": 12908, "challenges evaluating": 8043, "content original": 10543, "discussion future": 15493, "version paper": 61555, "networks widely used": 36924, "deep multi task": 13730, "language processing important": 28409, "recently achieved great": 45402, "method achieve comparable": 32357, "approach compares favorably": 3458, "results compared existing": 47547, "nature language processing": 36481, "domain specific bert": 16167, "address problem develop": 1784, "previous attention based": 42244, "furthermore existing methods": 21820, "approach able outperform": 3389, "downstream tasks rely": 16365, "high performance nlp": 23759, "range text classification": 44940, "prompt based method": 43200, "graph extensive experiments": 23136, "train task specific": 57645, "demonstrates strong performance": 14045, "training transformer language": 58307, "approaches real world": 3907, "methods code publicly": 32784, "significant room improvement": 50924, "model recent work": 34281, "domain specific downstream": 16173, "recent work natural": 45370, "paper https github": 39390, "models perform competitively": 35312, "model best knowledge": 33621, "inspired observation propose": 26409, "examples extensive experiments": 18902, "work presents simple": 62765, "learning contrastive learning": 29573, "empirical results suggest": 17347, "significantly improves zero": 50979, "pursues": 44420, "conveyed text": 11085, "approach case": 3444, "architecture modeling": 4064, "recently word": 45475, "set document": 50137, "identifies important": 24409, "programming based": 43084, "focus main": 21179, "document discourse": 15785, "significant problems": 50914, "huge volumes": 24080, "embeddings graph": 17144, "lexical choices": 30357, "learning encourage": 29621, "architecture encoder": 4047, "systems involving": 54536, "use formal": 59894, "develop open": 14610, "portions text": 41223, "generation focuses": 22464, "iterative algorithm": 27122, "document experimental": 15792, "meaningful results": 32026, "seek better": 49049, "evaluations verify": 18772, "faithful original": 20366, "applies attention": 3317, "various document": 61327, "text guide": 56612, "datasets superiority": 13448, "outperformed simple": 38841, "target pairs": 54835, "contains semantic": 10504, "domains current": 16242, "metrics capture": 33147, "generation lack": 22481, "created novel": 11731, "produce domain": 42980, "novel global": 37832, "importance modeling": 24685, "topics provide": 57460, "memory using": 32287, "verify robustness": 61544, "sentence labels": 49574, "based abstract": 5554, "novel weakly": 37954, "seeks generate": 49057, "20 human": 227, "scores multiple": 48910, "single short": 51337, "challenge identifying": 7985, "genetic algorithms": 22638, "keywords topics": 27355, "study complex": 53343, "constraints control": 10373, "based denoising": 5675, "edges nodes": 16588, "using case": 60599, "512 tokens": 434, "insights propose": 26393, "evaluation future": 18622, "metric text": 33126, "languages czech": 28631, "level latent": 30148, "documents evaluate": 15875, "establishing strong": 18367, "models extracted": 35008, "results evaluating": 47619, "approach delivers": 3479, "dataset related": 13058, "closer natural": 8712, "propose hierarchy": 43410, "use structural": 60030, "specifically framework": 52205, "general news": 22075, "baseline scores": 6208, "documents sentences": 15912, "review existing work": 48033, "paper present unsupervised": 39465, "proposed framework effective": 43782, "propose latent variable": 43435, "work present neural": 62755, "apply deep learning": 3325, "learning methods task": 29736, "novel method automatically": 37864, "significantly improve quality": 50967, "lack suitable datasets": 27917, "propose novel paradigm": 43554, "world applications work": 62930, "trained deep learning": 57707, "source target pairs": 51804, "correlates better human": 11513, "set experiments demonstrate": 50154, "reinforcement learning models": 45875, "generating training data": 22404, "specifically design new": 52192, "previous studies usually": 42292, "results model better": 47723, "domain introduce new": 16088, "novel weakly supervised": 37955, "approach outperforms standard": 3629, "propose graph based": 43406, "covid 19 paper": 11670, "language model allowing": 28154, "effectively improves performance": 16742, "using case study": 60600, "resources machine translation": 47316, "datasets large scale": 13313, "sequence neural networks": 49964, "machine generated human": 31300, "sentence level training": 49595, "limitation propose new": 30539, "recent works focus": 45378, "generation task using": 22561, "experiments public benchmark": 19501, "perform extensive empirical": 40105, "annotated data work": 2888, "prosperity": 43962, "981": 573, "executor": 18988, "containment": 10487, "dataset presents": 13034, "features contrast": 20547, "contribution task": 10949, "technique creating": 56031, "stop word": 52865, "information encode": 25829, "text identifying": 56619, "41 f1": 402, "automated techniques": 5061, "events entities": 18792, "determining given": 14563, "content representations": 10553, "generally focus": 22166, "provide sentence": 44127, "present significant": 42013, "focus finding": 21164, "networks gnn": 36862, "worse human": 62972, "popularity deep": 41201, "design specific": 14301, "proposed select": 43889, "model ranks": 34271, "expensive task": 19219, "score indicating": 48856, "task 2019": 54869, "learn sparse": 29426, "aim detect": 2143, "respect training": 47352, "work adversarial": 62561, "implement baseline": 24632, "used bag": 60099, "representation making": 46548, "complex set": 9661, "offers flexible": 38302, "single type": 51353, "large majority": 28901, "using distantly": 60661, "art level": 4276, "wikipedia provide": 62054, "set ranking": 50231, "2019 dataset": 282, "problem steps": 42668, "apply fine": 3328, "provides flexibility": 44199, "relations use": 46060, "solutions propose": 51671, "seven existing": 50419, "learn subtle": 29431, "datasets short": 13420, "based classical": 5617, "related performance": 45923, "account present": 879, "task little": 55191, "practice existing": 41484, "understanding cognitive": 59330, "pairs derived": 39179, "tasks relevant": 55848, "unordered set": 59623, "information space": 26098, "dataset hindi": 12950, "variants outperform": 61237, "task obtained": 55247, "f1 score 74": 20205, "using general purpose": 60703, "obtains competitive performance": 38246, "paper focus extracting": 39378, "aware word embeddings": 5478, "neural networks gnn": 37049, "popularity deep learning": 41202, "attracted increasing research": 4883, "increasing research attention": 25462, "corpora used train": 11255, "advantage large scale": 1941, "extensive experiments publicly": 19899, "shared task 2019": 50491, "new graph based": 37216, "layer multi head": 29192, "learning model trained": 29748, "state art level": 52625, "propose model named": 43466, "large number studies": 28926, "experimental results pre": 19303, "studies mainly focused": 53281, "f1 score 63": 20199, "supervised unsupervised settings": 54069, "improvement compared baselines": 24999, "fun": 21750, "mining important": 33314, "collected web": 8971, "results easily": 47601, "labeled tweets": 27769, "terms order": 56303, "words tweet": 62537, "discuss benefits": 15461, "additionally paper": 1727, "classification sub": 8559, "spanish respectively": 51947, "analyses dataset": 2592, "idf model": 24478, "information events": 25843, "regression support": 45819, "robustness terms": 48296, "sets tasks": 50308, "capture explicit": 7669, "language arabic": 27966, "solve text": 51693, "mechanism text": 32144, "detecting words": 14452, "features considering": 20545, "employ attention": 17374, "label assignment": 27690, "score identifying": 48853, "research identify": 47050, "information written": 26166, "leverages contextual": 30302, "shallow neural": 50442, "set label": 50176, "involves extracting": 27018, "words comparison": 62383, "sentence methods": 49601, "sets previous": 50302, "helpful low": 23597, "language studies": 28509, "extraction fundamental": 20069, "mechanism task": 32143, "day day life": 13502, "terms classification accuracy": 56276, "networks bi lstm": 36835, "tf idf model": 56993, "bi lstm layer": 7014, "regression support vector": 45820, "analysis demonstrates model": 2647, "recent years online": 45391, "art methods cross": 4285, "helpful low resource": 23598, "task order improve": 55250, "761": 508, "undetected": 59435, "tagger trained": 54731, "manually extracted": 31779, "images difficult": 24553, "methods representing": 33016, "identification use": 24401, "data automated": 12170, "study date": 53354, "completion methods": 9612, "performance predictive": 40492, "description language": 14244, "achieve decent": 1131, "learned textual": 29485, "graphs use": 23192, "lack unified": 27926, "decisions model": 13574, "trained german": 57742, "use encoder decoder": 59875, "results paper proposes": 47760, "results reported literature": 47807, "art methods benchmark": 4284, "model generalization capability": 33923, "word based context": 62118, "illustrate effectiveness proposed": 24516, "art methods use": 4290, "raises important question": 44860, "suffers problem": 53793, "meanings different": 32033, "supervised human": 53989, "embedding represent": 17056, "approach largely": 3583, "diversity generation": 15736, "includes text": 25236, "task alignment": 54898, "differ languages": 14814, "decade witnessed": 13539, "workshop shared task": 62923, "reducing training time": 45715, "used previous works": 60273, "propose novel structure": 43565, "language parallel data": 28373, "term loss": 56242, "help existing": 23560, "text respect": 56744, "based manner": 5830, "present shared": 42011, "use study": 60032, "making dataset": 31649, "proposed approach generate": 43727, "present shared task": 42012, "tackle problem generating": 54709, "time novel": 57185, "twitter using": 59043, "corpus twitter": 11451, "posts tweets": 41374, "attracted researchers": 4887, "years social": 63076, "facebook youtube": 20248, "public safety": 44327, "report best": 46429, "typically represent": 59153, "research effective": 47024, "language framework": 28079, "thorough study": 57065, "text proposed model": 56718, "years social media": 63077, "accurate natural language": 1083, "art results languages": 4378, "task best performing": 54938, "flags": 21091, "550": 445, "develop approaches": 14573, "compare baseline": 9331, "unsupervised nature": 59716, "extracted twitter": 20024, "complexity modeling": 9684, "level develop": 30100, "demonstrate knowledge": 13926, "limited specific": 30618, "ensemble combining": 17973, "studies zero": 53314, "particular dataset": 39839, "dataset key": 12972, "illustrate model": 24518, "detection important task": 14493, "use dataset train": 59861, "obtain good performance": 38174, "features usually": 20693, "significant increases": 50892, "features crucial": 20549, "versions english": 61560, "content makes": 10536, "study exploring": 53377, "work addition": 62554, "obtains competitive results": 38247, "suffer high computational": 53766, "based benchmark datasets": 5598, "level level": 30149, "scores introduce": 48905, "recasting": 45251, "important social": 24772, "grained aspects": 23024, "fine grained aspects": 20929, "textual datasets": 56958, "used baselines": 60101, "everyday lives": 18805, "task strong": 55412, "showing results": 50688, "task tasks": 55430, "applied study": 3297, "level given": 30125, "words change": 62377, "approaches topic": 3942, "sentences differ": 49704, "individual text": 25583, "components proposed": 9726, "providing feedback": 44243, "art survey": 4419, "state art survey": 52675, "create hybrid": 11702, "network models including": 36768, "bi encoder model": 7006, "paraphrase data": 39738, "combining visual": 9126 } } }