diff --git "a/ctfidf_config.json" "b/ctfidf_config.json" new file mode 100644--- /dev/null +++ "b/ctfidf_config.json" @@ -0,0 +1,12358 @@ +{ + "ctfidf_model": { + "bm25_weighting": false, + "reduce_frequent_words": false + }, + "vectorizer_model": { + "params": { + "analyzer": "word", + "binary": false, + "decode_error": "strict", + "encoding": "utf-8", + "input": "content", + "lowercase": true, + "max_df": 1.0, + "max_features": null, + "min_df": 2, + "ngram_range": [ + 1, + 5 + ], + "stop_words": "english", + "strip_accents": null, + "token_pattern": "(?u)\\b\\w\\w+\\b", + "vocabulary": null + }, + "vocab": { + "bert": 1152, + "language": 5758, + "understanding": 11761, + "latest": 6156, + "work": 12246, + "representations": 9731, + "carefully": 1398, + "integrates": 5458, + "contextualized": 2156, + "features": 4014, + "model": 7098, + "training": 11539, + "enables": 3310, + "series": 10292, + "success": 10909, + "especially": 3471, + "various": 12050, + "machine": 6751, + "reading": 9368, + "comprehension": 1923, + "natural": 7707, + "inference": 5270, + "tasks": 11157, + "existing": 3677, + "representation": 9725, + "models": 7250, + "including": 5175, + "gpt": 4665, + "exploit": 3829, + "plain": 8511, + "character": 1515, + "word": 12237, + "embeddings": 3233, + "rarely": 9346, + "consider": 2081, + "incorporating": 5215, + "structured": 10819, + "semantic": 10226, + "information": 5288, + "provide": 9149, + "rich": 10003, + "semantics": 10252, + "promote": 8979, + "propose": 9054, + "incorporate": 5210, + "explicit": 3825, + "contextual": 2155, + "pretrained": 8742, + "role": 10050, + "labeling": 5733, + "introduce": 5534, + "improved": 5139, + "capable": 1375, + "explicitly": 3827, + "backbone": 1002, + "keeps": 5625, + "convenient": 2195, + "usability": 11880, + "light": 6314, + "finetuning": 4119, + "way": 12174, + "substantial": 10892, + "taskspecific": 11307, + "modifications": 7573, + "compared": 1837, + "simple": 10458, + "concept": 1992, + "powerful": 8652, + "obtains": 7999, + "new": 7805, + "stateoftheart": 10701, + "substantially": 10899, + "improves": 5147, + "results": 9877, + "zeroshot": 12309, + "paraphrase": 8300, + "generation": 4511, + "multilingual": 7616, + "leveraging": 6293, + "parallel": 8276, + "texts": 11423, + "automatically": 950, + "generate": 4439, + "paraphrases": 8301, + "drawn": 3093, + "attention": 888, + "size": 10492, + "highquality": 4908, + "corpus": 2232, + "limited": 6349, + "translation": 11634, + "known": 5721, + "pivoting": 8508, + "method": 6932, + "typical": 11720, + "approach": 728, + "end": 3346, + "notice": 7912, + "process": 8881, + "involves": 5576, + "multiple": 7649, + "likely": 6336, + "incur": 5235, + "drift": 3097, + "twostep": 11714, + "translations": 11646, + "paper": 8207, + "inspired": 5373, + "transformerbased": 11616, + "unified": 11800, + "paraphrasing": 8302, + "purely": 9216, + "trained": 11533, + "data": 2380, + "conduct": 2019, + "step": 10744, + "generated": 4471, + "semantically": 10247, + "similar": 10453, + "input": 5346, + "sentence": 10261, + "shares": 10334, + "architecture": 789, + "radford": 9303, + "et": 3488, + "al": 558, + "2018": 37, + "able": 173, + "pretrain": 8741, + "largescale": 6126, + "fluency": 4170, + "output": 8165, + "sentences": 10266, + "addition": 372, + "mechanism": 6889, + "denoising": 2710, + "autoencoder": 930, + "improve": 5118, + "diversity": 3031, + "robustness": 10045, + "experimental": 3736, + "surpasses": 11012, + "terms": 11356, + "relevance": 9625, + "efficiency": 3184, + "knowledgeenhanced": 5713, + "pretraining": 8772, + "commonsense": 1800, + "story": 10766, + "generating": 4495, + "reasonable": 9397, + "leading": 6170, + "context": 2139, + "important": 5099, + "challenging": 1493, + "task": 11111, + "spite": 10658, + "modeling": 7243, + "local": 6687, + "coherence": 1757, + "neural": 7797, + "gpt2": 4671, + "suffer": 10935, + "repetition": 9702, + "logic": 6698, + "conflicts": 2059, + "lack": 5739, + "stories": 10765, + "conjecture": 2063, + "difficulty": 2924, + "relevant": 9627, + "knowledge": 5649, + "causal": 1423, + "relationships": 9610, + "planning": 8513, + "entities": 3423, + "events": 3593, + "proper": 9048, + "temporal": 11346, + "order": 8107, + "devise": 2847, + "utilize": 11988, + "external": 3914, + "bases": 1079, + "capture": 1390, + "dependencies": 2715, + "employ": 3283, + "multitask": 7674, + "learning": 6183, + "combines": 1782, + "discriminative": 2967, + "objective": 7974, + "distinguish": 3001, + "true": 11676, + "fake": 3986, + "automatic": 935, + "manual": 6817, + "evaluation": 3538, + "shows": 10390, + "baselines": 1072, + "particularly": 8312, + "global": 4647, + "effect": 3134, + "paragraph": 8275, + "sequence": 10279, + "tokens": 11489, + "text": 11381, + "read": 9366, + "article": 809, + "study": 10849, + "implicit": 5094, + "affect": 487, + "quality": 9236, + "specifically": 10622, + "better": 1172, + "stage": 10674, + "english": 3379, + "lead": 6166, + "higher": 4879, + "bleu": 1224, + "score": 10156, + "lower": 6738, + "perplexity": 8474, + "experiments": 3761, + "selfcollected": 10216, + "chinese": 1620, + "essay": 3475, + "dataset": 2475, + "level": 6269, + "lm": 6683, + "challenge": 1468, + "closedbook": 1683, + "science": 10146, + "exam": 3610, + "based": 1025, + "question": 9264, + "answering": 664, + "prior": 8834, + "standardized": 10685, + "exams": 3622, + "requires": 9765, + "support": 10998, + "large": 6000, + "targeted": 11108, + "retrieving": 9958, + "timeconsuming": 11476, + "questions": 9287, + "embedded": 3230, + "complex": 1895, + "retrieval": 9944, + "dual": 3103, + "theory": 11449, + "cognitive": 1753, + "framework": 4235, + "intuitive": 5555, + "reasoning": 9401, + "module": 7578, + "efficiently": 3200, + "solve": 10547, + "problems": 8871, + "related": 9598, + "example": 3616, + "relying": 9644, + "evaluate": 3499, + "arc": 787, + "yields": 12304, + "considerable": 2082, + "classification": 1651, + "performance": 8360, + "emerging": 3261, + "types": 11717, + "provided": 9170, + "significantly": 10422, + "accuracy": 230, + "competitive": 1875, + "advantage": 472, + "retrievalbased": 9952, + "qa": 9226, + "methods": 6974, + "multihop": 7611, + "long": 6703, + "main": 6769, + "problem": 8859, + "lies": 6308, + "sentencelevel": 10265, + "traditional": 11515, + "generative": 4591, + "address": 385, + "mrg": 7600, + "incorporates": 5214, + "graph": 4729, + "learn": 6176, + "consists": 2100, + "realization": 9379, + "responsible": 9859, + "searching": 10183, + "paths": 8324, + "imitate": 5070, + "imagination": 5068, + "human": 4948, + "writing": 12285, + "transfer": 11594, + "inferred": 5281, + "generates": 4490, + "complete": 1886, + "unlike": 11831, + "previous": 8805, + "blackbox": 1217, + "infers": 5283, + "path": 8323, + "provides": 9173, + "explanatory": 3824, + "views": 12132, + "proposed": 9111, + "works": 12271, + "representative": 9736, + "review": 9972, + "product": 8921, + "description": 2739, + "informative": 5321, + "strong": 10803, + "design": 2744, + "implementation": 5087, + "chatbot": 1529, + "using": 11935, + "learningbased": 6249, + "corresponding": 2250, + "levels": 6271, + "systematically": 11052, + "speech": 10648, + "recognition": 9510, + "correction": 2239, + "specific": 10606, + "domain": 3051, + "conversation": 2200, + "simulation": 10476, + "highest": 4886, + "communication": 1807, + "agent": 495, + "academic": 191, + "contribution": 2183, + "implement": 5086, + "explain": 3817, + "following": 4185, + "explainable": 3819, + "artificial": 812, + "intelligence": 5468, + "connections": 2075, + "network": 7784, + "perspective": 8480, + "integrated": 5457, + "wechat": 12193, + "finetuned": 4109, + "backend": 1005, + "interpret": 5521, + "responses": 9849, + "consistency": 2090, + "coherency": 1759, + "enhanced": 3398, + "demands": 2640, + "maintain": 6778, + "characters": 1521, + "shown": 10372, + "achieved": 263, + "good": 4658, + "observe": 7987, + "issues": 5593, + "exist": 3674, + "categorized": 1418, + "folds": 4182, + "hand": 4797, + "guarantee": 4772, + "usually": 11978, + "contain": 2123, + "errors": 3466, + "does": 3045, + "account": 222, + "discourse": 2957, + "relations": 9605, + "directly": 2945, + "enhance": 3385, + "twostage": 11711, + "organize": 8115, + "outline": 8126, + "depicts": 2720, + "second": 10184, + "expand": 3718, + "controlled": 2191, + "supervision": 10993, + "signals": 10398, + "incorporated": 5213, + "reduce": 9542, + "auxiliary": 969, + "relation": 9603, + "outperforms": 8145, + "baseline": 1063, + "approaches": 766, + "metrics": 7023, + "datatotext": 2555, + "augmentation": 913, + "application": 693, + "domains": 3056, + "obstacle": 7991, + "numbers": 7958, + "instances": 5387, + "available": 972, + "samples": 10089, + "novel": 7914, + "fewshot": 4028, + "setting": 10315, + "augments": 926, + "replacing": 9709, + "values": 12025, + "alternative": 605, + "ones": 8023, + "category": 1420, + "ii": 5053, + "iii": 5054, + "proposing": 9127, + "noise": 7888, + "use": 11884, + "make": 6787, + "sure": 11005, + "given": 4630, + "sample": 10086, + "correctly": 2241, + "reconstructed": 9527, + "having": 4829, + "formulated": 4209, + "benchmarks": 1134, + "weakly": 12182, + "supervised": 10984, + "paradigm": 8272, + "outperform": 8132, + "fully": 4301, + "seq2seq": 10277, + "10": 1, + "annotations": 649, + "utilizing": 11998, + "annotated": 642, + "boost": 1238, + "standard": 10681, + "points": 8556, + "establishing": 3484, + "datasets": 2512, + "llm": 6401, + "helps": 4855, + "optimize": 8098, + "crystal": 2333, + "surface": 11006, + "conventional": 2196, + "optimization": 8092, + "expert": 3814, + "physics": 8492, + "algorithms": 563, + "trend": 11662, + "automation": 957, + "entire": 3422, + "industry": 5263, + "drawback": 3089, + "relatively": 9613, + "laborintensive": 5736, + "suboptimal": 10885, + "refinement": 9563, + "technical": 11326, + "dilemma": 2931, + "remained": 9647, + "emergence": 3245, + "llms": 6440, + "openais": 8038, + "chatgpt": 1534, + "googles": 4663, + "bard": 1016, + "explores": 3857, + "possibility": 8603, + "applying": 722, + "gpt35": 4684, + "gpt4": 4688, + "simply": 10471, + "conversations": 2206, + "assisted": 861, + "difference": 2871, + "time": 11472, + "code": 1699, + "deep": 2596, + "reinforcement": 9591, + "acquire": 321, + "optimized": 8099, + "solution": 10541, + "spanning": 10585, + "proposition": 9128, + "ideas": 5040, + "perform": 8349, + "detailed": 2793, + "break": 1266, + "converse": 2208, + "posing": 8593, + "openended": 8049, + "heuristic": 4860, + "definitive": 2625, + "commands": 1788, + "guide": 4780, + "processes": 8897, + "conceptual": 1995, + "humanai": 4993, + "strategies": 10775, + "practical": 8662, + "implications": 5093, + "achieve": 237, + "significant": 10400, + "milestone": 7036, + "automated": 933, + "production": 8922, + "pipeline": 8502, + "rank": 9324, + "math": 6857, + "critical": 2309, + "processing": 8898, + "recent": 9448, + "studies": 10836, + "adopted": 435, + "sequencetosequence": 10287, + "transform": 11603, + "descriptions": 2741, + "mathematical": 6863, + "expressions": 3873, + "prone": 9045, + "minor": 7056, + "mistakes": 7066, + "handle": 4800, + "limitation": 6338, + "ranking": 9328, + "joint": 5608, + "learns": 6251, + "correct": 2233, + "incorrect": 5221, + "treebased": 11658, + "specially": 10604, + "designed": 2759, + "online": 8025, + "update": 11866, + "demonstrate": 2645, + "effectiveness": 3167, + "benchmark": 1110, + "consistently": 2094, + "classical": 1649, + "784": 99, + "improving": 5157, + "ability": 133, + "focus": 4173, + "structure": 10817, + "general": 4400, + "numerical": 7959, + "properties": 9050, + "robustly": 10044, + "measurement": 6884, + "estimation": 3487, + "leverages": 6283, + "embedding": 3231, + "encode": 3320, + "number": 7948, + "individual": 5250, + "loss": 6723, + "function": 4309, + "integrate": 5455, + "extensive": 3883, + "different": 2874, + "experiment": 3733, + "range": 9316, + "comparison": 1862, + "magnitude": 6768, + "ablation": 167, + "conducted": 2040, + "impact": 5075, + "topic": 11502, + "transferable": 11599, + "table": 11073, + "weaklysupervised": 12184, + "transformer": 11608, + "jointly": 5610, + "encoding": 3332, + "produce": 8915, + "query": 9261, + "settings": 10316, + "systems": 11055, + "deployed": 2722, + "corpora": 2230, + "distributions": 3006, + "quite": 9301, + "distinct": 2998, + "simulate": 10472, + "shift": 10346, + "scenario": 10120, + "designing": 2769, + "consisting": 2099, + "splits": 10660, + "groups": 4764, + "popular": 8570, + "empirically": 3282, + "despite": 2777, + "opendomain": 8043, + "degrades": 2628, + "evaluated": 3517, + "unseen": 11847, + "topics": 11504, + "response": 9843, + "pragmatic": 8677, + "adaptation": 353, + "comprising": 1958, + "vocabulary": 12161, + "injection": 5340, + "texttotext": 11434, + "generator": 4623, + "t5": 11069, + "focused": 4178, + "logical": 6699, + "form": 4194, + "reasonably": 9399, + "believe": 1107, + "split": 10659, + "robust": 10041, + "solutions": 10545, + "suited": 10954, + "deployment": 2724, + "synthetic": 11043, + "books": 1237, + "ways": 12176, + "written": 12286, + "aided": 528, + "ai": 508, + "technologies": 11337, + "like": 6320, + "gpt3": 4679, + "eventually": 3594, + "replace": 9705, + "authored": 928, + "publications": 9207, + "kind": 5643, + "tools": 11497, + "purpose": 9217, + "introduced": 5548, + "stands": 10688, + "created": 2299, + "deploying": 2723, + "technology": 11338, + "precisely": 8681, + "autoregressive": 963, + "humanlike": 5005, + "supported": 11001, + "case": 1404, + "value": 12022, + "discussed": 2977, + "emphasizes": 3270, + "artistic": 819, + "issue": 5586, + "comes": 1787, + "aigenerated": 532, + "content": 2130, + "introduces": 5549, + "projects": 8958, + "interactive": 5495, + "andor": 638, + "combined": 1781, + "focuses": 4179, + "aesthetics": 486, + "art": 808, + "search": 10174, + "decoder": 2580, + "transformers": 11625, + "continued": 2160, + "increasing": 5226, + "scale": 10106, + "reaching": 9364, + "hundreds": 5025, + "billions": 1202, + "parameters": 8291, + "sets": 10312, + "prompting": 9006, + "foundation": 4217, + "remain": 9646, + "fields": 4055, + "prevents": 8804, + "possibly": 8607, + "organizations": 8114, + "train": 11526, + "separate": 10273, + "58": 82, + "billion": 1199, + "previously": 8822, + "best": 1163, + "margin": 6829, + "175": 28, + "measured": 6883, + "result": 9868, + "files": 4060, + "freely": 4284, + "endtoend": 3361, + "unpaired": 11841, + "technique": 11331, + "encoderdecoder": 3328, + "acoustic": 320, + "units": 11820, + "pseudo": 9195, + "codes": 1738, + "derived": 2732, + "offline": 8019, + "predict": 8685, + "masked": 6837, + "encoder": 3322, + "lets": 6268, + "reconstruct": 9526, + "autoregressively": 968, + "instead": 5390, + "textual": 11436, + "scripts": 10165, + "original": 8118, + "comprehensive": 1924, + "error": 3459, + "rate": 9347, + "20": 35, + "subsets": 10891, + "release": 9617, + "sources": 10578, + "enriching": 3413, + "wikidata": 12229, + "completion": 1893, + "augmenting": 921, + "additional": 378, + "divides": 3036, + "steps": 10752, + "subject": 10879, + "suggestion": 10946, + "populating": 8580, + "gap": 4376, + "filling": 4061, + "remaining": 9648, + "present": 8713, + "idea": 5038, + "combining": 1783, + "base": 1021, + "interpretation": 5524, + "free": 4280, + "suggest": 10940, + "metadata": 6931, + "headers": 4833, + "property": 9051, + "linking": 6375, + "candidate": 1333, + "synthesize": 11039, + "prompts": 9028, + "finally": 4069, + "verify": 12113, + "synthesized": 11040, + "linked": 6373, + "web": 12188, + "source": 10571, + "wikipedia": 12230, + "prototypical": 9139, + "calibration": 1328, + "incontext": 5203, + "gptlike": 4706, + "recognized": 9516, + "handcrafted": 4798, + "templates": 11345, + "demonstration": 2706, + "adaptively": 366, + "decision": 2572, + "boundary": 1255, + "zero": 12305, + "greedy": 4755, + "decoding": 2583, + "concretely": 2011, + "adopts": 438, + "mixture": 7082, + "distribution": 3005, + "estimate": 3485, + "clusters": 1694, + "categories": 1416, + "assign": 850, + "cluster": 1693, + "label": 5728, + "solving": 10558, + "weighted": 12197, + "matching": 6854, + "prediction": 8689, + "calibrated": 1326, + "likelihood": 6335, + "improvement": 5142, + "diverse": 3011, + "set": 10308, + "analysis": 617, + "scales": 10112, + "indicates": 5244, + "expected": 3725, + "greatly": 4753, + "class": 1646, + "imbalance": 5069, + "selfsupervised": 10223, + "autoencoders": 931, + "extensively": 3909, + "explored": 3850, + "years": 12292, + "seen": 10197, + "wide": 12205, + "adoption": 437, + "contrastive": 2174, + "heavily": 4840, + "relies": 9639, + "structural": 10815, + "complicated": 1911, + "dominant": 3068, + "progress": 8944, + "graphs": 4737, + "far": 3998, + "reached": 9362, + "potential": 8615, + "identify": 5045, + "examine": 3613, + "negatively": 7777, + "development": 2833, + "reconstruction": 9529, + "metric": 7022, + "mitigates": 7074, + "reconstructing": 9528, + "structures": 10821, + "feature": 4009, + "masking": 6841, + "strategy": 10783, + "scaled": 10111, + "benefit": 1148, + "21": 42, + "public": 9199, + "careful": 1397, + "outperformance": 8139, + "demonstrates": 2693, + "allinone": 592, + "lowresource": 6745, + "nlp": 7859, + "leverage": 6272, + "rules": 10072, + "synonym": 11033, + "replacement": 9707, + "finetune": 4103, + "generalpurpose": 4436, + "consequently": 2080, + "trivial": 11675, + "yielding": 12303, + "lowquality": 6740, + "combat": 1777, + "goal": 4652, + "single": 10482, + "quickly": 9300, + "grasp": 4742, + "inherent": 5327, + "synthesis": 11038, + "law": 6161, + "target": 11104, + "reformulates": 9572, + "examples": 3618, + "heterogeneous": 4859, + "format": 4197, + "employs": 3290, + "objectives": 7979, + "granularity": 4728, + "partial": 8306, + "attempt": 884, + "apply": 721, + "100": 4, + "produced": 8918, + "successfully": 10929, + "deberta": 2563, + "transfers": 11602, + "confidence": 2050, + "speaker": 10596, + "key": 5627, + "variability": 12028, + "compact": 1813, + "dependent": 2717, + "hidden": 4863, + "unit": 11819, + "contributions": 2184, + "used": 11897, + "facilitate": 3952, + "adaptive": 365, + "sat": 10093, + "test": 11366, + "unsupervised": 11855, + "sensitivity": 10259, + "reduced": 9545, + "selection": 10208, + "trustworthy": 11680, + "subset": 10890, + "smooth": 10521, + "probabilities": 8853, + "serving": 10305, + "scores": 10159, + "increased": 5224, + "sparsity": 10593, + "addressed": 413, + "outperformed": 8140, + "independent": 5237, + "adapted": 359, + "12": 10, + "absolute": 181, + "90": 110, + "79": 100, + "89": 109, + "relative": 9611, + "respectively": 9836, + "consistent": 2092, + "improvements": 5144, + "lstm": 6749, + "rescoring": 9772, + "impressive": 5107, + "huge": 4946, + "generally": 4434, + "incurs": 5236, + "high": 4866, + "cost": 2257, + "recently": 9490, + "augment": 911, + "smaller": 10512, + "retriever": 9956, + "demonstrated": 2670, + "promising": 8964, + "capabilities": 1337, + "remains": 9649, + "unclear": 11737, + "competitively": 1880, + "counterparts": 2277, + "generalization": 4424, + "downstream": 3074, + "heldout": 4845, + "prompted": 9005, + "parametric": 8298, + "taskagnostic": 11152, + "unlabeled": 11825, + "potentially": 8641, + "noisy": 7889, + "retrieved": 9954, + "fusion": 4342, + "perceiver": 8344, + "gated": 4392, + "crossattention": 2316, + "notably": 7907, + "16": 26, + "seven": 10323, + "scaling": 10114, + "backtranslation": 1009, + "aims": 542, + "translate": 11632, + "spoken": 10661, + "scarcity": 10117, + "labeled": 5729, + "translating": 11633, + "indomain": 5254, + "applied": 716, + "alleviate": 584, + "limits": 6362, + "overcome": 8177, + "prompt": 8985, + "randomly": 9314, + "concatenates": 1989, + "induce": 5256, + "style": 10873, + "varied": 12037, + "languages": 5994, + "increases": 5225, + "demonstrating": 2702, + "facilitating": 3959, + "future": 4345, + "research": 9773, + "field": 4044, + "variational": 12034, + "tags": 11093, + "node": 7885, + "associated": 863, + "ideal": 5039, + "integrating": 5460, + "networks": 7793, + "gnns": 4651, + "computational": 1969, + "complexity": 1908, + "brought": 1295, + "efficient": 3190, + "effective": 3136, + "fusing": 4341, + "em": 3227, + "called": 1329, + "simultaneously": 10481, + "big": 1194, + "proposes": 9124, + "modules": 7581, + "procedure": 8878, + "allows": 596, + "separately": 10275, + "allowing": 594, + "interact": 5484, + "business": 1319, + "helped": 4851, + "advance": 440, + "numerous": 7961, + "opened": 8048, + "door": 3072, + "modalities": 7094, + "images": 5065, + "music": 7689, + "argue": 800, + "unique": 11815, + "characteristics": 1516, + "mining": 7055, + "making": 6794, + "tackle": 11080, + "challenges": 1474, + "include": 5172, + "multimodal": 7622, + "privacy": 8843, + "concerns": 1999, + "memorizing": 6910, + "cross": 2315, + "codebook": 1736, + "findings": 4085, + "memorization": 6908, + "capability": 1367, + "contributes": 2181, + "lot": 6726, + "inspires": 5383, + "bring": 1281, + "memory": 6911, + "memorize": 6909, + "uses": 11932, + "place": 8509, + "phases": 8486, + "addressing": 417, + "restoring": 9862, + "named": 7692, + "reaches": 9363, + "superior": 10972, + "means": 6881, + "obtain": 7993, + "gains": 4370, + "importance": 5098, + "feasibility": 4006, + "sheds": 10344, + "direction": 2937, + "crosstask": 2322, + "highly": 4899, + "sensitive": 10258, + "choice": 1636, + "selecting": 10207, + "highperforming": 4907, + "labels": 5734, + "zps": 12328, + "selects": 10211, + "gradient": 4713, + "humanwritten": 5022, + "ensemble": 3414, + "extend": 3876, + "advantages": 474, + "tuning": 11688, + "translator": 11647, + "yes": 12298, + "engine": 3373, + "report": 9712, + "preliminary": 8705, + "adopt": 434, + "trigger": 11668, + "differences": 2872, + "evaluating": 3521, + "performs": 8466, + "commercial": 1791, + "products": 8925, + "google": 4661, + "highresource": 4914, + "explore": 3838, + "interesting": 5505, + "asks": 824, + "pivot": 8505, + "biomedical": 1212, + "abstracts": 186, + "reddit": 9541, + "comments": 1790, + "exhibits": 3667, + "launch": 6158, + "boosted": 1241, + "comparable": 1816, + "words": 12244, + "hallucination": 4789, + "interactivity": 5500, + "quantitatively": 9252, + "publicly": 9208, + "carry": 1403, + "23": 46, + "covering": 2286, + "common": 1792, + "aspects": 829, + "newly": 7848, + "nonlatin": 7895, + "script": 10163, + "intermediate": 5511, + "accurate": 234, + "average": 980, + "unreliable": 11844, + "reasoner": 9400, + "deductive": 2595, + "inductive": 5259, + "suffers": 10938, + "hallucinations": 4796, + "access": 201, + "collaboration": 1767, + "underlying": 11748, + "rouge1": 10059, + "summarization": 10958, + "chrf": 1642, + "multiturn": 7682, + "engineering": 3374, + "fashion": 4000, + "extraction": 3927, + "construction": 2117, + "financial": 4081, + "effort": 3203, + "built": 1315, + "approximately": 781, + "raw": 9359, + "glue": 4648, + "superglue": 10971, + "driven": 3098, + "advancements": 457, + "enabling": 3314, + "comparisons": 1865, + "drawing": 3091, + "inspiration": 5368, + "includes": 5174, + "aim": 535, + "released": 9623, + "project": 8954, + "understand": 11753, + "comparative": 1829, + "attracted": 896, + "great": 4743, + "fluent": 4171, + "attains": 883, + "remarkable": 9665, + "quantitative": 9248, + "chatgpts": 1609, + "little": 6382, + "comparing": 1860, + "bertstyle": 1161, + "falls": 3991, + "short": 10349, + "handling": 4805, + "similarity": 10457, + "achieves": 278, + "sentiment": 10268, + "questionanswering": 9284, + "additionally": 381, + "advanced": 443, + "systematic": 11047, + "adversarial": 478, + "normal": 7901, + "pushed": 9222, + "toxic": 11508, + "risk": 10017, + "undesired": 11791, + "alter": 602, + "demanding": 2639, + "computation": 1963, + "requirements": 9762, + "rely": 9641, + "rulebased": 10071, + "promptbased": 9001, + "token": 11485, + "elimination": 3225, + "overall": 8171, + "meaning": 6878, + "center": 1439, + "probability": 8854, + "ultimately": 11727, + "considered": 2086, + "point": 8552, + "rl": 10022, + "literature": 6380, + "cover": 2283, + "uncertain": 11734, + "outcomes": 8124, + "utilizes": 11992, + "share": 10330, + "importantly": 5103, + "require": 9753, + "internal": 5517, + "crucial": 2325, + "servers": 10300, + "accessible": 205, + "apis": 684, + "techniques": 11333, + "showcasing": 10365, + "abilities": 121, + "complexities": 1907, + "open": 8031, + "world": 12278, + "assessing": 841, + "stability": 10669, + "aspect": 826, + "exploring": 3858, + "transformations": 11605, + "nlu": 7879, + "indicate": 5240, + "encounters": 3341, + "degradation": 2626, + "faces": 3949, + "instability": 5385, + "insights": 5364, + "valuable": 12018, + "limitations": 6340, + "guiding": 4786, + "meets": 6906, + "feedback": 4020, + "oracle": 8105, + "realworld": 9386, + "applications": 702, + "cases": 1407, + "assessed": 838, + "rlhf": 10027, + "prominent": 8961, + "guidance": 4776, + "algorithm": 561, + "theoretical": 11444, + "random": 9311, + "descent": 2733, + "proven": 9144, + "policy": 8558, + "reward": 9994, + "makes": 6791, + "optimizes": 8101, + "precollected": 8683, + "furthermore": 4324, + "diffusion": 2927, + "rounds": 10064, + "advances": 466, + "optimizing": 8102, + "functions": 4314, + "offers": 8015, + "aligning": 570, + "intentions": 5482, + "know": 5647, + "intent": 5480, + "conversational": 2202, + "rewriting": 10000, + "aggregating": 504, + "represent": 9723, + "users": 11921, + "real": 9370, + "59": 83, + "wrt": 12290, + "highlighting": 4895, + "vast": 12103, + "survey": 11024, + "legal": 6256, + "transformed": 11607, + "computer": 1980, + "vision": 12136, + "increasingly": 5230, + "utilized": 11991, + "automate": 932, + "document": 3041, + "integration": 5464, + "raised": 9307, + "bias": 1187, + "explainability": 3818, + "discuss": 2969, + "arise": 801, + "resources": 9828, + "directions": 2941, + "conclude": 2006, + "doing": 3050, + "hope": 4932, + "overview": 8195, + "current": 2345, + "state": 10695, + "highlight": 4890, + "benefits": 1150, + "aigc": 530, + "need": 7763, + "goes": 4655, + "headlines": 4834, + "analyze": 630, + "create": 2296, + "media": 6893, + "coverage": 2284, + "impossible": 5104, + "miss": 7063, + "opportunity": 8084, + "certain": 1442, + "era": 3451, + "pure": 9215, + "creation": 2303, + "worth": 12282, + "noting": 7913, + "just": 5619, + "tool": 11491, + "people": 8339, + "variants": 12033, + "help": 4846, + "unify": 11812, + "diversified": 3030, + "needed": 7770, + "offering": 8010, + "look": 6719, + "ranging": 9323, + "modern": 7565, + "gan": 4374, + "introducing": 5551, + "fundamental": 4316, + "type": 11715, + "videos": 12129, + "3d": 62, + "summarize": 10964, + "mainstream": 6774, + "industries": 5262, + "education": 3132, + "creativity": 2305, + "currently": 2365, + "faced": 3948, + "outlook": 8129, + "evolve": 3603, + "near": 7756, + "learners": 6181, + "evidence": 3597, + "narrative": 7699, + "unknown": 11824, + "really": 9383, + "prominently": 8962, + "basis": 1088, + "updating": 11869, + "reveal": 9965, + "chatbots": 1531, + "analyzed": 634, + "components": 1913, + "special": 10598, + "instrument": 5449, + "analyzing": 637, + "revealed": 9968, + "performed": 8462, + "referential": 9556, + "worse": 12281, + "syntactic": 11035, + "simplicity": 10469, + "initial": 5333, + "version": 12120, + "updated": 11867, + "resulting": 9874, + "facilitated": 3957, + "lagged": 5752, + "correlation": 2247, + "suggests": 10948, + "correlated": 2246, + "group": 4763, + "surprising": 11020, + "constructed": 2115, + "inputoutput": 5357, + "variations": 12036, + "formats": 4199, + "appropriate": 778, + "essential": 3476, + "revisit": 9977, + "view": 12130, + "fixed": 4161, + "attributes": 905, + "unsatisfactory": 11846, + "observation": 7982, + "interpretable": 5523, + "manner": 6815, + "grammatical": 4722, + "cuttingedge": 2374, + "developed": 2827, + "openai": 8036, + "surprisingly": 11021, + "followup": 4191, + "compare": 1834, + "gec": 4397, + "testing": 11377, + "outputs": 8168, + "change": 1510, + "expression": 3872, + "maintaining": 6779, + "correctness": 2242, + "confirms": 2057, + "produces": 8919, + "unleashing": 11830, + "incorporation": 5220, + "particular": 8310, + "vital": 12157, + "immersive": 5074, + "experiences": 3732, + "gaining": 4367, + "dynamic": 3106, + "personalized": 8478, + "possible": 8605, + "legitimate": 6262, + "ethical": 3492, + "readers": 9367, + "influence": 5284, + "effectively": 3149, + "engaging": 3371, + "virtual": 12135, + "environment": 3435, + "opportunities": 8080, + "obstacles": 7992, + "signal": 10396, + "rethinking": 9940, + "established": 3481, + "age": 493, + "cognition": 1752, + "subjective": 10880, + "intelligent": 5476, + "needs": 7771, + "chat": 1523, + "initially": 5336, + "realized": 9381, + "massive": 6845, + "researchers": 9809, + "answer": 655, + "mathematically": 6866, + "accurately": 235, + "described": 2734, + "machines": 6766, + "truly": 11677, + "starts": 10694, + "basic": 1086, + "concepts": 1993, + "presents": 8728, + "investigate": 5560, + "relationship": 9609, + "transformation": 11604, + "decomposition": 2589, + "composition": 1919, + "scheme": 10142, + "conversion": 2210, + "implementing": 5091, + "knowledgebased": 5711, + "instruction": 5398, + "empirical": 3273, + "efforts": 3204, + "replicate": 9711, + "instructiontuning": 5444, + "factor": 3967, + "achieving": 308, + "enhances": 3402, + "patterns": 8329, + "amounts": 611, + "major": 6783, + "merely": 6926, + "leads": 6172, + "continuous": 2162, + "flat": 4163, + "causes": 1432, + "phenomena": 8487, + "specialized": 10600, + "hard": 4807, + "checkpoints": 1617, + "informed": 5322, + "clauses": 1667, + "linguistics": 6372, + "fail": 3978, + "investigates": 5566, + "linguistic": 6368, + "difficulties": 2922, + "modified": 7574, + "widely": 12215, + "scenarios": 10121, + "35": 60, + "knowledgeable": 5710, + "solver": 10556, + "investigation": 5568, + "wellknown": 12202, + "gpts": 4707, + "aware": 999, + "struggle": 10824, + "required": 9760, + "raise": 9306, + "mechanisms": 6892, + "disentangled": 2984, + "symbolic": 11031, + "discovered": 2960, + "dnns": 3039, + "sparse": 10591, + "encodes": 3331, + "disentangle": 2983, + "dialogue": 2859, + "small": 10505, + "states": 10734, + "transferability": 11598, + "encoded": 3321, + "exhibit": 3656, + "exact": 3607, + "reasons": 9441, + "accountable": 224, + "dearth": 2561, + "area": 797, + "showcase": 10359, + "chainofthought": 1458, + "cot": 2266, + "official": 8017, + "evaluations": 3583, + "excellent": 3627, + "detection": 2801, + "corrected": 2238, + "overcorrection": 8185, + "tendencies": 11350, + "adhering": 427, + "principle": 8832, + "minimal": 7049, + "edits": 3130, + "nonenglish": 7891, + "highlights": 4897, + "adapter": 360, + "family": 3997, + "parameterefficient": 8284, + "led": 6254, + "costeffective": 2262, + "alternatives": 606, + "alpaca": 600, + "peft": 8336, + "undoubtedly": 11792, + "enable": 3307, + "easytouse": 3115, + "adapters": 361, + "execute": 3647, + "llama": 6387, + "bloom": 1231, + "opt": 8085, + "gptj": 4705, + "lora": 6722, + "researchfriendly": 9817, + "modular": 7576, + "largerscale": 6125, + "7b": 101, + "extra": 3918, + "trainable": 11531, + "175b": 31, + "arithmetic": 804, + "emerged": 3239, + "calculate": 1323, + "revolutionizing": 9989, + "cell": 1435, + "power": 8644, + "annotation": 646, + "rna": 10029, + "sequencing": 10289, + "annotating": 645, + "biology": 1211, + "bing": 1208, + "2023": 40, + "revolutionized": 9982, + "scientific": 10151, + "providing": 9182, + "breakthrough": 1268, + "reviews": 9976, + "uncover": 11742, + "annotate": 640, + "rare": 9345, + "differentiation": 2919, + "trajectories": 11591, + "overlooked": 8190, + "cancer": 1332, + "discovery": 2962, + "cells": 1438, + "pathway": 8325, + "life": 6309, + "sciences": 10150, + "history": 4925, + "harnessing": 4824, + "endeavor": 3356, + "highlighted": 4894, + "analyses": 615, + "requiring": 9770, + "construct": 2112, + "outofdistribution": 8130, + "roberta": 10032, + "early": 3110, + "api": 682, + "drops": 3101, + "suite": 10953, + "rrhf": 10067, + "align": 564, + "facilitates": 3958, + "alignment": 572, + "preferences": 8701, + "enhancing": 3404, + "interactions": 5493, + "humans": 5016, + "instructgpt": 5396, + "implements": 5092, + "stages": 10678, + "sft": 10327, + "proximal": 9189, + "ppo": 8661, + "minimum": 7054, + "contrast": 2167, + "sampling": 10092, + "policies": 8557, + "extension": 3882, + "simpler": 10468, + "coding": 1751, + "accomplished": 214, + "session": 10307, + "helpful": 4852, + "stock": 10760, + "movement": 7596, + "variety": 12040, + "predicting": 8688, + "market": 6834, + "tweets": 11708, + "historical": 4924, + "underperforms": 11749, + "linear": 6364, + "regression": 9584, + "subpar": 10887, + "suggesting": 10944, + "serves": 10301, + "aimed": 540, + "social": 10524, + "giant": 4628, + "agi": 506, + "plus": 8550, + "november": 7941, + "2022": 39, + "unprecedented": 11842, + "motivated": 7593, + "according": 218, + "500": 75, + "articles": 810, + "titles": 11484, + "mentioning": 6924, + "considering": 2087, + "urgently": 11877, + "realize": 9380, + "ranked": 9327, + "susceptible": 11030, + "biases": 1191, + "unfairness": 11795, + "consequences": 2079, + "ethics": 3497, + "ensuring": 3419, + "primarily": 8824, + "employed": 3286, + "guided": 4783, + "inefficiencies": 5265, + "frequently": 4287, + "successful": 10928, + "sufficient": 10939, + "behavior": 1100, + "subsequently": 10889, + "filtered": 4063, + "identifying": 5048, + "detect": 2797, + "eye": 3941, + "growing": 4766, + "lexical": 6303, + "stylistic": 10876, + "teaching": 11320, + "balanced": 1014, + "machinegenerated": 6764, + "paired": 8202, + "roughly": 10062, + "equal": 3442, + "matched": 6852, + "hire": 4923, + "exposed": 3867, + "61": 87, + "detecting": 2800, + "67": 91, + "round": 10063, + "tend": 11348, + "detectors": 2812, + "build": 1301, + "exponential": 3863, + "growth": 4771, + "electronic": 3209, + "health": 4835, + "records": 9532, + "poses": 8588, + "clinicians": 1676, + "clinical": 1674, + "management": 6807, + "concise": 2002, + "summaries": 10956, + "distill": 2991, + "documents": 3044, + "rapid": 9332, + "advancement": 452, + "plms": 8543, + "raising": 9310, + "uptodate": 11874, + "begin": 1092, + "foundational": 4230, + "followed": 4184, + "indepth": 5238, + "community": 1811, + "line": 6363, + "leaderboard": 6169, + "useful": 11904, + "resource": 9822, + "track": 11511, + "guidelines": 4784, + "gptbased": 4704, + "identification": 5042, + "addresses": 415, + "rapidly": 9343, + "evolving": 3605, + "database": 2470, + "multistep": 7672, + "included": 5173, + "filtering": 4064, + "keyword": 5639, + "precision": 8682, + "recall": 9442, + "finding": 4084, + "captured": 1392, + "94": 114, + "publication": 9206, + "volume": 12162, + "trends": 11663, + "revealing": 9969, + "degree": 2629, + "countries": 2278, + "institutions": 5393, + "identified": 5043, + "scholarly": 10144, + "interdisciplinary": 5503, + "nature": 7752, + "players": 8532, + "investigating": 5567, + "reranking": 9771, + "generalize": 4430, + "ir": 5583, + "properly": 9049, + "instructed": 5395, + "deliver": 2632, + "27": 50, + "delve": 2636, + "distilling": 2996, + "reproduce": 9745, + "equipped": 3445, + "emotional": 3265, + "evaluates": 3520, + "avenues": 978, + "democratizing": 2644, + "opensource": 8055, + "excelling": 3630, + "beneficial": 1147, + "restrictions": 9866, + "empowering": 3300, + "follow": 4183, + "instructions": 5430, + "brings": 1284, + "manually": 6824, + "creating": 2302, + "avenue": 977, + "varying": 12099, + "starting": 10692, + "evolinstruct": 3600, + "rewrite": 9999, + "mix": 7078, + "humancreated": 4999, + "preferred": 8703, + "capacity": 1380, + "17": 27, + "skills": 10498, + "httpsgithubcomnlpxucanwizardlm": 4945, + "brains": 1261, + "customized": 2372, + "prevalent": 8802, + "room": 10055, + "unstable": 11852, + "inability": 5164, + "think": 11450, + "randomness": 9315, + "thinking": 11451, + "possess": 8599, + "perspectives": 8483, + "consolidating": 2103, + "decisionmaking": 2575, + "objectively": 7977, + "comprehensively": 1950, + "languagebased": 5991, + "backpropagation": 1008, + "devised": 2848, + "problemsolving": 8876, + "texttosql": 11433, + "converts": 2213, + "sql": 10665, + "retrieve": 9953, + "syntax": 11037, + "llmbased": 6432, + "retrieves": 9957, + "schemes": 10143, + "queries": 9260, + "similarities": 10456, + "demonstrations": 2708, + "extracts": 3935, + "schema": 10140, + "items": 5601, + "tables": 11076, + "filter": 4062, + "adapts": 368, + "balance": 1013, + "length": 6263, + "fallback": 3990, + "fails": 3980, + "crossdomain": 2317, + "constrained": 2108, + "lengthy": 6265, + "inputs": 5358, + "unleash": 11827, + "composed": 1917, + "stream": 10790, + "controller": 2192, + "iteratively": 5605, + "longterm": 6716, + "shortterm": 10357, + "precise": 8679, + "coherent": 1760, + "memories": 6907, + "activated": 333, + "modification": 7572, + "involving": 5579, + "supply": 10997, + "covers": 2288, + "longtext": 6718, + "intellectual": 5467, + "protection": 9134, + "revolutionary": 9980, + "expensive": 3727, + "computing": 1986, + "hardware": 4810, + "architectures": 793, + "costly": 2263, + "assets": 849, + "protect": 9132, + "reproduction": 9750, + "abuse": 188, + "evolution": 3601, + "watermarking": 12172, + "taxonomy": 11313, + "190": 34, + "definition": 2624, + "threats": 11468, + "merits": 6929, + "discussion": 2981, + "13": 13, + "reliable": 9632, + "tailored": 11095, + "template": 11344, + "icl": 5034, + "establish": 3480, + "proficient": 8931, + "recognize": 9515, + "poorly": 8569, + "parsing": 8304, + "ml": 7085, + "gained": 4360, + "widespread": 12226, + "demand": 2637, + "adapting": 362, + "nontrivial": 7899, + "predominant": 8696, + "consuming": 2121, + "developers": 2828, + "engineers": 3377, + "incredible": 5233, + "reason": 9396, + "experience": 3730, + "difficult": 2921, + "bridge": 1270, + "develop": 2822, + "extending": 3879, + "comprehend": 1921, + "thorough": 11454, + "dedicated": 2593, + "competitiveness": 1881, + "mt": 7602, + "brainstorm": 1262, + "stylized": 10877, + "privacypreserving": 8848, + "mitigate": 7068, + "risks": 10020, + "illustrate": 5055, + "mentioned": 6923, + "achievements": 277, + "fullysupervised": 4308, + "shortcomings": 10353, + "low": 6728, + "regarding": 9578, + "entity": 3425, + "inclination": 5171, + "wrongly": 12289, + "classify": 1659, + "predefined": 8684, + "aforementioned": 492, + "gold": 4656, + "widelyused": 12222, + "sota": 10565, + "performances": 8459, + "derivativefree": 2729, + "lacks": 5751, + "versatility": 12119, + "inappropriate": 5170, + "assumption": 868, + "nearly": 7757, + "optimal": 8087, + "confirm": 2055, + "regardless": 9581, + "refining": 9566, + "answers": 677, + "iterative": 5604, + "iterations": 5603, + "removing": 9694, + "intervention": 5527, + "par": 8271, + "surpass": 11011, + "superiority": 10982, + "stepbystep": 10749, + "decompose": 2584, + "procedures": 8879, + "completing": 1891, + "obtained": 7997, + "tune": 11684, + "sizes": 10494, + "everyday": 3596, + "plan": 8512, + "actions": 331, + "goaloriented": 4653, + "exploited": 3831, + "lms": 6684, + "abstract": 182, + "goals": 4654, + "activities": 339, + "leaves": 6252, + "constraints": 2111, + "understudied": 11790, + "define": 2622, + "constraint": 2110, + "faithfulness": 3985, + "endowing": 3359, + "chatgptlike": 1608, + "plays": 8535, + "industrial": 5260, + "maintenance": 6782, + "failures": 3981, + "necessary": 7759, + "measures": 6885, + "taken": 11098, + "service": 10303, + "reliability": 9630, + "reducing": 9548, + "costs": 2265, + "energy": 3367, + "condition": 2014, + "monitoring": 7586, + "fault": 4005, + "marks": 6835, + "entry": 3434, + "evolved": 3604, + "represents": 9743, + "landmark": 5754, + "achievement": 276, + "consensus": 2078, + "respond": 9842, + "roadmap": 10031, + "developments": 2844, + "answered": 663, + "applicable": 691, + "interpreter": 5525, + "noncausal": 7890, + "upgrading": 11870, + "express": 3870, + "excels": 3631, + "capturing": 1394, + "causality": 1428, + "event": 3591, + "density": 2713, + "distance": 2990, + "aiassisted": 526, + "forms": 4204, + "tagging": 11092, + "direct": 2934, + "mapping": 6827, + "errorprone": 3465, + "limiting": 6361, + "scalability": 10102, + "automating": 956, + "coder": 1737, + "grammar": 4721, + "approaching": 777, + "aid": 527, + "scalable": 10103, + "keywords": 5640, + "uncovering": 11744, + "shallow": 10328, + "highlevel": 4888, + "segmentation": 10201, + "adapt": 347, + "paradigms": 8274, + "chain": 1448, + "thought": 11462, + "reveals": 9970, + "annotator": 651, + "refine": 9561, + "typically": 11722, + "static": 10736, + "closed": 1680, + "fall": 3988, + "emerges": 3260, + "necessitates": 7761, + "extract": 3920, + "dynamically": 3107, + "changing": 1513, + "retraining": 9941, + "convert": 2211, + "principles": 8833, + "expansion": 3722, + "vertical": 12123, + "hybrid": 5027, + "uie": 11726, + "dubbed": 3105, + "contains": 2128, + "prefix": 8704, + "instructor": 5448, + "vanilla": 12026, + "knowledgeintensive": 5715, + "attempted": 885, + "outcome": 8123, + "latent": 6153, + "adding": 371, + "try": 11683, + "inject": 5338, + "consolidation": 2104, + "proves": 9148, + "stored": 10763, + "misuse": 7067, + "passive": 8317, + "specificity": 10643, + "whitebox": 12204, + "embed": 3229, + "watermarks": 12173, + "dividing": 3037, + "list": 6377, + "adjusting": 430, + "watermarked": 12171, + "instance": 5386, + "providers": 9172, + "interests": 5507, + "allow": 593, + "autonomously": 962, + "usage": 11882, + "binary": 1206, + "compute": 1978, + "computed": 1979, + "conform": 2060, + "representing": 9742, + "selectively": 10209, + "contextbased": 2151, + "statistical": 10737, + "retranslation": 9942, + "polishing": 8565, + "substitution": 10903, + "attacks": 881, + "arduous": 796, + "remove": 9693, + "compromising": 1962, + "maybe": 6876, + "exploration": 3834, + "unlock": 11836, + "tens": 11352, + "millions": 7039, + "unaffordable": 11730, + "decrease": 2591, + "conducts": 2049, + "identifies": 5044, + "observations": 7985, + "specialization": 10599, + "taskrelated": 11155, + "protecting": 9133, + "copyright": 2226, + "backdoor": 1004, + "companies": 1814, + "begun": 1096, + "offer": 8006, + "vulnerable": 12169, + "cause": 1429, + "losses": 6725, + "extremely": 3939, + "containing": 2127, + "weight": 12196, + "insertion": 5360, + "transferred": 11600, + "verification": 12110, + "minimizing": 7053, + "utility": 11982, + "propagation": 9046, + "core": 2227, + "insight": 5361, + "nodes": 7887, + "edges": 3122, + "building": 1308, + "blocks": 1230, + "passes": 8316, + "graphstructured": 4740, + "empower": 3292, + "domainspecific": 3064, + "popularity": 8579, + "microsoft": 7033, + "encountered": 3340, + "interaction": 5487, + "commonly": 1798, + "perceived": 8343, + "perceive": 8342, + "gender": 4399, + "preregistered": 8711, + "identity": 5049, + "summarizing": 10968, + "absence": 179, + "eliciting": 3218, + "asking": 823, + "default": 2617, + "perception": 8346, + "reverse": 9971, + "user": 11908, + "economic": 3117, + "rationality": 9358, + "assess": 833, + "examines": 3614, + "instructing": 5397, + "budgetary": 1299, + "decisions": 2577, + "food": 4192, + "measure": 6882, + "maximization": 6873, + "classic": 1648, + "preference": 8700, + "largely": 6120, + "rational": 9353, + "subjects": 10883, + "slightly": 10499, + "heterogeneity": 4858, + "contexts": 2152, + "frames": 4234, + "situations": 10491, + "forward": 4212, + "boosts": 1243, + "exhibited": 3661, + "emergent": 3255, + "ordinary": 8112, + "hardly": 4809, + "extended": 3878, + "singleturn": 10488, + "analogy": 614, + "exploiting": 3832, + "divide": 3033, + "times": 11482, + "accumulated": 226, + "manipulating": 6811, + "keyvalue": 5638, + "matrices": 6868, + "selfattention": 10214, + "takes": 11099, + "concatenating": 1990, + "applies": 720, + "learned": 6179, + "multiplechoice": 7664, + "assistance": 855, + "owing": 8197, + "broad": 1287, + "choose": 1638, + "testbed": 11374, + "collect": 1770, + "reframe": 9575, + "instructionfollowing": 5426, + "llmempowered": 6437, + "patient": 8326, + "mental": 6919, + "receiving": 9447, + "developing": 2829, + "collaborate": 1766, + "closely": 1686, + "recruit": 9536, + "patients": 8327, + "engage": 3368, + "diagnostic": 2855, + "collecting": 1773, + "ratings": 9351, + "assessment": 844, + "designs": 2770, + "treat": 11652, + "black": 1215, + "boxes": 1259, + "accessing": 208, + "gradients": 4719, + "extractor": 3933, + "classifier": 1657, + "augmented": 920, + "parameter": 8279, + "ease": 3111, + "powered": 8651, + "enhancement": 3399, + "connected": 2067, + "availability": 970, + "collected": 1772, + "customize": 2371, + "active": 335, + "interfaces": 5509, + "options": 8104, + "flexibility": 4165, + "meet": 6904, + "accelerate": 195, + "contemplation": 2129, + "reliance": 9636, + "grounded": 4758, + "roles": 10054, + "student": 10831, + "teacher": 11317, + "accordingly": 220, + "maximize": 6874, + "increase": 5223, + "rise": 10015, + "bertscore": 1160, + "applicability": 690, + "experts": 3816, + "aligned": 569, + "treated": 11653, + "crafting": 2295, + "elicit": 3215, + "ask": 820, + "conditioned": 2016, + "background": 1006, + "assistant": 856, + "96": 117, + "counterfactual": 2276, + "strengths": 10795, + "weaknesses": 12186, + "areas": 799, + "underexplored": 11746, + "factors": 3968, + "slms": 10503, + "enhancements": 3401, + "pivotal": 8506, + "interestingly": 5506, + "illustrates": 5056, + "regulate": 9588, + "plenty": 8540, + "storytelling": 10769, + "jobs": 5607, + "replaced": 9706, + "divergent": 3009, + "opinions": 8079, + "conclusion": 2008, + "conducting": 2048, + "regard": 9576, + "databases": 2471, + "professional": 8926, + "discussions": 2982, + "shed": 10337, + "reach": 9361, + "medicine": 6903, + "deficiency": 2621, + "inadequate": 5169, + "resolve": 9820, + "continual": 2157, + "add": 369, + "extracted": 3924, + "nlg": 7856, + "references": 9555, + "poor": 8567, + "actually": 345, + "expressed": 3871, + "reflect": 9567, + "hypotheses": 5031, + "reference": 9553, + "image": 5058, + "caption": 1387, + "782": 98, + "ratio": 9352, + "reformulation": 9574, + "turn": 11705, + "determine": 2817, + "avoid": 987, + "mimicking": 7044, + "rewritten": 10001, + "limit": 6337, + "reformulate": 9571, + "infusion": 5325, + "intuition": 5554, + "adequately": 422, + "prevailing": 8801, + "component": 1912, + "unexplored": 11794, + "welldesigned": 12200, + "utterance": 12005, + "channels": 1514, + "recursively": 9540, + "mild": 7035, + "assumptions": 869, + "rendering": 9696, + "compatible": 1867, + "probabilistic": 8852, + "utterances": 12006, + "bounds": 1257, + "simulated": 10473, + "alleviating": 591, + "situation": 10490, + "confounders": 2061, + "unresolved": 11845, + "fair": 3982, + "evaluators": 3590, + "adopting": 436, + "modelsllms": 7563, + "easily": 3113, + "altering": 604, + "appearance": 689, + "manipulation": 6812, + "appear": 688, + "considerably": 2084, + "80": 102, + "tested": 11375, + "evaluator": 3589, + "assigning": 851, + "position": 8594, + "orders": 8111, + "final": 4065, + "humanintheloop": 5001, + "entropy": 3433, + "seeks": 10196, + "vicuna": 12127, + "closer": 1687, + "judgments": 5618, + "dont": 3071, + "excel": 3626, + "accommodate": 209, + "referred": 9557, + "paramount": 8299, + "methodology": 6973, + "uncertainty": 11735, + "answerable": 662, + "discovering": 2961, + "intrinsic": 5533, + "proficiency": 8930, + "recognizing": 9517, + "humancomputer": 4997, + "psychological": 9197, + "behavioral": 1101, + "displayed": 2987, + "employing": 3287, + "personality": 8476, + "crosslingual": 2319, + "effects": 3180, + "changes": 1512, + "cues": 2335, + "maintains": 6781, + "shedding": 10342, + "anticipate": 680, + "serve": 10295, + "catalyst": 1412, + "explanation": 3821, + "discrepancy": 2964, + "unstructured": 11853, + "finetunes": 4118, + "textbased": 11419, + "prove": 9141, + "insufficient": 5452, + "bridging": 1277, + "synthesizing": 11042, + "margins": 6831, + "actual": 344, + "commonsenseqa": 1805, + "openbookqa": 8041, + "functioning": 4313, + "behave": 1097, + "responsibly": 9860, + "helping": 4854, + "personal": 8475, + "depth": 2727, + "completeness": 1890, + "security": 10191, + "executors": 3652, + "empowered": 3295, + "verifying": 12116, + "generic": 4625, + "learnable": 6178, + "ui": 11725, + "homepage": 4931, + "international": 5519, + "april": 784, + "china": 1619, + "diagnosis": 2853, + "investigations": 5569, + "humanlevel": 5002, + "validation": 12016, + "interpretability": 5522, + "doctors": 3040, + "preserving": 8739, + "integrity": 5466, + "remote": 9692, + "mitigating": 7076, + "concern": 1996, + "straightforward": 10770, + "practice": 8674, + "invoking": 5572, + "primary": 8829, + "directed": 2936, + "thoughts": 11465, + "concluding": 2007, + "statement": 10699, + "activity": 340, + "batch": 1089, + "students": 10834, + "divided": 3035, + "rendered": 9695, + "run": 10073, + "anomaly": 653, + "logs": 6702, + "play": 8525, + "software": 10535, + "fruitful": 4294, + "notable": 7905, + "cloud": 1689, + "face": 3945, + "consumption": 2122, + "adaptability": 350, + "lightweight": 6319, + "tda": 11314, + "realtime": 9385, + "log": 6696, + "runs": 10075, + "2x": 51, + "faster": 4003, + "pandalm": 8206, + "hyperparameter": 5030, + "involved": 5574, + "tuned": 11685, + "judge": 5615, + "extends": 3880, + "adherence": 426, + "ensure": 3416, + "humanannotated": 4994, + "gpt4s": 4701, + "f1score": 3944, + "evidenced": 3598, + "alpacas": 601, + "depend": 2714, + "avoiding": 994, + "leakage": 6174, + "generalized": 4431, + "3000": 54, + "embodied": 3234, + "relied": 9638, + "attain": 882, + "creates": 2300, + "multiagent": 7604, + "400": 65, + "50": 74, + "feasible": 4008, + "draw": 3087, + "conclusions": 2009, + "unfortunately": 11797, + "comprises": 1956, + "tabular": 11078, + "papers": 8270, + "verifier": 12112, + "locate": 6693, + "shared": 10331, + "flexible": 4166, + "measuring": 6887, + "psychology": 9198, + "f1": 3942, + "reported": 9719, + "inefficient": 5266, + "inaccurate": 5167, + "adjusts": 433, + "fewer": 4025, + "reports": 9720, + "behaves": 1098, + "guessing": 4775, + "finegrained": 4100, + "instructiontuned": 5442, + "programming": 8938, + "tests": 11380, + "norm": 7900, + "subtask": 10904, + "stems": 10743, + "yielded": 12300, + "centered": 1440, + "pattern": 8328, + "thoroughly": 11458, + "firmly": 4153, + "distillation": 2993, + "necessitate": 7760, + "intricate": 5530, + "commence": 1789, + "elicitation": 3217, + "multimodality": 7647, + "dimensions": 2933, + "conditional": 2015, + "textonly": 11421, + "concerning": 1998, + "unveiling": 11864, + "return": 9960, + "trading": 11514, + "revolves": 9993, + "investment": 5570, + "portfolio": 8583, + "adjustments": 432, + "implementations": 5089, + "subsequent": 10888, + "rigorous": 10012, + "encompassing": 3336, + "aiming": 541, + "efficacy": 3181, + "news": 7852, + "distinctive": 3000, + "languagespecific": 5999, + "volumes": 12163, + "summary": 10969, + "proceed": 8880, + "running": 10074, + "realistic": 9374, + "constructing": 2116, + "element": 3212, + "extracting": 3925, + "expertise": 3815, + "beings": 1105, + "squad": 10666, + "strengthens": 10794, + "generalizing": 4433, + "race": 9302, + "keeping": 5624, + "beginning": 1094, + "emotion": 3263, + "multimodalities": 7646, + "visual": 12148, + "clues": 1692, + "assume": 865, + "emotions": 3267, + "explanations": 3822, + "predictions": 8694, + "predicted": 8687, + "plausible": 8524, + "necessity": 7762, + "multifaceted": 7608, + "deal": 2559, + "longstanding": 6713, + "ambiguity": 609, + "chart": 1522, + "revolutionize": 9981, + "proprietary": 9129, + "leveraged": 6282, + "strides": 10800, + "finance": 4080, + "digital": 2930, + "vector": 12105, + "solely": 10539, + "opensourced": 8065, + "curated": 2343, + "holistically": 4930, + "societal": 10528, + "100k": 7, + "stereotypes": 10754, + "14": 21, + "culture": 2338, + "curation": 2344, + "ambiguous": 610, + "stringent": 10801, + "control": 2185, + "exhibiting": 3666, + "extent": 3912, + "harmful": 4814, + "moral": 7588, + "exceptional": 3632, + "threestage": 11470, + "llmdriven": 6436, + "datadriven": 2473, + "laws": 6162, + "universal": 11821, + "variables": 12030, + "recovering": 9534, + "pioneering": 8500, + "emphasize": 3269, + "frontier": 4290, + "opening": 8051, + "encourage": 3342, + "misleading": 7062, + "emphasizing": 3271, + "pro": 8850, + "pairwise": 8205, + "contrasting": 2173, + "prioritize": 8841, + "progressively": 8950, + "transforms": 11628, + "longer": 6709, + "sequences": 10286, + "regarded": 9577, + "recommendation": 9518, + "individuals": 5253, + "specified": 10644, + "narrowed": 7702, + "discover": 2959, + "existence": 3676, + "thanks": 11441, + "stable": 10670, + "decades": 2567, + "sam": 10085, + "generalizable": 4423, + "blank": 1223, + "scope": 10155, + "curate": 2342, + "clear": 1670, + "clean": 1668, + "meta": 6930, + "instantiation": 5389, + "communities": 1810, + "15": 23, + "setups": 10322, + "safety": 10080, + "fostering": 4216, + "uniquely": 11818, + "separates": 10276, + "pairs": 8203, + "total": 11506, + "questionanswer": 9281, + "gathered": 4393, + "contributing": 2182, + "safe": 10079, + "page": 8199, + "url": 11879, + "blueprint": 1234, + "assumes": 866, + "significance": 10399, + "pursuit": 9220, + "routes": 10065, + "coupled": 2280, + "trial": 11664, + "barrier": 1017, + "motivate": 7592, + "landing": 5753, + "puzzle": 9223, + "dissect": 2988, + "inner": 5342, + "workings": 12270, + "parts": 8313, + "posed": 8587, + "modest": 7571, + "retrievalaugmented": 9949, + "strict": 10798, + "operators": 8077, + "sharing": 10336, + "intents": 5483, + "assisting": 862, + "firstly": 4155, + "unifying": 11813, + "clarifying": 1645, + "executable": 3646, + "encompass": 3333, + "architectural": 788, + "innovations": 5343, + "benchmarking": 1133, + "regular": 9587, + "breakthroughs": 1269, + "bigger": 1195, + "picture": 8493, + "imperative": 5084, + "treatment": 11655, + "details": 2796, + "pay": 8333, + "accounting": 225, + "broader": 1292, + "discusses": 2979, + "intended": 5478, + "quick": 9299, + "practitioners": 8676, + "differentiable": 2917, + "action": 327, + "innovative": 5344, + "verb": 12108, + "truth": 11681, + "assignment": 852, + "matrix": 6869, + "moderate": 7564, + "submission": 10884, + "top1": 11501, + "discourseaware": 2958, + "overlook": 8189, + "totally": 11507, + "literary": 6379, + "judgment": 5617, + "professionals": 8929, + "teach": 11315, + "teaches": 11319, + "fact": 3964, + "concentrate": 1991, + "acts": 343, + "justification": 5620, + "pose": 8584, + "negative": 7772, + "impacts": 5083, + "mainly": 6772, + "neglecting": 7779, + "criteria": 2307, + "induced": 5257, + "github": 4629, + "strategic": 10773, + "expanding": 3720, + "exciting": 3644, + "synergistic": 11032, + "seeking": 10195, + "contribute": 2179, + "central": 1441, + "services": 10304, + "credibility": 2306, + "considerations": 2085, + "transformative": 11606, + "mutual": 7691, + "proposal": 9053, + "factual": 3972, + "prowess": 9188, + "boundaries": 1254, + "affects": 489, + "examining": 3615, + "awareness": 1000, + "formulating": 4210, + "games": 4373, + "presented": 8726, + "caused": 1431, + "overcoming": 8182, + "treats": 11656, + "game": 4371, + "voting": 12164, + "cooperative": 2223, + "accompanied": 211, + "actorcritic": 342, + "editing": 3126, + "showcased": 10360, + "discrepancies": 2963, + "refines": 9565, + "taking": 11101, + "performing": 8465, + "criticizing": 2314, + "24": 47, + "induction": 5258, + "98": 119, + "ood": 8030, + "catalyzed": 1413, + "smallscale": 10519, + "stark": 10689, + "embarks": 3228, + "focusing": 4181, + "65b": 90, + "indistribution": 5248, + "unveil": 11863, + "detector": 2811, + "outperforming": 8142, + "intriguing": 5531, + "phenomenon": 8488, + "spaces": 10582, + "anisotropic": 639, + "observed": 7989, + "environments": 3438, + "restricted": 9864, + "bilingual": 1196, + "atomic": 874, + "restrict": 9863, + "decent": 2568, + "advent": 477, + "past": 8318, + "couple": 2279, + "gradually": 4720, + "statistically": 10740, + "option": 8103, + "remarkably": 9688, + "zerofewshot": 12307, + "fewshort": 4027, + "hoping": 4935, + "kg": 5642, + "recommending": 9525, + "friendly": 4289, + "behaviors": 1104, + "tail": 11094, + "mines": 7047, + "recommendations": 9522, + "prefer": 8699, + "tackling": 11089, + "unable": 11728, + "weak": 12179, + "boosting": 1242, + "builds": 1314, + "reflecting": 9568, + "backward": 1010, + "majority": 6785, + "calculation": 1324, + "matters": 6871, + "sequentially": 10291, + "concatenated": 1988, + "locality": 6688, + "modeled": 7242, + "forgetting": 4193, + "shifting": 10347, + "concurrently": 2012, + "13b": 19, + "97": 118, + "played": 8530, + "occur": 8004, + "softmax": 10533, + "establishes": 3482, + "avoids": 998, + "autonomous": 958, + "utilities": 11981, + "pass": 8314, + "reduces": 9546, + "calls": 1331, + "mind": 7046, + "vs": 12165, + "inconsistency": 5201, + "researches": 9816, + "constructs": 2118, + "twolevel": 11710, + "conscious": 2077, + "statements": 10700, + "biased": 1190, + "contradicts": 2166, + "corroborate": 2254, + "emerge": 3238, + "strengthen": 10793, + "theories": 11448, + "closedsource": 1685, + "instrumental": 5450, + "depends": 2719, + "deeply": 2615, + "utilization": 11983, + "arising": 803, + "strictly": 10799, + "culminating": 2336, + "variant": 12032, + "reliant": 9637, + "reality": 9378, + "determined": 2818, + "player": 8531, + "steer": 10742, + "texttoimage": 11427, + "craft": 2292, + "narratives": 7700, + "shape": 10329, + "elements": 3213, + "gameplay": 4372, + "possibilities": 8602, + "fresh": 4288, + "labourintensive": 5738, + "acceleration": 198, + "chemistry": 1618, + "material": 6855, + "eliminates": 3220, + "injects": 5341, + "showcases": 10362, + "overarching": 8176, + "prosperity": 9131, + "mllm": 7087, + "mllms": 7088, + "closedloop": 1684, + "bridges": 1276, + "loop": 6721, + "weakness": 12185, + "incremental": 5234, + "collection": 1774, + "targeting": 11109, + "multiround": 7668, + "participation": 8309, + "implied": 5097, + "freeform": 4283, + "valid": 12010, + "infusing": 5324, + "convergence": 2199, + "instructional": 5424, + "reflects": 9570, + "curriculum": 2366, + "selfinstruction": 10221, + "ignores": 5051, + "multistage": 7671, + "selfinstruct": 10220, + "introspective": 5553, + "tuningfree": 11704, + "claude": 1662, + "gpt4tools": 4702, + "facial": 3951, + "encapsulate": 3319, + "conveying": 2217, + "arbitrary": 786, + "styles": 10875, + "eliminating": 3223, + "termed": 11355, + "yield": 12299, + "expressive": 3874, + "controllable": 2189, + "epa": 3441, + "accomplishes": 215, + "desired": 2772, + "thousand": 11466, + "windows": 12232, + "sophisticated": 10563, + "lacking": 5750, + "struggles": 10830, + "compression": 1954, + "counseling": 2273, + "decisionsupport": 2579, + "landscape": 5756, + "underscores": 11752, + "profound": 8934, + "counselors": 2274, + "interventions": 5528, + "pressing": 8740, + "assist": 853, + "harness": 4817, + "meaningful": 6879, + "affirm": 490, + "compelling": 1868, + "lays": 6165, + "organization": 8113, + "elaborate": 3207, + "spider": 10656, + "execution": 3650, + "bar": 1015, + "explorations": 3836, + "disadvantages": 2954, + "deeper": 2611, + "refers": 9560, + "expanded": 3719, + "possesses": 8601, + "infer": 5269, + "subtasks": 10905, + "dealing": 2560, + "teams": 11325, + "decides": 2571, + "considers": 2088, + "invokes": 5571, + "chosen": 1641, + "bootstrapping": 1247, + "sparked": 10588, + "modality": 7095, + "quantities": 9255, + "bootstraps": 1248, + "frozen": 4291, + "segment": 10198, + "transcript": 11592, + "transcripts": 11593, + "obtaining": 7998, + "accomplish": 212, + "equips": 3447, + "connect": 2066, + "controllers": 2193, + "userfriendly": 11920, + "library": 6306, + "seamless": 10169, + "equip": 3444, + "1000": 6, + "localized": 6690, + "sketch": 10495, + "cut": 2373, + "shortage": 10352, + "trees": 11659, + "transforming": 11627, + "hierarchy": 4865, + "divideandconquer": 3034, + "chose": 1640, + "depths": 2728, + "degrees": 2630, + "secondly": 10187, + "impractical": 5105, + "highdimensional": 4878, + "spirit": 10657, + "selfdriven": 10218, + "grounding": 4759, + "agents": 497, + "skill": 10496, + "hinders": 4919, + "generality": 4421, + "ground": 4757, + "hypothesis": 5032, + "subgoals": 10878, + "interacting": 5486, + "verified": 12111, + "phase": 8485, + "imitation": 5071, + "proving": 9186, + "showing": 10368, + "chatgptbased": 1606, + "aipowered": 553, + "indicated": 5243, + "proved": 9142, + "supplementary": 10995, + "complementing": 1885, + "operations": 8076, + "multiplication": 7666, + "billionparameter": 1201, + "surpassing": 11016, + "43": 68, + "budget": 1298, + "successes": 10927, + "consolidate": 2102, + "knowledgeoriented": 5720, + "rule": 10070, + "minimize": 7051, + "glm130b": 4646, + "checkpoint": 1616, + "verbalizer": 12109, + "space": 10580, + "cloze": 1691, + "mask": 6836, + "labor": 5735, + "nonlinear": 7898, + "locally": 6692, + "neighborhood": 7780, + "preserves": 8738, + "32": 58, + "stimulates": 10757, + "exists": 3717, + "existed": 3675, + "facing": 3963, + "implemented": 5090, + "encounter": 3337, + "managing": 6809, + "request": 9751, + "targets": 11110, + "edge": 3121, + "decomposed": 2586, + "manage": 6805, + "configuration": 2053, + "processed": 8896, + "decomposing": 2588, + "artificially": 818, + "molecule": 7584, + "cornerstone": 2229, + "materials": 6856, + "drug": 3102, + "crossmodal": 2320, + "molecular": 7583, + "descriptive": 2743, + "advancing": 471, + "inconsistencies": 5200, + "socalled": 10523, + "appealing": 687, + "selfevaluation": 10219, + "operates": 8073, + "updates": 11868, + "modify": 7575, + "attack": 875, + "defense": 2618, + "19": 33, + "postprocessing": 8611, + "practically": 8673, + "shortens": 10355, + "postprocessed": 8610, + "post": 8608, + "versatile": 12118, + "seamlessly": 10171, + "preceding": 8678, + "219": 43, + "68": 93, + "book": 1236, + "inhouse": 5332, + "wordlevel": 12243, + "biasing": 1192, + "private": 8849, + "tutoring": 11707, + "chaining": 1452, + "course": 2282, + "cater": 1421, + "interconnected": 5502, + "reflection": 9569, + "reaction": 9365, + "storage": 10761, + "gets": 4627, + "testify": 11376, + "connecting": 2068, + "optimizers": 8100, + "crafted": 2293, + "discrete": 2965, + "fast": 4001, + "humanreadable": 5014, + "population": 8581, + "25": 48, + "inspire": 5369, + "combination": 1778, + "spotting": 10664, + "names": 7698, + "texttospeech": 11432, + "convolutional": 2218, + "cnn": 1695, + "match": 6850, + "englishonly": 3384, + "codeswitching": 1749, + "llmgenerated": 6438, + "satisfactory": 10094, + "lean": 6175, + "daily": 2376, + "paid": 8200, + "classifying": 1660, + "multilevel": 7613, + "correspondingly": 2253, + "depending": 2718, + "surrounding": 11023, + "purposes": 9219, + "characterized": 1520, + "catastrophic": 1414, + "rewards": 9998, + "prevent": 8803, + "strategically": 10774, + "rates": 9348, + "investigated": 5564, + "studied": 10835, + "mixing": 7080, + "tendency": 11351, + "distractors": 3003, + "defined": 2623, + "threshold": 11471, + "compose": 1916, + "educational": 3133, + "adeptly": 420, + "navigate": 7754, + "alongside": 599, + "15b": 24, + "functionality": 4312, + "amazing": 607, + "intertask": 5526, + "openchat": 8042, + "nowadays": 7943, + "mixed": 7079, + "equally": 3443, + "rankingbased": 9331, + "proportion": 9052, + "regards": 9582, + "coarsegrained": 1696, + "complementary": 1884, + "solved": 10555, + "validate": 12011, + "qualitative": 9232, + "contract": 2164, + "saving": 10099, + "represented": 9741, + "constrain": 2107, + "nested": 7782, + "captures": 1393, + "llmassisted": 6431, + "contracts": 2165, + "promise": 8963, + "emulate": 3305, + "solid": 10540, + "acting": 326, + "extraordinary": 3936, + "plans": 8518, + "flaws": 4164, + "hindered": 4917, + "irrelevant": 5584, + "inaccuracies": 5166, + "barriers": 1018, + "encouraging": 3344, + "scratch": 10161, + "realm": 9384, + "discern": 2955, + "recognise": 9509, + "dialogues": 2869, + "taskoriented": 11154, + "spectrum": 10646, + "capacities": 1379, + "zhou": 12327, + "fund": 4315, + "generalizability": 4422, + "superficial": 10970, + "develops": 2845, + "advice": 483, + "healthcare": 4839, + "sourced": 10577, + "internet": 5520, + "cleansing": 1669, + "origins": 8121, + "supports": 11004, + "aids": 529, + "propelling": 9047, + "communications": 1809, + "fms": 4172, + "physical": 8490, + "profoundly": 8935, + "avoidance": 993, + "sorting": 10564, + "qualitatively": 9235, + "preprocessing": 8710, + "missing": 7065, + "incomplete": 5198, + "hinder": 4916, + "usefulness": 11906, + "aroused": 806, + "attempting": 886, + "layers": 6164, + "multihead": 7610, + "qformer": 9230, + "encoders": 3330, + "segments": 10204, + "exceeding": 3624, + "humangenerated": 5000, + "selected": 10206, + "humancrafted": 4998, + "greater": 4752, + "rationale": 9354, + "competence": 1870, + "exclusively": 3645, + "injecting": 5339, + "calibrating": 1327, + "gptstyle": 4708, + "ubiquitous": 11724, + "devices": 2846, + "societies": 10529, + "contextaware": 2150, + "enabled": 3309, + "autoagents": 929, + "requests": 9752, + "sensor": 10260, + "later": 6155, + "follows": 4190, + "foster": 4214, + "operating": 8074, + "mobile": 7091, + "scheduling": 10139, + "naturally": 7751, + "wonder": 12236, + "fairly": 3983, + "endows": 3360, + "deepen": 2609, + "satisfy": 10096, + "peoples": 8341, + "lives": 6386, + "nonetheless": 7893, + "empowers": 3304, + "suit": 10949, + "criterion": 2308, + "constitute": 2106, + "team": 11324, + "vulnerability": 12167, + "formulation": 4211, + "quantifiable": 9245, + "guarantees": 4774, + "theoretically": 11446, + "oversight": 8194, + "misalignment": 7060, + "adaptable": 352, + "specify": 10645, + "versus": 12122, + "indicating": 5245, + "believed": 1109, + "hold": 4926, + "pilot": 8497, + "necessarily": 7758, + "merge": 6927, + "mimic": 7042, + "calibrate": 1325, + "aligns": 583, + "merges": 6928, + "markedly": 6833, + "88": 108, + "rectifies": 9537, + "elevating": 3214, + "standalone": 10680, + "intensive": 5479, + "consist": 2089, + "language understanding": 5984, + "latest work": 6157, + "language model": 5779, + "model training": 7231, + "training enables": 11551, + "various machine": 12076, + "machine reading": 6757, + "reading comprehension": 9369, + "natural language": 7708, + "language inference": 5768, + "tasks existing": 11201, + "existing language": 3691, + "models including": 7358, + "gpt bert": 4666, + "word embeddings": 12238, + "semantic information": 10235, + "rich semantics": 10008, + "semantics language": 10254, + "propose incorporate": 9072, + "incorporate explicit": 5211, + "representation model": 9730, + "compared bert": 1841, + "obtains new": 8000, + "new stateoftheart": 7838, + "substantially improves": 10900, + "tasks zeroshot": 11305, + "multilingual language": 7617, + "language models": 5809, + "automatically generate": 953, + "involves multiple": 5578, + "machine translation": 6758, + "translation models": 11637, + "paper inspired": 8233, + "transformerbased language": 11617, + "models propose": 7499, + "propose simple": 9098, + "parallel data": 8277, + "data conduct": 2397, + "semantically similar": 10250, + "radford et": 9304, + "et al": 3489, + "al 2018": 559, + "model largescale": 7174, + "addition introduce": 374, + "robustness model": 10048, + "model experimental": 7142, + "experimental results": 3739, + "results model": 9915, + "model surpasses": 7224, + "pretraining model": 8790, + "story generation": 10767, + "generation generating": 4533, + "important challenging": 5100, + "challenging task": 1505, + "neural language": 7799, + "language generation": 5764, + "generation models": 4553, + "models gpt2": 7346, + "coherence generated": 1758, + "generated stories": 4488, + "commonsense knowledge": 1801, + "knowledge understanding": 5708, + "causal relationships": 1427, + "paper devise": 8219, + "generation propose": 4567, + "knowledge external": 5670, + "external knowledge": 3915, + "knowledge bases": 5654, + "generate reasonable": 4465, + "multitask learning": 7678, + "learning combines": 6200, + "automatic manual": 941, + "evaluation shows": 3579, + "model generate": 7153, + "stateoftheart baselines": 10705, + "sequence tokens": 10284, + "text generation": 11395, + "generation specifically": 4579, + "pretrained language": 8746, + "model gpt2": 7157, + "learning generate": 6211, + "finetuning stage": 4145, + "lead higher": 6167, + "bleu score": 1225, + "conduct experiments": 2028, + "pretraining experimental": 8778, + "question answering": 9267, + "prior work": 8840, + "large text": 6119, + "text corpus": 11387, + "retrieving knowledge": 9959, + "knowledge large": 5684, + "large corpus": 6005, + "semantic representation": 10239, + "specifically method": 10636, + "method based": 6939, + "method large": 6956, + "large language": 6008, + "evaluate method": 3510, + "question types": 9278, + "information provided": 5311, + "significantly improves": 10435, + "improves accuracy": 5148, + "multihop reasoning": 7612, + "long text": 6707, + "generation long": 4549, + "generation important": 4536, + "generative models": 4605, + "models suffer": 7539, + "address problem": 409, + "problem propose": 8868, + "reasoning generation": 9421, + "generation mrg": 4554, + "approach incorporates": 746, + "knowledge graph": 5673, + "process human": 8885, + "human writing": 4992, + "unlike previous": 11833, + "proposed model": 9122, + "experiments representative": 3797, + "representative tasks": 9740, + "tasks including": 11221, + "description generation": 2740, + "generation automatic": 4516, + "proposed method": 9119, + "method generate": 6952, + "strong baselines": 10807, + "models design": 7300, + "language learning": 5778, + "transfer learning": 11596, + "learning paper": 6231, + "finetuning dataset": 4123, + "learning including": 6218, + "speech recognition": 10650, + "semantic level": 10237, + "specific domain": 10609, + "level language": 6270, + "artificial intelligence": 816, + "neural network": 7801, + "learning agent": 6186, + "gpt2 model": 4675, + "task demands": 11124, + "previous works": 8821, + "works shown": 12277, + "largescale language": 6134, + "model achieved": 7102, + "good performance": 4659, + "generation observe": 4557, + "usually contain": 11979, + "propose twostage": 9107, + "generation framework": 4532, + "second stage": 10185, + "supervision signals": 10994, + "errors improve": 3468, + "dataset model": 2502, + "model outperforms": 7190, + "outperforms baseline": 8146, + "baseline approaches": 1064, + "terms automatic": 11358, + "automatic metrics": 943, + "metrics human": 7026, + "human evaluation": 4962, + "new application": 7806, + "training neural": 11573, + "training data": 11545, + "propose novel": 9084, + "data available": 2390, + "based gpt2": 1039, + "data samples": 2455, + "order make": 8110, + "weakly supervised": 12183, + "training paradigm": 11576, + "able outperform": 177, + "fully supervised": 4307, + "annotated data": 643, + "data model": 2434, + "model boost": 7117, + "boost performance": 1239, + "performance standard": 8431, + "seq2seq model": 10278, + "model bleu": 7116, + "design optimization": 2753, + "emergence large": 3251, + "models llms": 7388, + "openais chatgpt": 8039, + "googles bard": 4664, + "paper explores": 8229, + "applying llms": 727, + "specifically utilize": 10641, + "gpt35 gpt4": 4685, + "deep reinforcement": 2606, + "reinforcement learning": 9592, + "learning code": 6199, + "perform better": 8350, + "converse gpt": 2209, + "paper shows": 8267, + "llms chatgpt": 6474, + "technical level": 11327, + "propose new": 9081, + "practical implications": 8667, + "achieve significant": 260, + "math word": 6860, + "word problems": 12242, + "word problem": 12241, + "critical task": 2313, + "task natural": 11134, + "language processing": 5960, + "recent studies": 9476, + "generation task": 4581, + "problem descriptions": 8861, + "address limitation": 402, + "framework based": 4238, + "based generative": 1034, + "generative pretrained": 4606, + "training generation": 11557, + "ranking model": 9330, + "model learns": 7176, + "correct incorrect": 2237, + "specially designed": 10605, + "demonstrate effectiveness": 2651, + "effectiveness proposed": 3174, + "method benchmark": 6940, + "benchmark results": 1129, + "results method": 9911, + "method consistently": 6944, + "consistently outperforms": 2097, + "outperforms baselines": 8148, + "pretrained models": 8763, + "models gpt": 7345, + "modeling language": 7246, + "language structure": 5976, + "texts models": 11426, + "models consider": 7283, + "numerical reasoning": 7960, + "reasoning tasks": 9439, + "paper propose": 8251, + "pretrained model": 8762, + "model explicitly": 7145, + "specifically leverages": 10635, + "loss function": 6724, + "pretraining objective": 8791, + "conduct extensive": 2029, + "extensive experiments": 3893, + "experiments different": 3778, + "different datasets": 2879, + "datasets evaluate": 2528, + "experiment results": 3734, + "baseline models": 1069, + "ablation studies": 169, + "studies conducted": 10837, + "conducted evaluate": 2041, + "evaluate impact": 3508, + "table question": 11074, + "models achieved": 7254, + "performance using": 8442, + "using pretrained": 11963, + "pretraining corpus": 8773, + "pretraining large": 8787, + "opendomain text": 8047, + "performance models": 8411, + "response propose": 9846, + "t5 gpt2": 11071, + "gpt2 based": 4673, + "based natural": 1049, + "question generation": 9276, + "logical form": 6700, + "better suited": 1182, + "practical deployment": 8665, + "ai technology": 524, + "autoregressive language": 964, + "able generate": 175, + "generate humanlike": 4450, + "humanlike text": 5010, + "case studies": 1405, + "aigenerated content": 533, + "models end": 7314, + "end paper": 3350, + "paper focuses": 8231, + "sentence embeddings": 10262, + "increasing scale": 5229, + "hundreds billions": 5026, + "billions parameters": 1204, + "sets stateoftheart": 10314, + "stateoftheart results": 10726, + "results various": 9934, + "various language": 12072, + "language tasks": 5978, + "finetuning large": 4128, + "large foundation": 6006, + "foundation models": 4223, + "related fields": 9600, + "separate models": 10274, + "end propose": 3352, + "billion parameters": 1200, + "best sentence": 1170, + "175 billion": 29, + "code models": 1722, + "pretraining transformer": 8799, + "transformer decoder": 11610, + "data paper": 2441, + "automatic speech": 948, + "learning framework": 6210, + "pretraining tasks": 8798, + "masked language": 6838, + "language modeling": 5807, + "instead generating": 5391, + "generating textual": 4510, + "speech information": 10649, + "generate correct": 4442, + "comprehensive experiments": 1940, + "word error": 12239, + "error rate": 3464, + "release code": 9618, + "code model": 1720, + "completion task": 1894, + "standard benchmark": 10682, + "knowledge base": 5652, + "free text": 4281, + "better prompts": 1181, + "prompts text": 9041, + "linked knowledge": 6374, + "fewshot learning": 4033, + "models incontext": 7360, + "incontext learning": 5204, + "work propose": 12260, + "zero fewshot": 12306, + "fewshot classification": 4030, + "greedy decoding": 4756, + "method adopts": 6937, + "diverse set": 3027, + "tasks extensive": 11209, + "extensive analysis": 3885, + "different scales": 2904, + "selfsupervised learning": 10224, + "extensively explored": 3911, + "explored recent": 3853, + "recent years": 9485, + "success natural": 10920, + "wide adoption": 12206, + "bert gpt": 1153, + "heavily relies": 4842, + "data augmentation": 2386, + "dominant approach": 3069, + "paper identify": 8232, + "impact development": 5078, + "generative selfsupervised": 4621, + "masking strategy": 6842, + "public datasets": 9202, + "datasets different": 2526, + "learning tasks": 6246, + "tasks results": 11276, + "study provides": 10865, + "demonstrates potential": 2696, + "model data": 7129, + "nlp tasks": 7872, + "tasks training": 11294, + "training set": 11581, + "limited existing": 6352, + "existing solutions": 3708, + "heuristic rules": 4861, + "synonym replacement": 11034, + "gpt2 using": 4677, + "limited training": 6359, + "produce new": 8917, + "synthetic data": 11044, + "knowledge limited": 5692, + "issue propose": 5592, + "propose knowledge": 9076, + "model pretrained": 7201, + "novel framework": 7921, + "framework knowledge": 4263, + "knowledge single": 5701, + "target task": 11107, + "tasks unified": 11295, + "unified texttotext": 11805, + "texttotext format": 11435, + "training objectives": 11574, + "objectives different": 7980, + "best knowledge": 1164, + "multitask training": 7680, + "data produced": 2444, + "improves performance": 5150, + "performance strong": 8434, + "models bert": 7266, + "large margin": 6106, + "nlp benchmark": 7861, + "parameters experiments": 8294, + "performance improvements": 8399, + "models used": 7555, + "models multiple": 7469, + "multiple tasks": 7663, + "tasks large": 11235, + "achieved impressive": 266, + "impressive zeroshot": 5117, + "zeroshot ability": 12310, + "model size": 7218, + "high cost": 4870, + "smaller language": 10513, + "model external": 7149, + "demonstrated promising": 2684, + "modeling capabilities": 7244, + "capabilities remains": 1357, + "remains unclear": 9663, + "models perform": 7487, + "perform competitively": 8351, + "generalization downstream": 4428, + "downstream tasks": 3078, + "tasks work": 11301, + "work introduce": 12254, + "model best": 7112, + "model demonstrate": 7131, + "strong zeroshot": 10812, + "zeroshot performance": 12319, + "performance wide": 8455, + "wide range": 12209, + "unseen tasks": 11849, + "shows significant": 10393, + "significant improvement": 10412, + "fusion module": 4344, + "notably proposed": 7909, + "evaluation tasks": 3580, + "spoken language": 10662, + "texts challenging": 11424, + "alleviate data": 585, + "data scarcity": 2456, + "scarcity problem": 10118, + "lack largescale": 5745, + "largescale highquality": 6132, + "language text": 5981, + "text data": 11388, + "overcome limitation": 8178, + "limitation propose": 6339, + "largescale indomain": 6133, + "data specifically": 2462, + "texts generated": 11425, + "significantly outperforms": 10444, + "outperforms compared": 8150, + "achieve improvements": 253, + "demonstrating effectiveness": 2703, + "effectiveness approach": 3168, + "approach release": 756, + "code data": 1705, + "data facilitating": 2410, + "facilitating future": 3960, + "future research": 4350, + "research field": 9791, + "inference paper": 5273, + "solution problem": 10543, + "integrating text": 5463, + "graph structure": 4735, + "information large": 5302, + "graph neural": 4732, + "neural networks": 7802, + "networks gnns": 7795, + "high computational": 4868, + "computational complexity": 1970, + "training large": 11561, + "propose efficient": 9064, + "efficient effective": 3194, + "learning large": 6221, + "framework called": 4242, + "allows training": 597, + "training modules": 11571, + "experiments multiple": 3789, + "multiple data": 7652, + "efficiency effectiveness": 3185, + "proposed approach": 9112, + "stateoftheart performance": 10721, + "performance numerous": 8419, + "numerous natural": 7962, + "data modalities": 2433, + "unique characteristics": 11817, + "tasks like": 11242, + "decision making": 2573, + "unique challenges": 11816, + "challenges applying": 1475, + "privacy concerns": 8844, + "processing nlp": 8905, + "nlp demonstrate": 7864, + "success large": 10914, + "models llm": 7387, + "model learn": 7175, + "representations paper": 9734, + "model named": 7186, + "extensive experimental": 3890, + "results public": 9927, + "superior performance": 10975, + "performance stateoftheart": 8432, + "stateoftheart approaches": 10703, + "scaling law": 10115, + "performance gains": 8388, + "demonstrates importance": 2695, + "sheds light": 10345, + "promising research": 8973, + "research direction": 9784, + "language prompts": 5972, + "crosstask generalization": 2323, + "models limited": 7385, + "limited labeled": 6354, + "highly sensitive": 4906, + "challenging given": 1497, + "address issue": 394, + "labeled data": 5730, + "gradient update": 4716, + "unlabeled data": 11826, + "fewshot setting": 4039, + "prompt tuning": 9000, + "model tuning": 7232, + "chatgpt good": 1568, + "report provides": 9718, + "preliminary evaluation": 8706, + "evaluation chatgpt": 3546, + "including translation": 5197, + "multilingual translation": 7621, + "translation ability": 11635, + "candidate prompts": 1335, + "minor performance": 7057, + "performance differences": 8378, + "evaluating number": 3531, + "number benchmark": 7949, + "benchmark test": 1131, + "test sets": 11372, + "chatgpt performs": 1587, + "performs competitively": 8469, + "google translate": 4662, + "target language": 11105, + "improves translation": 5153, + "translation performance": 11639, + "performance significantly": 8428, + "chatgpt does": 1552, + "good results": 4660, + "performance chatgpt": 8369, + "chatgpt significantly": 1598, + "multilingual multimodal": 7619, + "paper proposes": 8259, + "evaluating interactive": 3525, + "interactive llms": 5497, + "chatgpt using": 1601, + "using publicly": 11965, + "publicly available": 9210, + "covering different": 2287, + "tasks evaluate": 11196, + "aspects chatgpt": 830, + "based data": 1028, + "newly designed": 7849, + "multimodal dataset": 7627, + "dataset chatgpt": 2482, + "chatgpt outperforms": 1585, + "llms zeroshot": 6682, + "zeroshot learning": 12316, + "finetuned models": 4116, + "models tasks": 7542, + "nonlatin script": 7896, + "script languages": 10164, + "generate multimodal": 4456, + "multimodal content": 7625, + "textual prompts": 11439, + "intermediate code": 5512, + "code generation": 1714, + "10 different": 2, + "different reasoning": 2903, + "logical reasoning": 6701, + "commonsense reasoning": 1803, + "access external": 202, + "llm improve": 6415, + "improve performance": 5132, + "prompt engineering": 8990, + "evaluation set": 3578, + "new chinese": 7813, + "pretraining language": 8786, + "model based": 7111, + "based t5": 1059, + "t5 model": 11072, + "different sources": 2907, + "general domain": 4403, + "comprehensive benchmarks": 1929, + "benchmarks like": 1140, + "glue superglue": 4650, + "significant advancements": 10401, + "model pretraining": 7205, + "drawing inspiration": 3092, + "understanding generation": 11772, + "evaluation benchmark": 3543, + "datasets covering": 2522, + "generation tasks": 4582, + "aim facilitate": 539, + "facilitate research": 3956, + "research development": 9783, + "benchmark released": 1128, + "largescale pretrained": 6140, + "chatgpt understand": 1600, + "comparative study": 1832, + "study chatgpt": 10850, + "chatgpt finetuned": 1562, + "finetuned bert": 4110, + "recently chatgpt": 9491, + "chatgpt attracted": 1538, + "great attention": 4745, + "generate fluent": 4447, + "highquality responses": 4912, + "responses human": 9855, + "prior studies": 8837, + "studies shown": 10845, + "shown chatgpt": 10373, + "generation ability": 4513, + "compared existing": 1845, + "existing models": 3703, + "quantitative analysis": 9249, + "understanding ability": 11762, + "little attention": 6383, + "ability chatgpt": 134, + "chatgpt evaluating": 1555, + "glue benchmark": 4649, + "bertstyle models": 1162, + "models chatgpt": 7276, + "falls short": 3992, + "short handling": 10350, + "tasks chatgpt": 11171, + "outperforms bert": 8149, + "bert models": 1156, + "models inference": 7364, + "chatgpt achieves": 1536, + "achieves comparable": 283, + "comparable performance": 1823, + "performance compared": 8374, + "sentiment analysis": 10269, + "questionanswering tasks": 9286, + "tasks additionally": 11163, + "combining advanced": 1784, + "prompting strategies": 9024, + "existing large": 3692, + "llms generating": 6549, + "training llm": 11565, + "computation requirements": 1967, + "methods rely": 7007, + "learning rl": 6240, + "approach called": 732, + "significantly smaller": 10450, + "method does": 6946, + "does require": 3049, + "internal representations": 5518, + "llm token": 6427, + "probability distribution": 8855, + "applied various": 719, + "various llms": 12074, + "llms including": 6562, + "approach significantly": 762, + "compared base": 1838, + "comprehensive study": 1947, + "study language": 10858, + "understanding tasks": 11785, + "models demonstrated": 7297, + "demonstrated impressive": 2679, + "impressive performance": 5111, + "performance various": 8446, + "various natural": 12079, + "showcasing strong": 10366, + "understanding reasoning": 11781, + "reasoning capabilities": 9409, + "handle various": 4803, + "explored especially": 3851, + "comprehensive experimental": 1937, + "test samples": 11370, + "understanding nlu": 11778, + "nlu tasks": 7882, + "findings indicate": 4090, + "outperforms existing": 8152, + "average performance": 984, + "inference sentiment": 5274, + "analysis tasks": 628, + "challenges including": 1483, + "addressing challenges": 418, + "overall performance": 8173, + "performance generalization": 8393, + "generalization abilities": 4425, + "human feedback": 4969, + "paper focus": 8230, + "realworld applications": 9387, + "applications particularly": 711, + "function assessed": 4310, + "assessed human": 839, + "learning human": 6214, + "feedback rlhf": 4023, + "recent works": 9484, + "improve quality": 5135, + "llms human": 6559, + "guidance propose": 4778, + "policy search": 8562, + "search problem": 10179, + "problem reinforcement": 8869, + "promising alternative": 8965, + "furthermore demonstrate": 4328, + "improving quality": 5163, + "images generated": 5066, + "generative model": 4603, + "ranking feedback": 9329, + "feedback experiments": 4021, + "significantly enhance": 10429, + "generated images": 4478, + "overall work": 8175, + "effective approach": 3137, + "human machine": 4980, + "code released": 1731, + "paper present": 8244, + "leverages large": 6284, + "prompting methods": 9019, + "methods generate": 6990, + "generate multiple": 4458, + "datasets including": 2534, + "approach achieves": 729, + "achieves significant": 298, + "significant improvements": 10413, + "existing baselines": 3679, + "significantly outperform": 10442, + "outperform stateoftheart": 8137, + "potential large": 8628, + "models conversational": 7289, + "including natural": 5189, + "processing computer": 8900, + "computer vision": 1981, + "learning models": 6230, + "models significant": 7524, + "significant impact": 10411, + "impact field": 5080, + "integration llms": 5465, + "problems including": 8873, + "llms field": 6537, + "various applications": 12051, + "applications llms": 707, + "challenges arise": 1476, + "data resources": 2454, + "finally discuss": 4073, + "promising directions": 8968, + "current state": 2361, + "highlight potential": 4893, + "potential benefits": 8617, + "generative ai": 4593, + "chatgpt gpt4": 1569, + "text images": 11400, + "worth noting": 12283, + "model gpt4": 7158, + "help chatgpt": 4847, + "content creation": 2133, + "answering question": 673, + "comprehensive review": 1946, + "review existing": 9974, + "techniques applications": 11334, + "model architecture": 7108, + "pretraining generative": 8784, + "generative modeling": 4604, + "methods like": 6998, + "diffusion models": 2928, + "models introducing": 7367, + "tasks based": 11168, + "including text": 5195, + "3d content": 63, + "discuss challenges": 2970, + "chatgpt deep": 1547, + "generate texts": 4470, + "given topics": 4644, + "chatgpt chinese": 1542, + "results revealed": 9930, + "performed better": 8463, + "chatgpt human": 1573, + "fewshot prompting": 4037, + "prompting large": 9012, + "models large": 7372, + "ability perform": 160, + "models directly": 7305, + "prior research": 8836, + "appropriate prompt": 779, + "improving performance": 5162, + "specifically introduce": 10633, + "introduce metric": 5543, + "lead unsatisfactory": 6168, + "quality based": 9237, + "based observation": 1052, + "observation propose": 7983, + "strategy based": 10784, + "mainstream models": 6777, + "models gpt3": 7347, + "various downstream": 12061, + "results indicate": 9909, + "enhance models": 3391, + "learning performance": 6233, + "evaluating chatgpt": 3523, + "grammatical error": 4724, + "error correction": 3461, + "chatgpt cuttingedge": 1546, + "lot attention": 6727, + "strong ability": 10805, + "report aim": 9713, + "evaluate chatgpt": 3504, + "stateoftheart models": 10719, + "benchmark dataset": 1114, + "baselines terms": 1078, + "automatic evaluation": 936, + "evaluation metrics": 3568, + "grammatical correctness": 4723, + "evaluation quantitatively": 3575, + "results demonstrate": 9891, + "demonstrate chatgpt": 2648, + "intelligence ai": 5470, + "chatgpt large": 1576, + "model trained": 7230, + "support dynamic": 11000, + "ethical issues": 3494, + "effectively create": 3151, + "technology based": 11339, + "information content": 5290, + "chat generative": 1524, + "pretrained transformer": 8766, + "massive data": 6846, + "years researchers": 12296, + "basic concepts": 1087, + "information knowledge": 5301, + "semantic communication": 10229, + "furthermore propose": 4336, + "verify proposed": 12115, + "instruction data": 5399, + "models empirical": 7308, + "empirical study": 3281, + "realworld use": 9395, + "success chatgpt": 10910, + "chatgpt recently": 1592, + "achieving remarkable": 316, + "remarkable results": 9685, + "significantly enhances": 10430, + "models performance": 7489, + "generated results": 4486, + "consistent human": 2093, + "current research": 2359, + "impact different": 5079, + "model performance": 7197, + "performance especially": 8384, + "paper explore": 8226, + "explore performance": 3842, + "performance large": 8403, + "models based": 7265, + "based instruction": 1040, + "instruction tuning": 5414, + "tuning different": 11691, + "data evaluation": 2407, + "evaluation dataset": 3550, + "dataset consisting": 2488, + "base model": 1023, + "model results": 7209, + "tasks openended": 11250, + "openended generation": 8050, + "data size": 2460, + "potential future": 8623, + "research directions": 9787, + "base models": 1024, + "models training": 7547, + "training methods": 11568, + "tasks release": 11270, + "evaluation datasets": 3551, + "model checkpoints": 7123, + "case study": 1406, + "tools fail": 11499, + "paper investigates": 8239, + "aims enhance": 544, + "novel twostep": 7940, + "prompt strategy": 8997, + "chatgpt currently": 1545, + "widely used": 12220, + "zeroshot scenarios": 12325, + "scenarios demonstrated": 10125, + "improve average": 5120, + "problem large": 8862, + "significant progress": 10417, + "llms remains": 6631, + "commonsense questions": 1802, + "effectively leverage": 3158, + "answering questions": 674, + "conduct series": 2035, + "series experiments": 10293, + "experiments evaluate": 3780, + "evaluate chatgpts": 3505, + "results gpts": 9906, + "tasks struggle": 11282, + "knowledge using": 5709, + "answer question": 659, + "knowledge llms": 5693, + "llms instruction": 6568, + "instruction following": 5404, + "concepts paper": 1994, + "llms set": 6645, + "usually encode": 11980, + "llm parameters": 6421, + "open problem": 8033, + "problem paper": 8866, + "dialogue tasks": 2866, + "small number": 10511, + "exhibit high": 3657, + "errors chatgpt": 3467, + "chatgpt highly": 1572, + "comprehensive evaluation": 1933, + "shown remarkable": 10386, + "remarkable potential": 9682, + "potential various": 8639, + "exploring potential": 3861, + "correction gec": 2240, + "zeroshot chainofthought": 12312, + "chainofthought cot": 1459, + "using incontext": 11948, + "evaluation involves": 3559, + "chatgpts performance": 1612, + "official test": 8018, + "different languages": 2886, + "results human": 9908, + "human evaluations": 4966, + "evaluations demonstrate": 3585, + "chatgpt excellent": 1557, + "correct errors": 2236, + "performance nonenglish": 8418, + "lowresource settings": 6748, + "highlights potential": 4898, + "gec tasks": 4398, + "tasks analysis": 11164, + "various types": 12098, + "chatgpt effectively": 1553, + "parameterefficient finetuning": 8285, + "models success": 7535, + "llms like": 6580, + "like gpt3": 6330, + "gpt3 chatgpt": 4680, + "taskspecific data": 11308, + "various finetuning": 12067, + "finetuning methods": 4134, + "finetuning peft": 4139, + "requires finetuning": 9766, + "llms achieving": 6449, + "achieving comparable": 312, + "comparable better": 1817, + "better performance": 1180, + "peft methods": 8337, + "llms paper": 6604, + "paper presents": 8248, + "framework integrates": 4262, + "integrates various": 5459, + "llms different": 6505, + "different tasks": 2909, + "framework includes": 4261, + "llms llama": 6589, + "framework designed": 4245, + "evaluation new": 3570, + "furthermore evaluate": 4330, + "evaluate effectiveness": 3506, + "math reasoning": 6858, + "reasoning datasets": 9419, + "datasets results": 2547, + "trainable parameters": 11532, + "powerful llms": 8660, + "provide promising": 9164, + "framework finetuning": 4252, + "llms downstream": 6509, + "practical applications": 8664, + "systems large": 11060, + "models emerged": 7307, + "step step": 10748, + "solving math": 10559, + "problems requires": 8875, + "focus evaluating": 4175, + "ability large": 149, + "models work": 7560, + "including gpt4": 5180, + "gpt4 chatgpt": 4693, + "llama various": 6393, + "provide detailed": 9153, + "detailed analysis": 2794, + "power large": 8645, + "cell type": 1436, + "type annotation": 11716, + "rna sequencing": 10030, + "task requires": 11144, + "chatgpt new": 1583, + "researchers conduct": 9811, + "efficiently accurately": 3201, + "new insights": 7822, + "using chatgpt": 11939, + "potentially lead": 8642, + "reasoning ability": 9403, + "ability comprehensive": 136, + "transformer gpt4": 11612, + "advanced reasoning": 451, + "tasks report": 11272, + "popular benchmarks": 8571, + "comparison chatgpt": 1863, + "results chatgpt": 9884, + "significantly better": 10424, + "reasoning benchmarks": 9408, + "results gpt4": 9904, + "higher performance": 4883, + "datasets benchmarks": 2518, + "performance drops": 8383, + "newly released": 7851, + "reasoning remains": 9436, + "remains challenging": 9651, + "benchmark suite": 1130, + "align language": 567, + "models human": 7352, + "human preferences": 4984, + "significantly enhancing": 10431, + "interactions humans": 5494, + "humans models": 5020, + "supervised finetuning": 10986, + "finetuning sft": 4143, + "reward model": 9997, + "proximal policy": 9190, + "policy optimization": 8560, + "optimization ppo": 8095, + "novel learning": 7924, + "learning paradigm": 6232, + "responses generated": 9853, + "generated different": 4477, + "align human": 565, + "model output": 7191, + "robust finetuning": 10043, + "alignment process": 579, + "performance comparable": 8372, + "recently large": 9499, + "like chatgpt": 6322, + "chatgpt demonstrated": 1548, + "demonstrated remarkable": 2685, + "remarkable performance": 9676, + "performance variety": 8445, + "variety natural": 12043, + "processing tasks": 8912, + "tasks effectiveness": 11193, + "domain specifically": 3055, + "remains explored": 9655, + "explored paper": 3852, + "paper conduct": 8214, + "capabilities multimodal": 1349, + "indicate chatgpt": 5241, + "stateoftheart methods": 10717, + "traditional methods": 11520, + "despite potential": 2784, + "potential chainofthought": 8618, + "chainofthought prompting": 1463, + "need specialized": 7769, + "training finetuning": 11555, + "provides insights": 9176, + "foundation future": 4218, + "future work": 4358, + "social media": 10526, + "recently released": 9505, + "artificial general": 813, + "general intelligence": 4406, + "intelligence agi": 5469, + "november 2022": 7942, + "chatgpt quickly": 1591, + "various aspects": 12053, + "500 articles": 76, + "urgently needed": 11878, + "applications challenges": 703, + "challenges present": 1488, + "foundation model": 4222, + "model alignment": 7106, + "essential step": 3478, + "models finetuned": 7332, + "rl algorithms": 10023, + "end introduce": 3349, + "introduce new": 5545, + "new framework": 7820, + "effectively utilizing": 3166, + "approach selects": 757, + "model finetuning": 7151, + "effectively improve": 3155, + "improve model": 5130, + "performance reward": 8427, + "metrics large": 7029, + "models diffusion": 7303, + "gpt models": 4669, + "ai generated": 516, + "generated content": 4475, + "content aigc": 2131, + "presents considerable": 8731, + "detect text": 2798, + "text generated": 11394, + "generated large": 4479, + "growing need": 4769, + "address challenges": 389, + "machinegenerated texts": 6765, + "linguistic analyses": 6369, + "sentences complex": 10267, + "syntactic structures": 11036, + "results suggest": 9931, + "finetuned training": 4117, + "comprehensive analysis": 1926, + "generative large": 4597, + "models publicly": 7504, + "text summarization": 11416, + "pretrained large": 8754, + "models exponential": 7326, + "exponential growth": 3864, + "electronic health": 3210, + "health records": 4837, + "poses significant": 8591, + "significant challenge": 10405, + "clinical information": 1675, + "tackle challenge": 11081, + "support clinical": 10999, + "information retrieval": 5313, + "aims generating": 548, + "generating concise": 4497, + "concise summaries": 2005, + "key information": 5633, + "rapid advancement": 9333, + "nlp techniques": 7878, + "models plms": 7491, + "methods datasets": 6981, + "datasets evaluation": 2529, + "need comprehensive": 7765, + "present systematic": 8725, + "systematic review": 11049, + "recent advancements": 9453, + "llms help": 6557, + "challenges future": 1481, + "future directions": 4348, + "available datasets": 973, + "discuss existing": 2971, + "existing challenges": 3681, + "promising future": 8969, + "era llms": 3456, + "research community": 9780, + "study presents": 10861, + "presents comprehensive": 8730, + "rapidly evolving": 9344, + "field artificial": 4046, + "processing capabilities": 8899, + "pretrained transformers": 8769, + "transformers gpt": 11626, + "effectiveness method": 3173, + "research area": 9776, + "llms demonstrated": 6495, + "remarkable ability": 9667, + "tasks paper": 11252, + "generative llms": 4602, + "retrieval ir": 9946, + "experiments reveal": 3799, + "superior results": 10980, + "supervised methods": 10990, + "lowresource languages": 6746, + "capabilities chatgpt": 1338, + "model small": 7220, + "chatgpt generated": 1567, + "generated data": 4476, + "code reproduce": 1732, + "reproduce results": 9746, + "results available": 9880, + "report presents": 9717, + "dialogue understanding": 2868, + "supervised models": 10991, + "promising results": 8975, + "results generating": 9903, + "responses furthermore": 9852, + "potential avenues": 8616, + "avenues future": 979, + "languages paper": 5998, + "chatgpt language": 1575, + "achieving competitive": 313, + "competitive performance": 1877, + "english chinese": 3380, + "limited resources": 6357, + "believe work": 1108, + "people use": 8340, + "use chatgpt": 11885, + "data code": 2392, + "models available": 7263, + "empowering large": 3301, + "complex instructions": 1897, + "data brings": 2391, + "struggle produce": 10828, + "large amounts": 6003, + "varying levels": 12102, + "using llm": 11958, + "starting initial": 10693, + "set instructions": 10309, + "instructions use": 5441, + "data finetune": 2411, + "finetune llama": 4104, + "resulting model": 9875, + "evaluation results": 3576, + "gpt4 automatic": 4690, + "findings suggest": 4098, + "llms code": 6478, + "data public": 2446, + "public httpsgithubcomnlpxucanwizardlm": 9203, + "impressive ability": 5108, + "interact users": 5485, + "challenging tasks": 1507, + "models like": 7380, + "room improvement": 10056, + "responses questions": 9858, + "based chatgpt": 1026, + "objectively comprehensively": 7978, + "feedback mechanism": 4022, + "datasets demonstrate": 2523, + "task converts": 11120, + "converts natural": 2214, + "llms work": 6679, + "work natural": 12257, + "tasks specifically": 11278, + "propose llmbased": 9077, + "llmbased framework": 6435, + "demonstration examples": 2707, + "prompt llms": 8996, + "questions different": 9291, + "valuable information": 12019, + "outperforms stateoftheart": 8160, + "demonstrates strong": 2697, + "strong generalization": 10809, + "generalization ability": 4426, + "capacity largescale": 1383, + "agent memory": 496, + "longterm memory": 6717, + "generate precise": 4462, + "memory activated": 6912, + "model input": 7165, + "finetuning experimental": 4125, + "enables llms": 3311, + "multiturn dialogue": 7686, + "comparable chatgpt": 1819, + "scenarios involving": 10129, + "test set": 11371, + "abilities llms": 127, + "survey deep": 11025, + "deep neural": 2603, + "networks dnns": 7794, + "various fields": 12066, + "high performance": 4874, + "highquality data": 4909, + "data expensive": 2409, + "methods proposed": 7005, + "rapid evolution": 9341, + "paper provide": 8261, + "provide comprehensive": 9152, + "comprehensive survey": 1948, + "research chatgpt": 9778, + "relations paper": 9608, + "paper aims": 8209, + "quantitatively evaluate": 9253, + "evaluate performance": 3511, + "promising performance": 8970, + "various tasks": 12094, + "tasks conduct": 11180, + "extensive evaluations": 3889, + "13 datasets": 15, + "downstream applications": 3075, + "prompt templates": 8999, + "zeroshot prompt": 12320, + "prompt template": 8998, + "learning icl": 6217, + "classification tasks": 1655, + "time chatgpt": 11473, + "chatgpt exhibits": 1559, + "exhibits strong": 3672, + "strong performance": 10811, + "reasoning causal": 9416, + "performs poorly": 8472, + "parsing task": 8305, + "models solving": 7528, + "machine learning": 6753, + "learning ml": 6228, + "significant demand": 10409, + "predominant approaches": 8697, + "understand human": 11757, + "human developers": 4960, + "ability understand": 166, + "paper aim": 8208, + "aim bridge": 536, + "bridge gap": 1271, + "machine intelligence": 6752, + "leverages stateoftheart": 6291, + "stateoftheart llms": 10715, + "llms develop": 6503, + "novel tasks": 7934, + "capability llms": 1372, + "perform thorough": 8359, + "results new": 9918, + "new tasks": 7845, + "achieve high": 249, + "translation using": 11645, + "using large": 11951, + "translation mt": 11638, + "using deep": 11941, + "deep learning": 2597, + "llms gpt3": 6551, + "chatgpt brings": 1540, + "new challenges": 7812, + "challenges opportunities": 1485, + "using llms": 11959, + "new evaluation": 7819, + "mitigate risks": 7072, + "new directions": 7817, + "opportunities challenges": 8081, + "relation extraction": 9604, + "shortcomings llms": 10354, + "gap llms": 4379, + "widelyused datasets": 12223, + "achieves sota": 299, + "competitive performances": 1878, + "blackbox prompt": 1221, + "derivativefree optimization": 2730, + "network large": 7788, + "tasks llms": 11243, + "llms believe": 6467, + "tasks target": 11288, + "shares similarities": 10335, + "task experiments": 11127, + "achieves competitive": 286, + "responses llms": 9857, + "simple efficient": 10462, + "efficient approach": 3192, + "approach based": 731, + "based prompt": 1057, + "models introduce": 7366, + "output quality": 8167, + "need manual": 7768, + "manual intervention": 6821, + "refinement framework": 9564, + "demonstrate superiority": 2668, + "superiority proposed": 10983, + "instructions instruction": 5436, + "able improve": 176, + "models challenging": 7275, + "tasks following": 11214, + "following instructions": 4186, + "instructions general": 5433, + "general lack": 4411, + "intermediate steps": 5516, + "steps address": 10753, + "decompose tasks": 2585, + "tasks provide": 11262, + "different model": 2892, + "model sizes": 7219, + "analysis indicates": 623, + "stepbystep instruction": 10750, + "facilitate future": 3954, + "research release": 9807, + "human quality": 4986, + "quality evaluation": 9240, + "language planning": 5959, + "previous work": 8820, + "models lms": 7462, + "paper define": 8218, + "time propose": 11475, + "approach improve": 745, + "llms task": 6667, + "task use": 11148, + "planning dataset": 8516, + "empirical results": 3278, + "demonstrate method": 2659, + "method significantly": 6965, + "ability llms": 154, + "llms especially": 6519, + "critical role": 2312, + "remarkable achievements": 9668, + "data widely": 2469, + "various industries": 12070, + "new era": 7818, + "deep models": 2602, + "models rapidly": 7506, + "research paradigm": 9803, + "represents landmark": 9744, + "general artificial": 4401, + "future development": 4347, + "gap paper": 4381, + "paper systematically": 8269, + "key components": 5629, + "causal reasoning": 1426, + "ability crucial": 137, + "nlp applications": 7860, + "despite impressive": 2783, + "various nlp": 12085, + "unclear chatgpt": 11738, + "reasoning paper": 9431, + "conduct comprehensive": 2021, + "experiments chatgpt": 3766, + "cot techniques": 2271, + "performs better": 8467, + "high accuracy": 4867, + "manual annotation": 6818, + "timeconsuming errorprone": 11478, + "study explores": 10853, + "compare chatgpt": 1835, + "successfully deployed": 10932, + "making process": 6803, + "approaches large": 771, + "chatbot chatgpt": 1530, + "potential chatgpt": 8620, + "summarization performance": 10960, + "higher level": 4881, + "study investigates": 10857, + "varying difficulty": 12100, + "difficulty levels": 2926, + "tasks propose": 11260, + "discriminative generative": 2968, + "chain thought": 1449, + "thought cot": 11463, + "cot approach": 2267, + "chatgpt achieve": 1535, + "comparable stateoftheart": 1826, + "methods reveals": 7010, + "complex tasks": 1906, + "understanding complex": 11768, + "complex structures": 1905, + "indepth analysis": 5239, + "difficulties understanding": 2923, + "findings provide": 4095, + "graph construction": 4730, + "information extraction": 5294, + "closed set": 1682, + "fall short": 3989, + "domains new": 3061, + "automatically extract": 952, + "new task": 7844, + "existing datasets": 3682, + "datasets based": 2517, + "simple effective": 10459, + "hope proposed": 4933, + "code datasets": 1712, + "datasets available": 2516, + "models previous": 7496, + "previous studies": 8818, + "studies revealed": 10844, + "lack capacity": 5740, + "capacity handle": 1382, + "works attempted": 12272, + "knowledge plms": 5696, + "despite promising": 2785, + "rich knowledge": 10005, + "knowledge pretrained": 5697, + "knowledgeintensive tasks": 5717, + "new paradigm": 7829, + "prompt like": 8994, + "model knowledge": 7169, + "including roberta": 5192, + "tasks glue": 11217, + "benchmarks demonstrate": 1136, + "knowledge stored": 5705, + "performance code": 8371, + "code available": 1700, + "blackbox language": 1218, + "llms exhibit": 6524, + "generated text": 4489, + "detection methods": 2806, + "adversarial robustness": 480, + "method proposed": 6962, + "generation method": 4550, + "realworld scenarios": 9392, + "probability distributions": 8856, + "scenarios specifically": 10133, + "used identify": 11901, + "experiments demonstrate": 3771, + "chinese english": 1624, + "english datasets": 3382, + "datasets furthermore": 2533, + "retranslation polishing": 9943, + "low training": 6736, + "data instruction": 2424, + "tuning large": 11693, + "llms gained": 6540, + "gained attention": 4361, + "unlock potential": 11837, + "potential llms": 8632, + "offers advantages": 8016, + "adaptation large": 354, + "tasks finetuning": 11212, + "finetuning approach": 4120, + "training models": 11570, + "millions billions": 7040, + "parameters large": 8295, + "amounts data": 612, + "computational costs": 1972, + "data used": 2467, + "training costs": 11544, + "improve data": 5122, + "data efficiency": 2405, + "paper conducts": 8217, + "llm training": 6428, + "regarding task": 9580, + "performance specific": 8429, + "specific task": 10620, + "instruction types": 5422, + "tuning data": 11689, + "taskspecific models": 11311, + "models results": 7517, + "models trained": 7545, + "trained using": 11538, + "taskrelated data": 11156, + "powerful capabilities": 8653, + "capabilities text": 1359, + "text understanding": 11417, + "based llms": 1048, + "cause significant": 1430, + "llms training": 6671, + "method called": 6942, + "effectively transferred": 3164, + "experiments various": 3810, + "various datasets": 12057, + "datasets method": 2537, + "method effectively": 6948, + "representation learning": 9728, + "presents novel": 8733, + "novel transformer": 7938, + "transformer architecture": 11609, + "method fully": 6951, + "fully consider": 4303, + "edges graph": 3123, + "attention module": 891, + "specifically propose": 10638, + "attention mechanism": 890, + "graphstructured data": 4741, + "architecture named": 792, + "graph data": 4731, + "experiments benchmark": 3764, + "benchmark datasets": 1115, + "method outperforms": 6959, + "models better": 7269, + "empower large": 3293, + "model perform": 7196, + "answering large": 669, + "model llm": 7178, + "llm gained": 6412, + "gained popularity": 4364, + "achieved remarkable": 268, + "results opendomain": 9919, + "domainspecific scenarios": 3066, + "specific knowledge": 10613, + "attracted widespread": 901, + "widespread attention": 12227, + "benchmarks available": 1135, + "provide benchmark": 9150, + "answering qa": 672, + "dataset named": 2503, + "technical problems": 11328, + "dataset contains": 2492, + "addition propose": 376, + "llm achieve": 6402, + "achieve better": 240, + "domainspecific tasks": 3067, + "demonstrate approach": 2646, + "model fusion": 7152, + "framework outperforms": 4271, + "commonly used": 1799, + "llm retrieval": 6426, + "retrieval methods": 9947, + "chatgpt likely": 1578, + "different methods": 2890, + "emotional support": 3266, + "like gpt": 6329, + "capabilities language": 1341, + "processing paper": 8909, + "paper examines": 8225, + "score human": 10157, + "slightly different": 10501, + "different human": 2883, + "age gender": 494, + "based language": 1042, + "llms make": 6590, + "understand capabilities": 11755, + "capabilities limitations": 1346, + "llms exhibited": 6526, + "emergent incontext": 3258, + "models solve": 7527, + "solve complex": 10549, + "propose effective": 9063, + "effective efficient": 3138, + "twostage framework": 11712, + "boost reasoning": 1240, + "reasoning abilities": 9402, + "llms test": 6669, + "demonstrations multiple": 2709, + "query input": 9262, + "llms effectively": 6511, + "effectively efficiently": 3152, + "method achieves": 6936, + "terms accuracy": 11357, + "accuracy efficiency": 232, + "multitask instruction": 7676, + "tuning llama": 11697, + "preliminary study": 8708, + "attracted substantial": 898, + "academic industrial": 193, + "fewshot zeroshot": 4041, + "ability handle": 144, + "tasks recent": 11267, + "recent work": 9483, + "data recently": 2451, + "recently proposed": 9504, + "exhibits impressive": 3670, + "broad range": 1289, + "range tasks": 9321, + "performance llms": 8409, + "explore capabilities": 3839, + "capabilities llms": 1347, + "scenarios choose": 10123, + "data tasks": 2463, + "data significantly": 2459, + "insights future": 5366, + "application evaluation": 695, + "mental health": 6920, + "increasing attention": 5227, + "developing evaluating": 2831, + "focus exploring": 4176, + "scenarios evaluation": 10126, + "evaluation experiments": 3554, + "assessment findings": 847, + "findings demonstrate": 4087, + "demonstrate feasibility": 2657, + "feasibility using": 4007, + "impact prompt": 5081, + "prompt designs": 8989, + "user experience": 11910, + "text classification": 11384, + "promptbased data": 9002, + "requires substantial": 9769, + "computation resources": 1968, + "recent efforts": 9464, + "tasks practical": 11256, + "area research": 798, + "paper investigate": 8238, + "llms achieve": 6444, + "blackbox model": 1220, + "model feature": 7150, + "feature extractor": 4011, + "data data": 2399, + "using promptbased": 11964, + "smaller parameter": 10516, + "parameter size": 8280, + "model extensive": 7147, + "experiments text": 3805, + "datasets approach": 2514, + "performs par": 8471, + "ai systems": 522, + "annotated datasets": 644, + "designed specific": 2766, + "specific tasks": 10621, + "tasks difficult": 11191, + "active learning": 336, + "learning mechanism": 6225, + "cases address": 1409, + "address limitations": 404, + "limitations present": 6343, + "learning prompt": 6238, + "models conduct": 7282, + "annotation process": 648, + "process language": 8887, + "exhibited remarkable": 3664, + "finetuning models": 4136, + "expensive timeconsuming": 3728, + "timeconsuming obtain": 11479, + "paper introduces": 8236, + "introduces novel": 5550, + "unsupervised method": 11858, + "improves llms": 5149, + "approach grounded": 744, + "text quality": 11411, + "generate text": 4469, + "building insight": 1311, + "dual roles": 3104, + "student teacher": 10833, + "llm generates": 6414, + "generates answers": 4492, + "model parameters": 7195, + "using reinforcement": 11967, + "tasks reasoning": 11266, + "reasoning problems": 9434, + "effectively improves": 3157, + "translation tasks": 11642, + "tasks furthermore": 11215, + "models different": 7302, + "different sizes": 2906, + "prompts paper": 9039, + "llms answer": 6454, + "ask llms": 821, + "llms provide": 6622, + "answer conditioned": 657, + "prompting strategy": 9025, + "strategy produce": 10789, + "instructionfollowing data": 5428, + "opensource chat": 8056, + "higher quality": 4884, + "existing opensource": 3705, + "chatgpts capability": 1611, + "model publicly": 7206, + "strengths weaknesses": 10797, + "performance range": 8424, + "range natural": 9318, + "tasks ability": 11158, + "ability generate": 143, + "remains underexplored": 9664, + "aims investigate": 551, + "generation capabilities": 4520, + "llms analysis": 6453, + "factors influence": 3969, + "small language": 10507, + "models slms": 7526, + "named entity": 7693, + "entity recognition": 3427, + "recognition relation": 9513, + "various settings": 12093, + "struggle complex": 10825, + "analysis reveals": 626, + "pivotal role": 8507, + "instructions llms": 5439, + "llms generate": 6545, + "provides comprehensive": 9174, + "generation abilities": 4512, + "novel perspective": 7929, + "utilizing llms": 12003, + "llms data": 6491, + "domains tasks": 3063, + "including context": 5178, + "context understanding": 2148, + "understanding code": 11766, + "generation language": 4540, + "work aim": 12247, + "data analysis": 2384, + "propose framework": 9067, + "tackle problems": 11087, + "design taskspecific": 2756, + "compare performance": 1836, + "professional human": 8927, + "gpt4 achieve": 4689, + "achieve comparable": 242, + "performance humans": 8396, + "humans provide": 5021, + "shed light": 10338, + "technical report": 11329, + "report large": 9714, + "like llama": 6333, + "performances various": 8461, + "specific domains": 10610, + "domainspecific knowledge": 3065, + "problems paper": 8874, + "domain knowledge": 3054, + "training stage": 11584, + "stage design": 10675, + "model tackle": 7227, + "practical issues": 8668, + "alleviate hallucination": 586, + "hallucination problem": 4795, + "release data": 9620, + "nlg evaluation": 7857, + "generation nlg": 4556, + "evaluation benchmarks": 3544, + "benchmarks limited": 1141, + "result poor": 9870, + "forms evaluation": 4205, + "issue paper": 5591, + "novel method": 7926, + "method named": 6958, + "existing evaluation": 3685, + "leverage large": 6276, + "nlg tasks": 7858, + "translation text": 11643, + "image caption": 5059, + "correlation human": 2248, + "query reformulation": 9263, + "existing methods": 3698, + "models ability": 7252, + "ability produce": 162, + "question paper": 9277, + "retrieval performance": 9948, + "performance propose": 8422, + "crucial aspect": 2327, + "nlp research": 7869, + "adequately addressed": 423, + "including large": 5181, + "remains largely": 9657, + "largely unexplored": 6121, + "model paper": 7193, + "methods propose": 7004, + "propose probabilistic": 9097, + "addresses issue": 416, + "demonstrate proposed": 2663, + "realworld datasets": 9388, + "finally analyze": 4070, + "analyze performance": 633, + "issue large": 5588, + "language modelsllms": 5957, + "chatgpt evaluator": 1556, + "effective strategies": 3146, + "human assistance": 4952, + "responses chatgpt": 9850, + "evaluation bias": 3545, + "alignment human": 576, + "human judgments": 4976, + "human annotation": 4949, + "research large": 9796, + "research focuses": 9793, + "enhancing performance": 3409, + "performance existing": 8385, + "existing knowledge": 3689, + "llms limited": 6588, + "aims evaluate": 545, + "evaluate llms": 3509, + "assessing ability": 842, + "ability identify": 146, + "introduce automated": 5535, + "questions diverse": 9293, + "diverse categories": 3013, + "gpt3 instructgpt": 4681, + "models demonstrate": 7296, + "findings highlight": 4089, + "capabilities models": 1348, + "llms remarkable": 6633, + "advancements field": 460, + "llms explore": 6532, + "behavioral characteristics": 1102, + "behavioral patterns": 1103, + "furthermore experiments": 4332, + "llms study": 6663, + "shedding light": 10343, + "llms anticipate": 6455, + "generation generative": 4534, + "generative pretraining": 4618, + "task aims": 11113, + "response user": 9847, + "user input": 11912, + "reasoning process": 9435, + "task challenging": 11118, + "significant discrepancy": 10410, + "user queries": 11916, + "limited scale": 6358, + "bridging gap": 1278, + "text structured": 11415, + "graphs paper": 4739, + "limitations propose": 6345, + "novel pretrained": 7930, + "task specifically": 11146, + "task pretrain": 11140, + "model goal": 7155, + "additionally propose": 384, + "propose automatic": 9058, + "large scale": 6118, + "methods experimental": 6984, + "baseline systems": 1071, + "systems remarkable": 11067, + "analysis demonstrates": 620, + "task automation": 11116, + "recent success": 9478, + "shown promising": 10384, + "completing tasks": 1892, + "user instructions": 11913, + "increasing number": 5228, + "number tasks": 7953, + "explore question": 3847, + "framework facilitate": 4251, + "users privacy": 11929, + "generic knowledge": 4626, + "evaluate proposed": 3513, + "diverse scenarios": 3026, + "llm chatgpt": 6404, + "chatgpt bring": 1539, + "data science": 2458, + "questions large": 9294, + "potential risks": 8634, + "risks llms": 10021, + "like gpt4": 6332, + "traditional ai": 11516, + "ai tools": 525, + "llms specifically": 6660, + "remarkable capabilities": 9669, + "humanlevel performance": 5004, + "directly used": 2953, + "specialized domains": 10602, + "explore potential": 3844, + "llms gpt4": 6553, + "results real": 9929, + "demonstrate potential": 2662, + "future advancements": 4346, + "launch chatgpt": 6159, + "employ chatgpt": 3284, + "prompts responses": 9040, + "question accuracy": 9265, + "anomaly detection": 654, + "detection based": 2802, + "play critical": 8526, + "reliability software": 9631, + "software systems": 10538, + "studies explored": 10841, + "achieved notable": 267, + "face limitations": 3947, + "resource consumption": 9823, + "detection framework": 2804, + "framework referred": 4275, + "accuracy response": 233, + "log data": 6697, + "chatgpt provide": 1590, + "comparable human": 1821, + "human experts": 4968, + "reduce manual": 9544, + "manual verification": 6823, + "extensively evaluate": 3910, + "baseline methods": 1067, + "methods terms": 7016, + "tuned models": 11686, + "reliable evaluation": 9633, + "challenges associated": 1477, + "privacy protection": 8847, + "response challenges": 9844, + "challenges introduce": 1484, + "superior model": 10974, + "given llms": 4633, + "evaluation ability": 3540, + "models tuned": 7548, + "avoiding potential": 995, + "potential data": 8621, + "data leakage": 2431, + "crucial achieving": 2326, + "intelligence existing": 5471, + "existing approaches": 3678, + "extremely large": 3940, + "models gpt4": 7349, + "zeroshot manner": 12317, + "supervised learning": 10988, + "train limited": 11527, + "models remains": 7513, + "remains uncertain": 9662, + "models achieve": 7253, + "address question": 412, + "designed automatically": 2760, + "generate diverse": 4445, + "models minimal": 7466, + "minimal human": 7050, + "human intervention": 4974, + "spanning 50": 10586, + "distinct categories": 2999, + "resulting models": 9876, + "respectively finally": 9840, + "finally evaluate": 4076, + "evaluate ability": 3500, + "ability models": 157, + "unseen tools": 11850, + "training experimental": 11552, + "like gpt35": 6331, + "novel task": 7933, + "task propose": 11142, + "new benchmark": 7809, + "tabular data": 11079, + "academic papers": 194, + "introduce metrics": 5544, + "metrics evaluate": 7025, + "aims identify": 549, + "modern large": 7566, + "llms propose": 6620, + "openais gpt4": 8040, + "code benchmark": 1701, + "benchmark publicly": 1127, + "cognitive ability": 1755, + "chatgpt shown": 1596, + "cognitive abilities": 1754, + "abilities different": 122, + "different models": 2894, + "different fields": 2882, + "test results": 11369, + "traditional metrics": 11521, + "evaluating llms": 3529, + "propose adaptive": 9055, + "llm evaluation": 6408, + "evaluation using": 3582, + "dynamically adjusts": 3108, + "questions difficulty": 9292, + "models abilities": 7251, + "abilities using": 130, + "llms compared": 6479, + "compared humans": 1849, + "humans easily": 5017, + "nlp models": 7868, + "models aim": 7257, + "diagnostic reports": 2856, + "behaves like": 1099, + "questions conduct": 9290, + "llms aspects": 6460, + "mathematical reasoning": 6865, + "models significantly": 7525, + "models using": 7556, + "evaluating large": 3526, + "models chinese": 7277, + "specifically designed": 10626, + "financial text": 4082, + "availability data": 971, + "developing effective": 2830, + "effective text": 3147, + "text processing": 11408, + "advancements large": 462, + "yielded remarkable": 12301, + "performance natural": 8412, + "tasks primarily": 11259, + "analysis dataset": 619, + "opensource llms": 8062, + "llms using": 6676, + "firmly believe": 4154, + "serve valuable": 10297, + "valuable resource": 12021, + "tasks focus": 11213, + "dataset publicly": 2506, + "reasoning capacity": 9415, + "multimodal comprehension": 7624, + "study explore": 10852, + "student model": 10832, + "intermediate reasoning": 5514, + "reasoning steps": 9437, + "llms cot": 6489, + "cot prompts": 2269, + "present novel": 8719, + "distillation method": 2994, + "stateoftheart accuracy": 10702, + "crossdomain generalization": 2318, + "advancement large": 453, + "llms led": 6579, + "regarding potential": 9579, + "llms extract": 6534, + "financial texts": 4083, + "development chinese": 2835, + "provide rigorous": 9166, + "efficacy various": 3183, + "specialized domain": 10601, + "news text": 7853, + "models generative": 7343, + "generative llm": 4601, + "pretrained llm": 8758, + "finetuned llm": 4115, + "extraction large": 3929, + "comparative analysis": 1830, + "improving llms": 5160, + "llms performance": 6607, + "llms evaluated": 6521, + "benchmark following": 1119, + "existing systems": 3711, + "performance human": 8395, + "human beings": 4954, + "reasoning methods": 9430, + "rely external": 9643, + "structures paper": 10822, + "highly effective": 4903, + "pretraining task": 8797, + "models help": 7350, + "achieves stateoftheart": 301, + "different pretrained": 2899, + "general language": 4412, + "testing tasks": 11379, + "era large": 3452, + "chatgpt comparison": 1543, + "emotion recognition": 3264, + "research topic": 9808, + "states current": 10735, + "current works": 2364, + "datasets lack": 2535, + "enhance reliability": 3396, + "annotations paper": 650, + "contrast previous": 2172, + "takes step": 11100, + "providing explanations": 9183, + "introduce benchmark": 5536, + "metrics observe": 7030, + "observe necessity": 7988, + "multimodal large": 7632, + "longstanding challenge": 6714, + "understanding capabilities": 11764, + "capabilities recent": 1356, + "multimodal llm": 7639, + "legal large": 6258, + "bases large": 1081, + "llms shown": 6648, + "shown potential": 10382, + "potential revolutionize": 8633, + "tasks various": 11299, + "various domains": 12058, + "large models": 6109, + "data quality": 2450, + "carefully designed": 1402, + "overcome problem": 8181, + "legal data": 6257, + "effectively reduce": 3161, + "relying solely": 9645, + "enhance ability": 3386, + "capabilities large": 1342, + "models opensourced": 7481, + "models crucial": 7291, + "highly capable": 4900, + "ai models": 518, + "work present": 12259, + "dataset consists": 2489, + "generative language": 4595, + "culture values": 2339, + "context generation": 2141, + "quality control": 9239, + "coverage high": 2285, + "effectiveness dataset": 3169, + "dataset detecting": 2495, + "model bias": 7115, + "chinese large": 1628, + "certain extent": 1444, + "avoid generating": 990, + "research opportunities": 9801, + "data large": 2429, + "recent research": 9475, + "given rise": 4640, + "framework combines": 4243, + "structure learning": 10818, + "leverage power": 6280, + "statistical analysis": 10738, + "build novel": 1307, + "learning introduce": 6219, + "set prompts": 10311, + "data demonstrate": 2400, + "demonstrate significant": 2664, + "critical challenges": 2310, + "pioneering study": 8501, + "llms contain": 6487, + "emphasizing need": 3272, + "human values": 4990, + "model typically": 7233, + "llm responses": 6425, + "aligning llms": 571, + "generating responses": 4509, + "generated llm": 4481, + "experiments shown": 3801, + "comparable results": 1825, + "enhance performance": 3392, + "alignment chatgpt": 573, + "study recent": 10868, + "numerous tasks": 7964, + "based given": 1038, + "given text": 4642, + "text considering": 11385, + "remarkable abilities": 9666, + "abilities various": 132, + "provide preliminary": 9160, + "task generating": 11131, + "variety prompting": 12047, + "explore chatgpts": 3840, + "chatgpts ability": 1610, + "chatgpt analyzing": 1537, + "reveal chatgpt": 9966, + "chatgpt zeroshot": 1602, + "prompting performance": 9021, + "performance gap": 8389, + "corresponding stateoftheart": 2251, + "stateoftheart model": 10718, + "sentiment classification": 10272, + "learning better": 6196, + "structured data": 10820, + "data forms": 2413, + "present despite": 8717, + "large pretrained": 6113, + "domains chatgpt": 3057, + "common knowledge": 1794, + "data remains": 2453, + "work identify": 12253, + "identify crucial": 5046, + "research challenges": 9777, + "data pretraining": 2442, + "work folds": 12252, + "pretraining dubbed": 8777, + "propose implement": 9070, + "vision natural": 12141, + "extensive empirical": 3888, + "performance supervised": 8435, + "paper introduce": 8234, + "dataset aimed": 2477, + "questionanswer qa": 9283, + "qa pairs": 9228, + "safety measures": 10082, + "dataset provides": 2505, + "development deployment": 2836, + "deployment llms": 2725, + "project page": 8956, + "model outputs": 7192, + "stepbystep reasoning": 10751, + "design environment": 2748, + "alignment safe": 580, + "improve training": 5138, + "training stability": 11583, + "opensource implementations": 8057, + "significant challenges": 10406, + "llms alignment": 6452, + "given natural": 4634, + "language questions": 5973, + "prompt learning": 8993, + "llms emerged": 6513, + "emerged recent": 3244, + "prompts lead": 9036, + "llms understand": 6673, + "input question": 5353, + "generate corresponding": 4443, + "faces challenges": 3950, + "existing work": 3715, + "prompts llms": 9038, + "semantic gap": 10234, + "prompting method": 9018, + "related given": 9601, + "given question": 4639, + "questions propose": 9298, + "propose strategies": 9103, + "leverage llms": 6279, + "generate executable": 4446, + "design dynamic": 2747, + "previously generated": 8823, + "strong baseline": 10806, + "models comprehensive": 7281, + "comprehensive overview": 1944, + "llms recently": 6628, + "recently demonstrated": 9492, + "capabilities natural": 1350, + "tasks success": 11284, + "success llms": 10919, + "encompass diverse": 3334, + "context length": 2143, + "alignment training": 582, + "training datasets": 11548, + "rapid development": 9337, + "llm research": 6424, + "overview recent": 8196, + "recent developments": 9463, + "systematic treatment": 11051, + "existing literature": 3695, + "models datasets": 7294, + "broader research": 1294, + "researchers practitioners": 9815, + "insights extensive": 5365, + "existing works": 3716, + "domain adaptation": 3052, + "action recognition": 329, + "findings study": 4097, + "generate logic": 4455, + "specifically models": 10637, + "models predictions": 7492, + "measures consistency": 6886, + "compared baseline": 1839, + "framework enhance": 4249, + "potential challenges": 8619, + "llms knowledge": 6574, + "terms top1": 11365, + "fundamental challenging": 4318, + "aspect natural": 827, + "gap propose": 4385, + "benchmark evaluate": 1116, + "tasks covering": 11184, + "understanding translation": 11787, + "contain rich": 2124, + "analysis design": 621, + "test suite": 11373, + "models learn": 7378, + "based transformer": 1061, + "llms results": 6636, + "consistently improves": 2095, + "datasets pretrained": 2542, + "teaching large": 11321, + "legal professionals": 6261, + "simple prompting": 10466, + "models produce": 7498, + "performed zeroshot": 8464, + "gpt3 models": 4682, + "results llms": 9910, + "thought prompting": 11464, + "enables model": 3312, + "methods method": 7002, + "method enables": 6950, + "evolution large": 3602, + "llms growing": 6556, + "evaluation human": 3557, + "increasingly important": 5232, + "knowledge reasoning": 5698, + "chinese context": 1623, + "context paper": 2145, + "chinese llms": 1631, + "llms conduct": 6482, + "conduct human": 2031, + "evaluation findings": 3555, + "llms perform": 6606, + "automatic human": 938, + "alignment different": 574, + "different aspects": 2875, + "user information": 11911, + "information needs": 5307, + "demonstrated exceptional": 2673, + "exceptional capabilities": 3634, + "generation knowledge": 4539, + "knowledge inference": 5679, + "research llms": 9800, + "model evaluation": 7140, + "models provide": 7501, + "relevant information": 9628, + "information llms": 5305, + "challenges exist": 1479, + "ethical considerations": 3493, + "research chinese": 9779, + "valuable insights": 12020, + "paper provides": 8262, + "enhancement llms": 3400, + "open challenges": 8032, + "factual knowledge": 3976, + "models retrieval": 7518, + "opendomain question": 8045, + "require substantial": 9759, + "solving wide": 10561, + "world knowledge": 12279, + "knowledge including": 5678, + "tasks remains": 11271, + "unclear llms": 11740, + "llms able": 6442, + "study present": 10860, + "opendomain qa": 8044, + "primary research": 8830, + "research questions": 9804, + "llms possess": 6612, + "quality results": 9244, + "evaluating models": 3530, + "evaluation methods": 3566, + "models paper": 7482, + "novel approach": 7915, + "overcoming limitations": 8183, + "limitations previous": 6344, + "previous methods": 8809, + "various forms": 12068, + "capabilities various": 1360, + "llms providing": 6624, + "abilities solve": 129, + "complex problems": 1899, + "editing large": 3127, + "model large": 7170, + "llms showcased": 6646, + "showcased remarkable": 10361, + "automatic prompt": 946, + "leverages llms": 6290, + "taking account": 11102, + "process helps": 8884, + "helps llms": 4856, + "llms better": 6468, + "better align": 1174, + "thinking llms": 11453, + "tasks experimental": 11203, + "performance highquality": 8394, + "exhibits notable": 3671, + "prompt generation": 8991, + "generation good": 4535, + "outofdistribution ood": 8131, + "plays vital": 8538, + "vital role": 12158, + "role enhancing": 10051, + "ml models": 7086, + "diverse natural": 3021, + "existing research": 3707, + "like bert": 6321, + "bert roberta": 1157, + "roberta gpt2": 10033, + "scales pretraining": 10113, + "pretraining objectives": 8792, + "paper embarks": 8221, + "empirical investigation": 3277, + "llama series": 6392, + "demonstrates superior": 2699, + "detectors provide": 2814, + "provide intriguing": 9158, + "models new": 7475, + "understanding llms": 11777, + "sequence understanding": 10285, + "understanding large": 11774, + "shown impressive": 10380, + "ability opendomain": 158, + "input format": 5349, + "prompts demonstrations": 9031, + "tasks event": 11198, + "event extraction": 3592, + "extraction entity": 3928, + "end present": 3351, + "bilingual english": 1197, + "model instructiontuned": 7167, + "capable performing": 1378, + "unseen domains": 11848, + "conduct empirical": 2024, + "empirical studies": 3280, + "transfer tasks": 11597, + "tasks model": 11245, + "model accessible": 7099, + "broad applications": 1288, + "significantly boost": 10425, + "models consistently": 7286, + "achieve best": 238, + "best results": 1169, + "results different": 9897, + "different benchmarks": 2877, + "benchmarks recent": 1143, + "zerofewshot learning": 12308, + "learning chainofthought": 6198, + "models present": 7493, + "present paper": 8721, + "paper comprehensively": 8212, + "comprehensively investigate": 1952, + "investigate llms": 5562, + "aspects including": 831, + "pose potential": 8585, + "recommendation systems": 9521, + "systems traditional": 11068, + "methods usually": 7018, + "recommendation results": 9520, + "long tail": 6706, + "users address": 11922, + "address issues": 398, + "general framework": 4405, + "llm knowledge": 6419, + "knowledge graphs": 5675, + "graphs kg": 4738, + "semantic representations": 10240, + "order improve": 8109, + "improve semantic": 5137, + "semantic understanding": 10245, + "use llms": 11891, + "llms powerful": 6614, + "rich semantic": 10006, + "addition method": 375, + "structural information": 10816, + "various traditional": 12097, + "traditional models": 11522, + "framework significantly": 4276, + "personalized recommendations": 8479, + "field code": 4049, + "ensemble learning": 3415, + "llms prompting": 6619, + "prompting recently": 9022, + "abilities variety": 131, + "llms existing": 6528, + "paradigm requires": 8273, + "substantial manual": 10895, + "manual effort": 6819, + "limitations specifically": 6348, + "given fact": 4632, + "based llm": 1047, + "effect evaluation": 3135, + "majority voting": 6786, + "types tasks": 11719, + "significant margin": 10414, + "code publicly": 1729, + "sequence generation": 10280, + "generation large": 4541, + "llms capable": 6473, + "instruction finetuning": 5403, + "task instruction": 11132, + "instruction input": 5410, + "selfattention mechanism": 10215, + "mechanism llms": 6891, + "llms models": 6593, + "risk instruction": 10018, + "instruction forgetting": 5408, + "mitigate issue": 7069, + "theoretical analysis": 11445, + "models learning": 7379, + "instructionfollowing capabilities": 5427, + "approach consistently": 735, + "data annotation": 2385, + "notably method": 7908, + "improves zeroshot": 5155, + "research applications": 9775, + "data models": 2436, + "network architecture": 7785, + "called attention": 1330, + "paper large": 8240, + "softmax regression": 10534, + "regression problem": 9586, + "regression function": 9585, + "exhibit impressive": 3658, + "learning abilities": 6184, + "knowledge solving": 5702, + "realworld tasks": 9394, + "unleash potential": 11828, + "enabling llms": 3318, + "mechanism designed": 6890, + "optimal solution": 8090, + "dataset demonstrate": 2493, + "10 improvement": 3, + "diverse tasks": 3028, + "api calls": 683, + "highlighting effectiveness": 4896, + "effectiveness efficiency": 3170, + "social bias": 10525, + "models recent": 7509, + "prompting researchers": 9023, + "explicit implicit": 3826, + "bias propose": 1189, + "llms known": 6576, + "llms capabilities": 6472, + "data generation": 2418, + "generation using": 4587, + "instrumental enabling": 5451, + "various opendomain": 12087, + "highquality instruction": 4911, + "quality human": 9243, + "models generate": 7341, + "generate instruction": 4451, + "work explore": 12250, + "generate highquality": 4449, + "various existing": 12065, + "instruction generation": 5409, + "generation methods": 4551, + "novel strategies": 7932, + "enhance quality": 3393, + "models hope": 7351, + "generating highquality": 4500, + "models language": 7371, + "using generative": 11946, + "ai paper": 519, + "using advanced": 11936, + "advanced ai": 444, + "tools like": 11500, + "stable diffusion": 10671, + "compared original": 1851, + "models natural": 7471, + "natural science": 7748, + "field natural": 4050, + "new capabilities": 7810, + "tailored llms": 11097, + "llms natural": 6595, + "opensource llm": 8061, + "llm incorporating": 6417, + "scientific knowledge": 10152, + "factual correctness": 3974, + "model automating": 7110, + "generation scientific": 4578, + "eliminates need": 3221, + "model explore": 7146, + "training strategies": 11585, + "models research": 7515, + "showcases ability": 10363, + "ability llm": 153, + "despite great": 2780, + "great advance": 4744, + "models mllms": 7467, + "instruction dataset": 5400, + "dataset building": 2480, + "makes current": 6792, + "current mllms": 2357, + "relatively low": 9615, + "cost paper": 2260, + "generation model": 4552, + "dataset training": 2510, + "enhance model": 3390, + "model capability": 7119, + "compared previous": 1852, + "data collection": 2395, + "data generated": 2416, + "different types": 2912, + "dataset based": 2479, + "gpt4 generate": 4696, + "data type": 2466, + "correctness prompt": 2244, + "prompt design": 8988, + "generation results": 4577, + "results previous": 9923, + "propose interactive": 9074, + "interactive prompt": 5498, + "interaction human": 5489, + "correctness generated": 2243, + "general solution": 4417, + "model instruction": 7166, + "generation despite": 4527, + "despite superior": 2789, + "generate natural": 4459, + "according given": 219, + "given task": 4641, + "models capture": 7274, + "capture information": 1391, + "language instructions": 5770, + "knowledge language": 5682, + "models finally": 7331, + "efficient compared": 3193, + "compared traditional": 1858, + "models despite": 7301, + "fewer parameters": 4026, + "approach generate": 743, + "models improves": 7356, + "augmenting large": 922, + "llms external": 6533, + "external tools": 3917, + "emerged promising": 3243, + "promising approach": 8966, + "learning task": 6245, + "task trained": 11147, + "llms learn": 6578, + "learning model": 6229, + "applications existing": 704, + "methods train": 7017, + "train model": 11528, + "novel tool": 7937, + "learning method": 6226, + "use various": 11896, + "propose iterative": 9075, + "experiments conducted": 3769, + "realworld settings": 9393, + "settings demonstrate": 10317, + "application scenarios": 701, + "semantic alignment": 10227, + "methods depend": 6982, + "user intent": 11914, + "research introduce": 9795, + "benefits terms": 1151, + "annotation method": 647, + "model termed": 7228, + "data introduce": 2427, + "introduce effective": 5538, + "prompt augmentation": 8986, + "method accomplishes": 6933, + "desired style": 2774, + "multitask benchmark": 7675, + "long context": 6704, + "llms demonstrate": 6493, + "demonstrate impressive": 2658, + "performance language": 8402, + "works proposed": 12275, + "proposed methods": 9121, + "methods improve": 6992, + "improve llms": 5128, + "context windows": 2149, + "memory mechanisms": 6917, + "rigorous evaluation": 10013, + "datasets task": 2550, + "average length": 983, + "tasks code": 11172, + "code completion": 1704, + "standardized unified": 10686, + "unified format": 11801, + "evaluation llms": 3563, + "llms comprehensive": 6481, + "opensourced models": 8067, + "compression technique": 1955, + "understanding capability": 11765, + "users express": 11925, + "effective mental": 3141, + "timeconsuming task": 11480, + "leveraging capabilities": 6294, + "recent advances": 9459, + "advances large": 467, + "models offers": 7479, + "challenge paper": 1472, + "capable analyzing": 1376, + "application large": 696, + "models field": 7330, + "health support": 4838, + "empowered large": 3296, + "benchmark evaluation": 1118, + "evaluation large": 3560, + "emerged new": 3240, + "address challenge": 386, + "methods including": 6993, + "including question": 5191, + "based findings": 1031, + "findings propose": 4094, + "execution accuracy": 3651, + "sets new": 10313, + "various scenarios": 12092, + "advantages disadvantages": 475, + "hope work": 4934, + "work provides": 12264, + "deeper understanding": 2613, + "model multimodal": 7185, + "model mllm": 7182, + "multimodal data": 7626, + "data current": 2398, + "individual pretrained": 5251, + "specific subtasks": 10619, + "llms integrate": 6569, + "task realworld": 11143, + "common practice": 1796, + "inspired study": 5381, + "results multiple": 9917, + "result obtained": 9869, + "performance mllm": 8410, + "models parallel": 7485, + "process input": 8886, + "input data": 5348, + "data generate": 2415, + "study using": 10871, + "sparked significant": 10589, + "language capabilities": 5759, + "modality alignment": 7096, + "remains open": 9660, + "used inputs": 11902, + "data difficult": 2403, + "issues propose": 5597, + "encoder llm": 3327, + "llm exhibits": 6409, + "training process": 11579, + "prompts llm": 9037, + "llm generate": 6413, + "endtoend manner": 3362, + "demonstrate straightforward": 2665, + "straightforward process": 10772, + "extend capabilities": 3877, + "opensource large": 8060, + "human intentions": 4973, + "unleash power": 11829, + "power llms": 8649, + "equips llms": 3448, + "training multiple": 11572, + "llms enabling": 6516, + "seamless integration": 10170, + "model apis": 7107, + "unified way": 11807, + "comprehensive framework": 1941, + "framework proposed": 4273, + "finally showcase": 4078, + "gaining increasing": 4368, + "attention potential": 893, + "learning techniques": 6247, + "expected results": 3726, + "propose approach": 9056, + "approach transform": 765, + "llms traditional": 6670, + "approach fewshot": 740, + "fewshot incontext": 4031, + "correct answer": 2234, + "using technique": 11975, + "experiments method": 3788, + "method achieve": 6934, + "achieve correct": 247, + "method provides": 6963, + "provides solution": 9179, + "large number": 6112, + "process model": 8891, + "deep learningbased": 2601, + "learningbased methods": 6250, + "methods face": 6986, + "face challenges": 3946, + "domains lack": 3058, + "application chatgpt": 694, + "aims explore": 546, + "knowledge largescale": 5689, + "largescale corpora": 6128, + "detection conduct": 2803, + "detection task": 2810, + "grounding large": 4760, + "model agents": 7105, + "automatic reasoning": 947, + "reasoning planning": 9433, + "planning capability": 8515, + "semantic knowledge": 10236, + "human world": 4991, + "hinders applications": 4920, + "existing studies": 3710, + "studies try": 10847, + "finetune llm": 4105, + "utilize predefined": 11990, + "bridge llms": 1275, + "human efforts": 4961, + "single task": 10487, + "strengths llms": 10796, + "llms autonomously": 6465, + "framework automatically": 4237, + "employs llm": 3291, + "guidance successfully": 4779, + "performance challenging": 8367, + "tasks compared": 11179, + "learning methods": 6227, + "proving effectiveness": 9187, + "generate responses": 4467, + "responses given": 9854, + "compared conventional": 1843, + "translation quality": 11640, + "linguistic features": 6370, + "proved effective": 9143, + "outcomes indicate": 8125, + "mathematical problems": 6864, + "studies typically": 10848, + "models unable": 7550, + "surpassing gpt4": 11018, + "similar performance": 10455, + "llms achieved": 6445, + "remarkable success": 9686, + "success nlp": 10923, + "multimodal tasks": 7642, + "tasks despite": 11189, + "despite successes": 2788, + "main challenges": 6770, + "challenges remain": 1492, + "developing llms": 2832, + "computational cost": 1971, + "paper report": 8265, + "significantly reduce": 10448, + "training cost": 11543, + "strategy demonstrate": 10787, + "existing evaluations": 3688, + "potential impact": 8627, + "achieves performance": 294, + "explored use": 3855, + "study propose": 10863, + "propose tuningfree": 9106, + "tuning parameters": 11700, + "parameter tuning": 8282, + "models static": 7533, + "approach llm": 751, + "various realworld": 12090, + "existing llm": 3696, + "methods mainly": 7000, + "widely exists": 12218, + "llms address": 6450, + "proposed framework": 9116, + "framework llms": 4268, + "llms performances": 6608, + "interaction llms": 5491, + "furthermore proposed": 4338, + "framework general": 4255, + "evaluation method": 3565, + "translation code": 11636, + "generation demonstrate": 4526, + "llmbased autonomous": 6433, + "autonomous agents": 960, + "handling diverse": 4806, + "diverse data": 3014, + "data learning": 2432, + "efficient manner": 3197, + "designed diverse": 2761, + "despite success": 2787, + "encounter limitations": 3339, + "architecture design": 790, + "prior knowledge": 8835, + "propose use": 9110, + "use large": 11887, + "learning process": 6236, + "diverse realworld": 3024, + "node graph": 7886, + "method dubbed": 6947, + "performance different": 8380, + "humanlike decisions": 5007, + "pseudo data": 9196, + "models lowresource": 7463, + "serves cornerstone": 10302, + "llms introduce": 6571, + "construct highquality": 2113, + "experiments using": 3807, + "data domain": 2404, + "methods requiring": 7009, + "model scale": 7212, + "efficiency furthermore": 3186, + "furthermore method": 4335, + "great potential": 4750, + "models align": 7259, + "previous research": 8811, + "human preference": 4983, + "finetuning step": 4146, + "frozen llms": 4293, + "llms directly": 6508, + "introduce novel": 5546, + "inference method": 5272, + "pretrained llms": 8759, + "llms evaluate": 6520, + "generation ai": 4514, + "ai safety": 520, + "need extra": 7767, + "gradient computation": 4714, + "computation parameter": 1966, + "parameter updates": 8283, + "eliminating need": 3224, + "results evaluated": 9899, + "evaluated gpt4": 3518, + "establishes new": 3483, + "attack success": 877, + "success rate": 10924, + "draw inspiration": 3088, + "integrating multiple": 5462, + "tasks related": 11269, + "errors resulting": 3470, + "including contextual": 5179, + "tasks achieve": 11159, + "achieve objective": 257, + "model offers": 7189, + "seamlessly integrates": 10173, + "context information": 2142, + "inspired propose": 5376, + "systems achieve": 11056, + "text encoder": 11392, + "text prompts": 11410, + "utterances content": 12007, + "68 relative": 94, + "prompt given": 8992, + "chaining large": 1453, + "learning approaches": 6193, + "stateoftheart large": 10711, + "tool usage": 11492, + "connecting large": 2070, + "llms excel": 6523, + "rely carefully": 9642, + "carefully crafted": 1399, + "crafted prompts": 2294, + "process paper": 8893, + "fast convergence": 4002, + "approach allows": 730, + "powerful language": 8656, + "efficient optimization": 3198, + "llms based": 6466, + "respectively furthermore": 9841, + "connecting llms": 2073, + "inspire research": 5372, + "increasingly crucial": 5231, + "crucial efficiently": 2329, + "including named": 5186, + "dialogue systems": 2865, + "systems recently": 11066, + "achieved significant": 273, + "nlp downstream": 7865, + "tasks lack": 11233, + "lack specialized": 5747, + "proposed improve": 9117, + "parameterefficient tuning": 8289, + "different domains": 2880, + "results tasks": 9932, + "significant margins": 10415, + "work provide": 12263, + "provide insights": 9156, + "technical terms": 11330, + "model performs": 7199, + "convolutional neural": 2219, + "features entities": 4015, + "incorporating predicted": 5217, + "model significantly": 7217, + "significantly improved": 10434, + "datasets cover": 2521, + "generate summaries": 4468, + "develop new": 2824, + "new datasets": 7816, + "datasets conduct": 2520, + "generation capability": 4521, + "summarization tasks": 10962, + "summaries generated": 10957, + "models specifically": 7532, + "factual consistency": 3973, + "tasks surpassing": 11286, + "reference summaries": 9554, + "works field": 12273, + "field text": 4054, + "novel datasets": 7919, + "chinese language": 1627, + "propose comprehensive": 9061, + "create largescale": 2298, + "largescale chinese": 6127, + "multiple domains": 7654, + "ability existing": 141, + "models explore": 7325, + "limitations conduct": 6341, + "conduct evaluations": 2027, + "using different": 11943, + "chatgpt results": 1594, + "semantic features": 10233, + "relatively good": 9614, + "improved providing": 5141, + "work serve": 12265, + "serve essential": 10296, + "textual context": 11437, + "llms helpful": 6558, + "information corresponding": 5291, + "corresponding textual": 2252, + "text representation": 11413, + "application llms": 700, + "knowledge improve": 5677, + "representations llms": 9733, + "network structure": 7792, + "promising avenues": 8967, + "combining llms": 1785, + "advantage model": 473, + "llms revolutionized": 6637, + "revolutionized natural": 9985, + "catastrophic forgetting": 1415, + "achieve higher": 250, + "text llm": 11404, + "paper explored": 8228, + "representation ability": 9726, + "ability different": 139, + "powerful large": 8658, + "text prompt": 11409, + "prompt dataset": 8987, + "high quality": 4875, + "different ways": 2913, + "ways data": 12177, + "curriculum learning": 2367, + "experiments ablation": 3762, + "augmentation methods": 918, + "methods data": 6980, + "multiplechoice questions": 7665, + "explanations generated": 3823, + "generated questions": 4484, + "crucial step": 2332, + "related concepts": 9599, + "ensure quality": 3417, + "gpt4 exhibited": 4695, + "represent significant": 9724, + "enhancing capabilities": 3405, + "assistant large": 857, + "demonstrated great": 2677, + "framework named": 4270, + "pretraining supervised": 8795, + "pretraining dataset": 8775, + "dataset pretraining": 2504, + "dataset tailored": 2507, + "tailored distinct": 11096, + "instruction pairs": 5411, + "llms augmented": 6462, + "additional modules": 379, + "tasks especially": 11194, + "especially text": 3474, + "text generative": 11398, + "generative tasks": 4622, + "leads high": 6173, + "cost model": 2258, + "online deployment": 8028, + "address multiple": 407, + "multiple nlp": 7658, + "tasks order": 11251, + "applications specifically": 712, + "model capture": 7121, + "twostage training": 11713, + "training method": 11567, + "tasks proposed": 11261, + "performance based": 8364, + "models various": 7558, + "opensource language": 8058, + "methods require": 7008, + "specifically consider": 10623, + "different data": 2878, + "data sources": 2461, + "leverage complementary": 6274, + "costly human": 2264, + "experiments standard": 3802, + "standard benchmarks": 10683, + "models use": 7554, + "generalization performance": 4429, + "finally conduct": 4072, + "effectiveness robustness": 3177, + "utilizing large": 11999, + "strategies construct": 10776, + "finetuning datasets": 4124, + "datasets chinese": 2519, + "finetune llms": 4106, + "reasoning capability": 9414, + "augment llms": 912, + "objective subjective": 7975, + "subjective dimensions": 10881, + "quantitative qualitative": 9250, + "qualitative results": 9234, + "users diverse": 11924, + "resources available": 9829, + "llms presents": 6616, + "lack domain": 5742, + "domain expertise": 3053, + "approach captures": 733, + "nested structure": 7783, + "pipeline achieves": 8503, + "review essential": 9973, + "current methods": 2356, + "shown promise": 10383, + "revolutionizing natural": 9990, + "issues paper": 5595, + "approach leverages": 750, + "knowledge enhance": 5666, + "use natural": 11892, + "practical implementation": 8666, + "models employ": 7311, + "recommendations future": 9523, + "reasoning path": 9432, + "retrievalaugmented large": 9950, + "extraordinary performance": 3937, + "tasks question": 11264, + "qa tasks": 9229, + "knowledge existing": 5669, + "generate reasoning": 4466, + "approaches inherent": 769, + "low quality": 6732, + "quality generated": 9242, + "llm easily": 6405, + "interaction ir": 5490, + "approach enables": 739, + "selects appropriate": 10212, + "answering datasets": 666, + "datasets outperform": 2540, + "answer accuracy": 656, + "ai ability": 509, + "leveraging diverse": 6295, + "compared llms": 1850, + "zeroshot fewshot": 12314, + "llms incontext": 6564, + "taskspecific finetuning": 11309, + "errors llm": 3469, + "llm predictions": 6423, + "extent llms": 3913, + "recognition capabilities": 9511, + "tuning present": 11702, + "present new": 8718, + "carefully curated": 1401, + "exam questions": 3611, + "shows strong": 10395, + "strong capabilities": 10808, + "models gpt35": 7348, + "nlp benchmarks": 7862, + "using small": 11974, + "practical perspective": 8669, + "capability understanding": 1374, + "release model": 9622, + "domains remains": 3062, + "paper evaluates": 8224, + "models specialized": 7529, + "certain domains": 1443, + "processing ensure": 8902, + "vertical domains": 12124, + "learning research": 6239, + "semantic communications": 10230, + "models fms": 7334, + "models increasingly": 7363, + "research explored": 9789, + "semantic extraction": 10232, + "different levels": 2887, + "computation memory": 1965, + "study focuses": 10854, + "universal knowledge": 11822, + "study highlights": 10855, + "comprehensive benchmark": 1927, + "benchmark evaluating": 1117, + "comprehensively evaluate": 1951, + "hallucination detection": 4790, + "domains llms": 3060, + "discuss key": 2974, + "analyze current": 632, + "point future": 8555, + "prompts code": 9030, + "significantly advanced": 10423, + "llms use": 6675, + "community remains": 1812, + "usefulness hand": 11907, + "timeconsuming costly": 11477, + "issue introduce": 5587, + "designed enhance": 2762, + "comprises components": 1957, + "corpora demonstrate": 2231, + "framework generate": 4256, + "encoder large": 3324, + "model series": 7215, + "series llms": 10294, + "indomain training": 5255, + "enable llms": 3308, + "learning despite": 6204, + "fewshot ability": 4029, + "llms standard": 6661, + "paper raise": 8264, + "instead using": 5392, + "asks llms": 825, + "llms create": 6490, + "final output": 4067, + "flexible framework": 4167, + "icl chainofthought": 5035, + "arithmetic reasoning": 805, + "generation benchmarks": 4519, + "learning strategy": 6243, + "performance paper": 8420, + "knowledge learned": 5691, + "llms factual": 6535, + "llms output": 6602, + "output generation": 8166, + "llms fewshot": 6536, + "learning scenarios": 6241, + "scenarios introduce": 10128, + "framework improve": 4260, + "proposed approaches": 9114, + "autoregressive llms": 967, + "gptstyle models": 4709, + "answering tasks": 676, + "outperforms strong": 8162, + "context modeling": 2144, + "reasoning llms": 9429, + "wide spectrum": 12213, + "social network": 10527, + "network services": 7791, + "contexts using": 2154, + "using natural": 11960, + "context reasoning": 2147, + "finetuning model": 4135, + "users requests": 11931, + "users request": 11930, + "stage does": 10676, + "data help": 2419, + "help llms": 4848, + "llms reasoning": 6627, + "reasoning large": 9424, + "foundation language": 4219, + "language technologies": 5980, + "great success": 4751, + "data training": 2465, + "training llms": 11566, + "impact code": 5076, + "different stages": 2908, + "results provide": 9926, + "text significantly": 11414, + "enhance llms": 3389, + "general reasoning": 4415, + "mixing strategy": 7081, + "strategy code": 10786, + "deepen understanding": 2610, + "llms regarding": 6630, + "source code": 10572, + "satisfy users": 10097, + "users information": 11926, + "tasks important": 11220, + "responses lack": 9856, + "effectiveness llms": 3172, + "issues present": 5596, + "learning contrastive": 6202, + "suit needs": 10950, + "specifically construct": 10624, + "reward function": 9995, + "teach llms": 11316, + "conducted experiments": 2042, + "experiments typical": 3806, + "typical applications": 11721, + "consistency llms": 2091, + "llms outputs": 6603, + "prompts vulnerability": 9042, + "vulnerability detection": 12168, + "approaches lack": 770, + "optimization llms": 8094, + "semantic space": 10243, + "technique solve": 11332, + "attack strategies": 876, + "outperforming existing": 8143, + "foundational framework": 4231, + "concerns potential": 2000, + "llms requires": 6634, + "dialogue dataset": 2861, + "value alignment": 12023, + "alignment llms": 578, + "evaluate representative": 3515, + "representative llms": 9737, + "high level": 4872, + "suggest llms": 10942, + "based provided": 1058, + "indicating potential": 5246, + "nlp large": 7867, + "performance limited": 8408, + "input length": 5350, + "pilot experiments": 8498, + "improved performance": 5140, + "insight propose": 5362, + "relative improvement": 9612, + "llms datasets": 6492, + "achieve competitive": 244, + "competitive results": 1879, + "assessing quality": 843, + "answers generated": 679, + "generated ai": 4472, + "used evaluate": 11899, + "candidate answers": 1334, + "mimic human": 7043, + "manner specifically": 6816, + "llms conducted": 6485, + "conducted extensive": 2043, + "experiments diverse": 3779, + "rates models": 9349, + "evaluations indicate": 3587, + "diverse applications": 3012, + "human labor": 4977, + "knowledge design": 5660, + "search space": 10180, + "space search": 10581, + "strategy paper": 10788, + "gpt4 based": 4692, + "design new": 2751, + "gpt4 generative": 4697, + "generates accurate": 4491, + "natural language inference": 7714, + "natural language understanding": 7739, + "obtains new stateoftheart": 8001, + "multilingual language models": 7618, + "machine translation models": 6759, + "language models propose": 5936, + "radford et al": 9305, + "et al 2018": 3490, + "model experimental results": 7143, + "experimental results model": 3749, + "story generation generating": 10768, + "language generation models": 5765, + "pretrained language model": 8747, + "language model gpt2": 5788, + "pretraining experimental results": 8779, + "method large language": 6957, + "large language model": 6009, + "significantly improves accuracy": 10436, + "address problem propose": 410, + "largescale language model": 6135, + "terms automatic metrics": 11361, + "automatic metrics human": 944, + "metrics human evaluation": 7027, + "emergence large language": 3252, + "large language models": 6019, + "language models llms": 5853, + "deep reinforcement learning": 2607, + "math word problems": 6862, + "math word problem": 6861, + "task natural language": 11135, + "natural language processing": 7722, + "based generative pretrained": 1035, + "generative pretrained language": 4607, + "demonstrate effectiveness proposed": 2654, + "effectiveness proposed method": 3176, + "proposed method benchmark": 9120, + "results method consistently": 9912, + "method consistently outperforms": 6945, + "generative pretrained models": 4612, + "pretrained language models": 8749, + "language models gpt": 5838, + "generative pretrained model": 4611, + "conduct extensive experiments": 2030, + "outperforms baseline models": 8147, + "ablation studies conducted": 170, + "table question answering": 11075, + "based natural language": 1050, + "autoregressive language models": 966, + "stateoftheart results various": 10729, + "large foundation models": 6007, + "new stateoftheart results": 7841, + "models end propose": 7315, + "175 billion parameters": 30, + "automatic speech recognition": 949, + "masked language modeling": 6839, + "word error rate": 12240, + "release code model": 9619, + "language models incontext": 5841, + "models incontext learning": 7361, + "explored recent years": 3854, + "success natural language": 10921, + "language models gpt2": 5839, + "language model pretrained": 5798, + "tasks unified texttotext": 11296, + "unified texttotext format": 11806, + "training objectives different": 11575, + "language models bert": 5814, + "language models used": 5951, + "language models multiple": 5922, + "models multiple tasks": 7470, + "tasks large language": 11236, + "language models achieved": 5810, + "models achieved impressive": 7255, + "language model external": 5785, + "capabilities remains unclear": 1358, + "language models perform": 5931, + "tasks work introduce": 11302, + "model best knowledge": 7113, + "language model demonstrate": 5783, + "performance wide range": 8456, + "shows significant improvement": 10394, + "data scarcity problem": 2457, + "lack largescale highquality": 5746, + "overcome limitation propose": 8179, + "text data specifically": 11389, + "facilitating future research": 3961, + "future research field": 4356, + "information large language": 5303, + "graph neural networks": 4733, + "neural networks gnns": 7804, + "training large language": 11562, + "paper propose efficient": 8253, + "propose efficient effective": 9065, + "effectiveness proposed approach": 3175, + "numerous natural language": 7963, + "natural language tasks": 7737, + "language processing nlp": 5964, + "processing nlp demonstrate": 8906, + "success large language": 10915, + "language models llm": 5852, + "extensive experimental results": 3891, + "experimental results public": 3756, + "results public datasets": 9928, + "performance stateoftheart approaches": 8433, + "promising research direction": 8974, + "natural language prompts": 7734, + "language models limited": 5851, + "address issue propose": 397, + "preliminary evaluation chatgpt": 8707, + "minor performance differences": 7058, + "evaluating number benchmark": 3532, + "number benchmark test": 7950, + "benchmark test sets": 1132, + "improves translation performance": 5154, + "using publicly available": 11966, + "nonlatin script languages": 7897, + "generate multimodal content": 4457, + "access external knowledge": 203, + "external knowledge base": 3916, + "llm improve performance": 6416, + "language model based": 5780, + "language model pretraining": 5801, + "language understanding generation": 5985, + "understanding generation tasks": 11773, + "largescale pretrained language": 6141, + "comparative study chatgpt": 1833, + "chatgpt finetuned bert": 1563, + "prior studies shown": 8838, + "studies shown chatgpt": 10846, + "understanding ability chatgpt": 11763, + "falls short handling": 3993, + "tasks large margin": 11239, + "achieves comparable performance": 284, + "existing large language": 3693, + "reinforcement learning rl": 9596, + "approach significantly improves": 763, + "language understanding tasks": 5989, + "demonstrated impressive performance": 2681, + "impressive performance various": 5113, + "performance various natural": 8448, + "various natural language": 12080, + "processing nlp tasks": 8907, + "understanding reasoning capabilities": 11782, + "language understanding nlu": 5987, + "understanding nlu tasks": 11779, + "language inference sentiment": 5769, + "sentiment analysis tasks": 10271, + "reinforcement learning human": 9593, + "learning human feedback": 6215, + "human feedback rlhf": 4970, + "policy search problem": 8563, + "problem reinforcement learning": 8870, + "leverages large language": 6285, + "prompting methods generate": 9020, + "significantly outperform stateoftheart": 10443, + "outperform stateoftheart baselines": 8138, + "potential large language": 8629, + "including natural language": 5190, + "language processing computer": 5961, + "processing computer vision": 8901, + "language model gpt4": 5789, + "diffusion models introducing": 2929, + "including text images": 5196, + "prompting large language": 9013, + "language models large": 5844, + "models large language": 7373, + "language models demonstrated": 5821, + "based observation propose": 1053, + "observation propose novel": 7984, + "various downstream tasks": 12064, + "incontext learning performance": 5209, + "grammatical error correction": 4725, + "terms automatic evaluation": 11359, + "automatic evaluation metrics": 937, + "human evaluation quantitatively": 4964, + "results demonstrate chatgpt": 9892, + "artificial intelligence ai": 817, + "chatgpt large language": 1577, + "chat generative pretrained": 1525, + "generative pretrained transformer": 4613, + "language models empirical": 5827, + "models empirical study": 7309, + "performance large language": 8404, + "language models based": 5813, + "based instruction tuning": 1041, + "instruction tuning different": 5417, + "potential future research": 8624, + "future research directions": 4355, + "furthermore propose novel": 4337, + "propose novel twostep": 9095, + "problem large language": 8863, + "models llms chatgpt": 7394, + "llms chatgpt gpt4": 6475, + "llms remains unclear": 6632, + "problem paper propose": 8867, + "remarkable potential various": 9683, + "error correction gec": 3462, + "using incontext learning": 11949, + "experimental results human": 3745, + "human evaluations demonstrate": 4967, + "parameterefficient finetuning large": 8286, + "finetuning large language": 4129, + "models success large": 7536, + "models llms like": 7428, + "llms like gpt3": 6586, + "parameterefficient finetuning peft": 8287, + "comparable better performance": 1818, + "llms different tasks": 6507, + "math reasoning datasets": 6859, + "datasets results demonstrate": 2548, + "framework finetuning large": 4253, + "llms downstream tasks": 6510, + "systems large language": 11061, + "solving math word": 10560, + "ability large language": 150, + "language models work": 5954, + "models work propose": 7561, + "models including gpt4": 7359, + "provide detailed analysis": 9154, + "power large language": 8646, + "cell type annotation": 1437, + "challenging task requires": 1506, + "language models chatgpt": 5816, + "ability chatgpt gpt4": 135, + "pretrained transformer gpt4": 8768, + "supervised finetuning sft": 10987, + "proximal policy optimization": 9191, + "policy optimization ppo": 8561, + "propose novel learning": 9089, + "align language model": 568, + "recently large language": 9500, + "llms like chatgpt": 6581, + "like chatgpt demonstrated": 6323, + "chatgpt demonstrated remarkable": 1550, + "demonstrated remarkable performance": 2689, + "variety natural language": 12044, + "language processing tasks": 5971, + "remains explored paper": 9656, + "artificial general intelligence": 814, + "general intelligence agi": 4407, + "improve model performance": 5131, + "language models diffusion": 5824, + "models diffusion models": 7304, + "generated large language": 4480, + "generative large language": 4598, + "models publicly available": 7505, + "pretrained large language": 8755, + "language models exponential": 5832, + "models exponential growth": 7327, + "electronic health records": 3211, + "poses significant challenge": 8592, + "processing nlp techniques": 8908, + "language models plms": 5933, + "challenges future directions": 1482, + "finally discuss existing": 4074, + "discuss existing challenges": 2972, + "field artificial intelligence": 4047, + "generative pretrained transformers": 4616, + "pretrained transformers gpt": 8770, + "models llms demonstrated": 7400, + "llms demonstrated remarkable": 6499, + "information retrieval ir": 5314, + "code reproduce results": 1733, + "reproduce results available": 9747, + "promising results generating": 8976, + "achieving competitive performance": 314, + "code models available": 1723, + "empowering large language": 3302, + "human evaluation results": 4965, + "gpt4 automatic evaluation": 4691, + "data public httpsgithubcomnlpxucanwizardlm": 2447, + "language models like": 5848, + "models like chatgpt": 7381, + "task converts natural": 11121, + "converts natural language": 2215, + "models llms work": 7461, + "work natural language": 12258, + "natural language generation": 7712, + "outperforms stateoftheart models": 8161, + "demonstrates strong generalization": 2698, + "capacity largescale language": 1384, + "largescale language models": 6136, + "address limitation propose": 403, + "finetuning experimental results": 4126, + "deep neural networks": 2604, + "neural networks dnns": 7803, + "quantitatively evaluate performance": 9254, + "promising performance various": 8971, + "performance various tasks": 8454, + "tasks conduct extensive": 11182, + "incontext learning icl": 5207, + "chatgpt exhibits strong": 1560, + "remains challenging task": 9652, + "machine learning tasks": 6756, + "machine learning ml": 6754, + "aim bridge gap": 537, + "using large language": 11952, + "machine translation mt": 6760, + "using deep learning": 11942, + "models llms gpt3": 7424, + "llms gpt3 chatgpt": 6552, + "bridge gap llms": 1272, + "network large language": 7789, + "tasks target task": 11289, + "achieves competitive performance": 287, + "paper propose simple": 8258, + "propose simple efficient": 9100, + "simple efficient approach": 10463, + "demonstrate superiority proposed": 2669, + "instructions instruction tuning": 5437, + "different model sizes": 2893, + "facilitate future research": 3955, + "future research release": 4357, + "human quality evaluation": 4987, + "knowledge large language": 5685, + "language models lms": 5919, + "empirical results demonstrate": 3279, + "results demonstrate method": 9894, + "method significantly improves": 6966, + "general artificial intelligence": 4402, + "reasoning ability crucial": 9404, + "various nlp tasks": 12086, + "unclear chatgpt performs": 11739, + "paper conduct comprehensive": 8215, + "conduct comprehensive evaluation": 2023, + "approaches large language": 772, + "varying difficulty levels": 12101, + "chain thought cot": 1450, + "comparable stateoftheart methods": 1827, + "knowledge graph construction": 5674, + "propose simple effective": 9099, + "comprehensive experimental results": 1939, + "code datasets available": 1713, + "language models previous": 5935, + "knowledgeintensive tasks paper": 5718, + "tasks paper propose": 11253, + "paper propose new": 8255, + "propose new paradigm": 9083, + "various language models": 12073, + "commonsense reasoning tasks": 1804, + "models llms exhibit": 7413, + "blackbox language model": 1219, + "experiments demonstrate effectiveness": 3773, + "demonstrate effectiveness method": 2653, + "data instruction tuning": 2425, + "instruction tuning large": 5418, + "tuning large language": 11694, + "models llms gained": 7419, + "adaptation large language": 355, + "models llms downstream": 7406, + "downstream tasks finetuning": 3083, + "millions billions parameters": 7041, + "performance specific task": 8430, + "instruction tuning data": 5415, + "powerful capabilities text": 8654, + "paper presents novel": 8250, + "novel transformer architecture": 7939, + "experiments benchmark datasets": 3765, + "results method outperforms": 9913, + "method outperforms stateoftheart": 6960, + "empower large language": 3294, + "language model perform": 5797, + "question answering large": 9271, + "answering large language": 670, + "language model llm": 5793, + "model llm gained": 7179, + "attracted widespread attention": 902, + "question answering qa": 9273, + "addition propose new": 377, + "achieve better performance": 241, + "extensive experiments demonstrate": 3899, + "experiments demonstrate approach": 3772, + "llms like gpt": 6585, + "language processing paper": 5968, + "models llms exhibited": 7415, + "emergent incontext learning": 3259, + "decision making process": 2574, + "downstream tasks extensive": 3081, + "tasks extensive experiments": 11210, + "datasets method achieves": 2538, + "multitask instruction tuning": 7677, + "broad range tasks": 1290, + "tasks conduct experiments": 11181, + "exploring potential chatgpt": 3862, + "findings demonstrate feasibility": 4088, + "smaller parameter size": 10517, + "model extensive experiments": 7148, + "significantly outperforms stateoftheart": 10447, + "active learning mechanism": 337, + "address limitations present": 405, + "language models conduct": 5819, + "llms exhibited remarkable": 6527, + "remarkable performance various": 9678, + "nlp tasks finetuning": 7875, + "expensive timeconsuming obtain": 3729, + "paper introduces novel": 8237, + "using reinforcement learning": 11968, + "model publicly available": 7207, + "performance range natural": 8425, + "range natural language": 9319, + "small language models": 10508, + "language models slms": 5947, + "named entity recognition": 7694, + "entity recognition relation": 3429, + "recognition relation extraction": 9514, + "llms generate reasonable": 6546, + "tasks including context": 11223, + "understanding code generation": 11767, + "results gpt4 achieve": 9905, + "achieve comparable performance": 243, + "report large language": 9715, + "language generation nlg": 5766, + "address issue paper": 396, + "leverage large language": 6277, + "machine translation text": 6762, + "translation text summarization": 11644, + "demonstrate method effectively": 2661, + "method effectively improve": 6949, + "answer question paper": 660, + "datasets demonstrate effectiveness": 2524, + "including large language": 5182, + "remains largely unexplored": 9658, + "experiments demonstrate proposed": 3776, + "significantly outperforms existing": 10445, + "outperforms existing methods": 8153, + "issue large language": 5589, + "large language modelsllms": 6104, + "research large language": 9797, + "current research focuses": 2360, + "models llms remarkable": 7443, + "response user input": 9848, + "language model small": 5802, + "natural language text": 7738, + "limitations propose novel": 6346, + "methods experimental results": 6985, + "recent success large": 9479, + "questions large language": 9295, + "paper explore potential": 8227, + "experimental results real": 3758, + "play critical role": 8527, + "avoiding potential data": 996, + "potential data leakage": 8622, + "smaller language models": 10514, + "training experimental results": 11553, + "experimental results demonstrate": 3740, + "models like gpt35": 7383, + "propose new benchmark": 9082, + "modern large language": 7567, + "code benchmark publicly": 1702, + "like chatgpt shown": 6328, + "abilities different models": 123, + "evaluating large language": 3527, + "language models chinese": 5817, + "recent advancements large": 9456, + "advancements large language": 463, + "yielded remarkable performance": 12302, + "performance natural language": 8413, + "paper propose novel": 8256, + "sentiment analysis dataset": 10270, + "existing opensource llms": 3706, + "reasoning ability large": 9405, + "intermediate reasoning steps": 5515, + "rapid advancement large": 9334, + "advancement large language": 454, + "models llms led": 7427, + "pretrained models help": 8765, + "achieves stateoftheart performance": 302, + "different pretrained models": 2900, + "general language understanding": 4414, + "era large language": 3453, + "metrics observe necessity": 7031, + "multimodal large language": 7633, + "code data available": 1706, + "legal large language": 6259, + "knowledge bases large": 5656, + "bases large language": 1082, + "models llms shown": 7450, + "llms shown potential": 6651, + "language models crucial": 5820, + "generative language models": 4596, + "chinese large language": 1629, + "align human values": 566, + "exhibited remarkable abilities": 3665, + "provide preliminary evaluation": 9161, + "large pretrained models": 6117, + "propose novel framework": 9087, + "computer vision natural": 1983, + "vision natural language": 12142, + "language models significant": 5945, + "given natural language": 4635, + "natural language questions": 7735, + "learning large language": 6222, + "models llms emerged": 7409, + "baseline models comprehensive": 1070, + "models llms recently": 7441, + "llms recently demonstrated": 6629, + "demonstrated remarkable capabilities": 2686, + "remarkable capabilities natural": 9670, + "capabilities natural language": 1351, + "comprehensive overview recent": 1945, + "models llms generate": 7423, + "model performance compared": 7198, + "aspect natural language": 828, + "bridge gap propose": 1274, + "datasets pretrained models": 2543, + "teaching large language": 11322, + "chain thought prompting": 1451, + "knowledge reasoning abilities": 5699, + "alignment human values": 577, + "conduct human evaluation": 2032, + "automatic human evaluation": 939, + "llms demonstrated exceptional": 6496, + "language models retrieval": 5942, + "opendomain question answering": 8046, + "chatgpt demonstrated impressive": 1549, + "wide range tasks": 12211, + "existing evaluation methods": 3686, + "overcoming limitations previous": 8184, + "solve complex problems": 10550, + "editing large language": 3128, + "language model large": 5790, + "model large language": 7171, + "models llms showcased": 7448, + "llms showcased remarkable": 6647, + "helps llms better": 4857, + "llms conduct extensive": 6483, + "tasks experimental results": 11204, + "experimental results indicate": 3746, + "plays vital role": 8539, + "diverse natural language": 3022, + "bert roberta gpt2": 1158, + "understanding large language": 11775, + "llms shown impressive": 6650, + "nlp tasks llms": 7877, + "tasks event extraction": 11199, + "bilingual english chinese": 1198, + "data model size": 2435, + "significantly boost performance": 10426, + "achieve best results": 239, + "benchmarks recent years": 1144, + "incontext learning chainofthought": 5206, + "paper comprehensively investigate": 8213, + "users address issues": 11923, + "knowledge graphs kg": 5676, + "rich semantic information": 10007, + "existing methods usually": 3700, + "code publicly available": 1730, + "generation large language": 4542, + "sequence generation tasks": 10281, + "risk instruction forgetting": 10019, + "mitigate issue propose": 7070, + "significantly improves zeroshot": 10439, + "improves zeroshot performance": 5156, + "data models trained": 2439, + "llms exhibit impressive": 6525, + "incontext learning abilities": 5205, + "language models recent": 5938, + "generate instruction data": 4452, + "machine learning models": 6755, + "language models natural": 5924, + "eliminates need manual": 3222, + "achieves stateoftheart results": 305, + "despite great advance": 2781, + "language models mllms": 5921, + "cost paper propose": 2261, + "despite superior performance": 2790, + "superior performance large": 10978, + "language models generate": 5837, + "generate natural language": 4460, + "natural language instructions": 7716, + "knowledge language models": 5683, + "effective efficient compared": 3139, + "language models despite": 5823, + "augmenting large language": 923, + "extensive experiments conducted": 3898, + "long context understanding": 6705, + "models llms demonstrate": 7398, + "llms demonstrate impressive": 6494, + "impressive performance language": 5112, + "works proposed methods": 12276, + "tasks code completion": 11173, + "evaluation llms comprehensive": 3564, + "comprehensive evaluation llms": 1935, + "recent advances large": 9460, + "advances large language": 468, + "language models offers": 5926, + "application large language": 697, + "language models field": 5835, + "mental health support": 6922, + "empowered large language": 3297, + "evaluation large language": 3561, + "emerged new paradigm": 3241, + "address challenge paper": 388, + "based findings propose": 1032, + "language model multimodal": 5796, + "language model mllm": 5795, + "individual pretrained models": 5252, + "address issues propose": 400, + "spoken language understanding": 10663, + "gaining increasing attention": 4369, + "models llms traditional": 7458, + "fewshot incontext learning": 4032, + "shown promising results": 10385, + "framework based chatgpt": 4239, + "knowledge largescale corpora": 5690, + "grounding large language": 4761, + "models llms powerful": 7437, + "performance challenging tasks": 8368, + "previous studies typically": 8819, + "models llms achieved": 7389, + "llms achieved remarkable": 6446, + "achieved remarkable success": 271, + "remarkable success nlp": 9687, + "multimodal tasks despite": 7643, + "high computational cost": 4869, + "achieves performance comparable": 295, + "recent studies explored": 9477, + "language models static": 5948, + "various realworld tasks": 12091, + "evaluate ability llms": 3501, + "issues propose novel": 5598, + "llmbased autonomous agents": 6434, + "use large language": 11888, + "models llms introduce": 7426, + "gradient computation parameter": 4715, + "results evaluated gpt4": 9900, + "attack success rate": 878, + "processing tasks related": 8913, + "tasks including contextual": 11224, + "chaining large language": 1454, + "stateoftheart large language": 10712, + "connecting large language": 2071, + "models llms excel": 7412, + "carefully crafted prompts": 1400, + "remarkable capabilities various": 9673, + "capabilities various tasks": 1364, + "various tasks including": 12095, + "including named entity": 5187, + "llms achieved significant": 6447, + "nlp downstream tasks": 7866, + "furthermore evaluate performance": 4331, + "demonstrates superior performance": 2700, + "outperforms existing models": 8154, + "convolutional neural network": 2220, + "llms generate summaries": 6548, + "ability existing models": 142, + "different methods including": 2891, + "models llms revolutionized": 7444, + "llms revolutionized natural": 6639, + "revolutionized natural language": 9986, + "powerful large language": 8659, + "different ways data": 2914, + "ways data augmentation": 12178, + "experiments ablation studies": 3763, + "data augmentation methods": 2387, + "capabilities large language": 1343, + "assistant large language": 858, + "llms demonstrated great": 6497, + "demonstrated great potential": 2678, + "pretraining supervised finetuning": 8796, + "language models better": 5815, + "nlp tasks especially": 7873, + "tasks especially text": 11195, + "cost model training": 2259, + "multiple nlp tasks": 7659, + "language models various": 5952, + "opensource language models": 8059, + "models like llama": 7384, + "present novel framework": 8720, + "extensive experiments standard": 3906, + "code data models": 1707, + "data models publicly": 2437, + "utilizing large language": 12000, + "models llms provide": 7440, + "objective subjective dimensions": 7976, + "quantitative qualitative results": 9251, + "models llms presents": 7439, + "process paper introduces": 8894, + "llms shown promise": 6652, + "revolutionizing natural language": 9991, + "use natural language": 11893, + "language models employ": 5829, + "recommendations future research": 9524, + "retrievalaugmented large language": 9951, + "question answering datasets": 9268, + "datasets outperform stateoftheart": 2541, + "llms incontext learning": 6565, + "instruction tuning present": 5421, + "large models gpt4": 6110, + "exceptional capabilities various": 3635, + "capabilities various domains": 1361, + "various domains remains": 12060, + "existing large models": 3694, + "foundation models fms": 4224, + "previous research explored": 8812, + "research explored use": 9790, + "comprehensive benchmark evaluating": 1928, + "various domains llms": 12059, + "models llms use": 7459, + "address issue introduce": 395, + "encoder large language": 3325, + "icl chainofthought cot": 5036, + "code generation benchmarks": 1715, + "achieves comparable results": 285, + "data code available": 2393, + "fewshot learning scenarios": 4034, + "question answering tasks": 9275, + "outperforms strong baselines": 8163, + "using natural language": 11961, + "reasoning large language": 9425, + "impact code data": 5077, + "source code model": 10574, + "code model parameters": 1721, + "users information needs": 11927, + "address issues present": 399, + "llms generate responses": 6547, + "effectively improve performance": 3156, + "improve performance llms": 5133, + "prompts vulnerability detection": 9043, + "evaluate representative llms": 3516, + "building insight propose": 1312, + "conducted extensive experiments": 2044, + "search space search": 10181, + "model experimental results model": 7144, + "pretrained language model gpt2": 8748, + "terms automatic metrics human": 11362, + "automatic metrics human evaluation": 945, + "emergence large language models": 3253, + "large language models llms": 6047, + "task natural language processing": 11136, + "based generative pretrained language": 1036, + "generative pretrained language model": 4608, + "demonstrate effectiveness proposed method": 2656, + "generative pretrained language models": 4609, + "language models incontext learning": 5842, + "success natural language processing": 10922, + "tasks unified texttotext format": 11297, + "pretrained language models bert": 8751, + "language models multiple tasks": 5923, + "tasks large language models": 11237, + "large language models achieved": 6020, + "information large language models": 5304, + "graph neural networks gnns": 4734, + "training large language models": 11563, + "natural language processing nlp": 7726, + "language processing nlp demonstrate": 5965, + "success large language models": 10916, + "large language models llm": 6046, + "experimental results public datasets": 3757, + "large language models limited": 6045, + "evaluating number benchmark test": 3533, + "number benchmark test sets": 7951, + "largescale pretrained language model": 6142, + "prior studies shown chatgpt": 8839, + "demonstrated impressive performance various": 2682, + "impressive performance various natural": 5114, + "performance various natural language": 8449, + "various natural language processing": 12081, + "language processing nlp tasks": 5966, + "natural language understanding nlu": 7742, + "language understanding nlu tasks": 5988, + "natural language inference sentiment": 7715, + "reinforcement learning human feedback": 9594, + "learning human feedback rlhf": 6216, + "leverages large language models": 6287, + "potential large language models": 8630, + "natural language processing computer": 7723, + "language processing computer vision": 5962, + "prompting large language models": 9014, + "large language models large": 6043, + "language models large language": 5845, + "models large language models": 7374, + "large language models demonstrated": 6028, + "terms automatic evaluation metrics": 11360, + "chat generative pretrained transformer": 1526, + "large language models empirical": 6033, + "language models empirical study": 5828, + "performance large language models": 8405, + "large language models based": 6022, + "potential future research directions": 8625, + "problem large language models": 8864, + "language models llms chatgpt": 5859, + "models llms chatgpt gpt4": 7395, + "grammatical error correction gec": 4726, + "finetuning large language models": 4130, + "models success large language": 7537, + "language models llms like": 5890, + "models llms like gpt3": 7434, + "systems large language models": 11062, + "large language models perform": 6091, + "ability large language models": 151, + "large language models work": 6101, + "language models work propose": 5955, + "power large language models": 8647, + "large language models chatgpt": 6023, + "generative pretrained transformer gpt4": 4615, + "proximal policy optimization ppo": 9192, + "recently large language models": 9501, + "models llms like chatgpt": 7429, + "like chatgpt demonstrated remarkable": 6324, + "chatgpt demonstrated remarkable performance": 1551, + "variety natural language processing": 12045, + "natural language processing tasks": 7733, + "artificial general intelligence agi": 815, + "large language models diffusion": 6030, + "language models diffusion models": 5825, + "generative large language models": 4599, + "pretrained large language models": 8756, + "large language models exponential": 6036, + "language models exponential growth": 5833, + "language processing nlp techniques": 5967, + "pretrained language models plms": 8753, + "finally discuss existing challenges": 4075, + "field artificial intelligence ai": 4048, + "generative pretrained transformers gpt": 4617, + "language models llms demonstrated": 5865, + "models llms demonstrated remarkable": 7403, + "code reproduce results available": 1734, + "empowering large language models": 3303, + "language models like chatgpt": 5849, + "task converts natural language": 11122, + "language models llms work": 5918, + "capacity largescale language models": 1385, + "largescale language models llms": 6137, + "deep neural networks dnns": 2605, + "using large language models": 11955, + "language models llms gpt3": 5886, + "models llms gpt3 chatgpt": 7425, + "network large language models": 7790, + "propose simple efficient approach": 9101, + "leverages large language model": 6286, + "knowledge large language models": 5687, + "approaches large language models": 773, + "knowledgeintensive tasks paper propose": 5719, + "demonstrate effectiveness proposed approach": 2655, + "language models llms exhibit": 5876, + "experiments demonstrate effectiveness method": 3774, + "instruction tuning large language": 5419, + "tuning large language models": 11696, + "language models llms gained": 5882, + "adaptation large language models": 356, + "language models llms downstream": 5869, + "models llms downstream tasks": 7407, + "results method outperforms stateoftheart": 9914, + "question answering large language": 9272, + "large language model llm": 6015, + "language model llm gained": 5794, + "extensive experiments demonstrate approach": 3900, + "models llms like gpt": 7433, + "language models llms exhibited": 5878, + "downstream tasks extensive experiments": 3082, + "large language models conduct": 6026, + "models llms exhibited remarkable": 7416, + "remarkable performance various natural": 9679, + "llms demonstrated remarkable performance": 6500, + "performance range natural language": 8426, + "natural language understanding generation": 7740, + "language understanding generation tasks": 5986, + "small language models slms": 10509, + "named entity recognition relation": 7696, + "entity recognition relation extraction": 3430, + "report large language models": 9716, + "natural language generation nlg": 7713, + "leverage large language models": 6278, + "machine translation text summarization": 6763, + "extensive experiments demonstrate proposed": 3903, + "issue large language models": 5590, + "research large language models": 9798, + "language models llms remarkable": 5902, + "recent success large language": 9480, + "questions large language models": 9296, + "avoiding potential data leakage": 997, + "language models like gpt35": 5850, + "modern large language models": 7568, + "llms like chatgpt shown": 6584, + "evaluating large language models": 3528, + "large language models chinese": 6024, + "recent advancements large language": 9457, + "advancements large language models": 464, + "performance natural language processing": 8414, + "reasoning ability large language": 9406, + "rapid advancement large language": 9335, + "advancement large language models": 455, + "language models llms led": 5889, + "era large language models": 3454, + "multimodal large language model": 7634, + "legal large language model": 6260, + "knowledge bases large language": 5657, + "bases large language models": 1083, + "language models llms shown": 5908, + "models llms shown potential": 7453, + "large language models crucial": 6027, + "extensive experiments demonstrate effectiveness": 3901, + "chinese large language models": 1630, + "provide preliminary evaluation chatgpt": 9162, + "computer vision natural language": 1984, + "vision natural language processing": 12143, + "large language models significant": 6098, + "learning large language models": 6223, + "language models llms emerged": 5872, + "language models llms recently": 5900, + "models llms recently demonstrated": 7442, + "demonstrated remarkable capabilities natural": 2687, + "remarkable capabilities natural language": 9671, + "capabilities natural language processing": 1352, + "language models llms generate": 5885, + "teaching large language models": 11323, + "models llms demonstrated exceptional": 7401, + "large language models retrieval": 6097, + "large language model large": 6013, + "language model large language": 5791, + "model large language models": 7172, + "language models llms showcased": 5906, + "models llms showcased remarkable": 7449, + "llms conduct extensive experiments": 6484, + "diverse natural language processing": 3023, + "understanding large language models": 11776, + "models llms shown impressive": 7452, + "generation large language models": 4543, + "significantly improves zeroshot performance": 10440, + "models llms exhibit impressive": 7414, + "large language models recent": 6094, + "large language models natural": 6088, + "multimodal large language models": 7637, + "large language models mllms": 6086, + "despite superior performance large": 2791, + "superior performance large language": 10979, + "large language models generate": 6040, + "augmenting large language models": 924, + "language models llms demonstrate": 5863, + "models llms demonstrate impressive": 7399, + "recent advances large language": 9461, + "advances large language models": 469, + "application large language models": 698, + "large language models field": 6039, + "empowered large language models": 3299, + "evaluation large language models": 3562, + "large language model multimodal": 6018, + "large language model mllm": 6017, + "using large language model": 11953, + "language models llms traditional": 5915, + "language models llms powerful": 5896, + "language models llms achieved": 5854, + "models llms achieved remarkable": 7390, + "achieved remarkable success nlp": 272, + "address issues propose novel": 401, + "use large language models": 11889, + "language models llms introduce": 5888, + "chaining large language models": 1455, + "stateoftheart large language models": 10713, + "connecting large language models": 2072, + "language models llms excel": 5875, + "paper propose novel framework": 8257, + "including named entity recognition": 5188, + "models llms achieved significant": 7391, + "language models llms revolutionized": 5903, + "models llms revolutionized natural": 7446, + "llms revolutionized natural language": 6640, + "revolutionized natural language processing": 9987, + "different ways data augmentation": 2915, + "capabilities large language models": 1344, + "assistant large language model": 859, + "nlp tasks especially text": 7874, + "code data models publicly": 1708, + "data models publicly available": 2438, + "utilizing large language models": 12001, + "language models llms provide": 5899, + "language models llms presents": 5898, + "models llms shown promise": 7454, + "revolutionizing natural language processing": 9992, + "various natural language tasks": 12084, + "exceptional capabilities various domains": 3636, + "including large language models": 5183, + "language models llms use": 5916, + "encoder large language model": 3326, + "tasks experimental results demonstrate": 11205, + "reasoning large language models": 9426, + "source code model parameters": 10575, + "large language model based": 6010, + "terms automatic metrics human evaluation": 11363, + "emergence large language models llms": 3254, + "based generative pretrained language model": 1037, + "natural language processing nlp demonstrate": 7727, + "success large language models llm": 10917, + "evaluating number benchmark test sets": 3534, + "demonstrated impressive performance various natural": 2683, + "impressive performance various natural language": 5115, + "performance various natural language processing": 8450, + "various natural language processing nlp": 12082, + "natural language processing nlp tasks": 7728, + "natural language understanding nlu tasks": 7743, + "reinforcement learning human feedback rlhf": 9595, + "natural language processing computer vision": 7724, + "prompting large language models large": 9015, + "large language models large language": 6044, + "language models large language models": 5846, + "large language models empirical study": 6034, + "models large language models llms": 7375, + "large language models llms chatgpt": 6050, + "language models llms chatgpt gpt4": 5860, + "models success large language models": 7538, + "success large language models llms": 10918, + "large language models llms like": 6069, + "language models llms like gpt3": 5893, + "large language models work propose": 6102, + "recently large language models llms": 9502, + "language models llms like chatgpt": 5891, + "like chatgpt demonstrated remarkable performance": 6325, + "variety natural language processing tasks": 12046, + "large language models diffusion models": 6031, + "large language models exponential growth": 6037, + "natural language processing nlp techniques": 7729, + "large language models llms demonstrated": 6052, + "language models llms demonstrated remarkable": 5868, + "training large language models llms": 11564, + "large language models llms work": 6084, + "large language models llms gpt3": 6066, + "language models llms gpt3 chatgpt": 5887, + "using large language models llms": 11957, + "instruction tuning large language models": 5420, + "large language models llms gained": 6063, + "large language models llms downstream": 6053, + "language models llms downstream tasks": 5870, + "large language model llm gained": 6016, + "language models llms like gpt": 5892, + "large language models llms exhibited": 6060, + "language models llms exhibited remarkable": 5879, + "remarkable performance various natural language": 9680, + "models llms demonstrated remarkable performance": 7404, + "natural language understanding generation tasks": 7741, + "named entity recognition relation extraction": 7697, + "generative pretrained language models plms": 4610, + "large language models llms remarkable": 6077, + "recent success large language models": 9481, + "modern large language models llms": 7569, + "models llms like chatgpt shown": 7432, + "recent advancements large language models": 9458, + "advancements large language models llms": 465, + "performance natural language processing tasks": 8415, + "reasoning ability large language models": 9407, + "ability large language models llms": 152, + "rapid advancement large language models": 9336, + "advancement large language models llms": 456, + "large language models llms led": 6068, + "multimodal large language model llm": 7635, + "knowledge bases large language models": 5658, + "bases large language models llms": 1084, + "large language models llms shown": 6080, + "language models llms shown potential": 5911, + "large language models llms exhibit": 6059, + "various natural language processing tasks": 12083, + "computer vision natural language processing": 1985, + "large language models llms emerged": 6055, + "large language models llms recently": 6076, + "language models llms recently demonstrated": 5901, + "demonstrated remarkable capabilities natural language": 2688, + "pretrained large language models llms": 8757, + "large language models llms generate": 6065, + "language models llms demonstrated exceptional": 5866, + "large language model large language": 6014, + "language model large language models": 5792, + "model large language models llms": 7173, + "large language models llms showcased": 6079, + "language models llms showcased remarkable": 5907, + "language models llms shown impressive": 5910, + "power large language models llms": 8648, + "generation large language models large": 4544, + "language models llms exhibit impressive": 5877, + "multimodal large language models mllms": 7638, + "despite superior performance large language": 2792, + "large language models llms demonstrate": 6051, + "language models llms demonstrate impressive": 5864, + "recent advances large language models": 9462, + "multimodal large language model mllm": 7636, + "large language models llms traditional": 6081, + "large language models llms powerful": 6072, + "large language models llms achieved": 6048, + "language models llms achieved remarkable": 5855, + "use large language models llms": 11890, + "large language models llms introduce": 6067, + "stateoftheart large language models llms": 10714, + "large language models llms excel": 6058, + "language models llms achieved significant": 5856, + "large language models llms revolutionized": 6078, + "language models llms revolutionized natural": 5905, + "models llms revolutionized natural language": 7447, + "llms revolutionized natural language processing": 6641, + "code data models publicly available": 1709, + "utilizing large language models llms": 12002, + "large language models llms provide": 6075, + "large language models llms presents": 6074, + "language models llms shown promise": 5912, + "performance various natural language tasks": 8451, + "large language models llms use": 6082, + "reasoning large language models llms": 9427, + "captioning": 1388, + "bertbased": 1159, + "generators": 4624, + "coco": 1698, + "identical": 5041, + "sound": 10570, + "ending": 3358, + "bbc": 1090, + "classified": 1656, + "assuming": 867, + "invalid": 5557, + "implicitly": 5095, + "clip": 1677, + "gpu": 4710, + "head": 4832, + "cv": 2375, + "se": 10168, + "competent": 1872, + "singular": 10489, + "normalization": 7902, + "philosophy": 8489, + "block": 1228, + "combine": 1780, + "imagenet": 5064, + "traffic": 11525, + "practices": 8675, + "tracking": 11512, + "5th": 84, + "mrr": 7601, + "visionandlanguage": 12145, + "huggingface": 4947, + "repository": 9721, + "object": 7966, + "scene": 10134, + "abstractive": 184, + "53": 80, + "rouge2": 10060, + "51": 77, + "rougel": 10061, + "locations": 6695, + "bidirectional": 1193, + "visionlanguage": 12146, + "frameworks": 4279, + "fidelity": 4043, + "quantization": 9257, + "formulate": 4208, + "145": 22, + "million": 7037, + "driving": 3099, + "piece": 8494, + "spatial": 10594, + "norms": 7904, + "quantized": 9259, + "standards": 10687, + "confined": 2054, + "motion": 7589, + "gpt23": 4678, + "trainingfree": 11589, + "magic": 6767, + "plugandplay": 8547, + "offtheshelf": 8021, + "involve": 5573, + "operation": 8075, + "computationally": 1976, + "speedup": 10652, + "frequency": 4285, + "recover": 9533, + "drop": 3100, + "dramatically": 3085, + "masks": 6843, + "expect": 3724, + "respect": 9831, + "dalle": 2378, + "video": 12128, + "hierarchical": 4864, + "write": 12284, + "modal": 7093, + "stronger": 10813, + "tells": 11342, + "pair": 8201, + "presenting": 8727, + "vectors": 12107, + "altered": 603, + "close": 1679, + "benefiting": 1149, + "bart": 1019, + "edit": 3124, + "languageimage": 5992, + "exhaustive": 3655, + "viewpoint": 12131, + "shifts": 10348, + "sacrifices": 10077, + "weights": 12198, + "contained": 2126, + "kullbackleibler": 5725, + "divergence": 3007, + "kld": 5646, + "id": 5037, + "lines": 6366, + "sparser": 10592, + "suitable": 10952, + "patches": 8322, + "directional": 2940, + "objects": 7981, + "proximity": 9193, + "detected": 2799, + "webscale": 12190, + "arises": 802, + "branch": 1263, + "complement": 1883, + "forming": 4203, + "tunes": 11687, + "adhere": 425, + "musical": 7690, + "producing": 8920, + "controllability": 2188, + "medical": 6894, + "dialog": 2857, + "licensing": 6307, + "deeplearning": 2614, + "understandable": 11760, + "attracting": 903, + "interface": 5508, + "competency": 1871, + "sending": 10256, + "opens": 8053, + "narrowing": 7703, + "transferring": 11601, + "location": 6694, + "prototype": 9137, + "dimension": 2932, + "uniformly": 11810, + "reject": 9597, + "academia": 189, + "vlms": 12160, + "featuring": 4017, + "accountability": 223, + "compress": 1953, + "fed": 4018, + "imperfect": 5085, + "nextgeneration": 7854, + "kinds": 5644, + "gaps": 4387, + "mean": 6877, + "strongly": 10814, + "probe": 8857, + "accuracies": 229, + "suitability": 10951, + "lowlevel": 6739, + "localization": 6689, + "abundant": 187, + "exploitation": 3830, + "map": 6826, + "simultaneous": 10480, + "robots": 10040, + "naturallanguage": 7750, + "usages": 11883, + "navigation": 7755, + "entails": 3421, + "intention": 5481, + "finer": 4102, + "engines": 3378, + "supporting": 11002, + "multiview": 7688, + "scenes": 10137, + "neglect": 7778, + "weigh": 12195, + "prototypes": 9138, + "scoring": 10160, + "crossmodel": 2321, + "entail": 3420, + "inferential": 5279, + "mere": 6925, + "patch": 8320, + "deception": 2569, + "surgery": 11010, + "exponentially": 3865, + "unidirectional": 11798, + "robotic": 10038, + "expands": 3721, + "tokenizer": 11487, + "publically": 9204, + "holistic": 4929, + "lowrank": 6741, + "adaption": 363, + "unexpected": 11793, + "harder": 4808, + "demo": 2641, + "narrow": 7701, + "dense": 2712, + "tremendous": 11661, + "indispensable": 5247, + "thousands": 11467, + "hours": 4943, + "diagnose": 2851, + "speed": 10651, + "blip2": 1227, + "rationales": 9355, + "resourceintensive": 9826, + "processor": 8914, + "refer": 9552, + "term": 11354, + "acquiring": 323, + "act": 325, + "equipping": 3446, + "exploits": 3833, + "distills": 2997, + "competing": 1873, + "easier": 3112, + "fulfilling": 4298, + "preservation": 8735, + "formation": 4198, + "spatiotemporal": 10595, + "inconsistent": 5202, + "severe": 10325, + "obviously": 8003, + "affected": 488, + "humanmachine": 5012, + "undergone": 11747, + "diagnoses": 2852, + "ad": 346, + "largest": 6147, + "minigpt4": 7048, + "unbiased": 11733, + "transparency": 11649, + "website": 12191, + "accelerated": 196, + "immense": 5072, + "treating": 11654, + "flexibly": 4168, + "managed": 6806, + "customization": 2370, + "noteworthy": 7911, + "60": 86, + "hallucinate": 4788, + "accessed": 204, + "reviewed": 9975, + "timedependent": 11481, + "triplets": 11674, + "frame": 4233, + "attribute": 904, + "decomposes": 2587, + "fuse": 4339, + "highfidelity": 4887, + "communicate": 1806, + "vivid": 12159, + "preserve": 8736, + "start": 10690, + "refined": 9562, + "repeated": 9701, + "collections": 1775, + "enriched": 3412, + "activate": 332, + "interference": 5510, + "tree": 11657, + "urgent": 11875, + "decoderonly": 2582, + "conditions": 2018, + "failing": 3979, + "sequential": 10290, + "temperature": 11343, + "opt175b": 8086, + "85": 105, + "mass": 6844, + "gain": 4359, + "134x": 18, + "paving": 8331, + "openvocabulary": 8068, + "cold": 1765, + "contents": 2138, + "robotics": 10039, + "launched": 6160, + "showed": 10367, + "finegained": 4099, + "manipulations": 6814, + "confirmed": 2056, + "prohibitively": 8952, + "excessive": 3643, + "affordable": 491, + "routing": 10066, + "holds": 4927, + "assistants": 860, + "restricts": 9867, + "websites": 12192, + "ensures": 3418, + "audio": 910, + "creative": 2304, + "exacerbates": 3606, + "adversaries": 482, + "evade": 3498, + "subtly": 10908, + "seek": 10194, + "llava": 6400, + "examination": 3612, + "live": 6385, + "layer": 6163, + "bag": 1012, + "aggregate": 502, + "questionandanswer": 9279, + "mode": 7097, + "select": 10205, + "tackles": 11088, + "parse": 8303, + "variable": 12029, + "feedforward": 4024, + "prohibitive": 8951, + "inaccessible": 5165, + "assemble": 832, + "overhead": 8188, + "obvious": 8002, + "redundancy": 9550, + "deploys": 2726, + "box": 1258, + "coordinates": 2224, + "taskaware": 11153, + "formatting": 4200, + "upsurge": 11873, + "supervising": 10992, + "astonishing": 871, + "counter": 2275, + "redundant": 9551, + "localizing": 6691, + "easy": 3114, + "ego4d": 3205, + "attempts": 887, + "reformulating": 9573, + "45": 69, + "bloomz": 1233, + "ct": 2334, + "bypassing": 1321, + "chatglm6b": 1533, + "competition": 1874, + "clouds": 1690, + "extensible": 3881, + "threefold": 11469, + "productivity": 8923, + "programs": 8940, + "regions": 9583, + "leaving": 6253, + "unaffected": 11729, + "movie": 7597, + "movies": 7598, + "captivating": 1389, + "transitions": 11631, + "fitting": 4158, + "platform": 8521, + "47": 70, + "openworld": 8071, + "wrong": 12287, + "larger": 6122, + "date": 2556, + "exceptionally": 3641, + "03": 0, + "wellestablished": 12201, + "77": 97, + "poems": 8551, + "rising": 10016, + "hotspot": 4937, + "brain": 1260, + "trace": 11509, + "delineate": 2631, + "akin": 555, + "body": 1235, + "links": 6376, + "referring": 9558, + "iv": 5606, + "absent": 180, + "plugin": 8548, + "chains": 1465, + "surfaces": 11007, + "overly": 8192, + "succinct": 10934, + "array": 807, + "methodologies": 6972, + "ignore": 5050, + "instruct": 5394, + "compliance": 1910, + "generalizes": 4432, + "adjust": 428, + "visualization": 12156, + "designer": 2767, + "beginners": 1093, + "lighting": 6317, + "hindering": 4918, + "display": 2986, + "meticulously": 7021, + "choices": 1637, + "lasting": 6150, + "multiscale": 7670, + "interested": 5504, + "landmarks": 5755, + "inherit": 5329, + "raises": 9308, + "computations": 1977, + "onthefly": 8029, + "gradientbased": 4717, + "planting": 8520, + "seed": 10193, + "prominence": 8960, + "confident": 2051, + "recipe": 9508, + "positions": 8596, + "dependency": 2716, + "abstraction": 183, + "days": 2558, + "64": 89, + "v100": 12009, + "gpus": 4712, + "clicks": 1673, + "comprehending": 1922, + "synthesizes": 11041, + "harnesses": 4818, + "underscore": 11750, + "marked": 6832, + "surge": 11008, + "predominantly": 8698, + "hypothesize": 5033, + "presence": 8712, + "quantify": 9246, + "added": 370, + "kept": 5626, + "confidential": 2052, + "multidimensional": 7606, + "outstanding": 8169, + "textrich": 11422, + "contributed": 2180, + "native": 7706, + "suffering": 10937, + "false": 3994, + "guides": 4785, + "promotes": 8982, + "practicality": 8672, + "studying": 10872, + "approximate": 780, + "95": 116, + "suggestions": 10947, + "soon": 10562, + "weaker": 12181, + "simulating": 10475, + "learner": 6180, + "assesses": 840, + "shot": 10358, + "auc": 909, + "941": 115, + "concerned": 1997, + "cooperate": 2222, + "respective": 9834, + "bootstrap": 1246, + "unlocking": 11840, + "gptassisted": 4703, + "turns": 11706, + "926": 112, + "illustrating": 5057, + "sense": 10257, + "rigorously": 10014, + "annotators": 652, + "segmenting": 10203, + "completely": 1889, + "lexicons": 6304, + "exceptions": 3642, + "brief": 1279, + "decouple": 2590, + "enrich": 3411, + "aggregation": 505, + "distinguishes": 3002, + "aggregated": 503, + "cast": 1410, + "sc": 10101, + "latency": 6152, + "mlm": 7089, + "modelbased": 7241, + "recovery": 9535, + "promoted": 8981, + "nonexistent": 7894, + "pinpoint": 8499, + "gating": 4395, + "tedious": 11341, + "11": 8, + "instructionbased": 5425, + "mixtureofexpert": 7083, + "astounding": 872, + "moe": 7582, + "constrains": 2109, + "unification": 11799, + "quantifying": 9247, + "attributing": 907, + "categorize": 1417, + "deficiencies": 2620, + "everincreasing": 3595, + "wild": 12231, + "progressive": 8949, + "soft": 10532, + "enduring": 3366, + "handles": 4804, + "stimulated": 10756, + "cohesive": 1763, + "confronted": 2062, + "graphic": 4736, + "engagement": 3369, + "overlooking": 8191, + "initialization": 5334, + "html": 4944, + "cc": 1434, + "completed": 1887, + "transparent": 11650, + "maps": 6828, + "inferior": 5280, + "supplementing": 10996, + "flow": 4169, + "extractors": 3934, + "unsolved": 11851, + "vulnerabilities": 12166, + "surrogate": 11022, + "mislead": 7061, + "22": 44, + "26": 49, + "86": 106, + "ernie": 3457, + "bot": 1249, + "defenses": 2619, + "proliferation": 8959, + "claims": 1643, + "nsfw": 7944, + "substituting": 10902, + "sections": 10189, + "assessments": 848, + "hazards": 4830, + "inadequacy": 5168, + "golden": 4657, + "499": 73, + "opinion": 8078, + "iqa": 5582, + "imprecise": 5106, + "untapped": 11861, + "potentials": 8643, + "moving": 7599, + "speaking": 10597, + "collaborative": 1768, + "suggested": 10943, + "cortex": 2256, + "datacentric": 2472, + "insightful": 5363, + "garnered": 4388, + "scarce": 10116, + "laborious": 5737, + "factchecking": 3965, + "extant": 3875, + "image captioning": 5060, + "excellent results": 3629, + "results downstream": 9898, + "new method": 7826, + "results benchmark": 9881, + "generate new": 4461, + "different words": 2916, + "model used": 7236, + "tasks natural": 11247, + "proposes new": 9125, + "words sentences": 12245, + "search optimal": 10178, + "method tackle": 6968, + "main contribution": 6771, + "propose method": 9078, + "analysis visual": 629, + "vision language": 12138, + "text image": 11399, + "implicitly model": 5096, + "focus chinese": 4174, + "model called": 7118, + "contrastive learning": 2177, + "adopts simple": 439, + "building large": 1313, + "negative samples": 7776, + "gpu resources": 4711, + "dataset called": 2481, + "transformer transformer": 11615, + "transformer models": 11614, + "vision cv": 12137, + "works focus": 12274, + "transformer model": 11613, + "rich information": 10004, + "methods study": 7012, + "improves stateoftheart": 5152, + "benchmarks including": 1139, + "based pretrained": 1054, + "language transformers": 5983, + "boosts performance": 1245, + "language vision": 5990, + "natural languagebased": 7745, + "language description": 5761, + "new challenge": 7811, + "jointly train": 5611, + "train stateoftheart": 11530, + "vision models": 12140, + "design training": 2757, + "training strategy": 11586, + "experiments verify": 3811, + "verify effectiveness": 12114, + "method achieved": 6935, + "using language": 11950, + "systems code": 11057, + "learning pretrained": 6235, + "sequencetosequence model": 10288, + "answer questions": 661, + "models need": 7474, + "model t5": 7226, + "task based": 11117, + "including masked": 5185, + "models multimodal": 7468, + "abstractive summarization": 185, + "extract essential": 3921, + "essential information": 3477, + "data internet": 2426, + "recently largescale": 9503, + "largescale generative": 6131, + "shown effective": 10374, + "research gap": 9794, + "information paper": 5308, + "present simple": 8722, + "effective method": 3142, + "task using": 11149, + "original text": 8120, + "results best": 9883, + "best model": 1165, + "surpasses prior": 11015, + "conduct thorough": 2036, + "thorough ablation": 11455, + "effectiveness various": 3178, + "fusion methods": 4343, + "conventional methods": 2197, + "generated samples": 4487, + "visionlanguage pretraining": 12147, + "greatly improved": 4754, + "tasks largescale": 11240, + "largescale pretraining": 6144, + "texttoimage synthesis": 11431, + "pretraining framework": 8783, + "quantization models": 9258, + "generation text": 4584, + "texttoimage generation": 11428, + "generation process": 4565, + "endtoend training": 3365, + "largescale dataset": 6129, + "million chinese": 7038, + "aims generate": 547, + "number training": 7956, + "significantly increase": 10441, + "introduce lightweight": 5542, + "number trainable": 7954, + "design novel": 2752, + "decoder gpt2": 2581, + "training framework": 11556, + "results conducted": 9889, + "benchmarks reveal": 1146, + "models contain": 7288, + "compared stateoftheart": 1853, + "highly challenging": 4901, + "tackle challenges": 11083, + "challenges propose": 1489, + "transformer gpt": 11611, + "unsupervised manner": 11857, + "generation remains": 4573, + "open question": 8034, + "semantically related": 10249, + "does involve": 3046, + "task zeroshot": 11151, + "stateoftheart method": 10716, + "image text": 5063, + "great breakthroughs": 4746, + "performance drop": 8382, + "solve problem": 10552, + "information using": 5320, + "training phase": 11578, + "respect various": 9832, + "reasonable results": 9398, + "adversarial loss": 479, + "challenges potential": 1487, + "computation cost": 1964, + "available models": 976, + "learners recent": 6182, + "tasks making": 11244, + "multimodal foundation": 7628, + "new unified": 7846, + "modeling framework": 7245, + "tasks strong": 11281, + "multimodal understanding": 7644, + "tasks demonstrates": 11187, + "code pretrained": 1726, + "training work": 11588, + "using automatic": 11937, + "generation recently": 4572, + "tasks number": 11249, + "number studies": 7952, + "model text": 7229, + "image processing": 5061, + "address problems": 411, + "performance proposed": 8423, + "model using": 7237, + "results proposed": 9924, + "multilingual text": 7620, + "stateoftheart performances": 10723, + "tasks suggesting": 11285, + "models code": 7278, + "largescale datasets": 6130, + "models recently": 7511, + "recently gained": 9495, + "gained significant": 4365, + "multimodal models": 7641, + "models intuitive": 7368, + "leverage pretrained": 6281, + "semantically consistent": 10248, + "text descriptions": 11390, + "bert gpt2": 1154, + "gpt2 bart": 4672, + "processing task": 8911, + "terms bleu": 11364, + "model better": 7114, + "better understand": 1183, + "contrastive languageimage": 2176, + "models zeroshot": 7562, + "ability pretrained": 161, + "specifically use": 10640, + "kullbackleibler divergence": 5726, + "divergence kld": 3008, + "tasks achieves": 11160, + "achieves higher": 289, + "indistribution id": 5249, + "achieves superior": 306, + "superior robustness": 10981, + "surpasses previous": 11013, + "models nlp": 7476, + "performance textonly": 8438, + "selfsupervised training": 10225, + "retrieval generation": 9945, + "input text": 5355, + "approach generally": 741, + "generally applied": 4435, + "using retrieved": 11969, + "results approach": 9878, + "performance bert": 8366, + "bart t5": 1020, + "outperform competitive": 8135, + "competitive baselines": 1876, + "baselines tasks": 1077, + "tasks codes": 11177, + "codes data": 1740, + "data publicly": 2448, + "object detection": 7969, + "diverse knowledge": 3019, + "object categories": 7968, + "encoderdecoder architecture": 3329, + "achieves best": 279, + "text information": 11401, + "motivated propose": 7594, + "directly generate": 2948, + "natural question": 7747, + "framework leverages": 4267, + "data ii": 2420, + "studies demonstrate": 10838, + "model specifically": 7221, + "techniques including": 11336, + "sequence length": 10282, + "generation time": 4586, + "evaluation demonstrates": 3553, + "linear complexity": 6365, + "provides novel": 9178, + "model code": 7124, + "providing valuable": 9184, + "medical knowledge": 6901, + "medical licensing": 6902, + "processing images": 8903, + "making challenging": 6797, + "significant success": 10421, + "integrating llms": 5461, + "llms enhance": 6518, + "llms medical": 6591, + "medical domain": 6899, + "capability existing": 1368, + "models create": 7290, + "language interface": 5771, + "capabilities domains": 1340, + "showing great": 10369, + "inputs outputs": 5359, + "end build": 3347, + "model information": 7164, + "chatgpt opens": 1584, + "achieved great": 264, + "narrowing gap": 7704, + "current visual": 2363, + "methods designed": 6983, + "models lack": 7370, + "forms pretraining": 4206, + "pretraining downstream": 8776, + "tasks explore": 11207, + "learning generative": 6212, + "pretrained masked": 8760, + "model achieve": 7101, + "achieves excellent": 288, + "human instructions": 4971, + "drawn widespread": 3094, + "models vlms": 7559, + "construct new": 2114, + "method propose": 6961, + "variational autoencoder": 12035, + "comprehensive analyses": 1925, + "results terms": 9933, + "image quality": 5062, + "findings contribute": 4086, + "generating natural": 4503, + "language descriptions": 5762, + "guidance given": 4777, + "control signals": 2186, + "novel promptbased": 7931, + "prompts different": 9032, + "different kinds": 2884, + "inspired recent": 5377, + "denoising autoencoders": 2711, + "intermediate layers": 5513, + "suggesting potential": 10945, + "models mainstream": 7464, + "segmentation object": 10202, + "object localization": 7971, + "direct use": 2935, + "performance unsupervised": 8441, + "unsupervised settings": 11860, + "tackle issues": 11085, + "architectures extensive": 794, + "based large": 1043, + "technology enables": 11340, + "including semantic": 5193, + "semantic text": 10244, + "understand natural": 11758, + "provide guidance": 9155, + "based generated": 1033, + "language navigation": 5958, + "opens new": 8054, + "significant attention": 10404, + "remarkable progress": 9684, + "information present": 5310, + "generation leverages": 4547, + "analysis capabilities": 618, + "llms gpt": 6550, + "design prompts": 2755, + "information textual": 5317, + "classification problem": 1654, + "effectively generates": 3153, + "offering new": 8011, + "new perspective": 7830, + "methods commonly": 6978, + "scene representation": 10136, + "thorough experiments": 11457, + "ones different": 8024, + "task settings": 11145, + "visual grounding": 12149, + "knowledge text": 5707, + "linguistic knowledge": 6371, + "knowledge different": 5661, + "text features": 11393, + "performance benchmarks": 8365, + "learning systems": 6244, + "attention paid": 892, + "examples different": 3619, + "adversarial samples": 481, + "address gap": 391, + "patch generation": 8321, + "reasoning visual": 9440, + "visual question": 12152, + "answering image": 667, + "tasks require": 11273, + "processing models": 8904, + "advancements gpt": 461, + "endtoend trainable": 3364, + "generate coherent": 4441, + "mimicking human": 7045, + "human thought": 4989, + "understanding question": 11780, + "publically available": 9205, + "analysis furthermore": 622, + "multimodal abilities": 7623, + "foundation llm": 4221, + "llm visual": 6430, + "frozen llm": 4292, + "lowrank adaption": 6743, + "adaption lora": 364, + "multiturn conversation": 7683, + "conversation ability": 2201, + "makes possible": 6793, + "instructiontuned models": 5443, + "models evaluation": 7318, + "online demo": 8026, + "demo available": 2642, + "practical value": 8671, + "models struggle": 7534, + "perform poorly": 8355, + "network based": 7786, + "detection performance": 2808, + "performance demonstrating": 8376, + "llms developing": 6504, + "reduce cost": 9543, + "key factors": 5631, + "simple highly": 10464, + "significantly speed": 10451, + "data compared": 2396, + "intriguing findings": 5532, + "rationales provided": 9357, + "network designed": 7787, + "various human": 12069, + "llms contains": 6488, + "information evaluate": 5293, + "benchmarks demonstrating": 1137, + "inspired success": 5382, + "performance work": 8457, + "representation facilitates": 9727, + "summarization method": 10959, + "experiments public": 3793, + "baselines furthermore": 1073, + "small datasets": 10506, + "datasets limited": 2536, + "emerged popular": 3242, + "produce highquality": 8916, + "input prompts": 5352, + "annotate new": 641, + "new dataset": 7815, + "transfer knowledge": 11595, + "knowledge distillation": 5662, + "experiments integrating": 3783, + "llms popular": 6611, + "popular pretrained": 8578, + "models understand": 7551, + "concise natural": 2003, + "language image": 5767, + "better user": 1185, + "style transfer": 10874, + "exceptional ability": 3633, + "computational resources": 1975, + "resources training": 9830, + "directly applying": 2946, + "remains difficult": 9653, + "challenging paper": 1503, + "utilizes generative": 11993, + "employ sampling": 3285, + "previous solutions": 8813, + "understanding systems": 11784, + "broad spectrum": 1291, + "hallucination large": 4792, + "models inspired": 7365, + "abilities large": 124, + "llms improving": 6561, + "performance complex": 8375, + "complex multimodal": 1898, + "tend generate": 11349, + "systematic study": 11050, + "conduct evaluation": 2026, + "suffer severe": 10936, + "humanmachine interaction": 5013, + "model medical": 7180, + "pretrained vision": 8771, + "largescale medical": 6138, + "questionanswering dataset": 9285, + "best models": 1166, + "struggle solve": 10829, + "languageimage pretraining": 5993, + "achieve goal": 248, + "pretrained image": 8744, + "model achieves": 7103, + "sota performance": 10568, + "performance zeroshot": 8458, + "evaluation approach": 3541, + "methods generating": 6991, + "highly correlated": 4902, + "compared human": 1847, + "evaluation models": 3569, + "validate effectiveness": 12012, + "stateoftheart sota": 10731, + "previous evaluation": 8808, + "generation prompts": 4566, + "project website": 8957, + "immense potential": 5073, + "range applications": 9317, + "applications field": 705, + "framework provides": 4274, + "tasks language": 11234, + "experiments proposed": 3790, + "model set": 7216, + "new baseline": 7808, + "benchmark large": 1121, + "samples evaluating": 10090, + "evaluating performance": 3535, + "hallucination generate": 4791, + "samples propose": 10091, + "suggest chatgpt": 10941, + "existing llms": 3697, + "great challenges": 4748, + "experiments prove": 3791, + "models empirically": 7310, + "pretraining methods": 8789, + "making large": 6800, + "gap narrowed": 4380, + "instructiontuning dataset": 5446, + "obtain intriguing": 7995, + "obtain new": 7996, + "human language": 4978, + "incorporate knowledge": 5212, + "including chatgpt": 5176, + "results highlight": 9907, + "method code": 6943, + "models pretrained": 7495, + "generalization capabilities": 4427, + "method zeroshot": 6971, + "different perspectives": 2896, + "significantly boosts": 10427, + "chainofthought method": 1462, + "demonstrated effectiveness": 2672, + "models shown": 7521, + "problem data": 8860, + "aigc technology": 531, + "core idea": 2228, + "diverse models": 3020, + "achieve controllable": 246, + "make attempt": 6788, + "finally present": 4077, + "codes available": 1739, + "coherent text": 1762, + "ai assistant": 512, + "specifically start": 10639, + "factual errors": 3975, + "paper make": 8241, + "supervised manner": 10989, + "stage propose": 10677, + "instruction prompts": 5413, + "prompts activate": 9029, + "finetuned large": 4114, + "training model": 11569, + "model develop": 7135, + "achieve promising": 259, + "enhance reasoning": 3394, + "shown excellent": 10376, + "excellent performance": 3628, + "contrast large": 2168, + "llms emerge": 6512, + "model zeroshot": 7240, + "prompt llm": 8995, + "llm inference": 6418, + "final result": 4068, + "urgent need": 11876, + "taskspecific lack": 11310, + "lack comprehensive": 5741, + "gpt demonstrated": 4667, + "capabilities pretrained": 1355, + "token sequence": 11486, + "unified framework": 11802, + "evaluate efficacy": 3507, + "datasets experimental": 2531, + "object location": 7972, + "vision tasks": 12144, + "tasks example": 11200, + "work aims": 12248, + "tasks visual": 11300, + "interaction world": 5492, + "random guessing": 9312, + "achieve humanlevel": 251, + "achieving performance": 315, + "performance gain": 8387, + "dataset available": 2478, + "development large": 2839, + "models enabled": 7312, + "paving way": 8332, + "novel techniques": 7936, + "intelligence paper": 5475, + "unlike conventional": 11832, + "specific object": 10616, + "object names": 7973, + "openvocabulary object": 8069, + "object detectors": 7970, + "detectors perform": 2813, + "perform reasoning": 8356, + "reasoning context": 9418, + "users instructions": 11928, + "object based": 7967, + "autonomous driving": 961, + "provide inspiration": 9157, + "detection systems": 2809, + "latent space": 6154, + "methods limited": 6999, + "zeroshot reasoning": 12321, + "perform complex": 8352, + "opening new": 8052, + "approach outperforms": 754, + "outperforms previous": 8157, + "recently growing": 9498, + "capability large": 1369, + "prohibitively expensive": 8953, + "multimodal instructions": 7631, + "llm called": 6403, + "science question": 10148, + "demonstrate competitive": 2649, + "training efficiency": 11550, + "multimodal llms": 7640, + "llms integration": 6570, + "holds great": 4928, + "medical advice": 6895, + "diverse domains": 3015, + "provide reliable": 9165, + "reliable medical": 9634, + "advice additionally": 484, + "generation performance": 4560, + "model leverages": 7177, + "recent large": 9465, + "multimodal inputs": 7630, + "brings emergent": 1285, + "newly proposed": 7850, + "tuning dataset": 11690, + "covers wide": 2289, + "tasks text": 11292, + "data different": 2402, + "data image": 2421, + "robustness large": 10047, + "unprecedented performance": 11843, + "response generation": 9845, + "safety concerns": 10081, + "high success": 4876, + "scene based": 10135, + "based text": 1060, + "text use": 11418, + "model synthesize": 7225, + "conditioned input": 2017, + "way finally": 12175, + "tasks using": 11298, + "evaluation demonstrate": 3552, + "utilization large": 11985, + "limited number": 6355, + "framework tailored": 4277, + "specifically leverage": 10634, + "fewshot prompt": 4036, + "learning based": 6194, + "codes publicly": 1746, + "publicly accessible": 9209, + "given texts": 4643, + "single perspective": 10486, + "framework employs": 4247, + "various perspectives": 12088, + "sentence multiple": 10263, + "framework effectively": 4246, + "achieving stateoftheart": 317, + "popular datasets": 8572, + "temporal information": 11347, + "techniques improve": 11335, + "use pretrained": 11894, + "llms augment": 6461, + "alleviate problem": 588, + "semantic consistency": 10231, + "model use": 7234, + "use tools": 11895, + "gpt4 shown": 4698, + "shown great": 10378, + "models typically": 7549, + "data address": 2382, + "llama opt": 6391, + "instructionfollowing dataset": 5429, + "prompting advanced": 9007, + "lowrank adaptation": 6742, + "adaptation lora": 358, + "optimization approach": 8093, + "llms solve": 6658, + "enables zeroshot": 3313, + "tools code": 11498, + "realistic images": 9375, + "methods fail": 6987, + "score measuring": 10158, + "given prompt": 4637, + "measuring likelihood": 6888, + "reward functions": 9996, + "guide model": 4782, + "texttoimage models": 11429, + "benchmark proposed": 1126, + "semantic similarity": 10242, + "input prompt": 5351, + "performance improved": 8397, + "require additional": 9754, + "memory overhead": 6918, + "tasks inspired": 11226, + "model inference": 7163, + "learning approach": 6192, + "module obtain": 7579, + "search algorithm": 10175, + "plms achieve": 8544, + "representative plms": 9738, + "plms bert": 8545, + "instruction prompt": 5412, + "introduce extra": 5540, + "images text": 5067, + "recently shown": 9506, + "promising potential": 8972, + "generated answers": 4473, + "requirements propose": 9764, + "analyses demonstrate": 616, + "learning community": 6201, + "achieve new": 254, + "variety benchmarks": 12041, + "ai model": 517, + "model conduct": 7126, + "high memory": 4873, + "memory computational": 6913, + "large model": 6108, + "visual perception": 12150, + "propose enhance": 9066, + "taking advantage": 11103, + "new learning": 7824, + "knowledge extracted": 5671, + "models utilized": 7557, + "descriptions pretrained": 2742, + "pretrained encoder": 8743, + "representations learned": 9732, + "learn better": 6177, + "higher accuracy": 4880, + "segment model": 10199, + "systems like": 11063, + "model sam": 7211, + "model image": 7160, + "ability downstream": 140, + "detection paper": 2807, + "models presents": 7494, + "challenge propose": 1473, + "embedding space": 3232, + "ai assistants": 513, + "precise information": 8680, + "current datasets": 2349, + "ego4d dataset": 3206, + "models especially": 7317, + "understanding generating": 11771, + "promote development": 8980, + "wellknown chinese": 12203, + "diversity quality": 3032, + "chinese benchmarks": 1622, + "conduct indepth": 2033, + "research develop": 9782, + "develop better": 2823, + "achieves new": 290, + "stateoftheart result": 10725, + "instruction understanding": 5423, + "representations textual": 9735, + "challenging address": 1494, + "utilizes large": 11995, + "synthetic text": 11046, + "effectively mitigates": 3159, + "effectiveness versatility": 3179, + "bypassing need": 1322, + "generalpurpose foundation": 4437, + "coherent accurate": 1761, + "specific focus": 10611, + "approach introduce": 747, + "model model": 7183, + "model utilizes": 7238, + "components model": 1915, + "crucial factors": 2330, + "parameterefficient training": 8288, + "prediction task": 8692, + "framework benchmark": 4241, + "achieving artificial": 309, + "point clouds": 8554, + "point cloud": 8553, + "experiments validate": 3808, + "provide primary": 9163, + "observations analysis": 7986, + "codes datasets": 1744, + "text instructions": 11403, + "perform tasks": 8358, + "productivity paper": 8924, + "highlevel textual": 4889, + "dataset constructed": 2490, + "instructions generated": 5434, + "model chatgpt": 7122, + "specific regions": 10618, + "single forward": 10483, + "forward pass": 4213, + "instructions despite": 5432, + "limited data": 6350, + "model enhanced": 7138, + "ability recently": 164, + "advanced large": 446, + "straightforward effective": 10771, + "data despite": 2401, + "widely explored": 12219, + "model capable": 7120, + "language general": 5763, + "framework achieve": 4236, + "ability specifically": 165, + "model designed": 7134, + "unified multilingual": 11803, + "data including": 2423, + "conversations humans": 2207, + "effective multilingual": 3144, + "natural languages": 7746, + "fully automated": 4302, + "using simple": 11972, + "text inputs": 11402, + "surpassing existing": 11017, + "leverage chatgpt": 6273, + "model new": 7187, + "seamlessly fitting": 10172, + "textual information": 11438, + "module seamlessly": 7580, + "dataset terms": 2509, + "complex realworld": 1900, + "dominant role": 3070, + "available large": 975, + "large multimodal": 6111, + "models building": 7270, + "embodied artificial": 3236, + "current evaluation": 2350, + "evaluation metric": 3567, + "wrong answers": 12288, + "evaluation framework": 3556, + "light developing": 6315, + "llms key": 6572, + "key idea": 5632, + "capacity llms": 1386, + "fully exploited": 4306, + "limited domain": 6351, + "13b parameters": 20, + "success general": 10912, + "general domains": 4404, + "scenarios limited": 10131, + "diagnosis relies": 2854, + "paper study": 8268, + "realworld medical": 9390, + "medical dialogue": 6898, + "model complete": 7125, + "performs exceptionally": 8470, + "dataset code": 2483, + "entity linking": 3426, + "mainly focus": 6773, + "require finetuning": 9755, + "adapt llms": 349, + "offtheshelf language": 8022, + "llm perform": 6422, + "emergent abilities": 3256, + "evaluation paper": 3571, + "avoid data": 988, + "manually designed": 6825, + "existing mllms": 3702, + "directions subsequent": 2944, + "llms brain": 6469, + "summarize recent": 10965, + "recent progress": 9471, + "applications including": 706, + "akin human": 556, + "data largescale": 2430, + "model handle": 7159, + "handle multiple": 4802, + "specifically employ": 10628, + "vector quantization": 12106, + "tokens building": 11490, + "specific language": 10614, + "questionandanswer tasks": 9280, + "performances multiple": 8460, + "motion prediction": 7590, + "prediction motion": 8691, + "model addition": 7104, + "addition existing": 373, + "instructions performing": 5440, + "referring expression": 9559, + "generation work": 4589, + "step artificial": 10745, + "ability dialogue": 138, + "performance furthermore": 8386, + "chains thoughts": 1467, + "model dataset": 7130, + "finetuning multimodal": 4137, + "enhances performance": 3403, + "transforms raw": 11629, + "understanding response": 11783, + "document understanding": 3043, + "models tend": 7543, + "understanding evaluation": 11770, + "models capabilities": 7271, + "propose instruction": 9073, + "new conversational": 7814, + "model supports": 7223, + "allows users": 598, + "process obtain": 8892, + "result shows": 9871, + "create better": 2297, + "broader range": 1293, + "perception reasoning": 8347, + "offer comprehensive": 8007, + "comprehensive evaluations": 1936, + "evaluations models": 3588, + "incorporating human": 5216, + "evaluation pipeline": 3572, + "similar benchmarks": 10454, + "variety evaluation": 12042, + "robust evaluation": 10042, + "evaluating various": 3537, + "better evaluating": 1177, + "generation paper": 4558, + "develop scalable": 2826, + "scalable approach": 10104, + "dataset large": 2498, + "furthermore introduce": 4334, + "model demonstrates": 7133, + "generation research": 4576, + "medical data": 6897, + "remains limited": 9659, + "limited paper": 6356, + "dialogue model": 2863, + "dialogue data": 2860, + "exhibits excellent": 3668, + "incorporating visual": 5219, + "models make": 7465, + "scale language": 10107, + "gradientbased methods": 4718, + "methods various": 7019, + "tasks fewshot": 11211, + "fewshot settings": 4040, + "settings furthermore": 10318, + "learning different": 6205, + "emergence incontext": 3249, + "model present": 7200, + "emergent ability": 3257, + "compared blip2": 1842, + "generation compared": 4524, + "textual representations": 11440, + "semantics consistent": 10253, + "able perform": 178, + "pretraining instruction": 8785, + "study emphasizes": 10851, + "instructions leading": 5438, + "endtoend multimodal": 3363, + "provides flexible": 9175, + "based existing": 1030, + "furthermore design": 4329, + "capabilities demonstrated": 1339, + "current methodologies": 2355, + "datasets training": 2553, + "datasets exhibit": 2530, + "generative capabilities": 4594, + "mitigate limitations": 7071, + "novel data": 7918, + "harnesses power": 4823, + "conducted various": 2047, + "datasets using": 2554, + "using opensource": 11962, + "surge generative": 11009, + "current benchmarks": 2347, + "novel llmbased": 7925, + "dataset task": 2508, + "direction release": 2938, + "gpt4 significantly": 4699, + "models leading": 7377, + "english data": 3381, + "data collected": 2394, + "model demonstrated": 7132, + "significant advantages": 10403, + "generation question": 4569, + "gap present": 4384, + "generating questionanswer": 4506, + "questionanswer pairs": 9282, + "dataset designed": 2494, + "answering openended": 671, + "outstanding performance": 8170, + "generation various": 4588, + "metrics outperforming": 7032, + "current stateoftheart": 2362, + "sota models": 10567, + "text detection": 11391, + "rich world": 10009, + "tasks context": 11183, + "explored work": 3856, + "performance individual": 8400, + "recently significant": 9507, + "lowresource nature": 6747, + "effective training": 3148, + "strong multilingual": 10810, + "build large": 1303, + "achieve stateoftheart": 261, + "stateoftheart opensource": 10720, + "opensource performance": 8064, + "performance chinese": 8370, + "model weights": 7239, + "instructiontuning data": 5445, + "remains challenge": 9650, + "current leading": 2351, + "generate data": 4444, + "tasks worth": 11304, + "false information": 3995, + "framework enables": 4248, + "data ensure": 2406, + "generation quality": 4568, + "diverse highquality": 3016, + "success existing": 10911, + "existing visual": 3714, + "tuning methods": 11699, + "qualitative analysis": 9233, + "data released": 2452, + "evaluation based": 3542, + "low cost": 6729, + "privacy preservation": 8846, + "furthermore analyze": 4325, + "helpful suggestions": 4853, + "despite strong": 2786, + "strong abilities": 10804, + "common objects": 1795, + "design prompt": 2754, + "multiturn dialogues": 7687, + "impressive fewshot": 5110, + "task previous": 11141, + "methods suffer": 7013, + "insufficient knowledge": 5453, + "model novel": 7188, + "respective strengths": 9835, + "uses llm": 11933, + "final answer": 4066, + "results datasets": 9890, + "datasets prove": 2544, + "models exhibit": 7320, + "models extend": 7328, + "involving multiple": 5581, + "training introduce": 11560, + "furthermore construct": 4326, + "dialogue turns": 2867, + "reasoning task": 9438, + "common sense": 1797, + "llm effectively": 6406, + "dataset comprising": 2487, + "evaluation traditional": 3581, + "human annotators": 4951, + "semantic segmentation": 10241, + "novel object": 7928, + "inference time": 5276, + "practical scenarios": 8670, + "issues work": 5600, + "proposes novel": 9126, + "inspired human": 5375, + "human cognition": 4958, + "class names": 1647, + "strategies designed": 10777, + "target object": 11106, + "datasets attribute": 2515, + "generation instruction": 4537, + "finetuning techniques": 4151, + "exhibits superior": 3673, + "task address": 11112, + "accomplish task": 213, + "fully exploit": 4305, + "knowledge generate": 5672, + "methods consistently": 6979, + "consistently significantly": 2098, + "large ai": 6001, + "model empowered": 7137, + "semantic ambiguity": 10228, + "potential solutions": 8636, + "framework present": 4272, + "effectively addresses": 3150, + "finally apply": 4071, + "generative adversarial": 4592, + "state information": 10696, + "approach effectively": 737, + "mitigates impact": 7075, + "demonstrate superior": 2666, + "contrastive instruction": 2175, + "method better": 6941, + "better instruction": 1179, + "tuning method": 11698, + "tuning extensive": 11692, + "gating mechanism": 4396, + "exhibit superior": 3660, + "quality code": 9238, + "training samples": 11580, + "method improve": 6953, + "improve prompt": 5134, + "incorporating pretrained": 5218, + "model context": 7128, + "generated llms": 4482, + "llms underexplored": 6672, + "introduce pretrained": 5547, + "baseline code": 1065, + "manipulation tasks": 6813, + "tasks models": 11246, + "complexity diversity": 1909, + "mixtureofexpert moe": 7084, + "generate large": 4453, + "dataset using": 2511, + "form specifically": 4195, + "llms suffer": 6666, + "llms previous": 6617, + "finetuning process": 4142, + "process llms": 8888, + "task essential": 11125, + "task visual": 11150, + "datasets obtain": 2539, + "dataset method": 2501, + "revolutionized field": 9983, + "larger language": 6123, + "encoder decoder": 3323, + "models release": 7512, + "release dataset": 9621, + "challenges paper": 1486, + "entities target": 3424, + "key insight": 5634, + "harnessing capabilities": 4825, + "framework framework": 4254, + "plays significant": 8536, + "significant role": 10418, + "methods primarily": 7003, + "optimization task": 8096, + "generation code": 4522, + "enhance semantic": 3397, + "code code": 1703, + "completed code": 1888, + "highly interpretable": 4905, + "performance 50": 8361, + "improvements multiple": 5146, + "learning finetune": 6209, + "visual programming": 12151, + "training performance": 11577, + "employing finetuning": 3288, + "significant performance": 10416, + "overall task": 8174, + "task performance": 11138, + "distill knowledge": 2992, + "extensive comprehensive": 3886, + "experimental evaluations": 3737, + "achieve substantial": 262, + "substantial performance": 10896, + "performance improvement": 8398, + "methods large": 6997, + "large margins": 6107, + "provide valuable": 9168, + "process method": 8890, + "information loss": 5306, + "capable generating": 1377, + "experiments highlight": 3781, + "text modalities": 11405, + "security risks": 10192, + "work study": 12268, + "design corresponding": 2746, + "models dalle": 7293, + "generate highly": 4448, + "concerns regarding": 2001, + "nsfw content": 7945, + "subjective objective": 10882, + "specialized models": 10603, + "systematically evaluate": 11053, + "evaluate potential": 3512, + "attributes measure": 906, + "specifically design": 10625, + "evaluation abilities": 3539, + "pipeline harnesses": 8504, + "harnesses large": 4819, + "information introduce": 5300, + "model gpt": 7156, + "language semantics": 5975, + "information code": 5289, + "building ai": 1309, + "existing detectors": 3684, + "weak generalization": 12180, + "llms garnered": 6541, + "garnered widespread": 4391, + "applications various": 713, + "content generated": 2136, + "method automatically": 6938, + "automatically constructing": 951, + "stateoftheart results benchmark": 10727, + "results benchmark datasets": 9882, + "generative pretraining transformer": 4620, + "autoregressive language model": 965, + "tasks natural language": 11248, + "paper proposes new": 8260, + "paper propose method": 8254, + "contrastive learning framework": 2178, + "computer vision cv": 1982, + "significantly improves stateoftheart": 10438, + "based pretrained language": 1055, + "transformerbased language model": 11618, + "experiments verify effectiveness": 3812, + "text generation tasks": 11397, + "information paper present": 5309, + "present simple effective": 8723, + "simple effective method": 10460, + "conduct thorough ablation": 2037, + "thorough ablation studies": 11456, + "generation text generation": 4585, + "task aims generate": 11114, + "number training data": 7957, + "number trainable parameters": 7955, + "challenges propose novel": 1490, + "pretrained transformer gpt": 8767, + "generation remains open": 4574, + "remains open question": 9661, + "convolutional neural networks": 2221, + "experiments demonstrate method": 3775, + "multimodal foundation model": 7629, + "results demonstrate potential": 9895, + "code pretrained models": 1727, + "using automatic human": 11938, + "language model text": 5804, + "experimental results proposed": 3754, + "models code available": 7279, + "recently gained significant": 9496, + "language processing task": 5970, + "solve problem propose": 10553, + "kullbackleibler divergence kld": 5727, + "models nlp tasks": 7477, + "shown impressive performance": 10381, + "approach generally applied": 742, + "outperform competitive baselines": 8136, + "codes data publicly": 1742, + "data publicly available": 2449, + "results demonstrate proposed": 9896, + "achieves best results": 280, + "propose novel approach": 9085, + "ablation studies demonstrate": 171, + "studies demonstrate effectiveness": 10839, + "demonstrate effectiveness approach": 2652, + "model specifically designed": 7222, + "llms medical domain": 6592, + "medical domain knowledge": 6900, + "achieved great success": 265, + "models natural language": 7472, + "learning generative pretrained": 6213, + "drawn widespread attention": 3095, + "language models vlms": 5953, + "experimental results terms": 3759, + "generating natural language": 4504, + "natural language descriptions": 7711, + "end propose novel": 3353, + "propose novel promptbased": 9093, + "architectures extensive experiments": 795, + "extensive experiments ablation": 3894, + "based large language": 1044, + "including semantic text": 5194, + "understand natural language": 11759, + "natural language navigation": 7721, + "gained significant attention": 4366, + "achieved remarkable progress": 270, + "work propose new": 12261, + "conduct thorough experiments": 2039, + "achieves superior performance": 307, + "examples different tasks": 3620, + "address gap propose": 392, + "gap propose novel": 4386, + "visual question answering": 12153, + "question answering image": 9269, + "answering image captioning": 668, + "surpasses previous methods": 11014, + "language processing models": 5963, + "llms demonstrated impressive": 6498, + "lowrank adaption lora": 6744, + "results model outperforms": 9916, + "multiturn conversation ability": 7684, + "online demo available": 8027, + "extensive experiments benchmark": 3896, + "performance demonstrating effectiveness": 8377, + "simple highly effective": 10465, + "work propose novel": 12262, + "extensive experiments public": 3905, + "limited training data": 6360, + "semantic understanding reasoning": 10246, + "concise natural language": 2004, + "better user experience": 1186, + "utilizes generative pretrained": 11994, + "foundation models large": 4225, + "abilities large language": 125, + "experiment results demonstrate": 3735, + "model medical domain": 7181, + "language models design": 5822, + "achieves sota performance": 300, + "language models paper": 5927, + "models paper introduces": 7483, + "compared human evaluation": 1848, + "llms work present": 6680, + "vision language models": 12139, + "benchmark large language": 1122, + "making large language": 6801, + "language models pretrained": 5934, + "propose novel method": 9091, + "significantly boosts performance": 10428, + "models shown promising": 7522, + "address limitations propose": 406, + "natural language description": 7710, + "paper make attempt": 8242, + "experimental results multiple": 3751, + "enhance reasoning ability": 3395, + "shown excellent performance": 10377, + "contrast large language": 2169, + "models llms emerge": 7408, + "language model zeroshot": 5805, + "novel framework called": 7922, + "llms natural language": 6596, + "datasets experimental results": 2532, + "reasoning capabilities llms": 9413, + "development large language": 2840, + "paper introduce new": 8235, + "openvocabulary object detectors": 8070, + "zeroshot reasoning ability": 12322, + "approach outperforms previous": 755, + "language models recently": 5940, + "capability large language": 1370, + "science question answering": 10149, + "demonstrate competitive performance": 2650, + "tackle challenges introduce": 11084, + "medical advice additionally": 6896, + "source code available": 10573, + "recent large language": 9466, + "instruction tuning dataset": 5416, + "covers wide range": 2290, + "codes data models": 1741, + "data image text": 2422, + "human evaluation demonstrate": 4963, + "utilization large language": 11986, + "models demonstrated remarkable": 7298, + "achieving stateoftheart performance": 318, + "address challenges propose": 390, + "extensive experiments method": 3904, + "model use tools": 7235, + "chatgpt gpt4 shown": 1570, + "shown great potential": 10379, + "language models significantly": 5946, + "models achieved remarkable": 7256, + "representative plms bert": 9739, + "multimodal understanding capability": 7645, + "achieve new stateoftheart": 255, + "consistently improves performance": 2096, + "segment model sam": 10200, + "inspired recent success": 5378, + "language models especially": 5831, + "achieves new stateoftheart": 291, + "new stateoftheart result": 7840, + "challenging address challenges": 1495, + "utilizes large language": 11996, + "llms work propose": 6681, + "achieving artificial general": 310, + "extensive experiments validate": 3907, + "experiments validate effectiveness": 3809, + "codes datasets available": 1745, + "powerful language models": 8657, + "language model chatgpt": 5781, + "single forward pass": 10484, + "advanced large language": 447, + "capabilities various nlp": 1362, + "despite great success": 2782, + "publicly available large": 9212, + "embodied artificial intelligence": 3237, + "llms key idea": 6573, + "achieved significant success": 274, + "success general domains": 10913, + "realworld medical dialogue": 9391, + "language model complete": 5782, + "dataset code models": 2484, + "code models publicly": 1724, + "methods mainly focus": 7001, + "framework based llms": 4240, + "avoid data leakage": 989, + "summarize recent progress": 10966, + "akin human language": 557, + "achieves stateoftheart performances": 303, + "stateoftheart performances multiple": 10724, + "motion prediction motion": 7591, + "range tasks including": 9322, + "step artificial general": 10746, + "gap paper proposes": 4383, + "models llms using": 7460, + "tasks code models": 11175, + "training data evaluation": 11546, + "dataset large language": 2499, + "representation learning model": 9729, + "exhibits excellent performance": 3669, + "language models make": 5920, + "scale language models": 10108, + "emergence incontext learning": 3250, + "experiments conducted various": 3770, + "existing evaluation metrics": 3687, + "propose novel llmbased": 9090, + "future research direction": 4352, + "research direction release": 9785, + "direction release code": 2939, + "chatgpt gpt4 significantly": 1571, + "chinese english data": 1625, + "generation question answering": 4570, + "generating questionanswer pairs": 4507, + "stateoftheart sota models": 10732, + "rich world knowledge": 10010, + "diverse highquality data": 3017, + "code data released": 1710, + "achieved remarkable performance": 269, + "scenarios involving multiple": 10130, + "superior performance existing": 10977, + "generation instruction following": 4538, + "large ai models": 6002, + "effectively mitigates impact": 3160, + "demonstrate superior performance": 2667, + "tasks work propose": 11303, + "generate large number": 4454, + "hallucination large language": 4793, + "performance various nlp": 8452, + "larger language models": 6124, + "future research area": 4351, + "harnessing capabilities large": 4826, + "plays significant role": 8537, + "existing methods primarily": 3699, + "generation code generation": 4523, + "code generation task": 1718, + "comprehensive experimental evaluations": 1938, + "evaluations demonstrate method": 3586, + "substantial performance improvement": 10897, + "outperforms compared stateoftheart": 8151, + "compared stateoftheart methods": 1854, + "texttoimage models dalle": 11430, + "harnesses large language": 4820, + "language model gpt": 5787, + "models llms garnered": 7420, + "applications various domains": 714, + "new stateoftheart results benchmark": 7842, + "stateoftheart results benchmark datasets": 10728, + "present simple effective method": 8724, + "conduct thorough ablation studies": 2038, + "generative pretrained transformer gpt": 4614, + "generation remains open question": 4575, + "extensive experiments demonstrate method": 3902, + "natural language processing task": 7732, + "codes data publicly available": 1743, + "experimental results demonstrate proposed": 3743, + "ablation studies demonstrate effectiveness": 172, + "models natural language processing": 7473, + "generating natural language descriptions": 4505, + "extensive experiments ablation studies": 3895, + "address gap propose novel": 393, + "visual question answering image": 12154, + "question answering image captioning": 9270, + "models llms demonstrated impressive": 7402, + "experimental results model outperforms": 3750, + "extensive experiments benchmark datasets": 3897, + "foundation models large language": 4226, + "abilities large language models": 126, + "large language models paper": 6089, + "language models paper introduces": 5928, + "models paper introduces novel": 7484, + "benchmark large language models": 1123, + "making large language models": 6802, + "based natural language instructions": 1051, + "large language models pretrained": 6092, + "end propose novel method": 3354, + "models shown promising results": 7523, + "given natural language description": 4636, + "contrast large language models": 2170, + "language models llms emerge": 5871, + "propose novel framework called": 9088, + "development large language models": 2841, + "zeroshot reasoning ability large": 12323, + "large language models recently": 6096, + "capability large language models": 1371, + "recent large language models": 9467, + "models demonstrated remarkable capabilities": 7299, + "llms chatgpt gpt4 shown": 6476, + "inspired recent success large": 5379, + "large language models especially": 6035, + "utilizes large language models": 11997, + "achieving artificial general intelligence": 311, + "extensive experiments validate effectiveness": 3908, + "large language model chatgpt": 6011, + "advanced large language models": 448, + "capabilities various nlp tasks": 1363, + "dataset code models publicly": 2485, + "code models publicly available": 1725, + "achieves stateoftheart performances multiple": 304, + "wide range tasks including": 12212, + "step artificial general intelligence": 10747, + "language models llms using": 5917, + "tuning large language model": 11695, + "dataset large language models": 2500, + "large language models make": 6085, + "future research direction release": 4353, + "research direction release code": 9786, + "based large language models": 1045, + "knowledge large language model": 5686, + "demonstrates superior performance existing": 2701, + "tasks code models available": 11176, + "hallucination large language models": 4794, + "performance various nlp tasks": 8453, + "harnessing capabilities large language": 4827, + "harnesses large language models": 4821, + "language models llms garnered": 5883, + "new stateoftheart results benchmark datasets": 7843, + "using large language models large": 11956, + "leverages large language models llms": 6288, + "capabilities large language models llms": 1345, + "visual question answering image captioning": 12155, + "language models llms demonstrated impressive": 5867, + "knowledge large language models llms": 5688, + "foundation models large language models": 4227, + "language models paper introduces novel": 5929, + "tasks large language models llms": 11238, + "benchmark large language models large": 1124, + "advances large language models llms": 470, + "contrast large language models llms": 2171, + "large language models llms emerge": 6054, + "development large language models llms": 2842, + "zeroshot reasoning ability large language": 12324, + "recent large language models llm": 9468, + "inspired recent success large language": 5380, + "recent large language models llms": 9469, + "dataset code models publicly available": 2486, + "era large language models llms": 3455, + "large language models llms using": 6083, + "future research direction release code": 4354, + "harnesses large language models llms": 4822, + "large language models llms garnered": 6064, + "python": 9224, + "section": 10188, + "classifies": 1658, + "adjustable": 429, + "accommodating": 210, + "catering": 1422, + "validated": 12013, + "tabletop": 11077, + "executing": 3649, + "longhorizon": 6711, + "robot": 10037, + "relieve": 9640, + "acquisition": 324, + "burden": 1317, + "involvement": 5575, + "simulator": 10477, + "lights": 6318, + "uncommon": 11741, + "twin": 11709, + "15k": 25, + "environmental": 3437, + "perturbations": 8484, + "violations": 12134, + "continuously": 2163, + "monitor": 7585, + "shorter": 10356, + "infeasible": 5268, + "rgb": 10002, + "achievable": 236, + "longtail": 6715, + "30": 52, + "sr": 10667, + "lifelike": 6311, + "witnessed": 12234, + "stimulating": 10758, + "reused": 9963, + "scattered": 10119, + "reusing": 9964, + "accumulation": 228, + "max": 6872, + "traverse": 11651, + "tag": 11091, + "load": 6685, + "lifting": 6313, + "humanrobot": 5015, + "deploy": 2721, + "highresolution": 4913, + "salient": 10084, + "branches": 1264, + "prioritizing": 8842, + "48": 72, + "want": 12170, + "unfamiliar": 11796, + "equivalent": 3450, + "matches": 6853, + "operate": 8072, + "knowledgedriven": 5712, + "overfitting": 8186, + "significant strides": 10420, + "llm model": 6420, + "python programs": 9225, + "planning action": 8514, + "input types": 5356, + "tasks different": 11190, + "different scenarios": 2905, + "outperformed stateoftheart": 8141, + "embodied ai": 3235, + "longhorizon tasks": 6712, + "introduce efficient": 5539, + "efficient training": 3199, + "training approach": 11540, + "closed loop": 1681, + "task extracting": 11128, + "building blocks": 1310, + "multistep reasoning": 7673, + "connecting human": 2069, + "suboptimal results": 10886, + "data acquisition": 2381, + "human involvement": 4975, + "existing open": 3704, + "methods achieve": 6975, + "tasks realworld": 11265, + "environments agents": 3439, + "diverse training": 3029, + "better generalization": 1178, + "agents focus": 500, + "propose benchmark": 9059, + "benchmark named": 1125, + "multitask setting": 7679, + "realistic scenarios": 9376, + "simulator contains": 10478, + "tasks recently": 11268, + "llms unified": 6674, + "understand execute": 11756, + "end work": 3355, + "generated chatgpt": 4474, + "propose general": 9068, + "reasoning levels": 9428, + "different llms": 2888, + "llms encode": 6517, + "work explored": 12251, + "tasks generate": 11216, + "physical world": 8491, + "llms play": 6610, + "various complex": 12055, + "success rates": 10925, + "task completion": 11119, + "task planning": 11139, + "planning large": 8517, + "successfully complete": 10931, + "generation complex": 4525, + "lack information": 5743, + "realistic world": 9377, + "dataset containing": 2491, + "action plans": 328, + "designed prompts": 2765, + "llms inference": 6567, + "results generated": 9902, + "complex environments": 1896, + "potential using": 8638, + "llm understand": 6429, + "analyze ability": 631, + "ability reason": 163, + "complex scenarios": 1903, + "systems face": 11059, + "performance limitations": 8407, + "solve problems": 10554, + "employing llm": 3289, + "closer human": 1688, + "poses challenges": 8590, + "llms great": 6554, + "environment paper": 3436, + "previous stateoftheart": 8816, + "2023 competition": 41, + "dialog history": 2858, + "state tracking": 10698, + "30 absolute": 53, + "respectively code": 9838, + "rl methods": 10025, + "methods taskspecific": 7015, + "previous approaches": 8806, + "continual knowledge": 2158, + "pretrained knowledge": 8745, + "real world": 9373, + "engineering paper": 3376, + "enhance effectiveness": 3387, + "efficacy proposed": 3182, + "tasks resulting": 11275, + "feature maps": 4012, + "scenarios challenging": 10122, + "process experiments": 8883, + "features improve": 4016, + "outperform baseline": 8133, + "develop powerful": 2825, + "approach involves": 749, + "modeling tasks": 7249, + "finetuned downstream": 4111, + "feature engineering": 4010, + "outperforming previous": 8144, + "widely adopted": 12216, + "nature human": 7753, + "leveraging large": 6299, + "abilities propose": 128, + "significant advantage": 10402, + "various applications including": 12052, + "propose benchmark named": 9060, + "like chatgpt gpt4": 6327, + "conduct comprehensive analysis": 2022, + "models llms encode": 7411, + "various complex tasks": 12056, + "experimental results generated": 3744, + "explore potential using": 3845, + "providing valuable insights": 9185, + "llms great potential": 6555, + "outperforms previous stateoftheart": 8158, + "models llms existing": 7417, + "outperform baseline methods": 8134, + "models recent advancements": 7510, + "leveraging large language": 6300, + "natural language processing paper": 7730, + "llms like chatgpt gpt4": 6583, + "natural language understanding tasks": 7744, + "language models llms encode": 5874, + "language models llms existing": 5880, + "language models recent advancements": 5939, + "leveraging large language models": 6301, + "models llms like chatgpt gpt4": 7431, + "large language models llms encode": 6057, + "using large language model llm": 11954, + "large language models llms existing": 6061, + "large language models recent advancements": 6095, + "personalize": 8477, + "instantiate": 5388, + "editor": 3129, + "trustworthiness": 11679, + "authenticity": 927, + "lastly": 6151, + "acquired": 322, + "amazon": 608, + "beauty": 1091, + "disparity": 2985, + "executed": 3648, + "rtx": 10068, + "3090": 55, + "llama7b": 6396, + "devoted": 2850, + "describing": 2738, + "widelystudied": 12221, + "fairness": 3984, + "note": 7910, + "protocol": 9135, + "groundtruth": 4762, + "simulators": 10479, + "endeavors": 3357, + "rating": 9350, + "analyzes": 636, + "2000": 36, + "humancentered": 4996, + "reliably": 9635, + "let": 6267, + "browsing": 1297, + "clicking": 1672, + "influential": 5287, + "chatting": 1613, + "profiling": 8933, + "playing": 8533, + "giving": 4645, + "orthogonal": 8122, + "prospects": 9130, + "actively": 338, + "streamline": 10791, + "restricting": 9865, + "promotion": 8984, + "platforms": 8522, + "degrade": 2627, + "profile": 8932, + "lifelong": 6312, + "received": 9444, + "ecommerce": 3116, + "workflow": 12269, + "satisfying": 10098, + "multidomain": 7607, + "card": 1396, + "diverse information": 3018, + "success various": 10926, + "offering potential": 8012, + "overcome limitations": 8180, + "meet users": 6905, + "instructions guide": 5435, + "content generation": 2137, + "showing promising": 10370, + "witnessed significant": 12235, + "recommendation methods": 9519, + "recently emergence": 9493, + "emergence chatgpt": 3246, + "conversational models": 2204, + "thoroughly investigated": 11460, + "investigated paper": 5565, + "knowledge acquired": 5650, + "unlike traditional": 11835, + "explore use": 3848, + "evaluate quality": 3514, + "provided information": 9171, + "researchers explore": 9813, + "chatgpt improve": 1574, + "performance diverse": 8381, + "learning involves": 6220, + "training tasks": 11587, + "domains limited": 3059, + "highly efficient": 4904, + "rtx 3090": 10069, + "following large": 4187, + "attracted attention": 897, + "attention research": 894, + "industry communities": 5264, + "progress large": 8945, + "models considering": 7285, + "experiments tasks": 3804, + "baselines including": 1074, + "tasks approach": 11166, + "approach sheds": 758, + "obtain accurate": 7994, + "led emergence": 6255, + "contain social": 2125, + "avoid potential": 991, + "directly use": 2952, + "novel benchmark": 7916, + "benchmark called": 1112, + "code dataset": 1711, + "powerful conversational": 8655, + "utilization chatgpt": 11984, + "evaluation protocol": 3574, + "interactive evaluation": 5496, + "llms named": 6594, + "user simulators": 11918, + "experiments publicly": 3794, + "notable improvements": 7906, + "improvements compared": 5145, + "deeper comprehension": 2612, + "new opportunities": 7828, + "opportunities paper": 8083, + "chatgpt paper": 1586, + "based different": 1029, + "paper discusses": 8220, + "opportunities improvement": 8082, + "efficiency transparency": 3189, + "generation based": 4517, + "significantly improve": 10433, + "knowledge models": 5694, + "models improve": 7355, + "generate realistic": 4464, + "based user": 1062, + "user preferences": 11915, + "challenging problem": 1504, + "human cognitive": 4959, + "achieve humanlike": 252, + "humanlike intelligence": 5008, + "autonomous agent": 959, + "playing intervention": 8534, + "models survey": 7540, + "match users": 6851, + "applications natural": 708, + "survey research": 11029, + "training inference": 11558, + "finegrained taxonomy": 4101, + "key challenges": 5628, + "finally summarize": 4079, + "discuss future": 2973, + "models novel": 7478, + "propose train": 9105, + "model evaluate": 7139, + "rl method": 10024, + "preferences particular": 8702, + "experiments largescale": 3786, + "exploring large": 3859, + "tasks demonstrating": 11188, + "demonstrating exceptional": 2704, + "framework harnesses": 4259, + "models analyze": 7260, + "leverages llm": 6289, + "understand behavior": 11754, + "comprehensive dataset": 1930, + "models provides": 7502, + "provides valuable": 9180, + "growing field": 4768, + "offer practical": 8009, + "llms utilizing": 6677, + "leveraging llms": 6302, + "generation fewshot": 4531, + "alleviate limitation": 587, + "generation llms": 4548, + "information users": 5319, + "specifically extract": 10631, + "models generating": 7342, + "highquality generated": 4910, + "experiments large": 3785, + "problem llms": 8865, + "extract useful": 3922, + "useful information": 11905, + "augmentation technique": 919, + "specifically develop": 10627, + "training dataset": 11547, + "experiments realworld": 3796, + "public dataset": 9201, + "models capability": 7272, + "ai agent": 510, + "models excel": 7319, + "leveraging extensive": 6296, + "despite ability": 2778, + "tasks providing": 11263, + "engaging conversations": 3372, + "llms lack": 6577, + "finetuning llms": 4133, + "task execution": 11126, + "llms experimental": 6529, + "search engines": 10177, + "data multiple": 2440, + "shared parameters": 10332, + "tasks taskspecific": 11291, + "taskspecific parameters": 11312, + "llm extract": 6410, + "trained jointly": 11534, + "achieves better": 281, + "mobile applications": 7092, + "aigenerated content aigc": 534, + "showing promising results": 10371, + "recently emergence chatgpt": 9494, + "thoroughly investigated paper": 11461, + "incontext learning involves": 5208, + "instruction following large": 5405, + "following large language": 4188, + "language model empowered": 5784, + "recent progress large": 9472, + "progress large language": 8946, + "approach sheds light": 759, + "avoid potential risks": 992, + "novel benchmark called": 7917, + "llms shown great": 6649, + "experiments publicly available": 3795, + "applications natural language": 709, + "language models novel": 5925, + "exploring large language": 3860, + "novel framework harnesses": 7923, + "language models analyze": 5811, + "provides valuable insights": 9181, + "field natural language": 4051, + "rapid development large": 9338, + "extract useful information": 3923, + "llms address issue": 6451, + "llms propose novel": 6621, + "zeroshot fewshot settings": 12315, + "data augmentation technique": 2388, + "natural language interface": 7717, + "llms experimental results": 6530, + "achieves better performance": 282, + "instruction following large language": 5406, + "following large language model": 4189, + "large language model empowered": 6012, + "recent progress large language": 9473, + "progress large language models": 8947, + "models llms shown great": 7451, + "empowered large language model": 3298, + "applications natural language processing": 710, + "large language models analyze": 6021, + "field natural language processing": 4052, + "rapid development large language": 9339, + "datasets demonstrate effectiveness proposed": 2525, + "instruction following large language model": 5407, + "recent progress large language models": 9474, + "progress large language models llms": 8948, + "language models llms shown great": 5909, + "revolutionized natural language processing tasks": 9988, + "rapid development large language models": 9340, + "influenced": 5286, + "society": 10530, + "nn": 7884, + "belief": 1106, + "status": 10741, + "biological": 1209, + "conjectures": 2064, + "pack": 8198, + "pieces": 8496, + "articulate": 811, + "knowing": 5648, + "delivers": 2634, + "trials": 11665, + "outlining": 8128, + "bots": 1250, + "accomplishing": 216, + "stimulate": 10755, + "exploratory": 3837, + "collective": 1776, + "exert": 3654, + "organized": 8116, + "worlds": 12280, + "roleplaying": 10053, + "overseeing": 8193, + "compensating": 1869, + "custom": 2368, + "unavailable": 11732, + "gathers": 4394, + "accumulates": 227, + "accomplishment": 217, + "dynamics": 3109, + "collaboratively": 1769, + "positive": 8597, + "proactive": 8851, + "ais": 554, + "hinges": 4921, + "modularity": 7577, + "selfplay": 10222, + "populationbased": 8582, + "isolated": 5585, + "delivering": 2633, + "encompasses": 3335, + "classroom": 1661, + "economics": 3118, + "journey": 5613, + "scholars": 10145, + "tasksolving": 11306, + "couples": 2281, + "tasks study": 11283, + "finetuning llm": 4132, + "experiments involving": 3784, + "ai tasks": 523, + "new research": 7837, + "understanding deep": 11769, + "does need": 3047, + "models constructed": 7287, + "intelligence large": 5472, + "perspective paper": 8481, + "problems current": 8872, + "intelligent agents": 5477, + "knowledge acquisition": 5651, + "trials errors": 11666, + "directions field": 2942, + "traditional tasks": 11523, + "enabling efficient": 3315, + "lack systematic": 5748, + "systematic research": 11048, + "possess enhanced": 8600, + "publicly released": 9213, + "datasets research": 2545, + "language large": 5773, + "llms enabled": 6515, + "ai agents": 511, + "presents challenges": 8729, + "play crucial": 8528, + "crucial role": 2331, + "baseline evaluate": 1066, + "development advanced": 2834, + "recent surge": 9482, + "applying large": 724, + "growing demand": 4767, + "finetuning specific": 4144, + "models generalization": 7338, + "stateoftheart language": 10708, + "claude primarily": 1663, + "primarily accessible": 8825, + "accessible api": 206, + "tasks inference": 11225, + "informed decisions": 5323, + "decisions empirical": 2578, + "learning potential": 6234, + "multiagent collaboration": 7605, + "agents autonomous": 498, + "spectrum tasks": 10647, + "propose multiagent": 9080, + "furthermore delve": 4327, + "discuss possible": 2975, + "negative ones": 7774, + "discuss potential": 2976, + "research current": 9781, + "high degree": 4871, + "facilitating seamless": 3962, + "evaluations conducted": 3584, + "average improvement": 982, + "inspire future": 5370, + "research focus": 9792, + "limited knowledge": 6353, + "significantly human": 10432, + "human learning": 4979, + "vast amounts": 12104, + "humanlevel intelligence": 5003, + "present comprehensive": 8715, + "perspective specifically": 8482, + "propose unified": 9108, + "science engineering": 10147, + "agents based": 499, + "present challenges": 8714, + "repository relevant": 9722, + "interaction framework": 5488, + "emulate human": 3306, + "human behaviors": 4953, + "cognitive architecture": 1756, + "address present": 408, + "model contains": 7127, + "experiments indicate": 3782, + "settings open": 10319, + "open source": 8035, + "incomplete information": 5199, + "language communication": 5760, + "parameters llms": 8297, + "language modelbased": 5806, + "agents handling": 501, + "language knowledge": 5772, + "tool use": 11493, + "crucial component": 2328, + "methods providing": 7006, + "offering valuable": 8013, + "researchers field": 9814, + "simple tasks": 10467, + "innovative framework": 5345, + "generating multiple": 4502, + "plans agents": 8519, + "various benchmarks": 12054, + "generates coherent": 4493, + "solutions existing": 10546, + "new perspectives": 7831, + "tackling complex": 11090, + "project available": 8955, + "results various tasks": 9938, + "fewshot zeroshot learning": 4042, + "general intelligence large": 4408, + "intelligence large language": 5473, + "lack systematic research": 5749, + "natural language large": 7718, + "language large language": 5774, + "models llms enabled": 7410, + "play crucial role": 8529, + "applying large language": 725, + "stateoftheart language models": 10709, + "claude primarily accessible": 1664, + "primarily accessible api": 8826, + "accessible api calls": 207, + "inspire future research": 5371, + "demonstrated remarkable potential": 2691, + "paper present comprehensive": 8245, + "present comprehensive survey": 8716, + "future directions field": 4349, + "settings open source": 10320, + "natural language communication": 7709, + "tuning parameters llms": 11701, + "offering valuable insights": 8014, + "general intelligence large language": 4409, + "intelligence large language models": 5474, + "natural language large language": 7719, + "language large language models": 5775, + "language models llms enabled": 5873, + "applying large language models": 726, + "stateoftheart language models like": 10710, + "claude primarily accessible api": 1665, + "primarily accessible api calls": 8827, + "llms demonstrated remarkable potential": 6501, + "paper present comprehensive survey": 8246, + "general intelligence large language models": 4410, + "natural language large language models": 7720, + "language large language models llms": 5776, + "large language models llms enabled": 6056, + "claude primarily accessible api calls": 1666, + "models llms demonstrated remarkable potential": 7405, + "generation large language models llms": 4545, + "distributed": 3004, + "minutes": 7059, + "dota": 3073, + "champions": 1508, + "2019": 38, + "enormous": 3410, + "combinations": 1779, + "pool": 8566, + "mastered": 6849, + "treesearch": 11660, + "skillfully": 10497, + "actor": 341, + "biologically": 1210, + "conjunction": 2065, + "deterministic": 2820, + "plausibility": 8523, + "tradeoff": 11513, + "solvers": 10557, + "ushered": 11934, + "drawbacks": 3090, + "introduction": 5552, + "understood": 11788, + "decreasing": 2592, + "stochastic": 10759, + "casts": 1411, + "theorem": 11443, + "fit": 4157, + "damage": 2379, + "determining": 2819, + "optimally": 8091, + "bounded": 1256, + "satisfied": 10095, + "cumulative": 2341, + "transition": 11630, + "parameterized": 8290, + "interacts": 5501, + "compound": 1920, + "clipping": 1678, + "exceeds": 3625, + "envision": 3440, + "reuse": 9962, + "replay": 9710, + "harm": 4813, + "uniform": 11809, + "remedy": 9689, + "475": 71, + "cpu": 2291, + "srl": 10668, + "libraries": 6305, + "deepmind": 2616, + "dataflows": 2474, + "unifies": 11808, + "optimizations": 8097, + "massively": 6848, + "reproduces": 9748, + "5x": 85, + "a100": 120, + "inherently": 5328, + "connection": 2074, + "major challenge": 6784, + "given black": 4631, + "black box": 1216, + "learning specifically": 6242, + "learning deep": 6203, + "control tasks": 2187, + "state representation": 10697, + "policy gradient": 8559, + "current approaches": 2346, + "approaches tackling": 776, + "new generation": 7821, + "approach introduces": 748, + "performance theoretically": 8439, + "theoretically prove": 11447, + "human players": 4982, + "handle complex": 4801, + "challenges current": 1478, + "survey recent": 11028, + "real time": 9372, + "field ai": 4045, + "key problem": 5635, + "value function": 12024, + "especially complex": 3472, + "different popular": 2897, + "leading efficient": 6171, + "efficient learning": 3196, + "sequence modeling": 10283, + "gpt series": 4670, + "power modern": 8650, + "unlike prior": 11834, + "benchmarks results": 1145, + "compared strong": 1855, + "successfully applied": 10930, + "joint probability": 5609, + "making better": 6795, + "better use": 1184, + "perform experiments": 8353, + "hybrid model": 5029, + "model improves": 7161, + "better balance": 1175, + "learning algorithms": 6190, + "paper envision": 8222, + "information transfer": 5318, + "need attention": 7764, + "learning basic": 6195, + "paper analyze": 8211, + "propose uniform": 9109, + "scalable training": 10105, + "remedy issue": 9690, + "experiments results": 3798, + "hybrid methods": 5028, + "research recent": 9805, + "serving rich": 10306, + "methods achieving": 6976, + "remarkable improvement": 9675, + "demonstrating superior": 2705, + "training single": 11582, + "process massive": 8889, + "data train": 2464, + "largescale training": 6145, + "implementation details": 5088, + "single machine": 10485, + "speedup compared": 10653, + "design choices": 2745, + "academic community": 192, + "llm framework": 6411, + "design framework": 2749, + "llms potentially": 6613, + "encounter difficulties": 3338, + "tasks common": 11178, + "approach mitigating": 752, + "significant computational": 10407, + "information generated": 5299, + "compared strong baselines": 1856, + "making better use": 6796, + "high success rates": 4877, + "remedy issue propose": 9691, + "research recent years": 9806, + "paper present novel": 8247, + "significant computational resources": 10408, + "sphere": 10655, + "traditionally": 11524, + "handful": 4799, + "harmonized": 4816, + "scrutinize": 10166, + "designers": 2768, + "plm": 8541, + "car": 1395, + "relevancy": 9626, + "delta": 2635, + "55": 81, + "differs": 2920, + "record": 9530, + "briefly": 1280, + "vice": 12125, + "versa": 12117, + "comparably": 1828, + "half": 4787, + "resulted": 9873, + "utmost": 12004, + "determination": 2816, + "associations": 864, + "ingredients": 5326, + "accelerating": 197, + "formulas": 4207, + "emission": 3262, + "screen": 10162, + "40000": 66, + "disciplines": 2956, + "validates": 12014, + "18": 32, + "humidity": 5024, + "root": 10058, + "rmse": 10028, + "literature survey": 6381, + "proven beneficial": 9145, + "advance artificial": 441, + "models applied": 7261, + "finetuning pretrained": 4140, + "effective finetuning": 3140, + "finetuning approaches": 4121, + "transformerbased models": 11623, + "approaches directly": 768, + "tuning techniques": 11703, + "adapt downstream": 348, + "tasks effectively": 11192, + "improve generalization": 5125, + "processing related": 8910, + "vice versa": 12126, + "performs comparably": 8468, + "substantial progress": 10898, + "requirements paper": 9763, + "challenge introduce": 1470, + "comprehensive instruction": 1943, + "aims improve": 550, + "experiments llms": 3787, + "enhancing large": 3406, + "improve interpretability": 5127, + "better accomplish": 1173, + "challenge conversational": 1469, + "knowledge enhancement": 5668, + "recent advancement": 9449, + "models openais": 7480, + "gpt4 demonstrates": 4694, + "dataset achieving": 2476, + "possibility leveraging": 8604, + "human supervision": 4988, + "text recently": 11412, + "field nlp": 4053, + "utilize llms": 11989, + "exploration llms": 3835, + "specific prompt": 10617, + "multiple downstream": 7655, + "prediction tasks": 8693, + "human intelligence": 4972, + "advance artificial intelligence": 442, + "bridge gap paper": 1273, + "finetuning pretrained models": 4141, + "remarkable performance gains": 9677, + "large pretrained model": 6116, + "models generalization ability": 7339, + "improve generalization ability": 5126, + "language processing related": 5969, + "address challenge introduce": 387, + "recent advancement large": 9450, + "llms revolutionized field": 6638, + "revolutionized field nlp": 9984, + "multiple downstream tasks": 7656, + "downstream tasks experimental": 3079, + "based pretrained language model": 1056, + "natural language processing related": 7731, + "recent advancement large language": 9451, + "models llms revolutionized field": 7445, + "downstream tasks experimental results": 3080, + "recent advancement large language models": 9452, + "language models llms revolutionized field": 5904, + "program": 8936, + "acceptance": 200, + "partially": 8307, + "apart": 681, + "triggered": 11669, + "percentage": 8345, + "reorder": 9697, + "candidates": 1336, + "lyra": 6750, + "reduction": 9549, + "ignoring": 5052, + "functional": 4311, + "adds": 419, + "discrimination": 2966, + "repair": 9698, + "codex": 1750, + "fix": 4159, + "desirable": 2771, + "derive": 2731, + "repairing": 9699, + "detects": 2815, + "erroneous": 3458, + "repairs": 9700, + "40": 64, + "fatal": 4004, + "deduction": 2594, + "bugs": 1300, + "looking": 6720, + "trick": 11667, + "requirement": 9761, + "87": 107, + "93": 113, + "42": 67, + "app": 686, + "52": 79, + "122": 12, + "equivalence": 3449, + "estimates": 3486, + "builtin": 1316, + "prompttuning": 9044, + "checking": 1615, + "nl": 7855, + "847": 104, + "120": 11, + "come": 1786, + "debugging": 2564, + "motivation": 7595, + "participants": 8308, + "70": 95, + "33": 59, + "humanllm": 5011, + "edited": 3125, + "dl": 3038, + "bad": 1011, + "bottleneck": 1251, + "fulfill": 4297, + "facts": 3971, + "adequate": 421, + "nuances": 7947, + "differential": 2918, + "subtle": 10907, + "versions": 12121, + "inferring": 5282, + "31": 57, + "resemble": 9818, + "maximum": 6875, + "cots": 2272, + "intuitively": 5556, + "assurance": 870, + "heavy": 4844, + "iterating": 5602, + "actionable": 330, + "71": 96, + "36": 61, + "specifications": 10642, + "algorithmic": 562, + "tracing": 11510, + "provenance": 9147, + "scrutiny": 10167, + "bit": 1214, + "strings": 10802, + "respecting": 9833, + "preserved": 8737, + "guaranteeing": 4773, + "begins": 1095, + "manager": 6808, + "corrects": 2245, + "severity": 10326, + "compiling": 1882, + "manipulate": 6810, + "offloading": 8020, + "bringing": 1283, + "spent": 10654, + "repetitive": 9703, + "away": 1001, + "burgeoning": 1318, + "strength": 10792, + "promptingbased": 9026, + "toolaugmented": 11494, + "rest": 9861, + "coarsetofine": 1697, + "paves": 8330, + "day": 2557, + "advocate": 485, + "connects": 2076, + "experienced": 3731, + "company": 1815, + "plagiarism": 8510, + "detrimental": 2821, + "elaborating": 3208, + "ethically": 3496, + "emphasis": 3268, + "reusable": 9961, + "chatbased": 1528, + "humanauthored": 4995, + "chatgptgenerated": 1607, + "secure": 10190, + "devising": 2849, + "attribution": 908, + "rephrase": 9704, + "unannotated": 11731, + "corrupted": 2255, + "trains": 11590, + "expansions": 3723, + "continue": 2159, + "grow": 4765, + "preparation": 8709, + "summarizes": 10967, + "wireless": 12233, + "nuanced": 7946, + "consultation": 2119, + "started": 10691, + "engages": 3370, + "breaking": 1267, + "validating": 12015, + "resolution": 9819, + "alleviates": 590, + "unveils": 11865, + "hints": 4922, + "characterize": 1519, + "mitigated": 7073, + "runtime": 10076, + "uncovered": 11743, + "eda": 3120, + "67b": 92, + "plugins": 8549, + "trust": 11678, + "concrete": 2010, + "stack": 10672, + "overflow": 8187, + "decade": 2566, + "chatgpt4": 1605, + "chatgpt35": 1603, + "evident": 3599, + "programaided": 8937, + "backbones": 1003, + "consumed": 2120, + "llmintegrated": 6439, + "attackers": 880, + "smart": 10520, + "blockchain": 1229, + "week": 12194, + "hour": 4942, + "62": 88, + "applicationspecific": 715, + "decide": 2570, + "30k": 56, + "reproducing": 9749, + "compatibility": 1866, + "mitigation": 7077, + "unintended": 11814, + "gpt35turbo": 4686, + "minimizes": 7052, + "formidable": 4201, + "granularities": 4727, + "sampled": 10087, + "multiperspective": 7648, + "selfconsistency": 10217, + "frequent": 4286, + "generation benchmark": 4518, + "datasets significant": 2549, + "programming language": 8939, + "methods support": 7014, + "multiple models": 7657, + "tasks introduce": 11227, + "models best": 7268, + "various models": 12078, + "exact matching": 3609, + "provides new": 9177, + "current mainstream": 2354, + "time paper": 11474, + "different previous": 2901, + "optimal model": 8089, + "automatically generating": 955, + "computational linguistics": 1973, + "software engineering": 10536, + "approaches model": 774, + "models largescale": 7376, + "programs paper": 8942, + "experiments code": 3767, + "tasks demonstrate": 11186, + "comparing stateoftheart": 1861, + "programs programs": 8943, + "automatically generated": 954, + "fix patterns": 4160, + "data future": 2414, + "testing repairing": 11378, + "unstructured text": 11854, + "blackbox settings": 1222, + "set novel": 10310, + "additionally framework": 382, + "public benchmark": 9200, + "leverage existing": 6275, + "zeroshot setting": 12326, + "models important": 7354, + "model robustness": 7210, + "widely applied": 12217, + "consists components": 2101, + "original input": 8119, + "generation learning": 4546, + "important research": 5102, + "generation different": 4529, + "pretraining finetuning": 8781, + "finetuning paradigm": 4138, + "academia industry": 190, + "existing benchmarks": 3680, + "proposed including": 9118, + "assess models": 836, + "models compared": 7280, + "assess performance": 837, + "approaches proposed": 775, + "trained scratch": 11537, + "efficiency model": 3187, + "making difficult": 6799, + "existing deep": 3683, + "surpassing stateoftheart": 11019, + "stateoftheart baseline": 10704, + "respectively approach": 9837, + "trained models": 11536, + "research paper": 9802, + "debugging techniques": 2565, + "critical issue": 2311, + "existing techniques": 3712, + "results existing": 9901, + "propose automated": 9057, + "test prompts": 11368, + "prompts large": 9033, + "models automatically": 7262, + "efficient accurate": 3191, + "empirical analysis": 3274, + "desired task": 2775, + "make choice": 6790, + "typically trained": 11723, + "trained large": 11535, + "ability make": 155, + "tasks average": 11167, + "llms complex": 6480, + "tasks challenging": 11170, + "challenging involving": 1498, + "generates responses": 4494, + "responses following": 9851, + "controllable generation": 2190, + "gap humans": 4378, + "humans llms": 5019, + "utilization llms": 11987, + "study prompt": 10862, + "learning program": 6237, + "learning dl": 6206, + "far satisfactory": 3999, + "models fewshot": 7329, + "long time": 6708, + "used pretraining": 11903, + "pretraining process": 8794, + "pretraining experiments": 8780, + "light future": 6316, + "oracle detect": 8106, + "chatgpt stateoftheart": 1599, + "study shows": 10869, + "shows chatgpt": 10391, + "possible reason": 8606, + "evaluate approach": 3502, + "models encounter": 7313, + "using tools": 11976, + "method using": 6969, + "model automatically": 7109, + "relatively small": 9616, + "current best": 2348, + "poor accuracy": 8568, + "llms improve": 6560, + "perform extensive": 8354, + "directly generating": 2949, + "llms approach": 6458, + "parameter sizes": 8281, + "superior accuracy": 10973, + "evaluating improving": 3524, + "exhibit low": 3659, + "work shown": 12266, + "user study": 11919, + "study systematically": 10870, + "systematically investigate": 11054, + "issues including": 5594, + "chatgpt resemble": 1593, + "chatgpt promising": 1589, + "demonstrates effectiveness": 2694, + "fundamental aspect": 4317, + "analysis provides": 625, + "cot prompting": 2268, + "language reasoning": 5974, + "designed natural": 2763, + "propose structured": 9104, + "compared cot": 1844, + "generation apply": 4515, + "prompting llms": 9017, + "substantial improvements": 10893, + "evaluation platform": 3573, + "llm era": 6407, + "little work": 6384, + "evaluating capability": 3522, + "benchmark based": 1111, + "provide better": 9151, + "facilitate development": 3953, + "daily life": 2377, + "growing using": 4770, + "generating humanlike": 4501, + "need effective": 7766, + "chatgpt natural": 1580, + "approaches based": 767, + "metrics chatgpt": 7024, + "llms serve": 6642, + "solve issue": 10551, + "contexts introduce": 2153, + "outperforms sota": 8159, + "summarization techniques": 10963, + "chatgpt popular": 1588, + "attracted wide": 899, + "wide attention": 12207, + "engineering community": 3375, + "specifically explore": 10630, + "chatgpt generate": 1566, + "metrics including": 7028, + "significantly worse": 10452, + "findings outline": 4093, + "hardware design": 4811, + "design large": 2750, + "chatgpt exhibited": 1558, + "shows great": 10392, + "potential hardware": 8626, + "described natural": 2735, + "bias problem": 1188, + "code prompts": 1728, + "results pretrained": 9922, + "examples potentially": 3621, + "llms proficient": 6618, + "data flow": 2412, + "data processing": 2443, + "user requests": 11917, + "language task": 5977, + "automated evaluation": 934, + "wide margin": 12208, + "novel evaluation": 7920, + "thinking capabilities": 11452, + "human problemsolving": 4985, + "problemsolving abilities": 8877, + "framework large": 4264, + "generation pretrained": 4562, + "data various": 2468, + "various methods": 12077, + "retrieved knowledge": 9955, + "empirical experiments": 3276, + "baselines significant": 1075, + "promptingbased methods": 9027, + "advanced models": 450, + "models realworld": 7508, + "fully evaluate": 4304, + "able achieve": 174, + "impressive results": 5116, + "results complex": 9887, + "new way": 7847, + "billions data": 1203, + "sources end": 10579, + "raw data": 9360, + "privacy data": 8845, + "key elements": 5630, + "ethical principles": 3495, + "matrix multiplication": 6870, + "applied classification": 717, + "model models": 7184, + "exceptional performance": 3637, + "llms substantial": 6664, + "emergence foundation": 3247, + "chatbots chatgpt": 1532, + "ai services": 521, + "apis like": 685, + "propose concept": 9062, + "ai chain": 514, + "chains prompt": 1466, + "feature set": 4013, + "ablation experiments": 168, + "extensive dataset": 3887, + "binary classification": 1207, + "translation task": 11641, + "given query": 4638, + "requires large": 9767, + "does rely": 3048, + "modeling task": 7248, + "new pretraining": 7833, + "content gaps": 2135, + "unsupervised baselines": 11856, + "baselines significantly": 1076, + "compared supervised": 1857, + "transformerbased large": 11619, + "llms applications": 6456, + "development process": 2843, + "llms perspectives": 6609, + "garnered significant": 4389, + "studies demonstrated": 10840, + "demonstrated ability": 2671, + "role llms": 10052, + "signal processing": 10397, + "researchers developers": 9812, + "solve certain": 10548, + "llms generalization": 6544, + "decisionmaking processes": 2576, + "advancements deep": 458, + "remarkable efficacy": 9674, + "potential vulnerabilities": 8640, + "llms realm": 6626, + "zeroshot approaches": 12311, + "enabling language": 3316, + "example prompts": 3617, + "human annotations": 4950, + "exact match": 3608, + "using examples": 11944, + "influence effectiveness": 5285, + "language time": 5982, + "programs contain": 8941, + "experiments suggest": 3803, + "current limitations": 2352, + "complex set": 1904, + "diverse requirements": 3025, + "compared gpt4": 1846, + "models parameterefficient": 7486, + "models frequently": 7336, + "demand extensive": 2638, + "llama base": 6388, + "parameters limited": 8296, + "experiments provide": 3792, + "components including": 1914, + "input representation": 5354, + "performance tasks": 8436, + "generation reasoning": 4571, + "chatgpt extensively": 1561, + "research application": 9774, + "effectively handle": 3154, + "related literature": 9602, + "tasks hoping": 11219, + "help researchers": 4849, + "researchers better": 9810, + "reveal performance": 9967, + "llms various": 6678, + "received considerable": 9445, + "considerable attention": 2083, + "characteristics llms": 1518, + "study performance": 10859, + "different prompt": 2902, + "multiround dialogue": 7669, + "generation systems": 4580, + "instructions code": 5431, + "despite advancements": 2779, + "general texttotext": 4419, + "novel technique": 7935, + "stack overflow": 10673, + "chatgpt enhancing": 1554, + "survey participants": 11027, + "presents indepth": 8732, + "chatgpt35 chatgpt4": 1604, + "improve chatgpt": 5121, + "chatgpt models": 1579, + "effective methods": 3143, + "complex reasoning": 1901, + "understood llms": 11789, + "reasoning code": 9417, + "approach code": 734, + "new programming": 7834, + "primarily focused": 8828, + "models backbones": 7264, + "build models": 1305, + "investigate performance": 5563, + "fewshot scenarios": 4038, + "indicate model": 5242, + "different backbones": 2876, + "demonstrate better": 2647, + "dataset fewshot": 2497, + "provide new": 9159, + "datasets respectively": 2546, + "transformerbased pretrained": 11624, + "results code": 9885, + "generation existing": 4530, + "better decoding": 1176, + "allowing llms": 595, + "years large": 12293, + "great challenge": 4747, + "specifically focusing": 10632, + "bleu scores": 1226, + "insights potential": 5367, + "generated models": 4483, + "lines code": 6367, + "generating code": 4496, + "approach efficiently": 738, + "efficiently effectively": 3202, + "studies investigated": 10843, + "error propagation": 3463, + "approach new": 753, + "tasks application": 11165, + "prediction accuracy": 8690, + "furthermore explore": 4333, + "adaptability various": 351, + "higher levels": 4882, + "llms automatic": 6463, + "models play": 7490, + "framework conduct": 4244, + "findings reveal": 4096, + "low level": 6730, + "gpt35turbo gpt4": 4687, + "tasks experiments": 11206, + "outperforms models": 8156, + "change model": 1511, + "results automatic": 9879, + "assist llms": 854, + "llms achieves": 6448, + "formidable challenge": 4202, + "challenge llms": 1471, + "multiple outputs": 7660, + "multiple perspectives": 7661, + "multiple diverse": 7653, + "evaluation code": 3547, + "specific generation": 10612, + "tasks stateoftheart": 11279, + "addressed current": 414, + "models generalize": 7340, + "language tasks paper": 5979, + "results proposed method": 9925, + "improve quality generated": 5136, + "language models largescale": 5847, + "language model finetuning": 5786, + "code generation tasks": 1719, + "generation tasks demonstrate": 4583, + "code generation models": 1716, + "pretraining finetuning paradigm": 8782, + "paper propose benchmark": 8252, + "models trained scratch": 7546, + "deep learning based": 2598, + "prompts large language": 9034, + "language models automatically": 5812, + "deep learning dl": 2599, + "language models fewshot": 5834, + "shed light future": 10339, + "method using chatgpt": 6970, + "code generation process": 1717, + "language models code": 5818, + "llms chatgpt shown": 6477, + "chainofthought cot prompting": 1460, + "natural language reasoning": 7736, + "designed natural language": 2764, + "generation paper propose": 4559, + "chatgpt natural language": 1581, + "llms shown remarkable": 6654, + "attracted wide attention": 900, + "software engineering community": 10537, + "hardware design large": 4812, + "like chatgpt exhibited": 6326, + "described natural language": 2736, + "framework large language": 4265, + "language models realworld": 5937, + "demonstrated exceptional performance": 2674, + "emergence foundation models": 3248, + "machine translation task": 6761, + "language modeling task": 5808, + "paper provides comprehensive": 8263, + "transformerbased large language": 11620, + "llms garnered significant": 6542, + "garnered significant attention": 4390, + "recent advancements deep": 9454, + "advancements deep learning": 459, + "enabling language models": 3317, + "factors influence effectiveness": 3970, + "study provides valuable": 10866, + "superior performance compared": 10976, + "language models parameterefficient": 5930, + "publicly available datasets": 9211, + "llama base model": 6389, + "tasks text generation": 11293, + "tasks code generation": 11174, + "help researchers better": 4850, + "received considerable attention": 9446, + "propose novel technique": 9094, + "results demonstrate effectiveness": 9893, + "gap paper presents": 4382, + "paper presents indepth": 8249, + "reasoning capabilities large": 9410, + "complex reasoning tasks": 1902, + "proposed approach code": 9113, + "new programming language": 7835, + "pretrained models backbones": 8764, + "llms different sizes": 6506, + "recent years large": 9486, + "years large language": 12294, + "paper conduct empirical": 8216, + "conduct empirical study": 2025, + "llms shown promising": 6653, + "study propose novel": 10864, + "enhancing large language": 3407, + "adequately addressed current": 424, + "experimental results proposed method": 3755, + "prompts large language models": 9035, + "large language models fewshot": 6038, + "models llms chatgpt shown": 7396, + "chatgpt natural language understanding": 1582, + "large language models code": 6025, + "models llms shown remarkable": 7456, + "llms like chatgpt exhibited": 6582, + "framework large language models": 4266, + "large language models realworld": 6093, + "masked language modeling task": 6840, + "transformerbased large language models": 11621, + "models llms garnered significant": 7421, + "llms garnered significant attention": 6543, + "recent advancements deep learning": 9455, + "study provides valuable insights": 10867, + "large language models parameterefficient": 6090, + "reasoning capabilities large language": 9411, + "recent years large language": 9487, + "years large language models": 12295, + "models llms shown promising": 7455, + "extensive experimental results demonstrate": 3892, + "enhancing large language models": 3408, + "language models llms chatgpt shown": 5861, + "based large language models llms": 1046, + "language models llms shown remarkable": 5914, + "models llms like chatgpt exhibited": 7430, + "transformerbased large language models llms": 11622, + "language models llms garnered significant": 5884, + "models llms garnered significant attention": 7422, + "reasoning capabilities large language models": 9412, + "recent years large language models": 9488, + "language models llms shown promising": 5913, + "recorded": 9531, + "generaldomain": 4420, + "llamabased": 6398, + "national": 7705, + "simplified": 10470, + "distilled": 2995, + "rlaif": 10026, + "reinforced": 9590, + "drastic": 3086, + "check": 1614, + "span": 10583, + "resourceconstrained": 9824, + "qlora": 9231, + "nvidia": 7965, + "truthful": 11682, + "probing": 8858, + "eliminate": 3219, + "potent": 8614, + "ner": 7781, + "rapid growth": 9342, + "particular propose": 8311, + "fuse multiple": 4340, + "finetuning chinese": 4122, + "explicitly trained": 3828, + "training deploying": 11549, + "llama model": 6390, + "biomedical domain": 1213, + "llamabased model": 6399, + "knowledge paper": 5695, + "process adapting": 8882, + "knowledge injection": 5680, + "dataset encompasses": 2496, + "comprising total": 1959, + "various public": 12089, + "13 billion": 14, + "knowledge enhanced": 5667, + "model generative": 7154, + "paper evaluate": 8223, + "knowledge enabling": 5665, + "learning using": 6248, + "effective retrieval": 3145, + "background knowledge": 1007, + "guide inference": 4781, + "questions answered": 9289, + "average score": 985, + "chatgpt serve": 1595, + "benchmark chinese": 1113, + "llms ability": 6441, + "answer given": 658, + "generating rationales": 4508, + "qa datasets": 9227, + "limitations current": 6342, + "current llms": 2353, + "reasoning experiment": 9420, + "different preferences": 2898, + "presents significant": 8734, + "safety trustworthiness": 10083, + "attention work": 895, + "bring following": 1282, + "learning ai": 6187, + "ai feedback": 515, + "evaluation scheme": 3577, + "manual metrics": 6822, + "teacher model": 11318, + "modern llms": 7570, + "gpt4 struggle": 4700, + "struggle issues": 10826, + "issues regarding": 5599, + "framework using": 4278, + "extraction tasks": 3932, + "short text": 10351, + "resourceconstrained scenarios": 9825, + "models exhibited": 7321, + "exhibited exceptional": 3662, + "tasks leveraging": 11241, + "introduce comprehensive": 5537, + "datasets employ": 2527, + "scenarios extensive": 10127, + "traditional chinese": 11517, + "research domain": 9788, + "require llms": 9756, + "tasks benchmark": 11169, + "finetuning training": 4152, + "proposed benchmark": 9115, + "leverages structured": 6292, + "bases llms": 1085, + "compared vanilla": 1859, + "offer new": 8008, + "adaptation llms": 357, + "studies focused": 10842, + "work introduces": 12255, + "llms tend": 6668, + "pretraining phase": 8793, + "interactive scenarios": 5499, + "performance nlp": 8416, + "recognition ner": 9512, + "span extraction": 10584, + "llms including chatgpt": 6563, + "language model specifically": 5803, + "foundation language model": 4220, + "generative pretraining model": 4619, + "exceptional performance various": 3638, + "simple effective retrieval": 10461, + "different llms different": 2889, + "learning ai feedback": 6188, + "automatic manual metrics": 942, + "evaluation human evaluation": 3558, + "struggle issues regarding": 10827, + "experimental results method": 3748, + "information extraction tasks": 5298, + "models exhibited exceptional": 7322, + "exhibited exceptional performance": 3663, + "comprehensive evaluation framework": 1934, + "chatgpt shown remarkable": 1597, + "llms automatic evaluation": 6464, + "improve llms performance": 5129, + "pose potential risks": 8586, + "knowledge bases llms": 5659, + "experimental results llms": 3747, + "performance nlp tasks": 8417, + "entity recognition ner": 3428, + "capabilities natural language understanding": 1353, + "models like chatgpt demonstrated": 7382, + "demonstrated exceptional performance various": 2675, + "exceptional performance various natural": 3639, + "models exhibited exceptional performance": 7323, + "experimental results demonstrate effectiveness": 3741, + "named entity recognition ner": 7695, + "remarkable capabilities natural language understanding": 9672, + "demonstrated exceptional performance various natural": 2676, + "exceptional performance various natural language": 3640, + "research large language models llms": 9799, + "generative large language models llms": 4600, + "application large language models llms": 699, + "inspiring": 5384, + "entityrelation": 3432, + "triple": 11671, + "invariance": 5559, + "provably": 9140, + "schemas": 10141, + "validity": 12017, + "unlocked": 11838, + "instructive": 5447, + "revisiting": 9978, + "spans": 10587, + "push": 9221, + "flant5": 4162, + "recast": 9443, + "wellaligned": 12199, + "codestyle": 1748, + "occurrence": 8005, + "toolkit": 11495, + "entitycentric": 3431, + "toolkits": 11496, + "823": 103, + "secondary": 10186, + "bottlenecks": 1252, + "university": 11823, + "text challenging": 11382, + "data labeling": 2428, + "explore promptbased": 3846, + "methods work": 7020, + "directly prompting": 2950, + "learning algorithm": 6189, + "fundamental task": 4319, + "involves identifying": 5577, + "extracting information": 3926, + "tasks simple": 11277, + "used complex": 11898, + "conducted series": 2045, + "text paper": 11406, + "relations directly": 9606, + "directly extracted": 2947, + "unified text": 11804, + "fields natural": 4056, + "require specialized": 9758, + "professional knowledge": 8928, + "languages knowledge": 5997, + "firstly propose": 4156, + "propose generative": 9069, + "framework generative": 4258, + "models unlocked": 7552, + "unlocked strong": 11839, + "f1 score": 3943, + "uniformly model": 11811, + "enhance fewshot": 3388, + "fewshot performance": 4035, + "achieve performance": 258, + "nlp task": 7871, + "standard supervised": 10684, + "sota results": 10569, + "pretrained massive": 8761, + "learning ability": 6185, + "tasks particular": 11255, + "tasks experiment": 11202, + "seven benchmarks": 10324, + "outperforms finetuning": 8155, + "models specially": 7530, + "capabilities paper": 1354, + "existing toolkits": 3713, + "efficiency stability": 3188, + "semantic parsing": 10238, + "subtasks approach": 10906, + "architecture different": 791, + "downstream nlp": 3076, + "tasks parameter": 11254, + "aim explore": 538, + "popular large": 8573, + "generate prompts": 4463, + "directly prompting llms": 2951, + "models limited resources": 7386, + "foundation models like": 4228, + "fundamental task natural": 4320, + "text challenging task": 11383, + "relations directly extracted": 9607, + "fields natural language": 4057, + "information extraction large": 5295, + "extraction large language": 3930, + "language models unlocked": 5949, + "models unlocked strong": 7553, + "performance paper propose": 8421, + "demonstrate method achieves": 2660, + "comparable performance bert": 1824, + "text paper propose": 11407, + "various downstream nlp": 12062, + "downstream nlp tasks": 3077, + "language models zeroshot": 5956, + "popular large language": 8574, + "foundation models like chatgpt": 4229, + "demonstrated remarkable performance various": 2690, + "remarkable performance various tasks": 9681, + "fundamental task natural language": 4321, + "fields natural language processing": 4058, + "information extraction large language": 5296, + "extraction large language models": 3931, + "large language models unlocked": 6099, + "language models unlocked strong": 5950, + "experimental results demonstrate method": 3742, + "various downstream nlp tasks": 12063, + "large language models zeroshot": 6103, + "popular large language model": 8575, + "fundamental task natural language processing": 4322, + "information extraction large language models": 5297, + "large language models unlocked strong": 6100, + "malicious": 6804, + "dissemination": 2989, + "expose": 3866, + "did": 2870, + "say": 10100, + "violation": 12133, + "uncovers": 11745, + "llama13b": 6394, + "ecosystem": 3119, + "primitive": 8831, + "inevitable": 5267, + "chance": 1509, + "extreme": 3938, + "lifecycle": 6310, + "regulations": 9589, + "theft": 11442, + "topk": 11505, + "compromise": 1960, + "impacting": 5082, + "replacements": 9708, + "semanticlevel": 10251, + "bypass": 1320, + "payloads": 8334, + "arabic": 785, + "hate": 4828, + "formal": 4196, + "analyzer": 635, + "desktop": 2776, + "missed": 7064, + "discussing": 2980, + "ahead": 507, + "explaining": 3820, + "experimentally": 3760, + "conflicting": 2058, + "right": 10011, + "upper": 11871, + "bound": 1253, + "adaptivity": 367, + "assessment chinese": 846, + "assessment benchmark": 845, + "generated responses": 4485, + "llms strong": 6662, + "openai gpt": 8037, + "test llms": 11367, + "task automatically": 11115, + "popular llms": 8576, + "llms empirical": 6514, + "llms brought": 6470, + "brought significant": 1296, + "widespread deployment": 12228, + "conduct preliminary": 2034, + "mainstream llms": 6776, + "chatgpt capable": 1541, + "llms raises": 6625, + "raises concerns": 9309, + "knowledge domains": 5663, + "evaluate capabilities": 3503, + "challenging benchmark": 1496, + "encourage llms": 3343, + "like previous": 6334, + "llms accurately": 6443, + "study investigate": 10856, + "require model": 9757, + "methods benchmarking": 6977, + "types datasets": 11718, + "best performance": 1167, + "tasks requiring": 11274, + "evaluating text": 3536, + "models considerable": 7284, + "compromise models": 1961, + "tasks previous": 11258, + "previous benchmarks": 8807, + "robustness paper": 10049, + "introduce latent": 5541, + "instruction embedding": 5402, + "harmful content": 4815, + "content consequently": 2132, + "provide technical": 9167, + "languages english": 5995, + "multiple choice": 7651, + "llms increasing": 6566, + "essential task": 3479, + "performance advantage": 8362, + "significant room": 10419, + "foster development": 4215, + "evaluated language": 3519, + "cases addition": 1408, + "years witnessed": 12297, + "wide variety": 12214, + "benchmarks evaluation": 1138, + "lack interpretability": 5744, + "propose possible": 9096, + "systems compared": 11058, + "information realworld": 5312, + "end establish": 3348, + "experiments seven": 3800, + "detailed instructions": 2795, + "invalid responses": 5558, + "llms specific": 6659, + "upper bound": 11872, + "popular llms chatgpt": 8577, + "models llms brought": 7392, + "llms brought significant": 6471, + "deep learning models": 2600, + "recent years witnessed": 9489, + "baseline methods including": 1068, + "methods including large": 6994, + "language models llms brought": 5857, + "models llms brought significant": 7393, + "large language models multiple": 6087, + "methods including large language": 6995, + "large language models llms brought": 6049, + "language models llms brought significant": 5858, + "methods including large language models": 6996, + "sum": 10955, + "222": 45, + "simulates": 10474, + "exemplars": 3653, + "pruning": 9194, + "textdavinci003": 11420, + "92": 111, + "rectify": 9538, + "federated": 4019, + "asked": 822, + "crowdsourced": 2324, + "factuality": 3977, + "postediting": 8609, + "varies": 12038, + "langauge": 5757, + "initiative": 5337, + "condensed": 2013, + "clearly": 1671, + "activations": 334, + "clarification": 1644, + "initialize": 5335, + "hotpotqa": 4936, + "choosing": 1639, + "parallelly": 8278, + "composing": 1918, + "approximating": 782, + "outlines": 8127, + "triggers": 11670, + "debate": 2562, + "stance": 10679, + "chainofknowledge": 1456, + "cok": 1764, + "controlling": 2194, + "reallife": 9382, + "acceptable": 199, + "constant": 2105, + "setup": 10321, + "promoting": 8983, + "kbqa": 5622, + "webqsp": 12189, + "categorizing": 1419, + "cumbersome": 2340, + "mathematics": 6867, + "socratic": 10531, + "structuring": 10823, + "peer": 8335, + "triplet": 11673, + "llama2": 6395, + "115": 9, + "protoqa": 9136, + "512": 78, + "adjustment": 431, + "data existing": 2408, + "t5 bart": 11070, + "demonstrated stateoftheart": 2692, + "multiple benchmarks": 7650, + "prompting cot": 9010, + "tasks gpt3": 11218, + "requires manual": 9768, + "systems propose": 11065, + "different existing": 2881, + "effectively utilize": 3165, + "gap compared": 4377, + "prompting chainofthought": 9008, + "models increasing": 7362, + "scale large": 10109, + "cot reasoning": 2270, + "purpose propose": 9218, + "propose solution": 9102, + "challenges realworld": 1491, + "labeled training": 5731, + "creates barriers": 2301, + "general tasks": 4418, + "selects optimal": 10213, + "optimal combination": 8088, + "models knowledge": 7369, + "zeroshot commonsense": 12313, + "models experiments": 7324, + "experiments commonsense": 3768, + "ability methods": 156, + "new prompting": 7836, + "correct answers": 2235, + "used guide": 11900, + "encouraging results": 3345, + "llms experiments": 6531, + "difficulty introduce": 2925, + "questions accompanied": 9288, + "chainofthought reasoning": 1464, + "focuses typical": 4180, + "propose improve": 9071, + "methods significantly": 7011, + "answering task": 675, + "task finetuning": 11129, + "smaller models": 10515, + "additionally introduce": 383, + "question answer": 9266, + "tasks tackle": 11287, + "llms despite": 6502, + "proposed prompting": 9123, + "margin comparable": 6830, + "performance varies": 8443, + "varies substantially": 12039, + "significantly reduces": 10449, + "approach solving": 764, + "approach construct": 736, + "based collected": 1027, + "conducted types": 2046, + "current popular": 2358, + "additional training": 380, + "reduces number": 9547, + "achieves remarkable": 296, + "zeroshot methods": 12318, + "comparable gpt35": 1820, + "conversational systems": 2205, + "impressive capabilities": 5109, + "work conduct": 12249, + "challenges extensive": 1480, + "practical application": 8663, + "capability tackle": 1373, + "llms obtain": 6601, + "small models": 10510, + "higher training": 4885, + "multiturn conversations": 7685, + "shown effectiveness": 10375, + "tasks achieving": 11161, + "model selection": 7214, + "best worlds": 1171, + "model reasoning": 7208, + "approach shows": 760, + "models problem": 7497, + "shed new": 10340, + "new light": 7825, + "rationales answers": 9356, + "process prompting": 8895, + "making convenient": 6798, + "showcases impressive": 10364, + "robustness evaluation": 10046, + "perform significantly": 8357, + "leveraging incontext": 6297, + "new knowledge": 7823, + "approximating different": 783, + "paper outlines": 8243, + "common effective": 1793, + "model accuracy": 7100, + "experimental outcomes": 3738, + "available github": 974, + "llms nlp": 6599, + "framework generating": 4257, + "experiments widelyused": 3813, + "divergent thinking": 3010, + "performance general": 8390, + "framework multiple": 4269, + "framework extensive": 4250, + "extensive analyses": 3884, + "improving large": 5158, + "answers based": 678, + "new approach": 7807, + "augmenting llms": 925, + "memory large": 6914, + "conventional neural": 2198, + "paper seek": 8266, + "synthetic dataset": 11045, + "chainofknowledge cok": 1457, + "answering complex": 665, + "analysis model": 624, + "datasets tend": 2552, + "development language": 2837, + "ability humans": 145, + "ability language": 147, + "order explore": 8108, + "humans language": 5018, + "ability paper": 159, + "human performance": 4981, + "proven effective": 9146, + "aims provide": 552, + "health counseling": 4836, + "strategies tailored": 10782, + "humanlike responses": 5009, + "manual evaluations": 6820, + "tasks exploring": 11208, + "generate answers": 4440, + "sota methods": 10566, + "pretraining data": 8774, + "improve accuracy": 5119, + "llms evaluation": 6522, + "integrate information": 5456, + "llms knowledgeintensive": 6575, + "knowledgeintensive question": 5716, + "tasks kbqa": 11228, + "outperforms vanilla": 8164, + "advantages proposed": 476, + "potential limitations": 8631, + "data generating": 2417, + "positive negative": 8598, + "negative responses": 7775, + "involving gpt4": 5580, + "journey ahead": 5614, + "augmentation large": 914, + "multiple sources": 7662, + "improving model": 5161, + "augmentation method": 917, + "nlu nlg": 7880, + "reasoning language": 9422, + "challenging issue": 1499, + "llms approaches": 6459, + "causal language": 1424, + "underscore effectiveness": 11751, + "effectiveness generality": 3171, + "applicable different": 692, + "task gap": 11130, + "training explore": 11554, + "explore possibility": 3843, + "statistical information": 10739, + "potential unified": 8637, + "finetuned language": 4112, + "extra knowledge": 3919, + "results popular": 9920, + "llms significant": 6657, + "slightly better": 10500, + "training code": 11541, + "models good": 7344, + "traditional finetuning": 11518, + "models tailored": 7541, + "previous sota": 8814, + "nlp community": 7863, + "llms present": 6615, + "overall accuracy": 8172, + "achieved stateoftheart": 275, + "focus llms": 4177, + "heavily rely": 4843, + "applied different": 718, + "largescale models": 6139, + "inference training": 5277, + "empirical evaluations": 3275, + "alignment tasks": 581, + "prompting cot prompting": 9011, + "scale large language": 10110, + "prompting chainofthought cot": 9009, + "chainofthought cot reasoning": 1461, + "significantly improves performance": 10437, + "improves performance llms": 5151, + "different tasks paper": 2910, + "labeled training data": 5732, + "language models knowledge": 5843, + "using chatgpt gpt4": 11940, + "language models performance": 5932, + "shown remarkable performance": 10387, + "question answering task": 9274, + "new stateoftheart performance": 7839, + "performance varies substantially": 8444, + "novel method called": 7927, + "achieves remarkable performance": 297, + "performance comparable gpt35": 8373, + "tackle issues propose": 11086, + "approach shows significant": 761, + "shed new light": 10341, + "models llms nlp": 7436, + "llms nlp tasks": 6600, + "performance general language": 8391, + "general language tasks": 4413, + "improving large language": 5159, + "memory large language": 6915, + "method improve performance": 6954, + "development language models": 2838, + "ability language models": 148, + "mental health counseling": 6921, + "tackle challenge propose": 11082, + "previous stateoftheart methods": 8817, + "propose novel evaluation": 9086, + "augmentation large language": 915, + "neural language models": 7800, + "nlu nlg tasks": 7881, + "reasoning language models": 9423, + "causal language models": 1425, + "finetuned language model": 4113, + "experimental results popular": 3752, + "results popular benchmarks": 9921, + "previous sota models": 8815, + "models llms present": 7438, + "stateoftheart sota performance": 10733, + "achieves new stateoftheart results": 293, + "llms shown remarkable performance": 6655, + "shown remarkable performance various": 10388, + "achieves new stateoftheart performance": 292, + "propose novel method called": 9092, + "achieve new stateoftheart results": 256, + "large language models incontext": 6041, + "language models llms nlp": 5895, + "performance general language tasks": 8392, + "memory large language models": 6916, + "natural language processing models": 7725, + "augmentation large language models": 916, + "experimental results popular benchmarks": 3753, + "language models llms present": 5897, + "performance large language models llms": 8406, + "models llms shown remarkable performance": 7457, + "llms shown remarkable performance various": 6656, + "prompting large language models llms": 9016, + "large language models llms nlp": 6071, + "large language models llms present": 6073, + "semisupervised": 10255, + "inherits": 5331, + "cope": 2225, + "kb": 5621, + "analogous": 613, + "longform": 6710, + "facto": 3966, + "searches": 10182, + "sparql": 10590, + "judged": 5616, + "published": 9214, + "alleviated": 589, + "passage": 8315, + "oriented": 8117, + "triples": 11672, + "knowledgegrounded": 5714, + "untrained": 11862, + "kbs": 5623, + "store": 10762, + "literal": 6378, + "perfect": 8348, + "knowledge recent": 5700, + "improve downstream": 5123, + "models finetuning": 7333, + "demo video": 2643, + "text corpora": 11386, + "highresource languages": 4915, + "languages experiments": 5996, + "comparable improved": 1822, + "performance knowledge": 8401, + "especially lowresource": 3473, + "maintaining performance": 6780, + "shared task": 10333, + "error analysis": 3460, + "explore various": 3849, + "code scripts": 1735, + "base kb": 1022, + "large lms": 6105, + "successfully enables": 10933, + "aims answering": 543, + "supporting facts": 11003, + "search engine": 10176, + "finetune pretrained": 4108, + "models imitate": 7353, + "humanwritten ones": 5023, + "using search": 11970, + "models dynamic": 7306, + "relevant knowledge": 9629, + "knowledge sources": 5703, + "natural sentences": 7749, + "models real": 7507, + "llama7b model": 6397, + "supervised data": 10985, + "finetune model": 4107, + "llms applying": 6457, + "stored parameters": 10764, + "document retrieval": 3042, + "nonenglish languages": 7892, + "largest chinese": 6148, + "smaller pretrained": 10518, + "limitations researchers": 6347, + "inspired existing": 5374, + "models specific": 7531, + "bases kbs": 1080, + "various knowledge": 12071, + "user demands": 11909, + "vanilla llms": 12027, + "llms framework": 6539, + "llms limitations": 6587, + "benchmarks proposed": 1142, + "knowledge statements": 5704, + "neural knowledge": 7798, + "questions options": 9297, + "compared baselines": 1840, + "improve downstream nlp": 5124, + "language models finetuning": 5836, + "knowledge base kb": 5653, + "using search engine": 11971, + "significantly outperforms previous": 10446, + "language models dynamic": 5826, + "knowledge stored parameters": 5706, + "model paper propose": 7194, + "knowledge bases kbs": 5655, + "large language models dynamic": 6032, + "large language models knowledge": 6042, + "attacked": 879, + "keyphrases": 5637, + "penalize": 8338, + "releasing": 9624, + "positional": 8595, + "v1": 12008, + "normalized": 7903, + "plmbased": 8542, + "frustratingly": 4295, + "lengths": 6264, + "concatenate": 1987, + "keyphrase": 5636, + "exposure": 3868, + "familiar": 3996, + "solution use": 10544, + "sentence structures": 10264, + "generating high": 4498, + "important information": 5101, + "accordingly propose": 221, + "communication model": 1808, + "build model": 1304, + "existing metrics": 3701, + "input context": 5347, + "deep understanding": 2608, + "generation multiple": 4555, + "respectively compared": 9839, + "llms new": 6598, + "propose model": 9079, + "promptbased fewshot": 9003, + "nlp systems": 7870, + "pairs generated": 8204, + "gpt3 shown": 4683, + "methods far": 6988, + "massive knowledge": 6847, + "learning experimental": 6207, + "method surpasses": 6967, + "datasets achieves": 2513, + "gained increasing": 4362, + "datasets tasks": 2551, + "including classification": 5177, + "assess ability": 834, + "learning applying": 6191, + "previous pretrained": 8810, + "methods finetuned": 6989, + "task nlp": 11137, + "random sampling": 9313, + "challenging large": 1500, + "knowledge keywords": 5681, + "networks used": 7796, + "models current": 7292, + "poses challenge": 8589, + "shown strong": 10389, + "explore new": 3841, + "models capacity": 7273, + "general scenarios": 4416, + "specific datasets": 10608, + "sampled negative": 10088, + "frustratingly simple": 4296, + "leveraging knowledge": 6298, + "build new": 1306, + "llms construct": 6486, + "llms provides": 6623, + "mainstream datasets": 6775, + "strategy called": 10785, + "generating high quality": 4499, + "models bert gpt2": 7267, + "correlation human judgments": 2249, + "promptbased fewshot learning": 9004, + "learning experimental results": 6208, + "gained increasing attention": 4363, + "models datasets tasks": 7295, + "nlp tasks including": 7876, + "tasks including classification": 11222, + "results various natural": 9935, + "assess ability llms": 835, + "challenging large language": 1501, + "substantial improvements compared": 10894, + "existing knowledge bases": 3690, + "models llms construct": 7397, + "results various natural language": 9936, + "challenging large language models": 1502, + "language models llms construct": 5862, + "results various natural language processing": 9937, + "slow": 10504, + "resourcerich": 9827, + "nmt": 7883, + "scheduled": 10138, + "pretrains": 8800, + "unchanged": 11736, + "heuristics": 4862, + "inheritance": 5330, + "figure": 4059, + "periods": 8473, + "convey": 2216, + "tokenlevel": 11488, + "titan": 11483, + "continues": 2161, + "kl": 5645, + "recurrent": 9539, + "dozens": 3084, + "slm": 10502, + "generations": 4590, + "houlsby": 4938, + "xsum": 12291, + "weather": 12187, + "describes": 2737, + "exceed": 3623, + "posttraining": 8613, + "fundamentally": 4323, + "usual": 11977, + "retaining": 9939, + "observing": 7990, + "causing": 1433, + "smoothly": 10522, + "incorrectly": 5222, + "predicts": 8695, + "fullparameter": 4299, + "lowcost": 6737, + "sacrificing": 10078, + "usable": 11881, + "130b": 16, + "converting": 2212, + "elusive": 3226, + "translators": 11648, + "inferences": 5278, + "loads": 6686, + "revolution": 9979, + "multitude": 7681, + "asymmetric": 873, + "wider": 12224, + "variance": 12031, + "mmlu": 7090, + "freedom": 4282, + "int4": 5454, + "selector": 10210, + "averages": 986, + "inference speed": 5275, + "largescale unlabeled": 6146, + "comparative experiments": 1831, + "analysis shows": 627, + "new pretrained": 7832, + "best practice": 1168, + "parameters available": 8292, + "classical text": 1650, + "tasks story": 11280, + "dialogue generation": 2862, + "performance terms": 8437, + "model effective": 7136, + "recent pretrained": 9470, + "training corpus": 11542, + "effectively transfer": 3162, + "advanced knowledge": 445, + "pretraining largescale": 8788, + "model scratch": 7213, + "information different": 5292, + "gpt2 improved": 4674, + "models proposed": 7500, + "gpt2 paper": 4676, + "models improving": 7357, + "different modules": 2895, + "modeling representation": 7247, + "desired attributes": 2773, + "computational overhead": 1974, + "collect largescale": 1771, + "experiments demonstrated": 3777, + "thoroughly analyze": 11459, + "potential solution": 8635, + "classification accuracy": 1652, + "variety tasks": 12048, + "roberta models": 10034, + "houlsby et": 4939, + "al 2019": 560, + "task dataset": 11123, + "directions improving": 2943, + "size training": 10493, + "negative impact": 7773, + "topic coverage": 11503, + "improvement especially": 5143, + "work leverage": 12256, + "tasks addition": 11162, + "method investigate": 6955, + "finetuning strategies": 4147, + "accuracy drop": 231, + "tasks taskoriented": 11290, + "inference efficiency": 5271, + "recently garnered": 9497, + "attention academia": 889, + "model including": 7162, + "including limited": 5184, + "make challenging": 6789, + "industrial communities": 5261, + "comprehensive understanding": 1949, + "models furthermore": 7337, + "llms necessitates": 6597, + "conduct comparative": 2020, + "different training": 2911, + "predict response": 8686, + "instruction datasets": 5401, + "stateoftheart sentence": 10730, + "tens thousands": 11353, + "specific challenges": 10607, + "dialogue models": 2864, + "data availability": 2389, + "promising technique": 8978, + "classification models": 1653, + "exposure bias": 3869, + "learning bias": 6197, + "performance training": 8440, + "promising solution": 8977, + "solution achieve": 10542, + "generation present": 4561, + "new models": 7827, + "fullparameter finetuning": 4300, + "investigate impact": 5561, + "differences observed": 2873, + "data propose": 2445, + "identify potential": 5047, + "multilevel large": 7614, + "past years": 8319, + "specific models": 10615, + "models remarkably": 7514, + "efficient finetuning": 3195, + "low rank": 6733, + "rank adaptation": 9325, + "models scaling": 7520, + "130b parameters": 17, + "using single": 11973, + "stateoftheart deep": 10706, + "detection language": 2805, + "remains elusive": 9654, + "language learners": 5777, + "models scale": 7519, + "finetuning instruction": 4127, + "survey paper": 11026, + "100 languages": 5, + "systems paper": 11064, + "highlight current": 4891, + "low performance": 6731, + "capabilities wide": 1365, + "models aimed": 7258, + "foundational model": 4232, + "models era": 7316, + "opensourced llms": 8066, + "bloom llama": 1232, + "learning llms": 6224, + "tasks data": 11185, + "build endtoend": 1302, + "advanced llms": 449, + "llms research": 6635, + "performance analysis": 8363, + "results comprehensive": 9888, + "heavily depends": 4841, + "wider range": 12225, + "unsupervised methods": 11859, + "boosts model": 1244, + "applying approach": 723, + "parameters demonstrated": 8293, + "average accuracy": 981, + "method requires": 6964, + "scenarios code": 10124, + "llms foundation": 6538, + "models foundational": 7335, + "text generation paper": 11396, + "effectively transfer knowledge": 3163, + "plms bert gpt": 8546, + "generation pretrained language": 4563, + "train model scratch": 11529, + "codes publicly available": 1747, + "language models improving": 5840, + "houlsby et al": 4940, + "et al 2019": 3491, + "room improvement especially": 10057, + "automatic human evaluations": 940, + "large pretrained language": 6114, + "understanding tasks including": 11786, + "training inference efficiency": 11559, + "model pretrained language": 7202, + "quality evaluation shows": 9241, + "paper aims provide": 8210, + "stateoftheart performance various": 10722, + "performance various downstream": 8447, + "models llms necessitates": 7435, + "finetuning large pretrained": 4131, + "realworld datasets demonstrate": 9389, + "performance differences observed": 8379, + "multilevel large language": 7615, + "language models remarkably": 5941, + "low rank adaptation": 6734, + "rank adaptation lora": 9326, + "language models scaling": 5943, + "models perform tasks": 7488, + "highlight current limitations": 4892, + "capabilities wide range": 1366, + "language models era": 5830, + "billions parameters demonstrated": 1205, + "demonstrated impressive capabilities": 2680, + "wide range applications": 12210, + "generation pretrained language models": 4564, + "pretrained language models achieved": 8750, + "houlsby et al 2019": 4941, + "range natural language processing": 9320, + "language model pretrained language": 5799, + "model pretrained language models": 7203, + "large pretrained language models": 6115, + "pretrained language models llms": 8752, + "achieving stateoftheart performance various": 319, + "language models llms necessitates": 5894, + "low rank adaptation lora": 6735, + "large language models despite": 6029, + "language model pretrained language models": 5800, + "model pretrained language models plms": 7204, + "large language models llms necessitates": 6070, + "multireference": 7667, + "posts": 8612, + "monolingual": 7587, + "educated": 3131, + "breadth": 1265, + "customer": 2369, + "resultant": 9872, + "resolving": 9821, + "multigranularity": 7609, + "model examples": 7141, + "existing stateoftheart": 3709, + "comprehensive empirical": 1931, + "large conversational": 6004, + "real life": 9371, + "summarization systems": 10961, + "keywords topics": 5641, + "generation developed": 4528, + "model introduce": 7168, + "models public": 7503, + "chinese pretrained": 1632, + "context pretrained": 2146, + "brings significant": 1286, + "capacity fewshot": 1381, + "conversational ai": 2203, + "different knowledge": 2885, + "various lowresource": 12075, + "various topics": 12096, + "comprehensive human": 1942, + "source language": 10576, + "scenarios number": 10132, + "tasks present": 11257, + "use external": 11886, + "opensource model": 8063, + "discussed impact": 2978, + "humanlike characteristics": 5006, + "data alleviate": 2383, + "context code": 2140, + "models responses": 7516, + "bert model": 1155, + "content detection": 2134, + "design training objectives": 2758, + "largest chinese pretrained": 6149, + "language models shown": 5944, + "alignment different languages": 575, + "achieve competitive performance": 245, + "models llms explore": 7418, + "largescale pretrained language models": 6143, + "language models llms explore": 5881, + "large language models llms explore": 6062, + "polish": 8564, + "robertabased": 10035, + "served": 10298, + "scientists": 10154, + "hc3": 4831, + "substitute": 10901, + "fake news": 3987, + "great importance": 4749, + "benchmark future": 1120, + "certain language": 1445, + "information social": 5315, + "piece text": 8495, + "tasks known": 11229, + "known llms": 5722, + "llms served": 6643, + "served highquality": 10299, + "chinese benchmark": 1621, + "results compared": 9886, + "findings offer": 4091, + "work step": 12267, + "human chatgpt": 4955, + "comparison corpus": 1864, + "chatgpt gained": 1564, + "robertabased detector": 10036, + "llms substitute": 6665, + "variety tasks including": 12049, + "information social media": 5316, + "tasks known llms": 11230, + "known llms served": 5723, + "llms served highquality": 6644, + "findings offer new": 4092, + "human chatgpt comparison": 4956, + "chatgpt comparison corpus": 1544, + "chatgpt gained significant": 1565, + "tasks known llms served": 11231, + "known llms served highquality": 5724, + "human chatgpt comparison corpus": 4957, + "tasks known llms served highquality": 11232, + "generalpurposed": 4438, + "meanings": 6880, + "drew": 3096, + "cultural": 2337, + "journals": 5612, + "models traditional": 7544, + "traditional machine": 11519, + "characteristics language": 1517, + "english prompts": 3383, + "using human": 11947, + "specifically evaluate": 10629, + "elicit llms": 3216, + "stateoftheart finetuned": 10707, + "gpt model": 4668, + "strategies pretrained": 10778, + "tasks evaluated": 11197, + "using existing": 11945, + "scientific research": 10153, + "provide valuable insights": 9169, + "comprehensive empirical study": 1932, + "finetuning strategies pretrained": 4148, + "strategies pretrained language": 10779, + "finetuning strategies pretrained language": 4149, + "strategies pretrained language models": 10780, + "finetuning strategies pretrained language models": 4150, + "strategies pretrained language models plms": 10781, + "intralingual": 5529, + "quantity": 9256, + "ceval": 1446, + "middle": 7034, + "chineseoriented": 1635, + "gaokao": 4375, + "lessons": 6266, + "knowledge employ": 5664, + "chat models": 1527, + "ceval hard": 1447, + "mt systems": 7603, + "chinese gaokao": 1626, + "evaluation data": 3548, + "task largescale": 11133, + "llms particular": 6605, + "chinese pretrained language": 1633, + "evaluation data specifically": 3549, + "chinese pretrained language model": 1634 + } + } +} \ No newline at end of file