pradachan's picture
Upload folder using huggingface_hub
f71c233 verified
[
{
"Name": "adaptive_block_size",
"Title": "Adaptive Block Size: Dynamic Context Window Adjustment for Efficient Training",
"Experiment": "Modify the model to dynamically adjust its block size during training, starting with a smaller block size and gradually increasing it. This could potentially lead to faster initial training and better long-range dependency learning.",
"Interestingness": 6,
"Feasibility": 4,
"Novelty": 4,
"novel": true
},
{
"Name": "layerwise_learning_rates",
"Title": "Layer-wise Learning Rate Adaptation: Optimizing Training Dynamics in Transformer Models",
"Experiment": "Implement layer-wise learning rates, where each transformer layer has its own learning rate. Modify the configure_optimizers function to assign different learning rates to different layers, with deeper layers having lower learning rates. Compare the training dynamics, convergence speed, and final performance with the baseline model.",
"Interestingness": 4,
"Feasibility": 6,
"Novelty": 2,
"novel": true
},
{
"Name": "vi_t_pruning_benchmark_execution",
"Title": "Bayesian Scaffolded Sparsity: A Primitifal Approach to ViTF Compression against Alternate Mechanisms",
"Experiment": [
"Design efficient techniques that use techniques like hard reset for preventing over compression",
"Regular monitor & report of weights' magnitudes & norms as early detection"
],
"Interestingness": 8,
"Feasibility": 9,
"Novelty": 7,
"novel": true
},
{
"Name": "In-depth_SEqEembhead_expansion_evaluation",
"Title": "Publishing comprehensive analysis on extensive gridsearch experimentation: Sequence Length vs Embedding Layer Dimensions & Attention Heads for T5 Models \u2013 Benchmarked Results across Various NLP standards & Practical Implications including Stability and Computation Efficiency.",
"Experiment": [
"Present early-stage findings at a recognized neural processing symposia, eg NeurIPS/TACL/MAS etc."
],
"Interestingness": 8,
"Feasibility": 7,
"Novelty": 9,
"novel": true
}
]