Update README.md
Browse files
README.md
CHANGED
@@ -15,4 +15,24 @@ datasets:
|
|
15 |
### Model Sources
|
16 |
- **Dataset:** [SpaceLLaVA](https://huggingface.co/datasets/remyxai/vqasynth_spacellava)
|
17 |
- **Repository:** [VQASynth](https://github.com/remyxai/VQASynth/tree/main)
|
18 |
-
- **Paper:** [SpatialVLM](https://arxiv.org/abs/2401.12168)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
### Model Sources
|
16 |
- **Dataset:** [SpaceLLaVA](https://huggingface.co/datasets/remyxai/vqasynth_spacellava)
|
17 |
- **Repository:** [VQASynth](https://github.com/remyxai/VQASynth/tree/main)
|
18 |
+
- **Paper:** [SpatialVLM](https://arxiv.org/abs/2401.12168)
|
19 |
+
|
20 |
+
|
21 |
+
|
22 |
+
## Citation
|
23 |
+
```
|
24 |
+
@article{chen2024spatialvlm,
|
25 |
+
title = {SpatialVLM: Endowing Vision-Language Models with Spatial Reasoning Capabilities},
|
26 |
+
author = {Chen, Boyuan and Xu, Zhuo and Kirmani, Sean and Ichter, Brian and Driess, Danny and Florence, Pete and Sadigh, Dorsa and Guibas, Leonidas and Xia, Fei},
|
27 |
+
journal = {arXiv preprint arXiv:2401.12168},
|
28 |
+
year = {2024},
|
29 |
+
url = {https://arxiv.org/abs/2401.12168},
|
30 |
+
}
|
31 |
+
|
32 |
+
@article{xiao2023florence,
|
33 |
+
title={Florence-2: Advancing a unified representation for a variety of vision tasks},
|
34 |
+
author={Xiao, Bin and Wu, Haiping and Xu, Weijian and Dai, Xiyang and Hu, Houdong and Lu, Yumao and Zeng, Michael and Liu, Ce and Yuan, Lu},
|
35 |
+
journal={arXiv preprint arXiv:2311.06242},
|
36 |
+
year={2023}
|
37 |
+
}
|
38 |
+
```
|