@misc{xu2024llavacotletvisionlanguage, title={LLaVA-CoT: Let Vision Language Models Reason Step-by-Step}, author={Guowei Xu and Peng Jin and Hao Li and Yibing Song and Lichao Sun and Li Yuan}, year={2024}, eprint={2411.10440}, archivePrefix={arXiv}, primaryClass={cs.CV}, url={https://arxiv.org/abs/2411.10440}, }