@misc{xu2024llavacotletvisionlanguage,
      title={LLaVA-CoT: Let Vision Language Models Reason Step-by-Step}, 
      author={Guowei Xu and Peng Jin and Hao Li and Yibing Song and Lichao Sun and Li Yuan},
      year={2024},
      eprint={2411.10440},
      archivePrefix={arXiv},
      primaryClass={cs.CV},
      url={https://arxiv.org/abs/2411.10440}, 
}