@article{li2024visual,
title={Visual language tracking with multi-modal interaction: A robust benchmark},
author={Li, Xuchen and Hu, Shiyu and Feng, Xiaokun and Zhang, Dailing and Wu, Meiqi and Zhang, Jing and Huang, Kaiqi},
journal={arXiv preprint arXiv:2409.08887},
year={2024}
}