@article{li2024visual, title={Visual language tracking with multi-modal interaction: A robust benchmark}, author={Li, Xuchen and Hu, Shiyu and Feng, Xiaokun and Zhang, Dailing and Wu, Meiqi and Zhang, Jing and Huang, Kaiqi}, journal={arXiv preprint arXiv:2409.08887}, year={2024} }