@inProceedings{naacl2018wang, author = {Wang, Xin and Wang, Yuan-Fang and Wang, William Yang}, title = {Watch, Listen, and Describe: Globally and Locally Aligned Cross-Modal Attentions for Video Captioning}, booktitle = {Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies}, year = {2018}, address = {New Orleans, LA, USA}, publisher = {ACL} }