@inProceedings{naacl2018wang,
  author    = {Wang, Xin and Wang, Yuan-Fang and Wang, William Yang},
  title	    = {Watch, Listen, and Describe: Globally and Locally Aligned Cross-Modal Attentions for Video Captioning},
  booktitle = {Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
  year	    = {2018},
  address   = {New Orleans, LA, USA},
  publisher = {ACL}
}