@INPROCEEDINGS{pldi22-poe, title = "Visualization Question Answering Using Introspective Program Synthesis", booktitle = "Proceedings of the 43rd {ACM} {SIGPLAN} International Conference on Programming Language Design and Implementation", author = "Chen, Yanju and Yan, Xifeng and Feng, Yu", abstract = "While data visualization plays a crucial role in gaining insights from data, generating answers over complex visualizations from natural language questions is far from an easy task. Mainstream approaches reduce data visualization queries to a semantic parsing problem, which either relies on expensive-to-annotate supervised training data that pairs natural language questions with logical forms, or weakly supervised models that incorporate a larger corpus but fail on long-tailed queries without explanations. This paper aims to answer data visualization queries by automatically synthesizing the corresponding program from natural language. At the core of our technique is an abstract synthesis engine that is bootstrapped by an off-the-shelf weakly supervised model and an optimal synthesis algorithm guided by triangle alignment constraints, which represent consistency among natural language, visualization, and the synthesized program. Starting with a few tentative answers obtained from an off-the-shelf statistical model, our approach first involves an abstract synthesizer that generates a set of sketches that are consistent with the answers. Then we design an instance of optimal synthesis to complete one of the candidate sketches by satisfying common type constraints and maximizing the consistency among three parties, i.e., natural language, the visualization, and the candidate program. We implement the proposed idea in a system called Poe that can answer visualization queries from natural language. Our method is fully automated and does not require users to know the underlying schema of the visualizations. We evaluate Poe on 629 visualization queries and our experiment shows that Poe outperforms state-of-the-arts by improving the accuracy from 44\% to 59\%.", publisher = "Association for Computing Machinery", pages = "137--151", series = "PLDI 2022", year = 2022, address = "New York, NY, USA", keywords = "Program Synthesis, Natural Language Processing, Visualization, Machine Learning", location = "San Diego, CA, USA" }