@INPROCEEDINGS{pldi22-poe,
  title     = "Visualization Question Answering Using Introspective Program
               Synthesis",
  booktitle = "Proceedings of the 43rd {ACM} {SIGPLAN} International Conference
               on Programming Language Design and Implementation",
  author    = "Chen, Yanju and Yan, Xifeng and Feng, Yu",
  abstract  = "While data visualization plays a crucial role in gaining
               insights from data, generating answers over complex
               visualizations from natural language questions is far from an
               easy task. Mainstream approaches reduce data visualization
               queries to a semantic parsing problem, which either relies on
               expensive-to-annotate supervised training data that pairs
               natural language questions with logical forms, or weakly
               supervised models that incorporate a larger corpus but fail on
               long-tailed queries without explanations. This paper aims to
               answer data visualization queries by automatically synthesizing
               the corresponding program from natural language. At the core of
               our technique is an abstract synthesis engine that is
               bootstrapped by an off-the-shelf weakly supervised model and an
               optimal synthesis algorithm guided by triangle alignment
               constraints, which represent consistency among natural language,
               visualization, and the synthesized program. Starting with a few
               tentative answers obtained from an off-the-shelf statistical
               model, our approach first involves an abstract synthesizer that
               generates a set of sketches that are consistent with the
               answers. Then we design an instance of optimal synthesis to
               complete one of the candidate sketches by satisfying common type
               constraints and maximizing the consistency among three parties,
               i.e., natural language, the visualization, and the candidate
               program. We implement the proposed idea in a system called Poe
               that can answer visualization queries from natural language. Our
               method is fully automated and does not require users to know the
               underlying schema of the visualizations. We evaluate Poe on 629
               visualization queries and our experiment shows that Poe
               outperforms state-of-the-arts by improving the accuracy from
               44\% to 59\%.",
  publisher = "Association for Computing Machinery",
  pages     = "137--151",
  series    = "PLDI 2022",
  year      =  2022,
  address   = "New York, NY, USA",
  keywords  = "Program Synthesis, Natural Language Processing, Visualization,
               Machine Learning",
  location  = "San Diego, CA, USA"
}