@INPROCEEDINGS{cav20-concord,
  title     = "Program Synthesis Using {Deduction-Guided} Reinforcement
               Learning",
  booktitle = "Computer Aided Verification",
  author    = "Chen, Yanju and Wang, Chenglong and Bastani, Osbert and Dillig,
               Isil and Feng, Yu",
  editor    = "Lahiri, Shuvendu K and Wang, Chao",
  abstract  = "In this paper, we present a new program synthesis algorithm
               based on reinforcement learning. Given an initial policy (i.e.
               statistical model) trained off-line, our method uses this policy
               to guide its search and gradually improves it by leveraging
               feedback obtained from a deductive reasoning engine.
               Specifically, we formulate program synthesis as a reinforcement
               learning problem and propose a new variant of the policy
               gradient algorithm that can incorporate feedback from a
               deduction engine into the underlying statistical model. The
               benefit of this approach is two-fold: First, it combines the
               power of deductive and statistical reasoning in a unified
               framework. Second, it leverages deduction not only to prune the
               search space but also to guide search. We have implemented the
               proposed approach in a tool called Concord and experimentally
               evaluate it on synthesis tasks studied in prior work. Our
               comparison against several baselines and two existing synthesis
               tools shows the advantages of our proposed approach. In
               particular, Concord solves 15\% more benchmarks compared to Neo,
               a state-of-the-art synthesis tool, while improving synthesis
               time by 8.71$$\textbackslashtimes $$on benchmarks that can be
               solved by both tools.",
  publisher = "Springer International Publishing",
  pages     = "587--610",
  year      =  2020,
  address   = "Cham"
}