@inproceedings{gross2025demystifying, title={{Demystifying Cipher-Following in Large Language Models via Activation Analysis}}, author={Megan Gross and Yigitcan Kaya and Christopher Kruegel and Giovanni Vigna}, booktitle={Proceedings of the NeurIPS Workshop on Mechanistic Interpretability}, address = {San Diego, CA}, month = {September}, year={2025} }