@inproceedings{kim2025challenging,title={Challenging the Evaluator: LLM Sycophancy Under User Rebuttal},author={Kim, Sungwon and Khashabi, Daniel},booktitle={Findings of the Association for Computational Linguistics: EMNLP 2025},year={2025},month=nov,address={Suzhou, China},publisher={Association for Computational Linguistics},pages={22461--22478},isbn={979-8-89176-335-7},url={https://aclanthology.org/2025.findings-emnlp.1222/},}
arXiv
BiomedSQL: Text-to-SQL for Scientific Reasoning on Biomedical Knowledge Bases
Mathew J. Koretsky, Maya Willey, Adi Asija, and 8 more authors
@article{koretsky2025biomedsql-arxiv,title={BiomedSQL: Text-to-SQL for Scientific Reasoning on Biomedical Knowledge Bases},author={Koretsky, Mathew J. and Willey, Maya and Asija, Adi and Bianchi, Owen and Alvarado, Chelsea X. and Nayak, Tanay and Kuznetsov, Nicole and Kim, Sungwon and Nalls, Mike A. and Khashabi, Daniel and Faghri, Faraz},journal={arXiv preprint arXiv:2505.20321},year={2025},month=may,doi={10.48550/arXiv.2505.20321},url={https://arxiv.org/abs/2505.20321},}
bioRxiv
CARDBiomedBench: A Benchmark for Evaluating Large Language Model Performance in Biomedical Research
Owen Bianchi, Maya Willey, Chelsea X. Alvarado, and 21 more authors
@article{bianchi2025cardbiomedbench,title={CARDBiomedBench: A Benchmark for Evaluating Large Language Model Performance in Biomedical Research},author={Bianchi, Owen and Willey, Maya and Alvarado, Chelsea X. and Danek, Ben and Khani, Mahsa and Kuznetsov, Nicole and Dadu, Aanya and Shah, Saniya and Koretsky, Mathew J. and Makarious, Mary B. and Weller, Claire and Levine, Kelsey S. and Kim, Sungwon and Jarreau, Paul and Vitale, Domenico and Marsan, Emilie and Iwaki, Hiroto and Leonard, Heather and Bandres-Ciga, Sara and Singleton, Andrew B. and Nalls, Mike A. and Mokhtari, Soheil and Khashabi, Daniel and Faghri, Faraz},journal={bioRxiv},year={2025},month=jan,note={Preprint},doi={10.1101/2025.01.15.633272},pmid={39868292},pmcid={PMC11760394},url={https://www.biorxiv.org/content/10.1101/2025.01.15.633272v1},}