Hi! I'm a second-year Ph.D. student at the Electrical and Computer Engineering Department of Carnegie Mellon University. I am fortunate to be advised by Prof. Yuejie Chi. Previously I spent one year at Peking University for a data science master’s program. Before that, I received my B.S. in Mathematics (Honors Program) from Xi'an Jiaotong University. Here is my CV.
I have eclectic interests in machine learning, deep learning, reinforcement learning, optimization and game theory. Specifically, I am interested in developing sample and computationally efficient algorithms for some fundamental machine learning problems.
-
Exploration from a Primal-Dual Lens: Value-Incentivized Actor-Critic Methods for Sample-Efficient Online RL
Preprint, 2025
Tong Yang, Bo Dai, Lin Xiao, Yuejie Chi
[PDF]
[BibTex]
@misc{yang2025explorationprimalduallensvalueincentivized,
title={Exploration from a Primal-Dual Lens: Value-Incentivized Actor-Critic Methods for Sample-Efficient Online RL},
author={Tong Yang and Bo Dai and Lin Xiao and Yuejie Chi},
year={2025},
eprint={2506.22401},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2506.22401},
}
-
Incentivize without Bonus: Provably Efficient Model-based Online Multi-agent RL for Markov Games
ICML 2025
Tong Yang, Bo Dai, Lin Xiao, Yuejie Chi
[PDF]
[BibTex]
@article{yang2025incentivize,
title={Incentivize without bonus: Provably efficient model-based online multi-agent RL for Markov games},
author={Yang, Tong and Dai, Bo and Xiao, Lin and Chi, Yuejie},
journal={arXiv preprint arXiv:2502.09780},
year={2025}
}
-
Faster WIND: Accelerating Iterative Best-of-N Distillation for LLM Alignment
AISTATS 2025
Tong Yang, Jincheng Mei, Hanjun Dai, Zixin Wen, Shicong Cen, Dale Schuurmans, Yuejie Chi, Bo Dai
[PDF]
[BibTex]
@article{yang2024faster,
title={Faster WIND: Accelerating Iterative Best-of-$ N $ Distillation for LLM Alignment},
author={Yang, Tong and Mei, Jincheng and Dai, Hanjun and Wen, Zixin and Cen, Shicong and Schuurmans, Dale and Chi, Yuejie and Dai, Bo},
journal={arXiv preprint arXiv:2410.20727},
year={2024}
}
-
In-Context Learning with Representations: Contextual Generalization of Trained Transformers
NeurIPS 2024
Tong Yang, Yu Huang, Yingbin Liang, Yuejie Chi
[PDF]
[BibTex]
@article{yang2024context,
title={In-context learning with representations: Contextual generalization of trained transformers},
author={Yang, Tong and Huang, Yu and Liang, Yingbin and Chi, Yuejie},
journal={arXiv preprint arXiv:2408.10147},
year={2024}
}
-
Federated Natural Policy Gradient and Actor Critic Methods for Multi-task Reinforcement Learning
NeurIPS 2024
Tong Yang, Shicong Cen, Yuting Wei, Yuxin Chen, Yuejie Chi
[PDF]
[BibTex]
@article{yang2023federated,
title={Federated natural policy gradient methods for multi-task reinforcement learning},
author={Yang, Tong and Cen, Shicong and Wei, Yuting and Chen, Yuxin and Chi, Yuejie},
journal={arXiv preprint arXiv:2311.00201},
year={2023}
}
-
A Primal-Dual Approach to Solving Variational Inequalities with General Constraints
ICLR 2024
Tatjana Chavdarova*, Tong Yang*, Matteo Pagliardini, Michael I. Jordan (*equal contribution, order is alphabetical.)
[PDF]
[Poster (OPT@NeurIPS '22)]
[BibTex]
@article{chavdarova2022acvi,
title = {Revisiting the ACVI Method for Constrained Variational Inequalities},
author = {Chavdarova, Tatjana and Pagliardini, Matteo and Yang, Tong and Jordan, Michael I.},
journal= {ArXiv:2210.15659},
year = {2022},
}
-
Solving Constrained Variational Inequalities via an Interior Point Method
ICLR 2023 Spotlight!
Tong Yang*, Michael I. Jrodan*, Tatjana Chavdarova* (*equal contribution)
[PDF]
[Poster (WiML@ICML '22)]
[Code]
[BibTex]
@article{yang2022acvi,
title = {Solving Constrained Variational Inequalities via an Interior Point Method},
author = {Tong Yang and Michael I. Jordan and Tatjana Chavdarova},
journal = {ArXiv:2206.10575},
year = {2022},
}
-
Optimization for Amortized Inverse Problems
ICML 2023
Tianci Liu*, Tong Yang*, Quan Zhang, Qi Lei (*equal contribution)
[PDF]
[BibTex]
@inproceedings{liu2023optimization,
title={Optimization for amortized inverse problems},
author={Liu, Tianci and Yang, Tong and Zhang, Quan and Lei, Qi},
booktitle={International Conference on Machine Learning},
pages={22289--22319},
year={2023},
organization={PMLR}
}
-
Value-Incentivized Preference Optimization: A Unified Approach to Online and Offline RLHF
ICLR 2025
Shicong Cen, Jincheng Mei, Katayoon Goshvadi, Hanjun Dai, Tong Yang, Sherry Yang, Dale Schuurmans, Yuejie Chi, Bo Dai
[PDF]
[BibTex]
@article{cen2024value,
title={Value-Incentivized Preference Optimization: A Unified Approach to Online and Offline RLHF},
author={Cen, Shicong and Mei, Jincheng and Goshvadi, Katayoon and Dai, Hanjun and Yang, Tong and Yang, Sherry and Schuurmans, Dale and Chi, Yuejie and Dai, Bo},
journal={arXiv preprint arXiv:2405.19320},
year={2024}
}