Hi! I'm a third-year Ph.D. student at the Electrical and Computer Engineering Department of Carnegie Mellon University. I am fortunate to be advised by Prof. Yuejie Chi. Previously I spent one year at Peking University for a data science master’s program. Before that, I received my B.S. in Mathematics (Honors Program) from Xi'an Jiaotong University. Here is my CV. 
I'm interested in machine learning, deep learning, reinforcement learning, optimization and game theory. Specifically, I am interested in developing sample and computationally efficient algorithms for some fundamental machine learning problems. 
- 
Exploration from a Primal-Dual Lens: Value-Incentivized Actor-Critic Methods for Sample-Efficient Online RL
 NeurIPS 2025
 Tong Yang, Bo Dai, Lin Xiao, Yuejie Chi
 [PDF] 
[BibTex]
@article{yang2025exploration,
  title={Exploration from a Primal-Dual Lens: Value-Incentivized Actor-Critic Methods for Sample-Efficient Online RL},
  author={Yang, Tong and Dai, Bo and Xiao, Lin and Chi, Yuejie},
  journal={arXiv preprint arXiv:2506.22401},
  year={2025}
}
 
- 
Multi-head Transformers Provably Learn Symbolic Multi-step Reasoning via Gradient Descent
 NeurIPS 2025
 Tong Yang, Yu Huang, Yingbin Liang, Yuejie Chi
 [PDF] 
[BibTex]
@misc{yang2025multiheadtransformersprovablylearn,
      title={Multi-head Transformers Provably Learn Symbolic Multi-step Reasoning via Gradient Descent}, 
      author={Tong Yang and Yu Huang and Yingbin Liang and Yuejie Chi},
      year={2025},
      eprint={2508.08222},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2508.08222}, 
}
 
- 
Incentivize without Bonus: Provably Efficient Model-based Online Multi-agent RL for Markov Games
 ICML 2025
 Tong Yang, Bo Dai, Lin Xiao, Yuejie Chi
 [PDF] 
[BibTex]
@article{yang2025incentivize,
  title={Incentivize without bonus: Provably efficient model-based online multi-agent RL for Markov games},
  author={Yang, Tong and Dai, Bo and Xiao, Lin and Chi, Yuejie},
  journal={arXiv preprint arXiv:2502.09780},
  year={2025}
}
 
- 
Faster WIND: Accelerating Iterative Best-of-N Distillation for LLM Alignment
 AISTATS 2025
 Tong Yang, Jincheng Mei, Hanjun Dai, Zixin Wen, Shicong Cen, Dale Schuurmans, Yuejie Chi, Bo Dai
 [PDF] 
[BibTex]
@article{yang2024faster,
  title={Faster WIND: Accelerating Iterative Best-of-$ N $ Distillation for LLM Alignment},
  author={Yang, Tong and Mei, Jincheng and Dai, Hanjun and Wen, Zixin and Cen, Shicong and Schuurmans, Dale and Chi, Yuejie and Dai, Bo},
  journal={arXiv preprint arXiv:2410.20727},
  year={2024}
}
 
- 
In-Context Learning with Representations: Contextual Generalization of Trained Transformers
 NeurIPS 2024
 Tong Yang, Yu Huang, Yingbin Liang, Yuejie Chi
 [PDF] 
[BibTex]
@article{yang2024context,
  title={In-context learning with representations: Contextual generalization of trained transformers},
  author={Yang, Tong and Huang, Yu and Liang, Yingbin and Chi, Yuejie},
  journal={arXiv preprint arXiv:2408.10147},
  year={2024}
}
 
- 
Federated Natural Policy Gradient and Actor Critic Methods for Multi-task Reinforcement Learning
 NeurIPS 2024
 Tong Yang, Shicong Cen, Yuting Wei, Yuxin Chen, Yuejie Chi
 [PDF] 
[BibTex]
@article{yang2023federated,
  title={Federated natural policy gradient methods for multi-task reinforcement learning},
  author={Yang, Tong and Cen, Shicong and Wei, Yuting and Chen, Yuxin and Chi, Yuejie},
  journal={arXiv preprint arXiv:2311.00201},
  year={2023}
}
 
- 
A Primal-Dual Approach to Solving Variational Inequalities with General Constraints 
 ICLR 2024
 Tatjana Chavdarova*, Tong Yang*, Matteo Pagliardini, Michael I. Jordan (*equal contribution, order is alphabetical.)
 [PDF] 
[Poster (OPT@NeurIPS '22)]
[BibTex]
@article{chavdarova2022acvi,
 title   = {Revisiting the ACVI Method for Constrained Variational Inequalities},
 author  = {Chavdarova, Tatjana and Pagliardini, Matteo and Yang, Tong and Jordan, Michael I.},
 journal= {ArXiv:2210.15659},
 year    = {2022},
}
 
- 
Solving Constrained Variational Inequalities via an Interior Point Method
 ICLR 2023 Spotlight!
 Tong Yang*, Michael I. Jrodan*, Tatjana Chavdarova* (*equal contribution)
 [PDF]
[Poster (WiML@ICML '22)]
[Code]
[BibTex]
@article{yang2022acvi,
 title   = {Solving Constrained Variational Inequalities via an Interior Point Method},
 author  = {Tong Yang and Michael I. Jordan and Tatjana Chavdarova},
 journal = {ArXiv:2206.10575},
 year    = {2022},
}
 
- 
Optimization for Amortized Inverse Problems
 ICML 2023
 Tianci Liu*, Tong Yang*, Quan Zhang, Qi Lei (*equal contribution)
 [PDF]
[BibTex]
@inproceedings{liu2023optimization,
  title={Optimization for amortized inverse problems},
  author={Liu, Tianci and Yang, Tong and Zhang, Quan and Lei, Qi},
  booktitle={International Conference on Machine Learning},
  pages={22289--22319},
  year={2023},
  organization={PMLR}
}
 
- 
Value-Incentivized Preference Optimization: A Unified Approach to Online and Offline RLHF
 ICLR 2025
 Shicong Cen, Jincheng Mei, Katayoon Goshvadi, Hanjun Dai, Tong Yang, Sherry Yang, Dale Schuurmans, Yuejie Chi, Bo Dai
 [PDF]
[BibTex]
@article{cen2024value,
  title={Value-Incentivized Preference Optimization: A Unified Approach to Online and Offline RLHF},
  author={Cen, Shicong and Mei, Jincheng and Goshvadi, Katayoon and Dai, Hanjun and Yang, Tong and Yang, Sherry and Schuurmans, Dale and Chi, Yuejie and Dai, Bo},
  journal={arXiv preprint arXiv:2405.19320},
  year={2024}
}