PyTorch implementation of Quantize-Sample-and-Verify: LLM Acceleration via Adaptive Edge-Cloud Speculative Decoding
@article{ qs_speculative,
title={Quantize-Sample-and-Verify: LLM Acceleration via Adaptive Edge-Cloud Speculative Decoding},
author={Guangyi Zhang and Yunlong Cai and Guanding Yu and Petar Popovski and Osvaldo Simeone},
journal={arXiv preprint arXiv:2507.00605},
year={2025},}