@inproceedings{9214ad8dec454ba49fd93896cf0b542d,
title = "Conversational spontaneous speech synthesis using average voice model",
abstract = "This paper describes conversational spontaneous speech synthesis based on hidden Markov model (HMM). To reduce the amount of data required for model training, we utilize an average-voice-based speech synthesis framework, which has been shown to be effective for synthesizing speech with arbitrary speaker's voice using a small amount of training data. We examine several kinds of average voice model using reading-style speech and/or conversation-style speech. We also examine an appropriate utterance unit for conversational speech synthesis. Experimental results show that the proposed two-stage model adaptation method improves the quality of synthetic conversational speech.",
keywords = "Average voice model, Conversational speech, HMM-based speech synthesis, Speaker adaptation, Spontaneous speech, Style adaptation",
author = "Tomoki Koriyama and Takashi Nose and Takao Kobayashi",
note = "Funding Information: A part of this work was supported by JSPS Grant-in-Aid for Scientific Research 21300063 and 21800020.",
year = "2010",
language = "English",
series = "Proceedings of the 11th Annual Conference of the International Speech Communication Association, INTERSPEECH 2010",
publisher = "International Speech Communication Association",
pages = "853--856",
booktitle = "Proceedings of the 11th Annual Conference of the International Speech Communication Association, INTERSPEECH 2010",
}