@inproceedings{3de937632c8549f6aa409b4bd055851e,
title = "A study on tailor-made speech synthesis based on deep neural networks",
abstract = "We propose “tailor-made speech synthesis,” the speech synthesis technique which enables users to control the synthetic speech naturally and intuitively. As a first step to realizing tailor-made speech synthesis, we introduce F0 context into speaker model training of speech synthesis based on deep neural networks (DNNs). F0 context represents relative log F0 at the mora or the accent-phrase level of training data. It allows users to control the F0 of synthetic speech steplessly on the contrary to conventional F0 context in HMM-based technique. Experiments showed that F0 context was effective to control the F0 because the F0 of synthetic voice followed the value of F0 context.",
keywords = "Context label, DNN-based speech synthesis, F0 context, Model training, Prosody control, Unsupervised labeling",
author = "Shuhei Yamada and Takashi Nose and Akinori Ito",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing AG 2017.; 12th International Conference on Intelligent Information Hiding and Multimedia Signal Processing, IIH-MSP 2016 ; Conference date: 21-11-2016 Through 23-11-2016",
year = "2017",
doi = "10.1007/978-3-319-50209-0_20",
language = "English",
isbn = "9783319502083",
series = "Smart Innovation, Systems and Technologies",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "159--166",
editor = "Hsiang-Cheh Huang and Jeng-Shyang Pan and Pei-Wei Tsai",
booktitle = "Advances in Intelligent Information Hiding and Multimedia Signal Processing - Proceeding of the 12th International Conference on Intelligent Information Hiding and Multimedia Signal Processing, 2016",
address = "Germany",
}