diff --git a/CMakeLists.txt b/CMakeLists.txt index 82075aa4d1b..6acedb4b79c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,9 +74,11 @@ IF(RESET_INSTALL_PREFIX) ENDIF(NOT $ENV{FS2PATH} STREQUAL "") ENDIF(RESET_INSTALL_PREFIX) -IF(WIN32 OR APPLE) +IF(WIN32 OR APPLE OR CMAKE_SYSTEM_NAME STREQUAL "Linux") OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" ON) -ENDIF(WIN32 OR APPLE) +ELSE() + OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" OFF) +ENDIF() IF (WIN32) OPTION(FSO_USE_VOICEREC "Enable voice recognition support" ON) @@ -227,9 +229,7 @@ include(package) include(doxygen) # Print used options to log -IF(WIN32 OR APPLE) - message(STATUS "Using text to speech: ${FSO_USE_SPEECH}") -ENDIF() +message(STATUS "Using text to speech: ${FSO_USE_SPEECH}") IF (WIN32) message(STATUS "Using voice recogition: ${FSO_USE_VOICEREC}") message(STATUS "Building FRED2: ${FSO_BUILD_FRED2}") diff --git a/cmake/finder/FindSpeech.cmake b/cmake/finder/FindSpeech.cmake index b85b5b7fe9a..c7cc6b50b4c 100644 --- a/cmake/finder/FindSpeech.cmake +++ b/cmake/finder/FindSpeech.cmake @@ -11,6 +11,8 @@ if (WIN32) endif() elseif(APPLE) # it should just work +elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") + # uses speech-dispatcher with dlopen else() message(SEND_ERROR "Text to Speech is not supported on this platform!") endif() diff --git a/code/cmdline/cmdline.cpp b/code/cmdline/cmdline.cpp index 7dae2532cab..e200327a118 100644 --- a/code/cmdline/cmdline.cpp +++ b/code/cmdline/cmdline.cpp @@ -1414,7 +1414,7 @@ static json_t* json_get_v1() { auto voices = speech_enumerate_voices(); for (auto& voice : voices) { - json_array_append_new(voices_array, json_string(voice.c_str())); + json_array_append_new(voices_array, json_string(voice.second.c_str())); } json_object_set_new(root, "voices", voices_array); diff --git a/code/localization/localize.cpp b/code/localization/localize.cpp index 96c0c9b992c..8c6132f4462 100644 --- a/code/localization/localize.cpp +++ b/code/localization/localize.cpp @@ -64,7 +64,7 @@ bool *Lcl_unexpected_tstring_check = nullptr; // NOTE: with map storage of XSTR strings, the indexes no longer need to be contiguous, // but internal strings should still increment XSTR_SIZE to avoid collisions. // retail XSTR_SIZE = 1570 -// #define XSTR_SIZE 1915 // This is the next available ID +// #define XSTR_SIZE 1929 // This is the next available ID // struct to allow for strings.tbl-determined x offset // offset is 0 for english, by default diff --git a/code/options/Ingame_Options.cpp b/code/options/Ingame_Options.cpp index 801245458d1..dc797ec7f01 100644 --- a/code/options/Ingame_Options.cpp +++ b/code/options/Ingame_Options.cpp @@ -4,6 +4,7 @@ #include "options/OptionsManager.h" #include "options/Option.h" +#include static std::unique_ptr OCGR; @@ -100,6 +101,7 @@ void ingame_options_init() void ingame_options_close() { + fsspeech_options_cleanup(); OCGR.reset(); } diff --git a/code/options/Option.h b/code/options/Option.h index 44032a80f1a..791f1057107 100644 --- a/code/options/Option.h +++ b/code/options/Option.h @@ -608,7 +608,7 @@ class OptionBuilder { _instance.setPreset(val.first, json_dump_string_new(_instance.getSerializer()(val.second), JSON_COMPACT | JSON_ENSURE_ASCII | JSON_ENCODE_ANY)); } - auto opt_ptr = make_shared>(_instance); + auto opt_ptr = std::make_shared>(_instance); if (std::holds_alternative>(_title)) { const auto& xstr_info = std::get>(_title); diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp index 65ef525bb3a..d8d32601671 100644 --- a/code/sound/fsspeech.cpp +++ b/code/sound/fsspeech.cpp @@ -10,7 +10,7 @@ #include "osapi/osregistry.h" #include "sound/fsspeech.h" #include "sound/speech.h" - +#include "options/Option.h" extern int Cmdline_freespace_no_sound; @@ -30,6 +30,201 @@ const char *FSSpeech_play_id[FSSPEECH_FROM_MAX] = char Speech_buffer[MAX_SPEECH_BUFFER_LEN] = ""; size_t Speech_buffer_len; +static bool ttsrate_change(float new_val, bool initial) +{ + if (initial) { + return false; + } + speech_set_rate(new_val); + return true; +} + +static bool ttsingame_change(bool new_val, bool initial) +{ + if (initial) { + return false; + } + FSSpeech_play_from[FSSPEECH_FROM_INGAME] = new_val; + return true; +} + +static bool ttsmulti_change(bool new_val, bool initial) +{ + if (initial) { + return false; + } + FSSpeech_play_from[FSSPEECH_FROM_MULTI] = new_val; + return true; +} + +static bool ttsbriefing_change(bool new_val, bool initial) +{ + if (initial) { + return false; + } + FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = new_val; + return true; +} + +static bool ttstechroom_change(bool new_val, bool initial) +{ + if (initial) { + return false; + } + FSSpeech_play_from[FSSPEECH_FROM_TECHROOM] = new_val; + return true; +} + +static bool ttsvolume_change(float new_val, bool initial) +{ + if (initial) { + return false; + } + speech_set_volume((unsigned short) new_val); + return true; +} + +static std::pair ttsvoice_deserializer(const json_t* el) +{ + int id; + char* name = nullptr; + + json_error_t err; + if (json_unpack_ex((json_t*)el, &err, 0, "{s:i, s:s}", "id", &id, "name", &name) != 0) { + throw json_exception(err); + } + + return std::make_pair(id, name); +} + +static json_t* ttsvoice_serializer(const std::pair& value) +{ + return json_pack("{s:i, s:s}", "id", value.first, "name", value.second.c_str()); +} + +static SCP_vector> voice_list_cache; + +static SCP_vector> ttsvoice_enumerator() +{ + if(voice_list_cache.empty()) { + auto voices = speech_enumerate_voices(); + + if (voices.empty()) { + voices.emplace_back(std::make_pair(0, "No voices loaded")); + } + voice_list_cache = voices; + return voices; + } + else { + return voice_list_cache; + } +} + +static SCP_string ttsvoice_display(const std::pair& vi) +{ + return vi.second; +} + +static bool ttsvoice_change(const std::pair& new_voice, bool initial) +{ + if (initial) { + return false; + } + speech_set_voice(new_voice.first); + return true; +} + +static auto SpeechVoiceOption = options::OptionBuilder>("Speech.Voice", + std::pair{"TTS Voice", 1915}, + std::pair{"The voice used to read text", 1916}) + .category(std::make_pair("Audio", 1826)) + .level(options::ExpertLevel::Beginner) + .default_func([]() { return ttsvoice_enumerator().front(); }) // always guarantees at least 1 value + .enumerator(ttsvoice_enumerator) + .display(ttsvoice_display) + .serializer(ttsvoice_serializer) + .deserializer(ttsvoice_deserializer) + .flags({ options::OptionFlags::ForceMultiValueSelection }) + .change_listener(ttsvoice_change) + .importance(3) + .finish(); + +static auto SpeechVolumeOption = options::OptionBuilder("Speech.Volume", + std::pair{"TTS Volume", 1917}, + std::pair{"Volume used for playing TTS speech", 1918}) + .category(std::make_pair("Audio", 1826)) + .range(0.0f, 100.0f) + .default_val(100.0f) + .change_listener(ttsvolume_change) + .importance(2) + .finish(); + +static auto SpeechRateOption = options::OptionBuilder("Speech.Rate", + std::pair{"TTS Rate", 1919}, + std::pair{"Speed of the TTS voice (100 = normal)", 1920}) + .category(std::make_pair("Audio", 1826)) + .range(50.0f, 150.0f) + .default_val(100.0f) + .change_listener(ttsrate_change) + .importance(1) + .finish(); + +static auto SpeechBriefingOption = options::OptionBuilder("Speech.Briefing", + std::pair{"TTS in briefings", 1921}, + std::pair{"Enable or disable TTS in briefings", 1922}) + .category(std::make_pair("Audio", 1826)) + .level(options::ExpertLevel::Beginner) + .change_listener(ttsbriefing_change) + .default_val(true) + .importance(0) + .finish(); + +static auto SpeechTechroomOption = options::OptionBuilder("Speech.Techroom", + std::pair{"TTS in techroom", 1923}, + std::pair{"Enable or disable TTS in techroom", 1924}) + .category(std::make_pair("Audio", 1826)) + .level(options::ExpertLevel::Beginner) + .change_listener(ttstechroom_change) + .default_val(true) + .importance(0) + .finish(); + +static auto SpeechIngameOption = options::OptionBuilder("Speech.Ingame", + std::pair{"TTS in-game", 1925}, + std::pair{"Enable or disable TTS in-game", 1926}) + .category(std::make_pair("Audio", 1826)) + .level(options::ExpertLevel::Beginner) + .change_listener(ttsingame_change) + .default_val(true) + .importance(0) + .finish(); + +static auto SpeechMultiOption = options::OptionBuilder("Speech.Multi", + std::pair{"TTS in multiplayer", 1927}, + std::pair{"Enable or disable TTS in multiplayer", 1928}) + .category(std::make_pair("Audio", 1826)) + .level(options::ExpertLevel::Beginner) + .change_listener(ttsmulti_change) + .default_val(true) + .importance(0) + .finish(); + +void sanitize_text(const char* input, SCP_string& output) { + output.clear(); + bool saw_dollar = false; + for (auto ch : unicode::codepoint_range(input)) { + if (ch == UNICODE_CHAR('$')) { + saw_dollar = true; + continue; + } + else if (saw_dollar) { + saw_dollar = false; + continue; + } + unicode::encode(ch, std::back_inserter(output)); + } +} + bool fsspeech_init() { if (speech_inited) { @@ -45,18 +240,33 @@ bool fsspeech_init() return false; } - // Get the settings from the registry - for(int i = 0; i < FSSPEECH_FROM_MAX; i++) { - FSSpeech_play_from[i] = - os_config_read_uint(NULL, FSSpeech_play_id[i], 0) ? true : false; - nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false")); + if (Using_in_game_options) + { + FSSpeech_play_from[FSSPEECH_FROM_TECHROOM] = SpeechTechroomOption->getValue(); + FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = SpeechBriefingOption->getValue(); + FSSpeech_play_from[FSSPEECH_FROM_INGAME] = SpeechIngameOption->getValue(); + FSSpeech_play_from[FSSPEECH_FROM_MULTI] = SpeechMultiOption->getValue(); + speech_set_volume((unsigned short)SpeechVolumeOption->getValue()); + speech_set_voice(SpeechVoiceOption->getValue().first); + speech_set_rate(SpeechRateOption->getValue()); + } + else + { + // Get the settings from the registry + for (int i = 0; i < FSSPEECH_FROM_MAX; i++) { + FSSpeech_play_from[i] = static_cast(os_config_read_uint(nullptr, FSSpeech_play_id[i], 0)); + nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false")); + } + + int volume = os_config_read_uint(nullptr, "SpeechVolume", 100); + speech_set_volume((unsigned short)volume); + + int voice = os_config_read_uint(nullptr, "SpeechVoice", 0); + speech_set_voice(voice); + + int rate = os_config_read_uint(nullptr, "SpeechRate", 100); + speech_set_rate(static_cast(rate)); } - - int volume = os_config_read_uint(NULL, "SpeechVolume", 100); - speech_set_volume((unsigned short) volume); - - int voice = os_config_read_uint(NULL, "SpeechVoice", 0); - speech_set_voice(voice); speech_inited = 1; @@ -75,6 +285,11 @@ void fsspeech_deinit() void fsspeech_play(int type, const char *text) { + if (text == nullptr) { + nprintf(("Speech", "Not playing speech because passed text is null.\n")); + return; + } + if (!speech_inited) { nprintf(("Speech", "Aborting fsspech_play because speech_inited is false.\n")); return; @@ -90,7 +305,10 @@ void fsspeech_play(int type, const char *text) return; } - speech_play(text); + SCP_string sanitized_string; + sanitize_text(text, sanitized_string); + + speech_play(sanitized_string); } void fsspeech_stop() @@ -157,3 +375,9 @@ bool fsspeech_playing() return speech_is_speaking(); } + +void fsspeech_options_cleanup() +{ + voice_list_cache.clear(); + voice_list_cache.shrink_to_fit(); +} diff --git a/code/sound/fsspeech.h b/code/sound/fsspeech.h index 874b0c37468..cd80b3515bd 100644 --- a/code/sound/fsspeech.h +++ b/code/sound/fsspeech.h @@ -31,4 +31,7 @@ void fsspeech_play_buffer(int type); bool fsspeech_play_from(int type); bool fsspeech_playing(); +// Cleanup the voice cache after the options menu is closed +void fsspeech_options_cleanup(); + #endif // header define diff --git a/code/sound/speech.cpp b/code/sound/speech.cpp deleted file mode 100644 index 7967950ac10..00000000000 --- a/code/sound/speech.cpp +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Code created by Thomas Whittaker (RT) for a FreeSpace 2 source code project - * - * You may not sell or otherwise commercially exploit the source or things you - * created based on the source. - * -*/ - - - - - -#ifndef FS2_SPEECH -#if defined(_WIN32) || defined(__APPLE__) -#if NDEBUG - #pragma message( "WARNING: You have not compiled speech into this build (use FS2_SPEECH)" ) -#endif // NDEBUG -#endif // _WIN32 or __APPLE__ -#elif !defined(__APPLE__) // to end-of-file ... - - -#ifdef LAUNCHER -#include "stdafx.h" -#endif //LAUNCHER - -#ifdef _WIN32 - -// Since we define these ourself we need to undefine them for the sapi header -#pragma push_macro("strcpy_s") -#pragma push_macro("strncpy_s") -#pragma push_macro("strcat_s") -#pragma push_macro("memset") -#pragma push_macro("memcpy") -#undef strcpy_s -#undef strncpy_s -#undef strcat_s -#undef memset -#undef memcpy - - #include - #include - - #include - -#pragma pushpop_macro("strcpy_s") -#pragma pushpop_macro("strncpy_s") -#pragma pushpop_macro("strcat_s") -#pragma pushpop_macro("memset") -#pragma pushpop_macro("memcpy") - - ISpVoice *Voice_device; -#elif defined(SCP_UNIX) - #include -// #include - - int speech_dev = -1; -// FILE *speech_dev = NULL; -#else - #pragma error( "ERROR: Unknown platform, speech (FS2_SPEECH) is not supported" ) -#endif //_WIN32 - -#pragma warning(push) -#pragma warning(disable: 4995) -// Visual Studio complains that some functions are deprecated so this fixes that -#include -#include -#include -#pragma warning(pop) - -#include "globalincs/pstypes.h" -#include "utils/unicode.h" -#include "speech.h" - - -bool Speech_init = false; - -bool speech_init() -{ -#ifdef _WIN32 - HRESULT hr = CoCreateInstance( - CLSID_SpVoice, - NULL, - CLSCTX_ALL, - IID_ISpVoice, - (void **)&Voice_device); - - Speech_init = SUCCEEDED(hr); -#else - - speech_dev = open("/dev/speech", O_WRONLY | O_DIRECT); -// speech_dev = fopen("/dev/speech", "w"); - - if (speech_dev == -1) { -// if (speech_dev == NULL) { - mprintf(("Couldn't open '/dev/speech', turning text-to-speech off...\n")); - return false; - } - - Speech_init = true; -#endif - - nprintf(("Speech", "Speech init %s\n", Speech_init ? "succeeded!" : "failed!")); - return Speech_init; -} - -void speech_deinit() -{ - if(Speech_init == false) return; - -#ifdef _WIN32 - Voice_device->Release(); -#else - close(speech_dev); -// fclose(speech_dev); -#endif -} - -bool speech_play(const char *text) -{ - nprintf(("Speech", "Attempting to play speech string %s...\n", text)); - - if(Speech_init == false) return true; - if (text == NULL) { - nprintf(("Speech", "Not playing speech because passed text is null.\n")); - return false; - } - -#ifdef _WIN32 - SCP_string work_buffer; - - bool saw_dollar = false; - for (auto ch : unicode::codepoint_range(text)) { - if (ch == UNICODE_CHAR('$')) { - // Skip $ escape sequences which appear in briefing text - saw_dollar = true; - continue; - } else if (saw_dollar) { - saw_dollar = false; - continue; - } - - unicode::encode(ch, std::back_inserter(work_buffer)); - } - - // Determine the needed amount of data - auto num_chars = MultiByteToWideChar(CP_UTF8, 0, work_buffer.c_str(), (int) work_buffer.size(), nullptr, 0); - - if (num_chars <= 0) { - // Error - return false; - } - - std::wstring wide_string; - wide_string.resize(num_chars); - - auto err = MultiByteToWideChar(CP_UTF8, 0, work_buffer.c_str(), (int)work_buffer.size(), &wide_string[0], num_chars); - - if (err <= 0) { - return false; - } - - speech_stop(); - return SUCCEEDED(Voice_device->Speak(wide_string.c_str(), SPF_ASYNC, NULL)); -#else - int len = strlen(text); - char Conversion_buffer[MAX_SPEECH_CHAR_LEN]; - - if(len > (MAX_SPEECH_CHAR_LEN - 1)) { - len = MAX_SPEECH_CHAR_LEN - 1; - } - - int count = 0; - for(int i = 0; i < len; i++) { - if(text[i] == '$') { - i++; - continue; - } - - Conversion_buffer[count] = text[i]; - count++; - } - - Conversion_buffer[count] = '\0'; - - if ( write(speech_dev, Conversion_buffer, count) == -1 ) - return false; -// if (fwrite(Conversion_buffer, count, 1, speech_dev)) -// fflush(speech_dev); -// else -// return false; - - return true; -#endif //_WIN32 -} - -bool speech_pause() -{ - if(Speech_init == false) return true; -#ifdef _WIN32 - return SUCCEEDED(Voice_device->Pause()); -#else - STUB_FUNCTION; - - return true; -#endif -} - -bool speech_resume() -{ - if(Speech_init == false) return true; -#ifdef _WIN32 - return SUCCEEDED(Voice_device->Resume()); -#else - STUB_FUNCTION; - - return true; -#endif -} - -bool speech_stop() -{ - if(Speech_init == false) return true; -#ifdef _WIN32 - return SUCCEEDED(Voice_device->Speak( NULL, SPF_PURGEBEFORESPEAK, NULL )); -#else - STUB_FUNCTION; - - return true; -#endif -} - -bool speech_set_volume(unsigned short volume) -{ -#ifdef _WIN32 - return SUCCEEDED(Voice_device->SetVolume(volume)); -#else - STUB_FUNCTION; - - return true; -#endif -} - -bool speech_set_voice(int voice) -{ -#ifdef _WIN32 - HRESULT hr; - CComPtr cpVoiceToken; - CComPtr cpEnum; - ULONG num_voices = 0; - - //Enumerate the available voices - hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum); - - if(FAILED(hr)) return false; - - hr = cpEnum->GetCount(&num_voices); - - if(FAILED(hr)) return false; - - int count = 0; - // Obtain a list of available voice tokens, set the voice to the token, and call Speak - while (num_voices -- ) - { - cpVoiceToken.Release(); - - hr = cpEnum->Next( 1, &cpVoiceToken, NULL ); - - if(FAILED(hr)) { - return false; - } - - if(count == voice) { - return SUCCEEDED(Voice_device->SetVoice(cpVoiceToken)); - } - - count++; - } - return false; -#else - STUB_FUNCTION; - - return true; -#endif -} - -// Goober5000 -bool speech_is_speaking() -{ -#ifdef _WIN32 - HRESULT hr; - SPVOICESTATUS pStatus; - - hr = Voice_device->GetStatus(&pStatus, NULL); - if (FAILED(hr)) return false; - - return (pStatus.dwRunningState != SPRS_DONE); -#else - STUB_FUNCTION; - - return false; -#endif -} - -SCP_vector speech_enumerate_voices() -{ -#ifdef _WIN32 - HRESULT hr = CoCreateInstance( - CLSID_SpVoice, - NULL, - CLSCTX_ALL, - IID_ISpVoice, - (void **)&Voice_device); - - if (FAILED(hr)) { - return SCP_vector(); - } - - // This code is mostly copied from wxLauncher - ISpObjectTokenCategory * comTokenCategory = NULL; - IEnumSpObjectTokens * comVoices = NULL; - ULONG comVoicesCount = 0; - - // Generate enumeration of voices - hr = ::CoCreateInstance(CLSID_SpObjectTokenCategory, NULL, - CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (LPVOID*)&comTokenCategory); - if (FAILED(hr)) { - return SCP_vector(); - } - - hr = comTokenCategory->SetId(SPCAT_VOICES, false); - if (FAILED(hr)) { - return SCP_vector(); - } - - hr = comTokenCategory->EnumTokens(NULL, NULL, &comVoices); - if (FAILED(hr)) { - return SCP_vector(); - } - - hr = comVoices->GetCount(&comVoicesCount); - if (FAILED(hr)) { - return SCP_vector(); - } - - SCP_vector voices; - while (comVoicesCount > 0) { - ISpObjectToken * comAVoice = NULL; - - comVoices->Next(1, &comAVoice, NULL); // retrieve just one - - LPWSTR id = NULL; - comAVoice->GetStringValue(NULL, &id); - - auto idlength = wcslen(id); - auto buffer_size = WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, nullptr, 0, nullptr, nullptr); - - if (buffer_size > 0) { - SCP_string voiceName; - voiceName.resize(buffer_size); - buffer_size = WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, &voiceName[0], buffer_size, nullptr, nullptr); - - voices.push_back(voiceName); - } - - CoTaskMemFree(id); - comAVoice->Release(); - comVoicesCount--; - } - - comTokenCategory->Release(); - - Voice_device->Release(); - - return voices; -#else - STUB_FUNCTION; - - return SCP_vector(); -#endif -} - -#endif // FS2_SPEECH diff --git a/code/sound/speech.h b/code/sound/speech.h index 3f731dd5a7f..07d7d9debf6 100644 --- a/code/sound/speech.h +++ b/code/sound/speech.h @@ -15,32 +15,34 @@ bool speech_init(); void speech_deinit(); -bool speech_play(const char *text); +bool speech_play(const SCP_string& text); bool speech_pause(); bool speech_resume(); bool speech_stop(); bool speech_set_volume(unsigned short volume); bool speech_set_voice(int voice); +bool speech_set_rate(float rate); bool speech_is_speaking(); -SCP_vector speech_enumerate_voices(); +SCP_vector> speech_enumerate_voices(); #else inline bool speech_init() { return false; } inline void speech_deinit() {} -inline bool speech_play(const char* /*text*/) { return false; } +inline bool speech_play(const SCP_string& /*text*/) { return false; } inline bool speech_pause() { return false; } inline bool speech_resume() { return false; } inline bool speech_stop() { return false; } inline bool speech_set_volume(unsigned short /*volume*/) { return false; } inline bool speech_set_voice(int /*voice*/) { return false; } +inline bool speech_set_rate(float /*rate*/) { return false; } inline bool speech_is_speaking() { return false; } -inline SCP_vector speech_enumerate_voices() { - return SCP_vector(); +inline SCP_vector> speech_enumerate_voices() { + return SCP_vector>(); } #endif diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp new file mode 100644 index 00000000000..e996ecf22bb --- /dev/null +++ b/code/sound/speech_linux.cpp @@ -0,0 +1,266 @@ +#ifdef FS2_SPEECH +#include +#include "globalincs/pstypes.h" +#include "utils/unicode.h" +#include "speech.h" + +// Adapted from libspeechd.h / speechd_types.h +// https://github.com/brailcom/speechd/tree/master/src/api/c + +typedef struct SPDConnection SPDConnection; + +typedef struct { + char *name; + char *language; + char *variant; +} SPDVoice; + +typedef enum { + SPD_MODE_SINGLE = 0, + SPD_MODE_THREADED = 1 +} SPDConnectionMode; + +typedef enum { + SPD_IMPORTANT = 1, + SPD_MESSAGE = 2, + SPD_TEXT = 3, + SPD_NOTIFICATION = 4, + SPD_PROGRESS = 5 +} SPDPriority; + +static void* lib_handle = nullptr; + +typedef SPDConnection* (*pfn_spd_open)(const char*, const char*, const char*, SPDConnectionMode); +typedef void (*pfn_spd_close)(SPDConnection*); +typedef int (*pfn_spd_say)(SPDConnection*, SPDPriority, const char*); +typedef int (*pfn_spd_pause)(SPDConnection*); +typedef int (*pfn_spd_resume)(SPDConnection*); +typedef int (*pfn_spd_stop)(SPDConnection*); +typedef int (*pfn_spd_set_volume)(SPDConnection*, signed int); +typedef int (*pfn_spd_set_synthesis_voice)(SPDConnection*, const char*); +typedef int (*pfn_spd_set_voice_rate)(SPDConnection*, signed int); +typedef SPDVoice** (*pfn_spd_list_synthesis_voices)(SPDConnection*); +typedef void (*pfn_free_spd_voices)(SPDVoice**); + +static pfn_spd_open p_spd_open = nullptr; +static pfn_spd_close p_spd_close = nullptr; +static pfn_spd_say p_spd_say = nullptr; +static pfn_spd_pause p_spd_pause = nullptr; +static pfn_spd_resume p_spd_resume = nullptr; +static pfn_spd_stop p_spd_stop = nullptr; +static pfn_spd_set_volume p_spd_set_volume = nullptr; +static pfn_spd_set_synthesis_voice p_spd_set_synthesis_voice = nullptr; +static pfn_spd_list_synthesis_voices p_spd_list_synthesis_voices = nullptr; +static pfn_spd_set_voice_rate p_spd_set_voice_rate = nullptr; +static pfn_free_spd_voices p_free_spd_voices = nullptr; + +// Load speech-dispatcher with dlopen and load symbols +static bool ensure_speechd_lib() +{ + if (lib_handle) return true; + lib_handle = dlopen("libspeechd.so.2", RTLD_LAZY | RTLD_LOCAL); + if (!lib_handle) { + lib_handle = dlopen("libspeechd.so", RTLD_LAZY | RTLD_LOCAL); + } + + if (!lib_handle) { + mprintf(("Speech: Unable to load libspeechd.so: %s\n", dlerror())); + return false; + } + + // used symbols + p_spd_open = (pfn_spd_open) dlsym(lib_handle, "spd_open"); + p_spd_close = (pfn_spd_close) dlsym(lib_handle, "spd_close"); + p_spd_say = (pfn_spd_say) dlsym(lib_handle, "spd_say"); + p_spd_pause = (pfn_spd_pause) dlsym(lib_handle, "spd_pause"); + p_spd_resume = (pfn_spd_resume) dlsym(lib_handle, "spd_resume"); + p_spd_stop = (pfn_spd_stop) dlsym(lib_handle, "spd_stop"); + p_spd_set_volume = (pfn_spd_set_volume) dlsym(lib_handle, "spd_set_volume"); + p_spd_set_synthesis_voice = (pfn_spd_set_synthesis_voice) dlsym(lib_handle, "spd_set_synthesis_voice"); + p_spd_list_synthesis_voices = (pfn_spd_list_synthesis_voices) dlsym(lib_handle, "spd_list_synthesis_voices"); + p_spd_set_voice_rate = (pfn_spd_set_voice_rate) dlsym(lib_handle, "spd_set_voice_rate"); + p_free_spd_voices = (pfn_free_spd_voices) dlsym(lib_handle, "free_spd_voices"); + + if (!p_spd_open || !p_spd_close || !p_spd_say || !p_spd_pause || !p_spd_resume || !p_spd_stop || !p_spd_set_volume + || !p_spd_set_voice_rate || !p_spd_set_synthesis_voice || !p_spd_list_synthesis_voices || !p_free_spd_voices) { + mprintf(("Speech: Unable to load one or more symbols from libspeechd.so: %s\n", dlerror())); + dlclose(lib_handle); + lib_handle = nullptr; + return false; + } + + return true; +} + +// Speech handling starts here +static bool Speech_init = false; +static SPDConnection* spd = nullptr; + +bool speech_init() +{ + if (Speech_init) { + return true; + } + + if (!ensure_speechd_lib()) { + return false; + } + + spd = p_spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE); + if (!spd) { + mprintf(("Speech: Unable to connect to speech-dispatcher\n")); + return false; + } + + Speech_init = true; + return true; +} + +void speech_deinit() +{ + if ( !Speech_init ) { + return; + } + p_spd_close(spd); + Speech_init = false; + spd = nullptr; + if (lib_handle) { + dlclose(lib_handle); + lib_handle = nullptr; + } +} + +bool speech_play(const SCP_string& text) +{ + if ( !Speech_init ) { + return false; + } + + if (text.empty()) { + nprintf(("Speech", "Not playing speech because passed text is empty.\n")); + return false; + } + + return (p_spd_say(spd, SPD_TEXT, text.c_str()) >= 0); +} + +bool speech_pause() +{ + if ( !Speech_init ) { + return false; + } + + p_spd_pause(spd); + + return true; +} + +bool speech_resume() +{ + if ( !Speech_init ) { + return false; + } + + p_spd_resume(spd); + + return true; +} + +bool speech_stop() +{ + if ( !Speech_init ) { + return false; + } + + p_spd_stop(spd); + + return true; +} + +bool speech_set_volume(unsigned short volume) +{ + if ( !Speech_init ) { + return false; + } + + p_spd_set_volume(spd, volume); + + return true; +} + +bool speech_set_voice(int voice) +{ + if ( !Speech_init ) { + return false; + } + + auto voices = speech_enumerate_voices(); + + if (voice < 0 || static_cast(voice) >= voices.size()) { + return false; + } + + p_spd_set_synthesis_voice(spd, voices[voice].second.c_str()); + + return true; +} + +bool speech_set_rate(float rate_percent) +{ + if (!Speech_init) { + return false; + } + + // 50 / +150 -> 100 = normal -> range -100 / +100 + auto rate = static_cast(rate_percent - 100.0f); + CAP(rate, -100, 100); + + p_spd_set_voice_rate(spd, rate); + return true; +} + +bool speech_is_speaking() +{ + if ( !Speech_init ) { + return false; + } + + return false; +} + +SCP_vector> speech_enumerate_voices() +{ + SCP_vector> fsoVoices; + + if (!Speech_init) { + if (!ensure_speechd_lib()) { + return fsoVoices; + } + spd = p_spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE); + if (!spd) { + mprintf(("Speech: Unable to connect to speech-dispatcher\n")); + return fsoVoices; + } + } + + SPDVoice** voices = p_spd_list_synthesis_voices(spd); + + if (voices) { + for (int i = 0; voices[i] != nullptr; i++) { + fsoVoices.emplace_back(std::make_pair(i, voices[i]->name)); + } + p_free_spd_voices(voices); + } + else { + mprintf(("Speech: Unable to get voice list from speech-dispatcher.\n")); + } + + if (!Speech_init) { + p_spd_close(spd); + spd = nullptr; + } + + return fsoVoices; +} + +#endif diff --git a/code/sound/speech.mm b/code/sound/speech_mac.mm similarity index 67% rename from code/sound/speech.mm rename to code/sound/speech_mac.mm index 0cb45534028..cb18966ca37 100644 --- a/code/sound/speech.mm +++ b/code/sound/speech_mac.mm @@ -5,11 +5,11 @@ #include "globalincs/pstypes.h" #include "utils/unicode.h" - +#include "speech.h" static NSSpeechSynthesizer *synth = nil; static bool Speech_init = false; - +static int voice_default_rate = 200; bool speech_init() { @@ -36,40 +36,20 @@ void speech_deinit() Speech_init = false; } -bool speech_play(const char *text) +bool speech_play(const SCP_string& text) { if ( !Speech_init ) { return false; } - if ( !text || !strlen(text) ) { - nprintf(("Speech", "Not playing speech because passed text is null.\n")); - return false; - } - - SCP_string work_buffer; - - bool saw_dollar = false; - for (auto ch : unicode::codepoint_range(text)) { - if (ch == UNICODE_CHAR('$')) { - // Skip $ escape sequences which appear in briefing text - saw_dollar = true; - continue; - } else if (saw_dollar) { - saw_dollar = false; - continue; - } - - unicode::encode(ch, std::back_inserter(work_buffer)); - } - - if (work_buffer.empty()) { + if (text.empty()) { + nprintf(("Speech", "Not playing speech because passed text is empty.\n")); return false; } [synth startSpeakingString: [NSString stringWithUTF8String: - work_buffer.c_str() + text.c_str() ] ]; @@ -140,9 +120,34 @@ bool speech_set_voice(int voice) [synth setVoice: [voices objectAtIndex:voice]]; + // reset voice to defaults + [synth setObject:nil forProperty:NSSpeechResetProperty error:nil]; + + // get default rate for voice + NSNumber *voiceRate = [synth objectForProperty:NSSpeechRateProperty error:nil]; + voice_default_rate = voiceRate ? [voiceRate intValue] : 200; // median normal rate as default + return true; } +bool speech_set_rate(float rate_percent) +{ + if (!Speech_init) { + return false; + } + + CAP(rate_percent, 25.0f, 300.f); + + int rate = fl2i(voice_default_rate * (rate_percent / 100.0f)); + + [synth + setObject:[NSNumber numberWithInt:rate] + forProperty:NSSpeechRateProperty error:nil + ]; + + return true; +} + bool speech_is_speaking() { if ( !Speech_init ) { @@ -152,17 +157,17 @@ bool speech_is_speaking() return [synth isSpeaking]; } -SCP_vector speech_enumerate_voices() +SCP_vector> speech_enumerate_voices() { NSArray *voices = [NSSpeechSynthesizer availableVoices]; - SCP_vector fsoVoices; + SCP_vector> fsoVoices; + int voiceID = 0; for (NSString *voiceIdentifier in voices) { NSDictionary *attributes = [NSSpeechSynthesizer attributesForVoice:voiceIdentifier]; NSString *name = [attributes objectForKey:NSVoiceName]; - - fsoVoices.push_back([name UTF8String]); + fsoVoices.emplace_back(std::make_pair(voiceID++, [name UTF8String])); } return fsoVoices; diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp new file mode 100644 index 00000000000..a3d723c093a --- /dev/null +++ b/code/sound/speech_win.cpp @@ -0,0 +1,266 @@ +/* + * Code created by Thomas Whittaker (RT) for a FreeSpace 2 source code project + * + * You may not sell or otherwise commercially exploit the source or things you + * created based on the source. + * +*/ +#ifndef FS2_SPEECH +#if defined(_WIN32) +#if NDEBUG + #pragma message( "WARNING: You have not compiled speech into this build (use FS2_SPEECH)" ) +#endif // NDEBUG +#endif // _WIN32 +#elif defined(_WIN32) // FS2_SPEECH + +#ifdef LAUNCHER +#include "stdafx.h" +#endif //LAUNCHER + +// Since we define these ourself we need to undefine them for the sapi header +#pragma push_macro("strcpy_s") +#pragma push_macro("strncpy_s") +#pragma push_macro("strcat_s") +#pragma push_macro("memset") +#pragma push_macro("memcpy") +#undef strcpy_s +#undef strncpy_s +#undef strcat_s +#undef memset +#undef memcpy + +#include +#include +#include + +#pragma pushpop_macro("strcpy_s") +#pragma pushpop_macro("strncpy_s") +#pragma pushpop_macro("strcat_s") +#pragma pushpop_macro("memset") +#pragma pushpop_macro("memcpy") + +ISpVoice *Voice_device; + +#pragma warning(push) +#pragma warning(disable: 4995) +// Visual Studio complains that some functions are deprecated so this fixes that +#include +#include +#include +#pragma warning(pop) +#include "globalincs/pstypes.h" +#include "utils/unicode.h" +#include "speech.h" + +bool Speech_init = false; + +bool speech_init() +{ + HRESULT hr = CoCreateInstance( + CLSID_SpVoice, + nullptr, + CLSCTX_ALL, + IID_ISpVoice, + (void **)&Voice_device); + + Speech_init = SUCCEEDED(hr); + + nprintf(("Speech", "Speech init %s\n", Speech_init ? "succeeded!" : "failed!")); + return Speech_init; +} + +void speech_deinit() +{ + if(Speech_init == false) return; + Voice_device->Release(); +} + +bool speech_play(const SCP_string& text) +{ + nprintf(("Speech", "Attempting to play speech string %s...\n", text.c_str())); + + if(Speech_init == false) return true; + + if (text.empty()) { + nprintf(("Speech", "Not playing speech because passed text is empty.\n")); + return false; + } + + // Determine the needed amount of data + auto num_chars = MultiByteToWideChar(CP_UTF8, 0, text.c_str(), (int)text.size(), nullptr, 0); + + if (num_chars <= 0) { + // Error + return false; + } + + std::wstring wide_string; + wide_string.resize(num_chars); + + auto err = MultiByteToWideChar(CP_UTF8, 0, text.c_str(), (int)text.size(), &wide_string[0], num_chars); + + if (err <= 0) { + return false; + } + + speech_stop(); + return SUCCEEDED(Voice_device->Speak(wide_string.c_str(), SPF_ASYNC, nullptr)); +} + +bool speech_pause() +{ + if(Speech_init == false) return true; + return SUCCEEDED(Voice_device->Pause()); +} + +bool speech_resume() +{ + if(Speech_init == false) return true; + return SUCCEEDED(Voice_device->Resume()); +} + +bool speech_stop() +{ + if(Speech_init == false) return true; + return SUCCEEDED(Voice_device->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr)); +} + +bool speech_set_volume(unsigned short volume) +{ + return SUCCEEDED(Voice_device->SetVolume(volume)); +} + +bool speech_set_voice(int voice) +{ + HRESULT hr; + CComPtr cpVoiceToken; + CComPtr cpEnum; + ULONG num_voices = 0; + + //Enumerate the available voices + hr = SpEnumTokens(SPCAT_VOICES, nullptr, nullptr, &cpEnum); + + if(FAILED(hr)) return false; + + hr = cpEnum->GetCount(&num_voices); + + if(FAILED(hr)) return false; + + int count = 0; + // Obtain a list of available voice tokens, set the voice to the token, and call Speak + while (num_voices -- ) + { + cpVoiceToken.Release(); + + hr = cpEnum->Next( 1, &cpVoiceToken, nullptr); + + if(FAILED(hr)) { + return false; + } + + if(count == voice) { + return SUCCEEDED(Voice_device->SetVoice(cpVoiceToken)); + } + + count++; + } + return false; +} + +bool speech_set_rate(float rate_percent) +{ + if (!Speech_init) { + return false; + } + + // 50 / +150 -> 100 = normal -> range -10 / +10 + auto rate = static_cast((rate_percent - 100.0f) * 0.1f); + if (rate < -10) { + rate = -10; + } + else if (rate > 10) { + rate = 10; + } + + return SUCCEEDED(Voice_device->SetRate(rate)); +} + +// Goober5000 +bool speech_is_speaking() +{ + HRESULT hr; + SPVOICESTATUS pStatus; + + hr = Voice_device->GetStatus(&pStatus, nullptr); + if (FAILED(hr)) return false; + + return (pStatus.dwRunningState != SPRS_DONE); +} + +SCP_vector> speech_enumerate_voices() +{ + SCP_vector> voices; + + ISpObjectTokenCategory* comTokenCategory = nullptr; + IEnumSpObjectTokens* comVoices = nullptr; + ULONG comVoicesCount = 0; + + HRESULT hr = ::CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, + CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (LPVOID*)&comTokenCategory); + + if (FAILED(hr)) { + return voices; + } + + hr = comTokenCategory->SetId(SPCAT_VOICES, false); + if (FAILED(hr)) { + comTokenCategory->Release(); + return voices; + } + + hr = comTokenCategory->EnumTokens(nullptr, nullptr, &comVoices); + if (FAILED(hr)) { + comTokenCategory->Release(); + return voices; + } + + hr = comVoices->GetCount(&comVoicesCount); + if (FAILED(hr)) { + comVoices->Release(); + comTokenCategory->Release(); + return voices; + } + + int voiceID = 0; + while (comVoicesCount > 0) { + ISpObjectToken* comAVoice = nullptr; + + comVoices->Next(1, &comAVoice, nullptr); + + LPWSTR id = nullptr; + comAVoice->GetStringValue(nullptr, &id); + + if (id) { + auto idlength = wcslen(id); + int buffer_size = WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, nullptr, 0, nullptr, nullptr); + + if (buffer_size > 0) { + SCP_string voiceName; + voiceName.resize(buffer_size); + WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, &voiceName[0], buffer_size, nullptr, nullptr); + voices.emplace_back(std::make_pair(voiceID++, voiceName)); + } + CoTaskMemFree(id); + } + + comAVoice->Release(); + comVoicesCount--; + } + + comVoices->Release(); + comTokenCategory->Release(); + + return voices; +} + +#endif // FS2_SPEECH \ No newline at end of file diff --git a/code/source_groups.cmake b/code/source_groups.cmake index cd55353b901..dbba52510dc 100644 --- a/code/source_groups.cmake +++ b/code/source_groups.cmake @@ -1621,16 +1621,25 @@ add_file_folder("Sound" sound/rtvoice.h sound/sound.cpp sound/sound.h - sound/speech.cpp sound/speech.h sound/voicerec.cpp sound/voicerec.h ) -if (APPLE) +if (WIN32) add_file_folder("Sound" ${file_root_sound} - sound/speech.mm + sound/speech_win.cpp + ) +elseif (APPLE) + add_file_folder("Sound" + ${file_root_sound} + sound/speech_mac.mm + ) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux") + add_file_folder("Sound" + ${file_root_sound} + sound/speech_linux.cpp ) endif()