diff --git a/CMakeLists.txt b/CMakeLists.txt
index 82075aa4d1b..6acedb4b79c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -74,9 +74,11 @@ IF(RESET_INSTALL_PREFIX)
 	ENDIF(NOT $ENV{FS2PATH} STREQUAL "")
 ENDIF(RESET_INSTALL_PREFIX)
 
-IF(WIN32 OR APPLE)
+IF(WIN32 OR APPLE OR CMAKE_SYSTEM_NAME STREQUAL "Linux")
 	OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" ON)
-ENDIF(WIN32 OR APPLE)
+ELSE()
+	OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" OFF)
+ENDIF()
 
 IF (WIN32)
 	OPTION(FSO_USE_VOICEREC "Enable voice recognition support" ON)
@@ -227,9 +229,7 @@ include(package)
 include(doxygen)
 
 # Print used options to log
-IF(WIN32 OR APPLE)
-	message(STATUS "Using text to speech: ${FSO_USE_SPEECH}")
-ENDIF()
+message(STATUS "Using text to speech: ${FSO_USE_SPEECH}")
 IF (WIN32)
 	message(STATUS "Using voice recogition: ${FSO_USE_VOICEREC}")
 	message(STATUS "Building FRED2: ${FSO_BUILD_FRED2}")
diff --git a/cmake/finder/FindSpeech.cmake b/cmake/finder/FindSpeech.cmake
index b85b5b7fe9a..c7cc6b50b4c 100644
--- a/cmake/finder/FindSpeech.cmake
+++ b/cmake/finder/FindSpeech.cmake
@@ -11,6 +11,8 @@ if (WIN32)
 	endif()
 elseif(APPLE)
 	# it should just work
+elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+	# uses speech-dispatcher with dlopen
 else()
 	message(SEND_ERROR "Text to Speech is not supported on this platform!")
 endif()
diff --git a/code/cmdline/cmdline.cpp b/code/cmdline/cmdline.cpp
index 7dae2532cab..e200327a118 100644
--- a/code/cmdline/cmdline.cpp
+++ b/code/cmdline/cmdline.cpp
@@ -1414,7 +1414,7 @@ static json_t* json_get_v1() {
 		auto voices = speech_enumerate_voices();
 
 		for (auto& voice : voices) {
-			json_array_append_new(voices_array, json_string(voice.c_str()));
+			json_array_append_new(voices_array, json_string(voice.second.c_str()));
 		}
 
 		json_object_set_new(root, "voices", voices_array);
diff --git a/code/localization/localize.cpp b/code/localization/localize.cpp
index 96c0c9b992c..8c6132f4462 100644
--- a/code/localization/localize.cpp
+++ b/code/localization/localize.cpp
@@ -64,7 +64,7 @@ bool *Lcl_unexpected_tstring_check = nullptr;
 // NOTE: with map storage of XSTR strings, the indexes no longer need to be contiguous,
 // but internal strings should still increment XSTR_SIZE to avoid collisions.
 // retail XSTR_SIZE = 1570
-// #define XSTR_SIZE	1915 // This is the next available ID
+// #define XSTR_SIZE	1929 // This is the next available ID
 
 // struct to allow for strings.tbl-determined x offset
 // offset is 0 for english, by default
diff --git a/code/options/Ingame_Options.cpp b/code/options/Ingame_Options.cpp
index 801245458d1..dc797ec7f01 100644
--- a/code/options/Ingame_Options.cpp
+++ b/code/options/Ingame_Options.cpp
@@ -4,6 +4,7 @@
 
 #include "options/OptionsManager.h"
 #include "options/Option.h"
+#include <sound/fsspeech.h>
 
 static std::unique_ptr<OptConfigurator> OCGR;
 
@@ -100,6 +101,7 @@ void ingame_options_init()
 
 void ingame_options_close()
 {
+	fsspeech_options_cleanup();
 	OCGR.reset();
 }
 
diff --git a/code/options/Option.h b/code/options/Option.h
index 44032a80f1a..791f1057107 100644
--- a/code/options/Option.h
+++ b/code/options/Option.h
@@ -608,7 +608,7 @@ class OptionBuilder {
 			_instance.setPreset(val.first, json_dump_string_new(_instance.getSerializer()(val.second),
 			                                                    JSON_COMPACT | JSON_ENSURE_ASCII | JSON_ENCODE_ANY));
 		}
-		auto opt_ptr = make_shared<Option<T>>(_instance);
+		auto opt_ptr = std::make_shared<Option<T>>(_instance);
 
 		if (std::holds_alternative<std::pair<const char*, int>>(_title)) {
 			const auto& xstr_info = std::get<std::pair<const char*, int>>(_title);
diff --git a/code/sound/fsspeech.cpp b/code/sound/fsspeech.cpp
index 65ef525bb3a..d8d32601671 100644
--- a/code/sound/fsspeech.cpp
+++ b/code/sound/fsspeech.cpp
@@ -10,7 +10,7 @@
 #include "osapi/osregistry.h"
 #include "sound/fsspeech.h"
 #include "sound/speech.h"
-
+#include "options/Option.h"
 
 extern int Cmdline_freespace_no_sound;
 
@@ -30,6 +30,201 @@ const char *FSSpeech_play_id[FSSPEECH_FROM_MAX] =
 char Speech_buffer[MAX_SPEECH_BUFFER_LEN] = "";
 size_t  Speech_buffer_len;
 
+static bool ttsrate_change(float new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	speech_set_rate(new_val);
+	return true;
+}
+
+static bool ttsingame_change(bool new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	FSSpeech_play_from[FSSPEECH_FROM_INGAME] = new_val;
+	return true;
+}
+
+static bool ttsmulti_change(bool new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	FSSpeech_play_from[FSSPEECH_FROM_MULTI] = new_val;
+	return true;
+}
+
+static bool ttsbriefing_change(bool new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = new_val;
+	return true;
+}
+
+static bool ttstechroom_change(bool new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	FSSpeech_play_from[FSSPEECH_FROM_TECHROOM] = new_val;
+	return true;
+}
+
+static bool ttsvolume_change(float new_val, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	speech_set_volume((unsigned short) new_val);
+	return true;
+}
+
+static std::pair<int, SCP_string> ttsvoice_deserializer(const json_t* el)
+{
+	int id;
+	char* name = nullptr;
+
+	json_error_t err;
+	if (json_unpack_ex((json_t*)el, &err, 0, "{s:i, s:s}", "id", &id, "name", &name) != 0) {
+		throw json_exception(err);
+	}
+
+	return std::make_pair(id, name);
+}
+
+static json_t* ttsvoice_serializer(const std::pair<int, SCP_string>& value)
+{
+	return json_pack("{s:i, s:s}", "id", value.first, "name", value.second.c_str());
+}
+
+static SCP_vector<std::pair<int, SCP_string>> voice_list_cache;
+
+static SCP_vector<std::pair<int, SCP_string>> ttsvoice_enumerator()
+{
+	if(voice_list_cache.empty()) {
+		auto voices = speech_enumerate_voices();
+	
+		if (voices.empty()) {
+			voices.emplace_back(std::make_pair(0, "No voices loaded"));
+		}
+		voice_list_cache = voices;
+		return voices;
+	}
+	else {
+		return voice_list_cache;
+	}
+}
+
+static SCP_string ttsvoice_display(const std::pair<int, SCP_string>& vi)
+{
+	return vi.second;
+}
+
+static bool ttsvoice_change(const std::pair<int, SCP_string>& new_voice, bool initial)
+{
+	if (initial) {
+		return false;
+	}
+	speech_set_voice(new_voice.first);
+	return true;
+}
+
+static auto SpeechVoiceOption = options::OptionBuilder<std::pair<int, SCP_string>>("Speech.Voice",
+	std::pair<const char*, int>{"TTS Voice", 1915},
+	std::pair<const char*, int>{"The voice used to read text", 1916})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.default_func([]() { return ttsvoice_enumerator().front(); }) // always guarantees at least 1 value
+	.enumerator(ttsvoice_enumerator)
+	.display(ttsvoice_display)
+	.serializer(ttsvoice_serializer)
+	.deserializer(ttsvoice_deserializer)
+	.flags({ options::OptionFlags::ForceMultiValueSelection })
+	.change_listener(ttsvoice_change)
+	.importance(3)
+	.finish();
+
+static auto SpeechVolumeOption = options::OptionBuilder<float>("Speech.Volume",
+	std::pair<const char*, int>{"TTS Volume", 1917},
+	std::pair<const char*, int>{"Volume used for playing TTS speech", 1918})
+	.category(std::make_pair("Audio", 1826))
+	.range(0.0f, 100.0f)
+	.default_val(100.0f)
+	.change_listener(ttsvolume_change)
+	.importance(2)
+	.finish();
+
+static auto SpeechRateOption = options::OptionBuilder<float>("Speech.Rate",
+	std::pair<const char*, int>{"TTS Rate", 1919},
+	std::pair<const char*, int>{"Speed of the TTS voice (100 = normal)", 1920})
+	.category(std::make_pair("Audio", 1826))
+	.range(50.0f, 150.0f)
+	.default_val(100.0f)
+	.change_listener(ttsrate_change)
+	.importance(1)
+	.finish();
+
+static auto SpeechBriefingOption = options::OptionBuilder<bool>("Speech.Briefing",
+	std::pair<const char*, int>{"TTS in briefings", 1921},
+	std::pair<const char*, int>{"Enable or disable TTS in briefings", 1922})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.change_listener(ttsbriefing_change)
+	.default_val(true)
+	.importance(0)
+	.finish();
+
+static auto SpeechTechroomOption = options::OptionBuilder<bool>("Speech.Techroom",
+	std::pair<const char*, int>{"TTS in techroom", 1923},
+	std::pair<const char*, int>{"Enable or disable TTS in techroom", 1924})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.change_listener(ttstechroom_change)
+	.default_val(true)
+	.importance(0)
+	.finish();
+
+static auto SpeechIngameOption = options::OptionBuilder<bool>("Speech.Ingame",
+	std::pair<const char*, int>{"TTS in-game", 1925},
+	std::pair<const char*, int>{"Enable or disable TTS in-game", 1926})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.change_listener(ttsingame_change)
+	.default_val(true)
+	.importance(0)
+	.finish();
+
+static auto SpeechMultiOption = options::OptionBuilder<bool>("Speech.Multi",
+	std::pair<const char*, int>{"TTS in multiplayer", 1927},
+	std::pair<const char*, int>{"Enable or disable TTS in multiplayer", 1928})
+	.category(std::make_pair("Audio", 1826))
+	.level(options::ExpertLevel::Beginner)
+	.change_listener(ttsmulti_change)
+	.default_val(true)
+	.importance(0)
+	.finish();
+
+void sanitize_text(const char* input, SCP_string& output) {
+	output.clear();
+	bool saw_dollar = false;
+	for (auto ch : unicode::codepoint_range(input)) {
+		if (ch == UNICODE_CHAR('$')) {
+			saw_dollar = true;
+			continue;
+		}
+		else if (saw_dollar) {
+			saw_dollar = false;
+			continue;
+		}
+		unicode::encode(ch, std::back_inserter(output));
+	}
+}
+
 bool fsspeech_init()
 {
 	if (speech_inited) {
@@ -45,18 +240,33 @@ bool fsspeech_init()
 		return false;
 	}
 
-	// Get the settings from the registry
-	for(int i = 0; i < FSSPEECH_FROM_MAX; i++) {
-		FSSpeech_play_from[i] =
-			os_config_read_uint(NULL, FSSpeech_play_id[i], 0) ? true : false;
-		nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false"));
+	if (Using_in_game_options) 
+	{
+		FSSpeech_play_from[FSSPEECH_FROM_TECHROOM] = SpeechTechroomOption->getValue();
+		FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = SpeechBriefingOption->getValue();
+		FSSpeech_play_from[FSSPEECH_FROM_INGAME] = SpeechIngameOption->getValue();
+		FSSpeech_play_from[FSSPEECH_FROM_MULTI] = SpeechMultiOption->getValue();
+		speech_set_volume((unsigned short)SpeechVolumeOption->getValue());
+		speech_set_voice(SpeechVoiceOption->getValue().first);
+		speech_set_rate(SpeechRateOption->getValue());
+	}
+	else 
+	{
+		// Get the settings from the registry
+		for (int i = 0; i < FSSPEECH_FROM_MAX; i++) {
+			FSSpeech_play_from[i] = static_cast<bool>(os_config_read_uint(nullptr, FSSpeech_play_id[i], 0));
+			nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false"));
+		}
+
+		int volume = os_config_read_uint(nullptr, "SpeechVolume", 100);
+		speech_set_volume((unsigned short)volume);
+
+		int voice = os_config_read_uint(nullptr, "SpeechVoice", 0);
+		speech_set_voice(voice);
+
+		int rate = os_config_read_uint(nullptr, "SpeechRate", 100);
+		speech_set_rate(static_cast<float>(rate));
 	}
-
-	int volume = os_config_read_uint(NULL, "SpeechVolume", 100);
-	speech_set_volume((unsigned short) volume);
-
-	int voice = os_config_read_uint(NULL, "SpeechVoice", 0);
-	speech_set_voice(voice);
 
 	speech_inited = 1;
 
@@ -75,6 +285,11 @@ void fsspeech_deinit()
 
 void fsspeech_play(int type, const char *text)
 {
+	if (text == nullptr) {
+		nprintf(("Speech", "Not playing speech because passed text is null.\n"));
+		return;
+	}
+
 	if (!speech_inited) {
 		nprintf(("Speech", "Aborting fsspech_play because speech_inited is false.\n"));
 		return;
@@ -90,7 +305,10 @@ void fsspeech_play(int type, const char *text)
 		return;
 	}
 
-	speech_play(text);
+	SCP_string sanitized_string;
+	sanitize_text(text, sanitized_string);
+
+	speech_play(sanitized_string);
 }
 
 void fsspeech_stop()
@@ -157,3 +375,9 @@ bool fsspeech_playing()
 
 	return speech_is_speaking();
 }
+
+void fsspeech_options_cleanup()
+{
+	voice_list_cache.clear();
+	voice_list_cache.shrink_to_fit();
+}
diff --git a/code/sound/fsspeech.h b/code/sound/fsspeech.h
index 874b0c37468..cd80b3515bd 100644
--- a/code/sound/fsspeech.h
+++ b/code/sound/fsspeech.h
@@ -31,4 +31,7 @@ void fsspeech_play_buffer(int type);
 bool fsspeech_play_from(int type);
 bool fsspeech_playing();
 
+// Cleanup the voice cache after the options menu is closed
+void fsspeech_options_cleanup();
+
 #endif	// header define
diff --git a/code/sound/speech.cpp b/code/sound/speech.cpp
deleted file mode 100644
index 7967950ac10..00000000000
--- a/code/sound/speech.cpp
+++ /dev/null
@@ -1,382 +0,0 @@
-/*
- * Code created by Thomas Whittaker (RT) for a FreeSpace 2 source code project
- *
- * You may not sell or otherwise commercially exploit the source or things you 
- * created based on the source.
- *
-*/ 
-
-
-
-
-
-#ifndef FS2_SPEECH
-#if defined(_WIN32) || defined(__APPLE__)
-#if NDEBUG
-	#pragma message( "WARNING: You have not compiled speech into this build (use FS2_SPEECH)" )
-#endif // NDEBUG
-#endif // _WIN32 or __APPLE__
-#elif !defined(__APPLE__) // to end-of-file ...
-
-
-#ifdef LAUNCHER
-#include "stdafx.h"
-#endif	//LAUNCHER
-
-#ifdef _WIN32
-
-// Since we define these ourself we need to undefine them for the sapi header
-#pragma push_macro("strcpy_s")
-#pragma push_macro("strncpy_s")
-#pragma push_macro("strcat_s")
-#pragma push_macro("memset")
-#pragma push_macro("memcpy")
-#undef strcpy_s
-#undef strncpy_s
-#undef strcat_s
-#undef memset
-#undef memcpy
-
-	#include <windows.h>
-	#include <sapi.h>
-
-	#include <sphelper.h>
-
-#pragma pushpop_macro("strcpy_s")
-#pragma pushpop_macro("strncpy_s")
-#pragma pushpop_macro("strcat_s")
-#pragma pushpop_macro("memset")
-#pragma pushpop_macro("memcpy")
-
-	ISpVoice *Voice_device;
-#elif defined(SCP_UNIX)
-	#include <fcntl.h>
-//	#include <stdio.h>
-
-	int speech_dev = -1;
-//	FILE *speech_dev = NULL;
-#else 
-	#pragma error( "ERROR: Unknown platform, speech (FS2_SPEECH) is not supported" )
-#endif	//_WIN32
-
-#pragma warning(push)
-#pragma warning(disable: 4995)
-// Visual Studio complains that some functions are deprecated so this fixes that
-#include <cstring>
-#include <cwchar>
-#include <cstdio>
-#pragma warning(pop)
-
-#include "globalincs/pstypes.h"
-#include "utils/unicode.h"
-#include "speech.h"
-
-
-bool Speech_init = false;
-
-bool speech_init()
-{
-#ifdef _WIN32
-    HRESULT hr = CoCreateInstance(
-		CLSID_SpVoice, 
-		NULL, 
-		CLSCTX_ALL, 
-		IID_ISpVoice, 
-		(void **)&Voice_device);
-
-	Speech_init = SUCCEEDED(hr);
-#else
-
-	speech_dev = open("/dev/speech", O_WRONLY | O_DIRECT);
-//	speech_dev = fopen("/dev/speech", "w");
-
-	if (speech_dev == -1) {
-//	if (speech_dev == NULL) {
-		mprintf(("Couldn't open '/dev/speech', turning text-to-speech off...\n"));
-		return false;
-	}
-
-	Speech_init = true;
-#endif
-
-	nprintf(("Speech", "Speech init %s\n", Speech_init ? "succeeded!" : "failed!"));
-	return Speech_init;
-}
-
-void speech_deinit()
-{
-	if(Speech_init == false) return;
-
-#ifdef _WIN32
-	Voice_device->Release();
-#else
-	close(speech_dev);
-//	fclose(speech_dev);
-#endif
-}
-
-bool speech_play(const char *text)
-{
-	nprintf(("Speech", "Attempting to play speech string %s...\n", text));
-
-	if(Speech_init == false) return true;
-	if (text == NULL) {
-		nprintf(("Speech", "Not playing speech because passed text is null.\n"));
-		return false;
-	}
-
-#ifdef _WIN32
-	SCP_string work_buffer;
-
-	bool saw_dollar = false;
-	for (auto ch : unicode::codepoint_range(text)) {
-		if (ch == UNICODE_CHAR('$')) {
-			// Skip $ escape sequences which appear in briefing text
-			saw_dollar = true;
-			continue;
-		} else if (saw_dollar) {
-			saw_dollar = false;
-			continue;
-		}
-
-		unicode::encode(ch, std::back_inserter(work_buffer));
-	}
-
-	// Determine the needed amount of data
-	auto num_chars = MultiByteToWideChar(CP_UTF8, 0, work_buffer.c_str(), (int) work_buffer.size(), nullptr, 0);
-
-	if (num_chars <= 0) {
-		// Error
-		return false;
-	}
-
-	std::wstring wide_string;
-	wide_string.resize(num_chars);
-
-	auto err = MultiByteToWideChar(CP_UTF8, 0, work_buffer.c_str(), (int)work_buffer.size(), &wide_string[0], num_chars);
-
-	if (err <= 0) {
-		return false;
-	}
-
-	speech_stop();
-	return SUCCEEDED(Voice_device->Speak(wide_string.c_str(), SPF_ASYNC, NULL));
-#else
-	int len = strlen(text);
-	char Conversion_buffer[MAX_SPEECH_CHAR_LEN];
-
-	if(len > (MAX_SPEECH_CHAR_LEN - 1)) {
-		len = MAX_SPEECH_CHAR_LEN - 1;
-	}
-
-	int count = 0;
-	for(int i = 0; i < len; i++) {
-		if(text[i] == '$') {
-			i++;
-			continue;
-		}
-
-		Conversion_buffer[count] = text[i];
-		count++;
-	}
-
-	Conversion_buffer[count] = '\0';
-
-	if ( write(speech_dev, Conversion_buffer, count) == -1 )
-		return false;
-//	if (fwrite(Conversion_buffer, count, 1, speech_dev))
-//		fflush(speech_dev);
-//	else
-//		return false;
-
-	return true;
-#endif	//_WIN32
-}
-
-bool speech_pause()
-{
-	if(Speech_init == false) return true;
-#ifdef _WIN32
-	return SUCCEEDED(Voice_device->Pause());
-#else
-	STUB_FUNCTION;
-
-	return true;
-#endif
-}
-
-bool speech_resume()
-{
-	if(Speech_init == false) return true;
-#ifdef _WIN32
-	return SUCCEEDED(Voice_device->Resume());
-#else
-	STUB_FUNCTION;
-
-	return true;
-#endif
-}
-
-bool speech_stop()
-{
-	if(Speech_init == false) return true;
-#ifdef _WIN32
-    return SUCCEEDED(Voice_device->Speak( NULL, SPF_PURGEBEFORESPEAK, NULL ));
-#else
-	STUB_FUNCTION;
-
-	return true;
-#endif
-}
-
-bool speech_set_volume(unsigned short volume)
-{
-#ifdef _WIN32
-    return SUCCEEDED(Voice_device->SetVolume(volume));
-#else
-	STUB_FUNCTION;
-
-	return true;
-#endif
-}
-
-bool speech_set_voice(int voice)
-{
-#ifdef _WIN32
-	HRESULT                             hr;
-	CComPtr<ISpObjectToken>             cpVoiceToken;
-	CComPtr<IEnumSpObjectTokens>        cpEnum;
-	ULONG                               num_voices = 0;
-
-	//Enumerate the available voices 
-	hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
-
-	if(FAILED(hr)) return false;
-
-    hr = cpEnum->GetCount(&num_voices);
-
-	if(FAILED(hr)) return false;
-
-	int count = 0;
-	// Obtain a list of available voice tokens, set the voice to the token, and call Speak
-	while (num_voices -- )
-	{
-		cpVoiceToken.Release();
-		
-		hr = cpEnum->Next( 1, &cpVoiceToken, NULL );
-
-		if(FAILED(hr)) {
-			return false;
-		}
-
-		if(count == voice) {
-			return SUCCEEDED(Voice_device->SetVoice(cpVoiceToken));
-		}
-
-		count++;
-	}
-	return false;
-#else
-	STUB_FUNCTION;
-
-	return true;
-#endif
-}
-
-// Goober5000
-bool speech_is_speaking()
-{
-#ifdef _WIN32
-	HRESULT			hr;
-	SPVOICESTATUS	pStatus;
-
-	hr = Voice_device->GetStatus(&pStatus, NULL);
-	if (FAILED(hr)) return false;
-
-	return (pStatus.dwRunningState != SPRS_DONE);
-#else
-	STUB_FUNCTION;
-
-	return false;
-#endif
-}
-
-SCP_vector<SCP_string> speech_enumerate_voices()
-{
-#ifdef _WIN32
-	HRESULT hr = CoCreateInstance(
-		CLSID_SpVoice,
-		NULL,
-		CLSCTX_ALL,
-		IID_ISpVoice,
-		(void **)&Voice_device);
-
-	if (FAILED(hr)) {
-		return SCP_vector<SCP_string>();
-	}
-
-	// This code is mostly copied from wxLauncher
-	ISpObjectTokenCategory * comTokenCategory = NULL;
-	IEnumSpObjectTokens * comVoices = NULL;
-	ULONG comVoicesCount = 0;
-
-	// Generate enumeration of voices
-	hr = ::CoCreateInstance(CLSID_SpObjectTokenCategory, NULL,
-		CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (LPVOID*)&comTokenCategory);
-	if (FAILED(hr)) {
-		return SCP_vector<SCP_string>();
-	}
-
-	hr = comTokenCategory->SetId(SPCAT_VOICES, false);
-	if (FAILED(hr)) {
-		return SCP_vector<SCP_string>();
-	}
-
-	hr = comTokenCategory->EnumTokens(NULL, NULL, &comVoices);
-	if (FAILED(hr)) {
-		return SCP_vector<SCP_string>();
-	}
-
-	hr = comVoices->GetCount(&comVoicesCount);
-	if (FAILED(hr)) {
-		return SCP_vector<SCP_string>();
-	}
-
-	SCP_vector<SCP_string> voices;
-	while (comVoicesCount > 0) {
-		ISpObjectToken * comAVoice = NULL;
-
-		comVoices->Next(1, &comAVoice, NULL); // retrieve just one
-
-		LPWSTR id = NULL;
-		comAVoice->GetStringValue(NULL, &id);
-
-		auto idlength = wcslen(id);
-		auto buffer_size = WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, nullptr, 0, nullptr, nullptr);
-
-		if (buffer_size > 0) {
-			SCP_string voiceName;
-			voiceName.resize(buffer_size);
-			buffer_size = WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, &voiceName[0], buffer_size, nullptr, nullptr);
-
-			voices.push_back(voiceName);
-		}
-
-		CoTaskMemFree(id);
-		comAVoice->Release();
-		comVoicesCount--;
-	}
-
-	comTokenCategory->Release();
-
-	Voice_device->Release();
-
-	return voices;
-#else
-	STUB_FUNCTION;
-
-	return SCP_vector<SCP_string>();
-#endif
-}
-
-#endif // FS2_SPEECH
diff --git a/code/sound/speech.h b/code/sound/speech.h
index 3f731dd5a7f..07d7d9debf6 100644
--- a/code/sound/speech.h
+++ b/code/sound/speech.h
@@ -15,32 +15,34 @@
 
 bool speech_init();
 void speech_deinit();
-bool speech_play(const char *text);
+bool speech_play(const SCP_string& text);
 bool speech_pause();
 bool speech_resume();
 bool speech_stop();
 
 bool speech_set_volume(unsigned short volume);
 bool speech_set_voice(int voice);
+bool speech_set_rate(float rate);
 
 bool speech_is_speaking();
 
-SCP_vector<SCP_string> speech_enumerate_voices();
+SCP_vector<std::pair<int, SCP_string>> speech_enumerate_voices();
 
 #else
 
 inline bool speech_init() { return false; }
 inline void speech_deinit() {}
-inline bool speech_play(const char* /*text*/) { return false; }
+inline bool speech_play(const SCP_string& /*text*/) { return false; }
 inline bool speech_pause() { return false; }
 inline bool speech_resume() { return false; }
 inline bool speech_stop() { return false; }
 inline bool speech_set_volume(unsigned short /*volume*/) { return false; }
 inline bool speech_set_voice(int /*voice*/) { return false; }
+inline bool speech_set_rate(float /*rate*/) { return false; } 
 inline bool speech_is_speaking() { return false; }
 
-inline SCP_vector<SCP_string> speech_enumerate_voices() {
-	return SCP_vector<SCP_string>();
+inline SCP_vector<std::pair<int, SCP_string>> speech_enumerate_voices() {
+	return SCP_vector<std::pair<int, SCP_string>>();
 }
 
 #endif
diff --git a/code/sound/speech_linux.cpp b/code/sound/speech_linux.cpp
new file mode 100644
index 00000000000..e996ecf22bb
--- /dev/null
+++ b/code/sound/speech_linux.cpp
@@ -0,0 +1,266 @@
+#ifdef FS2_SPEECH
+#include <dlfcn.h> 
+#include "globalincs/pstypes.h"
+#include "utils/unicode.h"
+#include "speech.h"
+
+// Adapted from libspeechd.h / speechd_types.h
+// https://github.com/brailcom/speechd/tree/master/src/api/c
+
+typedef struct SPDConnection SPDConnection;
+
+typedef struct {
+    char *name;
+    char *language;
+    char *variant;
+} SPDVoice;
+
+typedef enum {
+    SPD_MODE_SINGLE = 0,
+    SPD_MODE_THREADED = 1
+} SPDConnectionMode;
+
+typedef enum {
+    SPD_IMPORTANT   = 1,
+    SPD_MESSAGE     = 2,
+    SPD_TEXT        = 3,
+    SPD_NOTIFICATION = 4,
+    SPD_PROGRESS    = 5
+} SPDPriority;
+
+static void* lib_handle = nullptr;
+
+typedef SPDConnection* (*pfn_spd_open)(const char*, const char*, const char*, SPDConnectionMode);
+typedef void (*pfn_spd_close)(SPDConnection*);
+typedef int (*pfn_spd_say)(SPDConnection*, SPDPriority, const char*);
+typedef int (*pfn_spd_pause)(SPDConnection*);
+typedef int (*pfn_spd_resume)(SPDConnection*);
+typedef int (*pfn_spd_stop)(SPDConnection*);
+typedef int (*pfn_spd_set_volume)(SPDConnection*, signed int);
+typedef int (*pfn_spd_set_synthesis_voice)(SPDConnection*, const char*);
+typedef int (*pfn_spd_set_voice_rate)(SPDConnection*, signed int);
+typedef SPDVoice** (*pfn_spd_list_synthesis_voices)(SPDConnection*);
+typedef void (*pfn_free_spd_voices)(SPDVoice**);
+
+static pfn_spd_open                		p_spd_open = nullptr;
+static pfn_spd_close                	p_spd_close = nullptr;
+static pfn_spd_say                  	p_spd_say = nullptr;
+static pfn_spd_pause                	p_spd_pause = nullptr;
+static pfn_spd_resume               	p_spd_resume = nullptr;
+static pfn_spd_stop                 	p_spd_stop = nullptr;
+static pfn_spd_set_volume           	p_spd_set_volume = nullptr;
+static pfn_spd_set_synthesis_voice  	p_spd_set_synthesis_voice = nullptr;
+static pfn_spd_list_synthesis_voices	p_spd_list_synthesis_voices = nullptr;
+static pfn_spd_set_voice_rate			p_spd_set_voice_rate = nullptr;
+static pfn_free_spd_voices 				p_free_spd_voices = nullptr;
+
+// Load speech-dispatcher with dlopen and load symbols
+static bool ensure_speechd_lib()
+{
+    if (lib_handle) return true;
+    lib_handle = dlopen("libspeechd.so.2", RTLD_LAZY | RTLD_LOCAL);
+    if (!lib_handle) {
+		lib_handle = dlopen("libspeechd.so", RTLD_LAZY | RTLD_LOCAL);
+    }
+
+    if (!lib_handle) {
+        mprintf(("Speech: Unable to load libspeechd.so: %s\n", dlerror()));
+        return false;
+    }
+    
+    // used symbols
+    p_spd_open                	= (pfn_spd_open)               		dlsym(lib_handle, "spd_open");
+    p_spd_close              	= (pfn_spd_close)              		dlsym(lib_handle, "spd_close");
+    p_spd_say                 	= (pfn_spd_say)                		dlsym(lib_handle, "spd_say");
+    p_spd_pause               	= (pfn_spd_pause)              		dlsym(lib_handle, "spd_pause");
+    p_spd_resume              	= (pfn_spd_resume)             		dlsym(lib_handle, "spd_resume");
+    p_spd_stop                	= (pfn_spd_stop)               		dlsym(lib_handle, "spd_stop");
+    p_spd_set_volume          	= (pfn_spd_set_volume)         		dlsym(lib_handle, "spd_set_volume");
+    p_spd_set_synthesis_voice 	= (pfn_spd_set_synthesis_voice)		dlsym(lib_handle, "spd_set_synthesis_voice");
+    p_spd_list_synthesis_voices = (pfn_spd_list_synthesis_voices)	dlsym(lib_handle, "spd_list_synthesis_voices");
+	p_spd_set_voice_rate		= (pfn_spd_set_voice_rate)			dlsym(lib_handle, "spd_set_voice_rate");
+    p_free_spd_voices 			= (pfn_free_spd_voices)				dlsym(lib_handle, "free_spd_voices");
+
+    if (!p_spd_open || !p_spd_close || !p_spd_say || !p_spd_pause || !p_spd_resume || !p_spd_stop || !p_spd_set_volume 
+		|| !p_spd_set_voice_rate || !p_spd_set_synthesis_voice || !p_spd_list_synthesis_voices || !p_free_spd_voices) {
+        mprintf(("Speech: Unable to load one or more symbols from libspeechd.so: %s\n", dlerror()));
+        dlclose(lib_handle);
+        lib_handle = nullptr;
+        return false;
+    }
+
+    return true;
+}
+
+// Speech handling starts here
+static bool Speech_init = false;
+static SPDConnection* spd = nullptr;
+
+bool speech_init()
+{
+	if (Speech_init) {
+		return true;
+	}
+	    
+	if (!ensure_speechd_lib()) {
+        return false;
+    }
+    
+    spd = p_spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE);
+    if (!spd) {
+        mprintf(("Speech: Unable to connect to speech-dispatcher\n"));
+        return false;
+    }
+
+	Speech_init = true;
+	return true;
+}
+
+void speech_deinit()
+{
+	if ( !Speech_init ) {
+		return;
+	}
+	p_spd_close(spd);
+	Speech_init = false;
+	spd = nullptr;
+    if (lib_handle) { 
+		dlclose(lib_handle); 
+		lib_handle = nullptr; 
+	}
+}
+
+bool speech_play(const SCP_string& text)
+{
+	if ( !Speech_init ) {
+		return false;
+	}
+
+	if (text.empty()) {
+		nprintf(("Speech", "Not playing speech because passed text is empty.\n"));
+		return false;
+	}
+
+	return (p_spd_say(spd, SPD_TEXT, text.c_str()) >= 0);
+}
+
+bool speech_pause()
+{
+	if ( !Speech_init ) {
+		return false;
+	}
+
+	p_spd_pause(spd);
+	
+	return true;
+}
+
+bool speech_resume()
+{
+	if ( !Speech_init ) {
+		return false;
+	}
+
+	p_spd_resume(spd);
+	
+	return true;
+}
+
+bool speech_stop()
+{
+	if ( !Speech_init ) {
+		return false;
+	}
+
+	p_spd_stop(spd);
+	
+	return true;
+}
+
+bool speech_set_volume(unsigned short volume)
+{
+	if ( !Speech_init ) {
+		return false;
+	}
+
+	p_spd_set_volume(spd, volume); 
+	
+	return true;
+}
+
+bool speech_set_voice(int voice)
+{
+	if ( !Speech_init ) {
+		return false;
+	}
+	
+	auto voices = speech_enumerate_voices();
+
+	if (voice < 0 || static_cast<size_t>(voice) >= voices.size()) {
+        return false;
+    }
+    
+	p_spd_set_synthesis_voice(spd, voices[voice].second.c_str());
+	
+	return true;
+}
+
+bool speech_set_rate(float rate_percent)
+{
+	if (!Speech_init) {
+		return false;
+	}
+
+	// 50 / +150 -> 100 = normal -> range -100 / +100
+	auto rate = static_cast<signed int>(rate_percent - 100.0f);
+	CAP(rate, -100, 100);
+
+	p_spd_set_voice_rate(spd, rate);
+	return true;
+}
+
+bool speech_is_speaking()
+{
+	if ( !Speech_init ) {
+		return false;
+	}
+
+	return false;
+}
+
+SCP_vector<std::pair<int, SCP_string>> speech_enumerate_voices()
+{
+	SCP_vector<std::pair<int, SCP_string>> fsoVoices;
+
+	if (!Speech_init) {
+		if (!ensure_speechd_lib()) {
+			return fsoVoices;
+		}
+		spd = p_spd_open("freespace_open", "main", nullptr, SPD_MODE_SINGLE);
+		if (!spd) {
+			mprintf(("Speech: Unable to connect to speech-dispatcher\n"));
+			return fsoVoices;
+		}
+	}
+
+	SPDVoice** voices = p_spd_list_synthesis_voices(spd);
+
+	if (voices) {
+		for (int i = 0; voices[i] != nullptr; i++) {
+			fsoVoices.emplace_back(std::make_pair(i, voices[i]->name));
+		}
+		p_free_spd_voices(voices);
+	}
+	else {
+		mprintf(("Speech: Unable to get voice list from speech-dispatcher.\n"));
+	}
+
+	if (!Speech_init) {
+		p_spd_close(spd);
+		spd = nullptr;
+	}
+
+	return fsoVoices;
+}
+
+#endif
diff --git a/code/sound/speech.mm b/code/sound/speech_mac.mm
similarity index 67%
rename from code/sound/speech.mm
rename to code/sound/speech_mac.mm
index 0cb45534028..cb18966ca37 100644
--- a/code/sound/speech.mm
+++ b/code/sound/speech_mac.mm
@@ -5,11 +5,11 @@
 
 #include "globalincs/pstypes.h"
 #include "utils/unicode.h"
-
+#include "speech.h"
 
 static NSSpeechSynthesizer *synth = nil;
 static bool Speech_init = false;
-
+static int voice_default_rate = 200;
 
 bool speech_init()
 {
@@ -36,40 +36,20 @@ void speech_deinit()
 	Speech_init = false;
 }
 
-bool speech_play(const char *text)
+bool speech_play(const SCP_string& text)
 {
 	if ( !Speech_init ) {
 		return false;
 	}
 
-	if ( !text || !strlen(text) ) {
-		nprintf(("Speech", "Not playing speech because passed text is null.\n"));
-		return false;
-	}
-
-	SCP_string work_buffer;
-
-	bool saw_dollar = false;
-	for (auto ch : unicode::codepoint_range(text)) {
-		if (ch == UNICODE_CHAR('$')) {
-			// Skip $ escape sequences which appear in briefing text
-			saw_dollar = true;
-			continue;
-		} else if (saw_dollar) {
-			saw_dollar = false;
-			continue;
-		}
-
-		unicode::encode(ch, std::back_inserter(work_buffer));
-	}
-
-	if (work_buffer.empty()) {
+	if (text.empty()) {
+		nprintf(("Speech", "Not playing speech because passed text is empty.\n"));
 		return false;
 	}
 
 	[synth startSpeakingString:
 		[NSString stringWithUTF8String:
-			work_buffer.c_str()
+			text.c_str()
 		]
 	];
 
@@ -140,9 +120,34 @@ bool speech_set_voice(int voice)
 
 	[synth setVoice: [voices objectAtIndex:voice]];
 
+	// reset voice to defaults
+	[synth setObject:nil forProperty:NSSpeechResetProperty error:nil];
+
+	// get default rate for voice
+	NSNumber *voiceRate = [synth objectForProperty:NSSpeechRateProperty error:nil];
+	voice_default_rate = voiceRate ? [voiceRate intValue] : 200; // median normal rate as default
+
 	return true;
 }
 
+bool speech_set_rate(float rate_percent)
+{
+    if (!Speech_init) {
+        return false;
+    }
+
+	CAP(rate_percent, 25.0f, 300.f);
+
+	int rate = fl2i(voice_default_rate * (rate_percent / 100.0f));
+
+	[synth
+		setObject:[NSNumber numberWithInt:rate]
+		forProperty:NSSpeechRateProperty error:nil
+	];
+
+    return true;
+}
+
 bool speech_is_speaking()
 {
 	if ( !Speech_init ) {
@@ -152,17 +157,17 @@ bool speech_is_speaking()
 	return [synth isSpeaking];
 }
 
-SCP_vector<SCP_string> speech_enumerate_voices()
+SCP_vector<std::pair<int, SCP_string>> speech_enumerate_voices()
 {
 	NSArray *voices = [NSSpeechSynthesizer availableVoices];
 
-	SCP_vector<SCP_string> fsoVoices;
+	SCP_vector<std::pair<int, SCP_string>> fsoVoices;
 
+	int voiceID = 0;
 	for (NSString *voiceIdentifier in voices) {
 		NSDictionary *attributes = [NSSpeechSynthesizer attributesForVoice:voiceIdentifier];
 		NSString *name = [attributes objectForKey:NSVoiceName];
-
-		fsoVoices.push_back([name UTF8String]);
+		fsoVoices.emplace_back(std::make_pair(voiceID++, [name UTF8String]));
 	}
 
 	return fsoVoices;
diff --git a/code/sound/speech_win.cpp b/code/sound/speech_win.cpp
new file mode 100644
index 00000000000..a3d723c093a
--- /dev/null
+++ b/code/sound/speech_win.cpp
@@ -0,0 +1,266 @@
+/*
+ * Code created by Thomas Whittaker (RT) for a FreeSpace 2 source code project
+ *
+ * You may not sell or otherwise commercially exploit the source or things you 
+ * created based on the source.
+ *
+*/ 
+#ifndef FS2_SPEECH
+#if defined(_WIN32)
+#if NDEBUG
+	#pragma message( "WARNING: You have not compiled speech into this build (use FS2_SPEECH)" )
+#endif // NDEBUG
+#endif // _WIN32
+#elif defined(_WIN32) // FS2_SPEECH
+
+#ifdef LAUNCHER
+#include "stdafx.h"
+#endif	//LAUNCHER
+
+// Since we define these ourself we need to undefine them for the sapi header
+#pragma push_macro("strcpy_s")
+#pragma push_macro("strncpy_s")
+#pragma push_macro("strcat_s")
+#pragma push_macro("memset")
+#pragma push_macro("memcpy")
+#undef strcpy_s
+#undef strncpy_s
+#undef strcat_s
+#undef memset
+#undef memcpy
+
+#include <windows.h>
+#include <sapi.h>
+#include <sphelper.h>
+
+#pragma pushpop_macro("strcpy_s")
+#pragma pushpop_macro("strncpy_s")
+#pragma pushpop_macro("strcat_s")
+#pragma pushpop_macro("memset")
+#pragma pushpop_macro("memcpy")
+
+ISpVoice *Voice_device;
+
+#pragma warning(push)
+#pragma warning(disable: 4995)
+// Visual Studio complains that some functions are deprecated so this fixes that
+#include <cstring>
+#include <cwchar>
+#include <cstdio>
+#pragma warning(pop)
+#include "globalincs/pstypes.h"
+#include "utils/unicode.h"
+#include "speech.h"
+
+bool Speech_init = false;
+
+bool speech_init()
+{
+    HRESULT hr = CoCreateInstance(
+		CLSID_SpVoice, 
+		nullptr, 
+		CLSCTX_ALL, 
+		IID_ISpVoice, 
+		(void **)&Voice_device);
+
+	Speech_init = SUCCEEDED(hr);
+
+	nprintf(("Speech", "Speech init %s\n", Speech_init ? "succeeded!" : "failed!"));
+	return Speech_init;
+}
+
+void speech_deinit()
+{
+	if(Speech_init == false) return;
+	Voice_device->Release();
+}
+
+bool speech_play(const SCP_string& text)
+{
+	nprintf(("Speech", "Attempting to play speech string %s...\n", text.c_str()));
+
+	if(Speech_init == false) return true;
+
+	if (text.empty()) {
+		nprintf(("Speech", "Not playing speech because passed text is empty.\n"));
+		return false;
+	}
+
+	// Determine the needed amount of data
+	auto num_chars = MultiByteToWideChar(CP_UTF8, 0, text.c_str(), (int)text.size(), nullptr, 0);
+
+	if (num_chars <= 0) {
+		// Error
+		return false;
+	}
+
+	std::wstring wide_string;
+	wide_string.resize(num_chars);
+
+	auto err = MultiByteToWideChar(CP_UTF8, 0, text.c_str(), (int)text.size(), &wide_string[0], num_chars);
+
+	if (err <= 0) {
+		return false;
+	}
+
+	speech_stop();
+	return SUCCEEDED(Voice_device->Speak(wide_string.c_str(), SPF_ASYNC, nullptr));
+}
+
+bool speech_pause()
+{
+	if(Speech_init == false) return true;
+	return SUCCEEDED(Voice_device->Pause());
+}
+
+bool speech_resume()
+{
+	if(Speech_init == false) return true;
+	return SUCCEEDED(Voice_device->Resume());
+}
+
+bool speech_stop()
+{
+	if(Speech_init == false) return true;
+    return SUCCEEDED(Voice_device->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr));
+}
+
+bool speech_set_volume(unsigned short volume)
+{
+    return SUCCEEDED(Voice_device->SetVolume(volume));
+}
+
+bool speech_set_voice(int voice)
+{	
+	HRESULT                             hr;
+	CComPtr<ISpObjectToken>             cpVoiceToken;
+	CComPtr<IEnumSpObjectTokens>        cpEnum;
+	ULONG                               num_voices = 0;
+
+	//Enumerate the available voices 
+	hr = SpEnumTokens(SPCAT_VOICES, nullptr, nullptr, &cpEnum);
+
+	if(FAILED(hr)) return false;
+
+    hr = cpEnum->GetCount(&num_voices);
+
+	if(FAILED(hr)) return false;
+
+	int count = 0;
+	// Obtain a list of available voice tokens, set the voice to the token, and call Speak
+	while (num_voices -- )
+	{
+		cpVoiceToken.Release();
+		
+		hr = cpEnum->Next( 1, &cpVoiceToken, nullptr);
+
+		if(FAILED(hr)) {
+			return false;
+		}
+
+		if(count == voice) {
+			return SUCCEEDED(Voice_device->SetVoice(cpVoiceToken));
+		}
+
+		count++;
+	}
+	return false;
+}
+
+bool speech_set_rate(float rate_percent)
+{
+	if (!Speech_init) {
+		return false;
+	}
+
+	// 50 / +150 -> 100 = normal -> range -10 / +10 
+    auto rate = static_cast<long>((rate_percent - 100.0f) * 0.1f);
+	if (rate < -10) {
+		rate = -10;
+	}
+	else if (rate > 10) {
+		rate = 10;
+	}
+
+	return SUCCEEDED(Voice_device->SetRate(rate));
+}
+
+// Goober5000
+bool speech_is_speaking()
+{
+	HRESULT			hr;
+	SPVOICESTATUS	pStatus;
+
+	hr = Voice_device->GetStatus(&pStatus, nullptr);
+	if (FAILED(hr)) return false;
+
+	return (pStatus.dwRunningState != SPRS_DONE);
+}
+
+SCP_vector<std::pair<int, SCP_string>> speech_enumerate_voices()
+{
+	SCP_vector<std::pair<int, SCP_string>> voices;
+
+	ISpObjectTokenCategory* comTokenCategory = nullptr;
+	IEnumSpObjectTokens* comVoices = nullptr;
+	ULONG comVoicesCount = 0;
+
+	HRESULT hr = ::CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr,
+		CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (LPVOID*)&comTokenCategory);
+
+	if (FAILED(hr)) {
+		return voices;
+	}
+
+	hr = comTokenCategory->SetId(SPCAT_VOICES, false);
+	if (FAILED(hr)) {
+		comTokenCategory->Release();
+		return voices;
+	}
+
+	hr = comTokenCategory->EnumTokens(nullptr, nullptr, &comVoices);
+	if (FAILED(hr)) {
+		comTokenCategory->Release();
+		return voices;
+	}
+
+	hr = comVoices->GetCount(&comVoicesCount);
+	if (FAILED(hr)) {
+		comVoices->Release();
+		comTokenCategory->Release();
+		return voices;
+	}
+
+	int voiceID = 0;
+	while (comVoicesCount > 0) {
+		ISpObjectToken* comAVoice = nullptr;
+
+		comVoices->Next(1, &comAVoice, nullptr);
+
+		LPWSTR id = nullptr;
+		comAVoice->GetStringValue(nullptr, &id);
+
+		if (id) {
+			auto idlength = wcslen(id);
+			int buffer_size = WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, nullptr, 0, nullptr, nullptr);
+
+			if (buffer_size > 0) {
+				SCP_string voiceName;
+				voiceName.resize(buffer_size);
+				WideCharToMultiByte(CP_UTF8, 0, id, (int)idlength, &voiceName[0], buffer_size, nullptr, nullptr);
+				voices.emplace_back(std::make_pair(voiceID++, voiceName));
+			}
+			CoTaskMemFree(id);
+		}
+
+		comAVoice->Release();
+		comVoicesCount--;
+	}
+
+	comVoices->Release();
+	comTokenCategory->Release();
+
+	return voices;
+}
+
+#endif // FS2_SPEECH
\ No newline at end of file
diff --git a/code/source_groups.cmake b/code/source_groups.cmake
index cd55353b901..dbba52510dc 100644
--- a/code/source_groups.cmake
+++ b/code/source_groups.cmake
@@ -1621,16 +1621,25 @@ add_file_folder("Sound"
 	sound/rtvoice.h
 	sound/sound.cpp
 	sound/sound.h
-	sound/speech.cpp
 	sound/speech.h
 	sound/voicerec.cpp
 	sound/voicerec.h
 )
 
-if (APPLE)
+if (WIN32)
 	add_file_folder("Sound"
 		${file_root_sound}
-		sound/speech.mm
+		sound/speech_win.cpp
+	)
+elseif (APPLE)
+	add_file_folder("Sound"
+		${file_root_sound}
+		sound/speech_mac.mm
+	)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux")
+	add_file_folder("Sound"
+		${file_root_sound}
+		sound/speech_linux.cpp
 	)
 endif()