<?xml version="1.0"?>
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-05-19T07:11:26Z</responseDate><request verb="GetRecord" metadataPrefix="oai_dc">https://keep.lib.asu.edu/oai/request</request><GetRecord><record><header><identifier>oai:keep.lib.asu.edu:node-201024</identifier><datestamp>2025-05-14T23:46:44Z</datestamp><setSpec>oai_pmh:all</setSpec><setSpec>oai_pmh:repo_items</setSpec></header><metadata><oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"><dc:identifier>201024</dc:identifier>
          <dc:identifier>https://hdl.handle.net/2286/R.2.N.201024</dc:identifier>
                  <dc:rights>http://rightsstatements.org/vocab/InC/1.0/</dc:rights>
          <dc:rights>http://creativecommons.org/licenses/by-nc-sa/4.0</dc:rights>
                  <dc:date>2025-05</dc:date>
                  <dc:format>29 pages</dc:format>
                  <dc:contributor>Jhaj, Baaz</dc:contributor>
          <dc:contributor>Ramani, Krishna</dc:contributor>
          <dc:contributor>Hsu, Jeffrey</dc:contributor>
          <dc:contributor>Osburn, Steven</dc:contributor>
          <dc:contributor>Zhu, Haolin</dc:contributor>
          <dc:contributor>Barrett, The Honors College</dc:contributor>
          <dc:contributor>Computer Science and Engineering Program</dc:contributor>
                  <dc:description>This thesis presents Translatica, a modular speech-to-speech translation (S2ST) system
that preserves both linguistic meaning and the speaker’s vocal identity across languages. Alongside developing a working prototype, this work surveys the landscape of S2ST methods and motivates the choice of a modular architecture over direct approaches, emphasizing flexibility, interpretability, and voice fidelity. The system combines state-of-the-art tools in transcription, translation, and voice synthesis to enable expressive, speaker-preserving dubbing of prerecorded videos. Through implementation and evaluation, the thesis explores the trade-offs between accuracy, latency, and control, demonstrating how modular design enables customization for diverse use cases. Future work includes real-time translation, enhanced speaker tracking, and applications in education and live media.</dc:description>
                  <dc:subject>Speech-to-Speech Translation</dc:subject>
          <dc:subject>Voice Cloning</dc:subject>
          <dc:subject>Speaker Preservation</dc:subject>
          <dc:subject>Modular AI Systems</dc:subject>
          <dc:subject>Neural Voice Synthesis</dc:subject>
          <dc:subject>Human-Centered Machine Translation</dc:subject>
                  <dc:title>Translatica: A Survey and Implementation Study on Speech-to-Speech Translation and Voice Synthesis with Speaker Preservation</dc:title></oai_dc:dc></metadata></record></GetRecord></OAI-PMH>
