<?xml version="1.0"?>
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"><responseDate>2026-05-23T17:50:54Z</responseDate><request verb="GetRecord" metadataPrefix="oai_dc">https://keep.lib.asu.edu/oai/request</request><GetRecord><record><header><identifier>oai:keep.lib.asu.edu:node-161694</identifier><datestamp>2024-12-23T18:01:48Z</datestamp><setSpec>oai_pmh:all</setSpec><setSpec>oai_pmh:repo_items</setSpec></header><metadata><oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"><dc:identifier>161694</dc:identifier>
          <dc:identifier>https://hdl.handle.net/2286/R.2.N.161694</dc:identifier>
                  <dc:rights>http://rightsstatements.org/vocab/InC/1.0/</dc:rights>
          <dc:rights>All Rights Reserved</dc:rights>
                  <dc:date>2021</dc:date>
                  <dc:format>38 pages</dc:format>
                  <dc:type>Masters Thesis</dc:type>
          <dc:type>Academic theses</dc:type>
          <dc:type>Text</dc:type>
                  <dc:language>eng</dc:language>
                  <dc:contributor>Agarwal, Nikhil</dc:contributor>
          <dc:contributor>Ben Amor, Heni</dc:contributor>
          <dc:contributor>Phielipp, Mariano</dc:contributor>
          <dc:contributor>DV, Hemanth</dc:contributor>
          <dc:contributor>Arizona State University</dc:contributor>
                  <dc:description>Partial requirement for: M.S., Arizona State University, 2021</dc:description>
          <dc:description>Field of study: Computer Science</dc:description>
          <dc:description>This work explores combining state-of-the-art \gls{mbrl} algorithms focused on learning complex policies with large state-spaces and augmenting them with distributional reward perspective on \gls{rl} algorithms. Distributional \gls{rl} provides a probabilistic reward formulation as opposed to the classic \gls{rl} formulation which models the estimation of this distributional return. These probabilistic reward formulations help the agent choose highly risk-averse actions, which in turn makes the learning more stable. To evaluate this idea, I experiment in simulation on complex high-dimensional environments when subject under different noisy conditions.</dc:description>
                  <dc:subject>Computer Science</dc:subject>
          <dc:subject>Deep learning</dc:subject>
          <dc:subject>distributed reinforcement learning</dc:subject>
          <dc:subject>Model Predictive Control</dc:subject>
          <dc:subject>model-based reinforcement learning</dc:subject>
          <dc:subject>Reinforcement Learning</dc:subject>
                  <dc:title>Learning Policies for Model-Based Reinforcement Learning Using Distributed Reward Formulation</dc:title></oai_dc:dc></metadata></record></GetRecord></OAI-PMH>
