mlpack  3.0.4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
nadam_update.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_CORE_OPTIMIZERS_ADAM_NADAM_UPDATE_HPP
14 #define MLPACK_CORE_OPTIMIZERS_ADAM_NADAM_UPDATE_HPP
15 
16 #include <mlpack/prereqs.hpp>
17 
18 namespace mlpack {
19 namespace optimization {
20 
38 {
39  public:
49  NadamUpdate(const double epsilon = 1e-8,
50  const double beta1 = 0.9,
51  const double beta2 = 0.99,
52  const double scheduleDecay = 4e-3) :
53  epsilon(epsilon),
54  beta1(beta1),
55  beta2(beta2),
56  scheduleDecay(scheduleDecay),
57  iteration(0),
58  cumBeta1(1)
59  {
60  // Nothing to do.
61  }
62 
70  void Initialize(const size_t rows, const size_t cols)
71  {
72  m = arma::zeros<arma::mat>(rows, cols);
73  v = arma::zeros<arma::mat>(rows, cols);
74  }
75 
83  void Update(arma::mat& iterate,
84  const double stepSize,
85  const arma::mat& gradient)
86  {
87  // Increment the iteration counter variable.
88  ++iteration;
89 
90  // And update the iterate.
91  m *= beta1;
92  m += (1 - beta1) * gradient;
93 
94  v *= beta2;
95  v += (1 - beta2) * gradient % gradient;
96 
97  double beta1T = beta1 * (1 - (0.5 *
98  std::pow(0.96, iteration * scheduleDecay)));
99 
100  double beta1T1 = beta1 * (1 - (0.5 *
101  std::pow(0.96, (iteration + 1) * scheduleDecay)));
102 
103  cumBeta1 *= beta1T;
104 
105  const double biasCorrection1 = 1.0 - cumBeta1;
106 
107  const double biasCorrection2 = 1.0 - std::pow(beta2, iteration);
108 
109  const double biasCorrection3 = 1.0 - (cumBeta1 * beta1T1);
110 
111  /* Note :- arma::sqrt(v) + epsilon * sqrt(biasCorrection2) is approximated
112  * as arma::sqrt(v) + epsilon
113  */
114  iterate -= (stepSize * (((1 - beta1T) / biasCorrection1) * gradient
115  + (beta1T1 / biasCorrection3) * m) * sqrt(biasCorrection2))
116  / (arma::sqrt(v) + epsilon);
117  }
118 
120  double Epsilon() const { return epsilon; }
122  double& Epsilon() { return epsilon; }
123 
125  double CumBeta1() const { return cumBeta1; }
127  double& CumBeta1() { return cumBeta1; }
128 
130  double Beta1() const { return beta1; }
132  double& Beta1() { return beta1; }
133 
135  double Beta2() const { return beta2; }
137  double& Beta2() { return beta2; }
138 
140  double ScheduleDecay() const { return scheduleDecay; }
142  double& ScheduleDecay() { return scheduleDecay; }
143 
144  private:
145  // The epsilon value used to initialise the squared gradient parameter.
146  double epsilon;
147 
148  // The smoothing parameter.
149  double beta1;
150 
151  // The second moment coefficient.
152  double beta2;
153 
154  // The exponential moving average of gradient values.
155  arma::mat m;
156 
157  // The exponential moving average of squared gradient values.
158  arma::mat v;
159 
160  // The decay parameter for decay coefficients
161  double scheduleDecay;
162 
163  // The number of iterations.
164  double iteration;
165 
166  // The cumulative product of decay coefficients
167  double cumBeta1;
168 };
169 
170 } // namespace optimization
171 } // namespace mlpack
172 
173 #endif
double ScheduleDecay() const
Get the decay parameter for decay coefficients.
The core includes that mlpack expects; standard C++ includes and Armadillo.
double & ScheduleDecay()
Modify the decay parameter for decay coefficients.
double & CumBeta1()
Modify the value of the cumulative product of decay coefficients.
double & Beta1()
Modify the smoothing parameter.
double CumBeta1() const
Get the value of the cumulative product of decay coefficients.
double & Epsilon()
Modify the value used to initialise the squared gradient parameter.
Nadam is an optimizer that combines the Adam and NAG optimization strategies.
void Initialize(const size_t rows, const size_t cols)
The Initialize() method is called by the optimizer before the start of the iteration update process...
double & Beta2()
Modify the second moment coefficient.
void Update(arma::mat &iterate, const double stepSize, const arma::mat &gradient)
Update step for Nadam.
double Beta1() const
Get the smoothing parameter.
NadamUpdate(const double epsilon=1e-8, const double beta1=0.9, const double beta2=0.99, const double scheduleDecay=4e-3)
Construct the Nadam update policy with the given parameters.
double Epsilon() const
Get the value used to initialise the squared gradient parameter.
double Beta2() const
Get the second moment coefficient.