function Y = riemannian_conjugate_gradient(A, B, Y0, max_iter, tol)
% Riemannian Conjugate Gradient Method for the Orthogonal Procrustes Problem
% Input:
% A: m x n matrix
% B: m x p matrix
% Y0: n x p initial guess (must be on the Stiefel manifold, Y0'*Y0 = I)
% max_iter: maximum number of iterations
% tol: tolerance for stopping criterion (based on Riemannian gradient norm)
% Output:
% Y: n x p orthogonal matrix minimizing ||AY - B||_F^2
% Initialize
Y = Y0;
G = A' * (A * Y - B); % Euclidean gradient
grad_F = G - Y * (Y' * G); % Riemannian gradient
P = -grad_F; % Initial search direction (negative gradient)
iter = 0;
while iter < max_iter
% Compute the Euclidean gradient
G_prev = G;
G = A' * (A * Y - B);
% Project the gradient onto the tangent space at Y
grad_F = G - Y * (Y' * G); % Riemannian gradient
% Compute objective value and gradient norm
F = 0.5 * norm(A * Y - B, 'fro')^2;
grad_norm = norm(grad_F, 'fro');
% Print iteration details for debugging
fprintf('Iter %d: F = %.6f, ||grad_F|| = %.6f\n', iter, F, grad_norm);
% Check stopping criterion (Riemannian gradient norm)
if grad_norm < tol
fprintf('Converged after %d iterations.\n', iter);
break;
end
% Update search direction using Riemannian conjugate gradient
if iter == 0
P = -grad_F; % First iteration: steepest descent
else
% Compute the Riemannian version of the Polak-Ribière parameter
beta = trace(grad_F' * (grad_F - G_prev)) / trace(grad_F' * grad_F);
beta = max(0, beta); % Ensure beta is non-negative (Fletcher-Reeves)
P = -grad_F + beta * P; % Update search direction
end
% Project the search direction onto the tangent space at Y
P = P - Y * (Y' * P);
% Line search to find the optimal step size
alpha = line_search(Y, P, A, B);
% Ensure alpha is not too small or too large
alpha = max(1e-10, min(alpha, 1));
% Update Y using the retraction (QR-based retraction)
Y = retract(Y, P, alpha);
iter = iter + 1;
end
if iter == max_iter
fprintf('Reached maximum iterations (%d).\n', max_iter);
end
% Verify that the Riemannian gradient is close to zero at the solution
grad_F_final = A' * (A * Y - B) - Y * (Y' * (A' * (A * Y - B)));
grad_norm_final = norm(grad_F_final, 'fro');
fprintf('Final Riemannian gradient norm: %.6f\n', grad_norm_final);
end
function alpha = line_search(Y, P, A, B)
% Line search to find the optimal step size alpha (Armijo condition only)
alpha = 1; % Initial step size
rho = 0.5; % Reduction factor
c = 1e-4; % Armijo constant
max_line_search_iters = 20;
F = @(Y) 0.5 * norm(A * Y - B, 'fro')^2; % Objective function
grad_F = A' * (A * Y - B) - Y * (Y' * (A' * (A * Y - B))); % Riemannian gradient
directional_derivative = trace(grad_F' * P); % Should be negative
% Armijo backtracking line search
for i = 1:max_line_search_iters
Y_new = retract(Y, P, alpha);
F_new = F(Y_new);
if F_new <= F(Y) + c * alpha * directional_derivative
break; % Armijo condition satisfied
end
alpha = rho * alpha; % Reduce step size
end
end
function Y_new = retract(Y, P, alpha)
% Retraction onto the Stiefel manifold using QR decomposition
Y_new = Y + alpha * P;
[Q, ~] = qr(Y_new, 0); % Thin QR decomposition
Y_new = Q;
end