function Y = riemannian_conjugate_gradient(A, B, Y0, max_iter, tol)
    % Riemannian Conjugate Gradient Method for the Orthogonal Procrustes Problem
    % Input:
    %   A: m x n matrix
    %   B: m x p matrix
    %   Y0: n x p initial guess (must be on the Stiefel manifold, Y0'*Y0 = I)
    %   max_iter: maximum number of iterations
    %   tol: tolerance for stopping criterion (based on Riemannian gradient norm)
    % Output:
    %   Y: n x p orthogonal matrix minimizing ||AY - B||_F^2
    % Initialize
    Y = Y0;
    G = A' * (A * Y - B); % Euclidean gradient
    grad_F = G - Y * (Y' * G); % Riemannian gradient
    P = -grad_F; % Initial search direction (negative gradient)
    iter = 0;
    while iter < max_iter
        % Compute the Euclidean gradient
        G_prev = G;
        G = A' * (A * Y - B);
        % Project the gradient onto the tangent space at Y
        grad_F = G - Y * (Y' * G); % Riemannian gradient
        % Compute objective value and gradient norm
        F = 0.5 * norm(A * Y - B, 'fro')^2;
        grad_norm = norm(grad_F, 'fro');
        % Print iteration details for debugging
        fprintf('Iter %d: F = %.6f, ||grad_F|| = %.6f\n', iter, F, grad_norm);
        % Check stopping criterion (Riemannian gradient norm)
        if grad_norm < tol
            fprintf('Converged after %d iterations.\n', iter);
            break;
        end
        % Update search direction using Riemannian conjugate gradient
        if iter == 0
            P = -grad_F; % First iteration: steepest descent
        else
            % Compute the Riemannian version of the Polak-Ribière parameter
            beta = trace(grad_F' * (grad_F - G_prev)) / trace(grad_F' * grad_F);
            beta = max(0, beta); % Ensure beta is non-negative (Fletcher-Reeves)
            P = -grad_F + beta * P; % Update search direction
        end
        % Project the search direction onto the tangent space at Y
        P = P - Y * (Y' * P);
        % Line search to find the optimal step size
        alpha = line_search(Y, P, A, B);
        % Ensure alpha is not too small or too large
        alpha = max(1e-10, min(alpha, 1));
        % Update Y using the retraction (QR-based retraction)
        Y = retract(Y, P, alpha);
        iter = iter + 1;
    end
    if iter == max_iter
        fprintf('Reached maximum iterations (%d).\n', max_iter);
    end
    % Verify that the Riemannian gradient is close to zero at the solution
    grad_F_final = A' * (A * Y - B) - Y * (Y' * (A' * (A * Y - B)));
    grad_norm_final = norm(grad_F_final, 'fro');
    fprintf('Final Riemannian gradient norm: %.6f\n', grad_norm_final);
end
function alpha = line_search(Y, P, A, B)
    % Line search to find the optimal step size alpha (Armijo condition only)
    alpha = 1; % Initial step size
    rho = 0.5; % Reduction factor
    c = 1e-4; % Armijo constant
    max_line_search_iters = 20;
    F = @(Y) 0.5 * norm(A * Y - B, 'fro')^2; % Objective function
    grad_F = A' * (A * Y - B) - Y * (Y' * (A' * (A * Y - B))); % Riemannian gradient
    directional_derivative = trace(grad_F' * P); % Should be negative
    % Armijo backtracking line search
    for i = 1:max_line_search_iters
        Y_new = retract(Y, P, alpha);
        F_new = F(Y_new);
        if F_new <= F(Y) + c * alpha * directional_derivative
            break; % Armijo condition satisfied
        end
        alpha = rho * alpha; % Reduce step size
    end
end
function Y_new = retract(Y, P, alpha)
    % Retraction onto the Stiefel manifold using QR decomposition
    Y_new = Y + alpha * P;
    [Q, ~] = qr(Y_new, 0); % Thin QR decomposition
    Y_new = Q;
end