/* nag_dggqrf (f08zec) Example Program.
 *
 * Copyright 2017 Numerical Algorithms Group.
 *
 * Mark 26.1, 2017.
 */

#include <stdio.h>
#include <nag.h>
#include <nag_stdlib.h>
#include <nagf07.h>
#include <nagf08.h>
#include <nagf16.h>

int main(void)
{
  /* Scalars */
  double alpha, beta, rnorm;
  const double zero = 0.0;
  Integer i, j, m, n, nm, p, pda, pdb, pdd, pnm, zrow;
  Integer exit_status = 0;

  /* Arrays */
  double *a = 0, *b = 0, *d = 0, *taua = 0, *taub = 0, *y = 0;

  /* Nag Types */
  NagError fail;
  Nag_OrderType order;

#ifdef NAG_COLUMN_MAJOR
#define A(I, J) a[(J-1)*pda + I - 1]
#define B(I, J) b[(J-1)*pdb + I - 1]
  order = Nag_ColMajor;
#else
#define A(I, J) a[(I-1)*pda + J - 1]
#define B(I, J) b[(I-1)*pdb + J - 1]
  order = Nag_RowMajor;
#endif

  INIT_FAIL(fail);

  printf("nag_dggqrf (f08zec) Example Program Results\n\n");

  /* Skip heading in data file */
  scanf("%*[^\n]");
  scanf("%" NAG_IFMT "%" NAG_IFMT "%" NAG_IFMT "%*[^\n]", &n, &m, &p);
  if (n < 0 || m < 0 || p < 0) {
    printf("Invalid n, m or p\n");
    exit_status = 1;
    goto END;
  }

#ifdef NAG_COLUMN_MAJOR
  pda = n;
  pdb = n;
  pdd = n;
#else
  pda = m;
  pdb = p;
  pdd = 1;
#endif

  /* Allocate memory */
  if (!(a = NAG_ALLOC(n * m, double)) ||
      !(b = NAG_ALLOC(n * p, double)) ||
      !(d = NAG_ALLOC(MAX(n, m), double)) ||
      !(taua = NAG_ALLOC(MIN(m, n), double)) ||
      !(taub = NAG_ALLOC(MIN(n, p), double)) || !(y = NAG_ALLOC(p, double)))
  {
    printf("Allocation failure\n");
    exit_status = -1;
    goto END;
  }

  /* Read A, B and d from data file */
  for (i = 1; i <= n; ++i)
    for (j = 1; j <= m; ++j)
      scanf("%lf", &A(i, j));
  scanf("%*[^\n]");
  for (i = 1; i <= n; ++i)
    for (j = 1; j <= p; ++j)
      scanf("%lf", &B(i, j));
  scanf("%*[^\n]");
  for (i = 0; i < n; ++i)
    scanf("%lf", &d[i]);
  scanf("%*[^\n]");

  /* Compute the generalized QR factorization of (A,B) as
   * A = Q*(R),   B = Q*(T11 T12)*Z
   *       (0)          ( 0  T22)
   * using  nag_dggqrf (f08zec).
   */
  nag_dggqrf(order, n, m, p, a, pda, taua, b, pdb, taub, &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_dggqrf (f08zec).\n%s\n", fail.message);
    exit_status = 1;
    goto END;
  }

  /* Solve weighted least squares problem for case n > m */
  if (n <= m)
    goto END;

  nm = n - m;
  pnm = p - nm;
  /* Multiply Q^T through d = Ax + By to get 
   *     (c1) = Q^T * d = (R) * x + (T11 T12) * Z * (y1)
   *     (c2)             (0)       ( 0  T22)       (y2)
   * Compute C using nag_dormqr (f08agc).
   */
  nag_dormqr(order, Nag_LeftSide, Nag_Trans, n, 1, m, a, pda, taua, d, pdd,
             &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_dormqr (f08agc).\n%s\n", fail.message);
    exit_status = 1;
    goto END;
  }

  /* Let Z*(y1) = (w1) and solving for w2 we have to solve the triangular sytem
   *       (y2) = (w2)
   *                     T22 * w2 = c2
   * This is done by putting c2 in y2 and backsolving to get w2 in y2.
   *
   * Copy c2 (at d[m]) into y2 using nag_dge_copy (f16qfc).
   */
  nag_dge_copy(Nag_ColMajor, Nag_NoTrans, nm, 1, &d[m], n - m, &y[pnm], nm,
               &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_dge_copy (f16qfc).\n%s\n", fail.message);
    exit_status = 1;
    goto END;
  }
  /* Solve T22*w2 = c2 using nag_dtrtrs (f07tec).
   * T22 is stored in a submatrix of matrix B of dimension n-m by n-m
   * with first element at B(m+1,p-(n-m)+1). y2 is stored from y[p-(n-m)].
   */
  nag_dtrtrs(order, Nag_Upper, Nag_NoTrans, Nag_NonUnitDiag, nm, 1,
             &B(m + 1, pnm + 1), pdb, &y[pnm], nm, &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_dtrtrs (f07tec).\n%s\n", fail.message);
    exit_status = 1;
    goto END;
  }

  /* set w1 = 0 for minimum norm y. */
  nag_dload(m + p - n, zero, y, 1, &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_dload.\n%s\n", fail.message);
    exit_status = 1;
    goto END;
  }

  /* Compute estimate of the square root of the residual sum of squares 
   * norm(y) = norm(w2) with y1 = 0 using  nag_dge_norm (f16rac).
   */
  nag_dge_norm(Nag_ColMajor, Nag_FrobeniusNorm, n - m, 1, &y[pnm],
               nm, &rnorm, &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_dge_norm (f16rac).\n%s\n", fail.message);
    exit_status = 1;
    goto END;
  }

  /* The top half of the system remains:
   *     (c1) = Q^T * d = (R) * x + (T11 T12) * ( 0)
   *                                            (w2)
   * =>      c1 = R * x + T12 * w2
   * =>   R * x = c1    - T12 * w2;
   * 
   * first form d = c1 - T12*w2 where c1 is stored in d
   * using nag_dgemv (f16pac).
   */
  alpha = -1.0;
  beta = 1.0;
  nag_dgemv(order, Nag_NoTrans, m, nm, alpha, &B(1, pnm + 1), pdb, &y[pnm], 1,
            beta, d, 1, &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_dgemv (f16pac).\n%s\n", fail.message);
    exit_status = 1;
    goto END;
  }

  /* Next, solve R * x = d for x (in d) where R is stored in leading submatrix
   * of A in a. This gives the least squares solution x in d.
   * Using nag_dtrtrs (f07tec).
   */
  nag_dtrtrs(order, Nag_Upper, Nag_NoTrans, Nag_NonUnitDiag, m, 1, a, pda, d,
             pdd, &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_dtrtrs (f07tec).\n%s\n", fail.message);
    exit_status = 1;
    goto END;
  }

  /* Compute the minimum norm residual vector y = (Z^T)*w 
   * using nag_dormrq (f08ckc).
   */
  zrow = MAX(1, n - p + 1);
  nag_dormrq(order, Nag_LeftSide, Nag_Trans, p, 1, MIN(n, p), &B(zrow, 1),
             pdb, taub, y, pdd, &fail);
  if (fail.code != NE_NOERROR) {
    printf("Error from nag_dormrq (f08ckc).\n%s\n", fail.message);
    exit_status = 1;
    goto END;
  }

  /* Print least squares solution x */
  printf("Generalized least squares solution\n");
  for (i = 0; i < m; ++i)
    printf(" %11.4f%s", d[i], i % 7 == 6 ? "\n" : "");

  /* Print residual vector y */
  printf("\n");
  printf("\nResidual vector\n");
  for (i = 0; i < p; ++i)
    printf(" %10.2e%s", y[i], i % 7 == 6 ? "\n" : "");

  /* Print estimate of the square root of the residual sum of  squares. */
  printf("\n\nSquare root of the residual sum of squares\n");
  printf("%11.2e\n", rnorm);

END:
  NAG_FREE(a);
  NAG_FREE(b);
  NAG_FREE(d);
  NAG_FREE(taua);
  NAG_FREE(taub);
  NAG_FREE(y);

  return exit_status;
}