/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, I1.HR                           *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Apr 93                                                   *
*  Last Update : Jun 93                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : schedule0.c                                              *
*                                                                         *
*  Function    : Operations for indirect addressing of local arrays       *
*                                                                         *
*  Export : FORTRAN Interface                                             *
*                                                                         *
*  - A, P, Q, ... are conform and are local                               *
*  - B must be a local array                                              *
*  - B is indirect accessed, rank determines number of P, Q, ...          *
*  - A and B have same type                                               *
*                                                                         *
*  A = B [P]  : A[j] = B[P[j]]    for all j                               *
*                                                                         *
*  void dalib_local_getm1_ (a, a_size, a_elems,                           *
*                            b, b_N1, p, mask   )                         *
*                                                                         *
*  B [P] = A  : B[P[j]] = A[j]    for all j                               *
*                                                                         *
*  void dalib_local_setm1_ (op, b, b_N1, p,                               *
*                            a, a_size, a_elems, mask)                    *
*                                                                         *
*  A = B [P,Q]  : A[j] = B[P[j],Q[j]]    for all j                        *
*                                                                         *
*  void dalib_local_getm2_ (a, a_size, a_elems,                           *
*                            b, b_N1, b_N2, p, q, mask   )                *
*                                                                         *
*  B [P,Q] = A  : B[P[j],Q[j]] = A[j]    for all j                        *
*                                                                         *
*  void dalib_local_setm2_ (op, b, b_N1, b_N2, p, q,                      *
*                            a, a_size, a_elems, mask)                    *
*                                                                         *
**************************************************************************/

#undef DEBUG
#include "system.h"
#include <stdio.h>

/*******************************************************************
*                                                                  *
*  MASK:  A[j] = B[P[j]]    for all j with MASK(j)                 *
*                                                                  *
*  operations for different sizes of elements                      *
*                                                                  *
*******************************************************************/

void dalib_lget1_1  (a, b, p, mask, N)
unsigned char *a, *b;
int *p, *mask, N;
{ int i;
  if (mask == p)
    { for (i=0; i<N; i++)
         a[i] = b[p[i]];
    }
   else
    { for (i=0; i<N; i++)
         if (mask[i]) a[i] = b[p[i]];
    }
}

void dalib_lget1_4  (a, b, p, mask, N)
int *a, *b;
int *p, *mask, N;
{ int i;
  if (mask == p)
    { for (i=0; i<N; i++)
         a[i] = b[p[i]];
    }
   else
    { for (i=0; i<N; i++)
         if (mask[i]) a[i] = b[p[i]];
    }
}

void dalib_lget1_8  (a, b, p, mask, N)
double *a, *b;
int *p, *mask, N;
{ int i;
  if (mask == p)
    { for (i=0; i<N; i++)
       a[i] = b[p[i]];
    }
   else
    { for (i=0; i<N; i++)
       if (mask[i]) a[i] = b[p[i]];
    }
}

/*******************************************************************
*                                                                  *
*  MASK: A = B [P]  : A[j] = B[P[j]]    for all j with MASK(j)     *
*                                                                  *
*  void dalib_local_getm1_ (a, a_size, a_elems,                    *
*                           b, b_N1, p, mask   )                   *
*                                                                  *
*******************************************************************/

void dalib_local_getm1_ (a, a_size, a_elems, b, b_N1, p, mask)

int *a_size;         /* number of local elements of a and p */
int *a_elems;        /* number of elemens in a, p           */
int *b_N1;           /* number of elements of b             */
int *p, *mask;       /* p == mask if mask is not available  */
unsigned char *a, *b;

{ int i, j, bytes;
  unsigned char *a1_ptr, *b1_ptr;

  bytes = *a_size;

  if (bytes == 4)
    dalib_lget1_4 (a, b-4, p, mask, *a_elems);  /* b-4 is fiktive zero */
  else if (bytes == 8)
    dalib_lget1_8 (a, b-8, p, mask, *a_elems);  /* b-8 is fiktive zero */
  else if (bytes == 1)
    dalib_lget1_1 (a, b-1, p, mask, *a_elems);  /* b-1 is fiktive zero */
  else
    { for (i=0, a1_ptr=a ; i<*a_elems; i++, a1_ptr+=bytes)
          if ((mask == p) || (mask[i]))
            { b1_ptr = b + (p[i] - 1) * bytes;
              for (j=0; j < bytes; j++)
                 a1_ptr[j] = b1_ptr[j];
            }  /* mask[i] == true */
     }  /* end of loop for all elements */
} /* dalib_local_getm1 */

/*******************************************************************
*                                                                  *
*  MASK:  B[P[j]] = A[j]   for all j with MASK(j)                  *
*                                                                  *
*  operations for different sizes of elements                      *
*                                                                  *
*******************************************************************/

void dalib_lset1_1  (a, b, p, mask, N)
unsigned char *a, *b;
int *p, *mask, N;
{ int i;
  if (mask == p)
    { for (i=0; i<N; i++)
         b[p[i]] = a[i];
    }
   else
    { for (i=0; i<N; i++)
         if (mask[i]) b[p[i]] = a[i];
    }
}

void dalib_lset1_4  (a, b, p, mask, N)
int *a, *b;
int *p, *mask, N;
{ int i;
  if (mask == p)
    { for (i=0; i<N; i++)
         b[p[i]] = a[i];
    }
   else
    { for (i=0; i<N; i++)
         if (mask[i]) b[p[i]] = a[i];
    }
}

void dalib_lset1_8  (a, b, p, mask, N)
double *a, *b;
int *p, *mask, N;
{ int i;
  if (mask == p)
    { for (i=0; i<N; i++)
         b[p[i]] = a[i];
    }
   else
    { for (i=0; i<N; i++)
         if (mask[i]) b[p[i]] = a[i];
    }
}

/*******************************************************************
*                                                                  *
*  MASK: B [P] = A  : B[P[j]] = A[j]    for all j with MASK(j)     *
*                                                                  *
*  void dalib_local_setm1_ (op, b, b_N1, p,                        *
*                           a, a_size, a_elems, mask)              *
*                                                                  *
*******************************************************************/

void dalib_local_setm1_ (op, b, b_N1, p, a, a_size, a_elems, mask)

unsigned char *a, *b;
int *op;
int *a_size, *a_elems;
int *p, *mask;
int *b_N1;

{ int i, j, bytes;
  unsigned char *a_ptr, *b_ptr;

  bytes = *a_size;

  if (*op == 0)
     { if (bytes == 1)
         dalib_lset1_1  (a, b-1, p, mask, *a_elems);
       else if (bytes == 4)
         dalib_lset1_4  (a, b-4, p, mask, *a_elems);
       else if (bytes == 8)
         dalib_lset1_8  (a, b-8, p, mask, *a_elems);
       else
         { for (i=0, a_ptr = a; i<*a_elems; i++, a_ptr+=bytes )
             if ((mask == p) || (mask[i]))
               { b_ptr = b + (p[i] - 1) * bytes;
                 for (j=0; j < bytes; j++)
                       b_ptr[j] = a_ptr[j];
               }
         }
     }

  else
   
  { for (i=0, a_ptr = a; i<*a_elems; i++, a_ptr+=bytes )
      if ((mask == p) || (mask[i]))
        { b_ptr = b + (p[i] - 1) * bytes;
          switch (*op) {
          case  1 : min_ints (b_ptr, a_ptr); break;
          case  2 : min_reals (b_ptr, a_ptr); break;
          case  3 : min_doubles (b_ptr, a_ptr); break;
          case  4 : max_ints (b_ptr, a_ptr); break;
          case  5 : max_reals (b_ptr, a_ptr); break;
          case  6 : max_doubles (b_ptr, a_ptr); break;
          case  7 : add_ints (b_ptr, a_ptr); break;
          case  8 : add_reals (b_ptr, a_ptr); break;
          case  9 : add_doubles (b_ptr, a_ptr); break;
          case 10 : mult_ints (b_ptr, a_ptr); break;
          case 11 : mult_reals (b_ptr, a_ptr); break;
          case 12 : mult_doubles (b_ptr, a_ptr); break;
          case 13 : and_ints (b_ptr, a_ptr); break;
          case 14 : or_ints (b_ptr, a_ptr); break;
          case 15 : eor_ints (b_ptr, a_ptr); break;
          case 16 : and_bools (b_ptr, a_ptr); break;
          case 17 : or_bools (b_ptr, a_ptr); break;
          case 18 : neq_bools (b_ptr, a_ptr); break;
          default : printf ("illegal operation in dalib_local_set\n");
                    exit(-1);
                    break;
          } /* switch operation */
        } /* end of get b[p[i] to a[i] */
  } /* end of for */
} /* dalib_local_setm1 */

/*******************************************************************
*                                                                  *
*  MASK:  A[j] = B[P[j],Q[j]]    for all j with MASK(j)            *
*                                                                  *
*  operations for different sizes of elements                      *
*                                                                  *
*******************************************************************/

void dalib_lget2_1  (a, b, S, p, q, mask, N)
unsigned char *a, *b; 
int *p, *q, *mask, N, S;
{ int i;
  if (mask == q)
    { for (i=0; i<N; i++)
         a[i] = b[p[i]*S+q[i]];
    }
   else
    { for (i=0; i<N; i++)
         if (mask[i]) a[i] = b[p[i]*S+q[i]];
    }
}

void dalib_lget2_4  (a, b, S, p, q, mask, N)
int *a, *b; 
int *p, *q, *mask, N, S;
{ int i;
  if (mask == q)
    { for (i=0; i<N; i++)
         a[i] = b[p[i]*S+q[i]];
    }
   else
    { for (i=0; i<N; i++)
         if (mask[i]) a[i] = b[p[i]*S+q[i]];
    }
}

void dalib_lget2_8  (a, b, S, p, q, mask, N)
double *a, *b; 
int *p, *q, *mask, N, S;
{ int i;
  if (mask == q)
    { for (i=0; i<N; i++)
         a[i] = b[p[i]*S+q[i]];
    }
   else
    { for (i=0; i<N; i++)
         if (mask[i]) a[i] = b[p[i]*S+q[i]];
    }
}

/***********************************************************************
*                                                                      *
*  MASK: A = B [P,Q]  : A[j] = B[P[j],Q[j]]    for all j with MASK(j)  *
*                                                                      *
*  void dalib_global_getm2_ (a, a_size, a_elems,                       *
*                            b, b_N1, b_N2, p, q, mask   )             *
*                                                                      *
***********************************************************************/

void dalib_local_getm2_ (a, a_size, a_elems, b, b_N1, b_N2, p, q, mask)

int *a_size;         /* number of local elements of a and p */
int *a_elems;        /* number of elemens in a, p           */
int *b_N1, *b_N2;    /* number of elements of b             */
int *p, *q, *mask;   /* q == mask if mask is not available  */
unsigned char *a, *b;

{ int i, j, bytes, N1;
  unsigned char *a1_ptr, *b1_ptr;

  bytes = *a_size;
  N1    = *b_N1;

  if (bytes == 4)
    dalib_lget2_4 (a, b-(N1+1)*4, N1, p, q, mask, *a_elems);
  else if (bytes == 8)
    dalib_lget2_8 (a, b-(N1+1)*8, N1, p, q, mask, *a_elems);
  else if (bytes == 1)
    dalib_lget2_1 (a, b-(N1+1)*1, N1, p, q, mask, *a_elems);
  else 
    { for (i=0, a1_ptr=a ; i<*a_elems; i++, a1_ptr+= bytes)
         if ((mask == p) || (mask[i]))
           { b1_ptr = b + ( (p[i]-1) * *b_N1 + q[i]-1) * bytes;
             for (j=0; j < bytes; j++)
                a1_ptr[j] = b1_ptr[j];
           }  /* mask[i] == true */
    }
} /* dalib_local_getm2 */

/*******************************************************************
*                                                                  *
*  MASK:  B[P[j],Q[j]] = A[j]   for all j with MASK(j)             *
*                                                                  *
*  operations for different sizes of elements                      *
*                                                                  *
*******************************************************************/

void dalib_lset2_1  (a, b, S, p, q, mask, N)
unsigned char *a, *b;
int *p, *q, *mask, N, S;
{ int i;
  if (mask == q)
    { for (i=0; i<N; i++)
         b[p[i]*S+q[i]] = a[i];
    }
   else
    { for (i=0; i<N; i++)
         if (mask[i]) b[p[i]*S+q[i]] = a[i];
    }
}

void dalib_lset2_4  (a, b, S, p, q, mask, N)
int *a, *b;
int *p, *q, *mask, N, S;
{ int i;
  if (mask == q)
    { for (i=0; i<N; i++)
         b[p[i]*S+q[i]] = a[i];
    }
   else
    { for (i=0; i<N; i++)
         if (mask[i]) b[p[i]*S+q[i]] = a[i];
    }
}

void dalib_lset2_8  (a, b, S, p, q, mask, N)
double *a, *b;
int *p, *q, *mask, N, S;
{ int i;
  if (mask == q)
    { for (i=0; i<N; i++)
         b[p[i]*S+q[i]] = a[i];
    }
   else
    { for (i=0; i<N; i++)
         if (mask[i]) b[p[i]*S+q[i]] = a[i];
    }
}

/***********************************************************************
*                                                                      *
*  MASK: B [P,Q] = A  : B[P[j],Q[j]] = A[j]    for all j with MASK(j)  *
*                                                                      *
*  void dalib_global_setm2_ (op, b, b_N1, b-N2, p, q                   *
*                            a, a_size, a_elems, mask)                 *
*                                                                      *
***********************************************************************/

void dalib_local_setm2_ (op, b, b_N1, b_N2, p, q,
                              a, a_size, a_elems,   mask)
int *op;
int *a_size;        /* number of bytes for elem size of a and b */
int *a_elems;       /* number of local elements in a, p, q */
int *b_N1, *b_N2;   /* global dimensions of b */
int *p, *q, *mask;  /* q == mask if mask is not available */
unsigned char *a, *b;

{ int i, j, bytes, S;
  unsigned char *a_ptr, *b_ptr;
  int index;

  bytes = *a_size;
  S = *b_N1;

  if (*op == 0)
     { if (bytes == 1)
         dalib_lset2_1  (a, b-(S+1)*1, S, p, q, mask, *a_elems);
       else if (bytes == 4)
         dalib_lset2_4  (a, b-(S+1)*4, S, p, q, mask, *a_elems);
       else if (bytes == 8)
         dalib_lset2_8  (a, b-(S+1)*8, S, p, q, mask, *a_elems);
       else
         { for (i=0, a_ptr = a; i<*a_elems; i++, a_ptr+=bytes )
             if ((mask == p) || (mask[i]))
               { b_ptr = b + (p[i] - 1) * bytes;
                 for (j=0; j < bytes; j++)
                       b_ptr[j] = a_ptr[j];
               }
         }
      }
  else
   { for (i=0, a_ptr=a ; i<*a_elems; i++, a_ptr+=bytes)
      if ((mask == p) || (mask[i]))
        { /* compute always new indirect address in b */
          b_ptr = b + ( (p[i]-1) * S + q[i]-1) * bytes;
          switch (*op) {
          case  1 : min_ints (b_ptr, a_ptr); break;
          case  2 : min_reals (b_ptr, a_ptr); break;
          case  3 : min_doubles (b_ptr, a_ptr); break;
          case  4 : max_ints (b_ptr, a_ptr); break;
          case  5 : max_reals (b_ptr, a_ptr); break;
          case  6 : max_doubles (b_ptr, a_ptr); break;
          case  7 : add_ints (b_ptr, a_ptr); break;
          case  8 : add_reals (b_ptr, a_ptr); break;
          case  9 : add_doubles (b_ptr, a_ptr); break;
          case 10 : mult_ints (b_ptr, a_ptr); break;
          case 11 : mult_reals (b_ptr, a_ptr); break;
          case 12 : mult_doubles (b_ptr, a_ptr); break;
          case 13 : and_ints (b_ptr, a_ptr); break;
          case 14 : or_ints (b_ptr, a_ptr); break;
          case 15 : eor_ints (b_ptr, a_ptr); break;
          case 16 : and_bools (b_ptr, a_ptr); break;
          case 17 : or_bools (b_ptr, a_ptr); break;
          case 18 : neq_bools (b_ptr, a_ptr); break;
          default : printf ("illegal operation in dalib_local_set\n");
                    exit(-1);
                    break;
          } /* switch operation */
        } /* end of get b[p[i] to a[i] */
    } /* end of for loop */
} /* dalib_local_setm2 */

/*******************************************************************
*                                                                  *
*  FORTRAN INTERFACE                                               *
*                                                                  *
*    - subroutines without a mask parameter                        *
*                                                                  *
*******************************************************************/

void dalib_local_get1__ (a, a_size, a_elems, b, b_N1, p)

int *a_size;    /* size of elements of a and b in Bytes */
int *a_elems;   /* number of elements in a and p        */
int *b_N1;      /* number of elements of b */
int *p;
unsigned char *a, *b;

{ /* mask = p means no mask */

   dalib_local_getm1_ (a, a_size, a_elems, b, b_N1, p, p);
}

void dalib_local_get2__ (a, a_size, a_elems, b, b_N1, b_N2, p, q)
int *a_size;        /* number of bytes for elem size of a and b */
int *a_elems;       /* number of local elements in a, p, q */
int *b_N1, b_N2;    /* global dimensions of b */
int *p, *q;
unsigned char *a, *b;

{ /* mask = p means no mask */
   dalib_local_getm2_ (a, a_size, a_elems, b, b_N1, b_N2, p, q, q);
}

