Real scaled sparse matrix vector multiplication?

28 views
Skip to first unread message

180110...@gmail.com

unread,
Jun 12, 2017, 10:35:47 AM6/12/17
to cusp-users
 I see that there is a multiply to calculate spmv that takes a reduce and a combine:

    template <typename LinearOperator,
             
typename MatrixOrVector1,
             
typename MatrixOrVector2,
             
typename UnaryFunction,
             
typename BinaryFunction1,
             
typename BinaryFunction2>
   
void multiply(const LinearOperator&  A,
                 
const MatrixOrVector1& B,
                 
MatrixOrVector2& C,
                 
UnaryFunction  initialize,
                 
BinaryFunction1 combine,
                 
BinaryFunction2 reduce);



From the interface it seems like custom combine and reduce should be possible for any matrix/vector multiplication. But I wonder that if cusp supports to use other combine and reduce function defined in  thrust/functional.h besides multiplication and plus to calculate spmv. For example, can I use thrust::plus to replace multiplication the original combine function(i.e. multiplication)?
So I have tried the following example:
   


       
#include <cusp/array1d.h>
       
#include <cusp/array2d.h>
       
#include <cusp/functional.h>
       
#include <cusp/multiply.h>
       
#include <cusp/print.h>
     
       
int main(void)
       
{
           
// define multiply functors
           cusp
::constant_functor<float> initialize;
           thrust
::plus<float> combine;
           thrust
::plus<float> reduce;
     
           
// initialize matrix
           cusp
::array2d<float, cusp::host_memory> A(8,8);
               
           A
(0,0) = 1; A(0,1) = 1; A(1,1) = 1; A(1,2) = 1; A(2,2) = 1;A(2,4) = 1;A(2,6) = 1;
           A
(3,3) = 1;A(3,4) = 1;A(3,5) = 1;A(4,5) = 1;A(5,5) = 1;A(5,6) = 1;
   
           cusp
::array1d<float, cusp::host_memory> x(8);
           
for(int i=0;i<8;i++)  x[i] = 1.0;
           
// allocate output vector
           cusp
::array1d<float, cusp::host_memory> y(8);
           
for(int i=0;i<8;i++)  y[i] = 1.0;
           
// compute y = A * x
           cusp
::multiply(A, x, y, initialize, combine, reduce);
     
           
// print y
           cusp
::print(A);
           cusp
::print(x);
           cusp
::print(y);
     
           
return 0;
       
}


And I got below answer:

    array2d <8, 8>
            (1)         (1)         (0)         (0)         (0)         (0)         (0)         (0) 
            (0)         (1)         (1)         (0)         (0)         (0)         (0)         (0) 
            (0)         (0)         (1)         (0)         (1)         (0)         (1)         (0) 
            (0)         (0)         (0)         (1)         (1)         (1)         (0)         (0) 
            (0)         (0)         (0)         (0)         (0)         (1)         (0)         (0) 
            (0)         (0)         (0)         (0)         (0)         (1)         (1)         (0) 
            (0)         (0)         (0)         (0)         (0)         (0)         (0)         (0) 
            (0)         (0)         (0)         (0)         (0)         (0)         (0)         (0) 
    array1d <8>
            (1)
            (1)
            (1)
            (1)
            (1)
            (1)
            (1)
            (1)
    array1d <8>
            (10)
            (10)
            (11)
            (11)
            (9)
            (10)
            (8)
            (8)

The answer was wrong. But when I tested the below code:

 
  #include <cusp/array1d.h>
   
#include <cusp/array2d.h>
   
#include <cusp/functional.h>
   
#include <cusp/multiply.h>
   
#include <cusp/print.h>
   
int main(void)
   
{
       
// define multiply functors
        cusp
::constant_functor<float> initialize;
        thrust
::plus<float> combine;
        thrust
::plus<float>       reduce;
       
// initialize matrix
        cusp
::array2d<float, cusp::host_memory> A(2,2);
        A
(0,0) = 10;  A(0,1) = 20;
        A
(1,0) = 40;  A(1,1) = 50;
       
// initialize input vector
        cusp
::array1d<float, cusp::host_memory> x(2);
        x
[0] = 1;
        x
[1] = 2;
       
// allocate output vector
        cusp
::array1d<float, cusp::host_memory> y(2);
       
// compute y = A * x
        cusp
::multiply(A, x, y, initialize, combine, reduce);
       
// print y
        cusp
::print(y);
       
return 0;
   
}


I have got the right answer:

    array2d <2, 2>
            (10)         (20) 
            (40)         (50) 
    array1d <2>
            (1)
            (2)
    array1d <2>
            (33)
            (93)

It is so strange, and I do not know the reason. So, I wonder whether the cusp::multiply is real scaled or not.

Any info will help.


Thank you!

Erich






Steven Dalton

unread,
Jun 12, 2017, 10:40:27 AM6/12/17
to cusp-...@googlegroups.com
Thanks Erich,

  Can you add an example of your expected output? Cusp computes the generalized spmv product using the following scheme.

accumulator = reduce(accumulator, combine(A(i,j), x[j]));

  So for your example for row 0 I get : (1 + 1) + (1 + 1) + (0 + 1) ... six more times = 10.

Steve 

--
You received this message because you are subscribed to the Google Groups "cusp-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to cusp-users+unsubscribe@googlegroups.com.
To post to this group, send email to cusp-...@googlegroups.com.
Visit this group at https://groups.google.com/group/cusp-users.
For more options, visit https://groups.google.com/d/optout.

180110...@gmail.com

unread,
Jun 13, 2017, 2:56:17 AM6/13/17
to cusp-users
Sorry Steve,  the first example posted in the previous question is correct. But I got a wrong answer when I tested the below example whose matrix A was in coo format.
It used plus operator to combine.  
#include <cusp/csr_matrix.h>
#include <cusp/monitor.h>
#include <cusp/multiply.h>
#include <cusp/print.h>
#include <cusp/krylov/cg.h>


int main(void)
{
   
// COO format in host memory
   
int   host_I[13] = {0,0,1,1,2,2,2,3,3,3,4,5,5}; // COO row indices
   
int   host_J[13] = {0,1,1,2,2,4,6,3,4,5,5,5,6}; // COO column indices
   
int   host_V[13] = {1,1,1,1,1,1,1,1,1,1,1,1,1};
   
// x and y arrays in host memory
   
int host_x[7] = {1,1,1,1,1,1,1};
   
int host_y[6] = {0,0,0,0,0,0};


   
// allocate device memory for COO format
   
int   * device_I;
    cudaMalloc
(&device_I, 13 * sizeof(int));
   
int   * device_J;
    cudaMalloc
(&device_J, 13 * sizeof(int));
   
int * device_V;
    cudaMalloc
(&device_V, 13 * sizeof(int));


   
// allocate device memory for x and y arrays
   
int * device_x;
    cudaMalloc
(&device_x, 7 * sizeof(int));
   
int * device_y;
    cudaMalloc
(&device_y, 6 * sizeof(int));


   
// copy raw data from host to device
    cudaMemcpy
(device_I, host_I, 13 * sizeof(int),   cudaMemcpyHostToDevice);
    cudaMemcpy
(device_J, host_J, 13 * sizeof(int),   cudaMemcpyHostToDevice);
    cudaMemcpy
(device_V, host_V, 13 * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy
(device_x, host_x,  7 * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy
(device_y, host_y,  6 * sizeof(int), cudaMemcpyHostToDevice);


   
// matrices and vectors now reside on the device


   
// *NOTE* raw pointers must be wrapped with thrust::device_ptr!
    thrust
::device_ptr<int>   wrapped_device_I(device_I);
    thrust
::device_ptr<int>   wrapped_device_J(device_J);
    thrust
::device_ptr<int>   wrapped_device_V(device_V);
    thrust
::device_ptr<int>   wrapped_device_x(device_x);
    thrust
::device_ptr<int>   wrapped_device_y(device_y);


   
// use array1d_view to wrap the individual arrays
   
typedef typename cusp::array1d_view< thrust::device_ptr<int>   > DeviceIndexArrayView;
   
typedef typename cusp::array1d_view< thrust::device_ptr<int> > DeviceValueArrayView;


   
DeviceIndexArrayView row_indices   (wrapped_device_I, wrapped_device_I + 13);
   
DeviceIndexArrayView column_indices(wrapped_device_J, wrapped_device_J + 13);
   
DeviceValueArrayView values        (wrapped_device_V, wrapped_device_V + 13);
   
DeviceValueArrayView x             (wrapped_device_x, wrapped_device_x + 7);
   
DeviceValueArrayView y             (wrapped_device_y, wrapped_device_y + 6);


   
// combine the three array1d_views into a coo_matrix_view
   
typedef cusp::coo_matrix_view<DeviceIndexArrayView,
           
DeviceIndexArrayView,
           
DeviceValueArrayView> DeviceView;


   
// construct a coo_matrix_view from the array1d_views
   
DeviceView A(6, 7, 13, row_indices, column_indices, values);


    std
::cout << "\ndevice coo_matrix_view" << std::endl;
    cusp
::print(A);
    cusp
::constant_functor<int> initialize;
    thrust
::plus<int> combine;
    thrust
::plus<int> reduce;
    cusp
::multiply(A , x , y , initialize, combine, reduce);
    std
::cout << "\nx array" << std::endl;
    cusp
::print(x);

    std
::cout << "\n y array, y = A * x" << std::endl;
    cusp
::print(y);


    cudaMemcpy
(host_y, device_y,  6 * sizeof(int), cudaMemcpyDeviceToHost);


   
// free device arrays
    cudaFree
(device_I);
    cudaFree
(device_J);
    cudaFree
(device_V);
    cudaFree
(device_x);
    cudaFree
(device_y);


   
return 0;
}


I get the below answer:
device coo_matrix_view
sparse matrix <6, 7> with 13 entries
              0              0        (1)
              0              1        (1)
              1              1        (1)
              1              2        (1)
              2              2        (1)
              2              4        (1)
              2              6        (1)
              3              3        (1)
              3              4        (1)
              3              5        (1)
              4              5        (1)
              5              5        (1)
              5              6        (1)
x array
array1d <7>

        (1)
        (1)
        (1)
        (1)
        (1)
        (1)
        (1)
 y array, y = A * x
array1d <6>
        (4)
        (4)
        (6)
        (6)
        (2)
        (631)


The vector y I got is strange, I think the correct answer y should be:
[9,
9,
10,
10,
8,
9]     
So I do not whether such replacement of combine and reduce can be adapted to other sparse matrix format, like coo.  Or maybe the code I wrote above is incorrect to call multiply. 
Can you give me some help? 

Thank you
Erich

在 2017年6月12日星期一 UTC+8下午10:40:27,Steve写道:
To unsubscribe from this group and stop receiving emails from it, send an email to cusp-users+...@googlegroups.com.
Reply all
Reply to author
Forward
0 new messages