Thanks for your help....those errors are gone...but now I face the following errors....
/tmp/tmpxft_00003c58_00000000-13_RecMST.o: In function `HPGMST()':
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x2c88): undefined reference to `cudppCreate'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x2d9e): undefined reference to `cudppPlan'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x2dd1): undefined reference to `cudppSegmentedScan'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x2de0): undefined reference to `cudppDestroyPlan'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x2f4a): undefined reference to `cudppPlan'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x2f73): undefined reference to `cudppScan'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x2f82): undefined reference to `cudppDestroyPlan'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x33b9): undefined reference to `cudppPlan'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x33e2): undefined reference to `cudppScan'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x33f1): undefined reference to `cudppDestroyPlan'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x3766): undefined reference to `cudppPlan'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x378f): undefined reference to `cudppScan'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x379e): undefined reference to `cudppDestroyPlan'
/tmp/tmpxft_00003c58_00000000-13_RecMST.o: In function `main':
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x46da): undefined reference to `cutCreateTimer'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x471b): undefined reference to `cutStartTimer'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x4771): undefined reference to `cutStopTimer'
tmpxft_00003c58_00000000-1_RecMST.cudafe1.cpp:(.text+0x47b2): undefined reference to `cutGetTimerValue'
collect2: ld returned 1 exit status
I'm using the follwing statement to complie:
nvcc -arch compute_20 -code sm_20 -I/home/ankit/Documents/cudpp-2.0/include -I/home/ankit/NVIDIA_GPU_Computing_SDK/C/common/inc RecMST.cu
I'm
new at CUDA and also at linux....I'm using Redhat 6....Please
help...the code was already avaliable...i'm trying to verify the
results...so there mustn't be any serious problem with the code...a bit
of your time could be extremly helpful
the HPGMST() is:
void HPGMST()
{
//Reinitialize the ranking arrays, must be orig but this also works
CUDA_SAFE_CALL( cudaMemcpy( d_vertex_split_rank,
h_vertex_split_rank_test, sizeof(unsigned long long int)*no_of_vertices,
cudaMemcpyHostToDevice));
CUDA_SAFE_CALL( cudaMemcpy( d_edge_rank, h_edge_rank_test,
sizeof(unsigned long long int)*no_of_edges, cudaMemcpyHostToDevice));
//Make both grids needed for execution, no_of_vertices and no_of_edges length sizes
int num_of_blocks, num_of_threads_per_block;
SetGridThreadLen(no_of_edges, &num_of_blocks, &num_of_threads_per_block);
dim3 grid_edgelen(num_of_blocks, 1, 1);
dim3 threads_edgelen(num_of_threads_per_block, 1, 1);
SetGridThreadLen(no_of_vertices, &num_of_blocks, &num_of_threads_per_block);
dim3 grid_vertexlen(num_of_blocks, 1, 1);
dim3 threads_vertexlen(num_of_threads_per_block, 1, 1);
CUDPPHandle theCudpp; //SELF MODIFIED, 1st argument in #247, #263, #305. #338
cudppCreate(&theCudpp);
//Append the Weight and Outgoing vertex into a single array, 8-10 bits for weight and 20-22 bits for vertex ID
//Append in Parallel on the Device itself, call the append kernel
AppendKernel_1<<< grid_edgelen, threads_edgelen, 0>>>(d_segmented_min_scan_input, d_weight, d_edge, no_of_edges);
//Create the Flag needed for segmented min scan operation, similar operation will also be used at other places
ClearArray<<< grid_edgelen, threads_edgelen, 0>>>( d_edge_flag, no_of_edges );
//Mark the segments for the segmented min scan using scan
MakeFlag_3<<< grid_vertexlen, threads_vertexlen, 0>>>( d_edge_flag, d_vertex, no_of_vertices);
//Perfom the Segmented Min Scan on resulting array using d_edge_flag as segments
cudppPlan(theCudpp, &segmentedScanPlan_min,
config_segmented_min, no_of_edges, 1, 0 ); //Make the segmented min scan
plan
cudppSegmentedScan(segmentedScanPlan_min, d_segmented_min_scan_output, d_segmented_min_scan_input, (const unsigned int*)d_edge_flag, no_of_edges);
cudppDestroyPlan(segmentedScanPlan_min);
//Make the Sucessor Array
MakeSucessorArray<<< grid_vertexlen, threads_vertexlen,
0>>>(d_successor, d_vertex, d_segmented_min_scan_output,
no_of_vertices, no_of_edges);
//Check and remove if cycles exist
RemoveCycles<<< grid_vertexlen, threads_vertexlen, 0>>>(d_successor,no_of_vertices);
//Scan the flag to get u at every edge, use the u to index d_vertex to get the last entry in each segment
//U at every edge will also be useful later in the algorithm.
ClearArray<<< grid_edgelen, threads_edgelen, 0>>>( d_edge_flag, no_of_edges );
MakeFlagForUIds<<< grid_vertexlen, threads_vertexlen, 0>>>(d_edge_flag, d_vertex,no_of_vertices);
cudppPlan(theCudpp, &scanPlan_add, config_scan_add, no_of_edges , 1, 0);
cudppScan(scanPlan_add, d_old_uIDs, d_edge_flag, no_of_edges);
cudppDestroyPlan(scanPlan_add);
ClearArray<<< grid_edgelen, threads_edgelen, 0>>>((unsigned int*)d_pick_array, no_of_edges);
//Fill the pick array using the above and the d_successor array
MakePickArray<<< grid_edgelen, threads_edgelen, 0>>>(d_pick_array,d_successor,d_vertex,d_old_uIDs,no_of_vertices,no_of_edges);
//Mark the Remaining Edges in the Output MST array. This not so elegant.
//Because we do not know which edge index was selected by the segmented min scan,
//we check each edge with the selected edges and write to output if same
MarkOutputEdges<<< grid_edgelen, threads_edgelen,
0>>>(d_pick_array, d_segmented_min_scan_input,
d_segmented_min_scan_output, d_output_MST,d_edge_mapping,no_of_edges);
//Propagate Representative Vertex IDs to all vertices
iteratively using pointer Doubling until no change occures in Successor
Array
bool succchange;
do
{
succchange=false; //if no thread changes this value, the loop stops
CUDA_SAFE_CALL( cudaMemcpy( d_succchange, &succchange, sizeof(bool), cudaMemcpyHostToDevice));
//Reusing Vertex Flag
SuccToCopy<<< grid_vertexlen, threads_vertexlen, 0>>>(d_successor, d_successor_copy, no_of_vertices);
PropagateRepresentativeID<<< grid_vertexlen,
threads_vertexlen, 0>>>(d_successor, d_successor_copy,
d_succchange,no_of_vertices);
CopyToSucc<<< grid_vertexlen, threads_vertexlen, 0>>>(d_successor, d_successor_copy, no_of_vertices);
CUDA_SAFE_CALL( cudaMemcpy( &succchange, d_succchange, sizeof(bool), cudaMemcpyDeviceToHost));
}
while(succchange);
//Split the vertex ids based on supervertex ids
//Using 64 bit version of split
//Append Vertex Ids with SuperVertexIDs and split based on supervertex IDs
AppendVertexIDsForSplit<<< grid_vertexlen,
threads_vertexlen, 0>>>(d_vertex_split,
d_successor,no_of_vertices);
sp.split(d_vertex_split, d_vertex_split_rank,
d_vertex_split_scratchmem, d_vertex_rank_scratchmem, no_of_vertices,
NO_OF_BITS_TO_SPLIT_ON, 0);
//Make the flag for assigning new vertex IDs based on difference in SuperVertex IDs
ClearArray<<< grid_vertexlen, threads_vertexlen, 0>>>( d_vertex_flag, no_of_vertices);
MakeFlagForScan<<< grid_vertexlen, threads_vertexlen, 0>>>(d_vertex_flag, d_vertex_split, no_of_vertices);
//Scan the newly formed flag array to assign new ids to supervertices
cudppPlan(theCudpp, &scanPlan_add, config_scan_add, no_of_vertices , 1, 0);
cudppScan(scanPlan_add, d_new_supervertexIDs, d_vertex_flag, no_of_vertices);
cudppDestroyPlan(scanPlan_add);
//Make the new supervertexids per vertex
MakeSuperVertexIDPerVertex<<< grid_vertexlen,
threads_vertexlen, 0>>>(d_new_supervertexIDs, d_vertex_split,
d_vertex_flag, no_of_vertices);
CopySuperVertexIDPerVertex<<< grid_vertexlen,
threads_vertexlen, 0>>>(d_new_supervertexIDs, d_vertex_flag,
no_of_vertices);
//Remove Self Edges from the edge-list
CopyEdgeArray<<< grid_edgelen, threads_edgelen, 0>>>(d_edge,d_edge_mapping_copy, no_of_edges);
RemoveSelfEdges<<< grid_edgelen, threads_edgelen,
0>>>(d_edge, d_old_uIDs, d_new_supervertexIDs,
d_vertex_split_rank, d_edge_mapping_copy, no_of_edges);
CopyEdgeArrayBack<<< grid_edgelen, threads_edgelen, 0>>>(d_edge,d_edge_mapping_copy, no_of_edges);
//Now, Remove Duplicated Edges. This is not mandatory, however, reduces the edge-list size
//significantly. You may choose to use it once in the initial iterations of the algorithm, later edge-list
//size is small anyways so not much is gained by doing this in later iterations
//Append u,v and weight per edge in a 64-bit value
//Split the array using {u,v) as the key. Pick First distinct (u,v) entry as the edge, nullify others
//You may also replace the split with sort, but we could not find a 64-bit sort.
AppendForDuplicateEdgeRemoval<<< grid_edgelen,
threads_edgelen, 0>>>(d_appended_uvw, d_edge, d_old_uIDs,
d_weight,d_new_supervertexIDs, no_of_edges);
sp.split(d_appended_uvw, d_edge_rank,
d_edge_split_scratchmem, d_edge_rank_scratchmem, no_of_edges,
NO_OF_BITS_TO_SPLIT_ON_UVW, 0);
//Pick the first distinct (u,v) combination, mark these edges and compact
ClearArray<<< grid_edgelen, threads_edgelen, 0>>>( d_edge_flag, no_of_edges );
unsigned int dsize=no_of_edges; //just make sure
CUDA_SAFE_CALL( cudaMemcpy( d_size, &dsize, sizeof(unsigned int), cudaMemcpyHostToDevice));
MarkEdgesUV<<< grid_edgelen, threads_edgelen, 0>>>(d_edge_flag, d_appended_uvw, d_size, no_of_edges);
//Scan the flag array to know where to write the value in new edge and weight lists
cudppPlan(theCudpp, &scanPlan_add, config_scan_add, no_of_edges, 1, 0);
cudppScan(scanPlan_add, d_old_uIDs, d_edge_flag, no_of_edges); //Just reusing the d_old_uIDs array for compating
cudppDestroyPlan(scanPlan_add);
//******************************************************************************************
//Do all clearing in a single kernel, no need to call multiple times, OK for testing only
//******************************************************************************************
ClearArray<<< grid_edgelen, threads_edgelen, 0>>>((unsigned int*)d_edge, no_of_edges );
ClearArray<<< grid_edgelen, threads_edgelen, 0>>>((unsigned int*)d_weight, no_of_edges );
ClearArray<<< grid_edgelen, threads_edgelen, 0>>>( d_edge_mapping_copy, no_of_edges);
ClearArray<<< grid_edgelen, threads_edgelen,
0>>>( (unsigned int*)d_pick_array, no_of_edges); //Reusing the
Pick Array
int negative=0;
CUDA_SAFE_CALL( cudaMemcpy( d_edge_list_size, &negative, sizeof( int), cudaMemcpyHostToDevice));
CUDA_SAFE_CALL( cudaMemcpy( d_vertex_list_size, &negative, sizeof( int), cudaMemcpyHostToDevice));
//Compact the edge and weight lists
//Make a new grid for valid entries in the d_edge_flag array
unsigned int validsize=0;
CUDA_SAFE_CALL( cudaMemcpy( &validsize, d_size, sizeof(unsigned int), cudaMemcpyDeviceToHost));
SetGridThreadLen(validsize, &num_of_blocks, &num_of_threads_per_block);
dim3 grid_validsizelen(num_of_blocks, 1, 1);
dim3 threads_validsizelen(num_of_threads_per_block, 1, 1);
//Reusing d_pick_array for storing the u ids
CompactEdgeList<<< grid_validsizelen,
threads_validsizelen, 0>>>(d_edge, d_weight, d_edge_mapping,
d_edge_mapping_copy, d_old_uIDs, d_edge_flag, d_appended_uvw,
d_pick_array, d_edge_rank, d_size, d_edge_list_size,
d_vertex_list_size);
CopyEdgeMap<<< grid_edgelen, threads_edgelen, 0>>>(d_edge_mapping, d_edge_mapping_copy,no_of_edges);
//Make the vertex list
//Mark flag for edge list, based on diffference of u ids
ClearArray<<< grid_edgelen, threads_edgelen, 0>>>( d_edge_flag, no_of_edges);
ClearArray<<< grid_vertexlen, threads_vertexlen, 0>>>((unsigned int*)d_vertex, no_of_vertices);
MakeFlagForVertexList<<< grid_edgelen, threads_edgelen, 0>>>(d_pick_array, d_edge_flag, no_of_edges);
MakeVertexList<<< grid_edgelen, threads_edgelen, 0>>>(d_vertex, d_pick_array, d_edge_flag, no_of_edges);
CUDA_SAFE_CALL( cudaMemcpy( &no_of_edges, d_edge_list_size, sizeof(int), cudaMemcpyDeviceToHost));
CUDA_SAFE_CALL( cudaMemcpy( &no_of_vertices, d_vertex_list_size, sizeof(int), cudaMemcpyDeviceToHost));
}