2688 if (PPCGGen->tree)
2689 generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg);
2690
diff --git a/lib/CodeGen/PPCGCodeGeneration.cpp b/lib/CodeGen/PPCGCodeGeneration.cpp
index f45b9ac..ab0d57a 100644
--- a/lib/CodeGen/PPCGCodeGeneration.cpp
+++ b/lib/CodeGen/PPCGCodeGeneration.cpp
@@ -2619,7 +2619,9 @@ public:
ScopAnnotator Annotator;
Annotator.buildAliasScopes(*S);
- Region *R = &S->getRegion();
+ Region *R_orig = &S->getRegion(), *R;
+ Region R_copy = Region(*R_orig);
+ R = &R_copy;
simplifyRegion(R, DT, LI, RI);
@@ -2662,11 +2664,10 @@ public:
/// In case a sequential kernel has more surrounding loops as any parallel
/// kernel, the SCoP is probably mostly sequential. Hence, there is no
/// point in running it on a GPU.
- if (NodeBuilder.DeepestSequential > NodeBuilder.DeepestParallel)
- SplitBlock->getTerminator()->setOperand(0, Builder.getFalse());
+ if (NodeBuilder.DeepestSequential > NodeBuilder.DeepestParallel
+ || !NodeBuilder.BuildSuccessful )
+ *(R_orig) = R_copy;
- if (!NodeBuilder.BuildSuccessful)
- SplitBlock->getTerminator()->setOperand(0, Builder.getFalse());
}
bool runOnScop(Scop &CurrentScop) override {
If it fails at verifyModule, you can pass verifyModule a raw_ostream IIRC which it will write to (I'm debugging similar problems :) ) having that output is nice.
Cheers,
Siddharth.
Hi Sanjay,
there are two reasons why code generation in the GPU path may fail. One,
because we write to scalar values which are not allowed, for which we do
not support to add synchronization statements yet. Second, because the
GPU kernel is cannot be generated. I personally do not think we should
put a large effort in trying to recover from this (i.e., to generate GPU
code). These are supposed to be exceptional situations indicating
missing features on our side.
It seems in your experiments we fail here rather often. Is this due to
scalar writes or due to problems in the kernel PTX code generation?