diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index e388775caab..1574fd0a160 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -1193,6 +1193,28 @@ struct OptimizeInstructions BranchHints::flip(curr, getFunction()); } } + // (i32.and X 1) as if-else condition => (i32.ctz X) with swapped arms, + // since ctz(X) == 0 iff LSB(X) == 1 (saves one instruction). + if (auto* binary = curr->condition->dynCast()) { + if (binary->op == AndInt32) { + Expression* other = nullptr; + if (auto* c = binary->right->dynCast()) { + if (c->value.geti32() == 1) { + other = binary->left; + } + } else if (auto* c = binary->left->dynCast()) { + if (c->value.geti32() == 1) { + other = binary->right; + } + } + if (other) { + Builder builder(*getModule()); + curr->condition = builder.makeUnary(CtzInt32, other); + std::swap(curr->ifTrue, curr->ifFalse); + BranchHints::flip(curr, getFunction()); + } + } + } // Note that we do not consider metadata here. Like LLVM, we ignore // metadata when trying to fold code together, preferring certain // optimization over possible benefits of profiling data. @@ -3114,6 +3136,24 @@ struct OptimizeInstructions binary->op = op; return binary; } + // eqz(and X 1) ==> ctz X in boolean context: + // both are truthy iff LSB(X) == 0, saving one instruction. + if (binary->op == AndInt32) { + Expression* other = nullptr; + if (auto* c = binary->right->dynCast()) { + if (c->value.geti32() == 1) { + other = binary->left; + } + } else if (auto* c = binary->left->dynCast()) { + if (c->value.geti32() == 1) { + other = binary->right; + } + } + if (other) { + Builder builder(*getModule()); + return builder.makeUnary(CtzInt32, other); + } + } } } if (unary->op == EqZInt32 || unary->op == EqZInt64) { diff --git a/test/lit/passes/optimize-instructions-lsb-if.wast b/test/lit/passes/optimize-instructions-lsb-if.wast new file mode 100644 index 00000000000..fbd6eca3179 --- /dev/null +++ b/test/lit/passes/optimize-instructions-lsb-if.wast @@ -0,0 +1,79 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s --optimize-instructions -S -o - | filecheck %s + +;; Test that (if (i32.and X (i32.const 1)) T E) is optimized to +;; (if (i32.ctz X) E T), and (br_if N V (i32.eqz (i32.and X 1))) to +;; (br_if N V (i32.ctz X)), saving one instruction in each case. + +(module + ;; CHECK: (func $lsb-if (param $x i32) (result i32) + ;; CHECK-NEXT: (if (result i32) + ;; CHECK-NEXT: (i32.ctz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $lsb-if (param $x i32) (result i32) + ;; if LSB is set, return 1; else return 0 + ;; optimizes to: if ctz(x) != 0, return 0; else return 1 + (if (result i32) + (i32.and (local.get $x) (i32.const 1)) + (then (i32.const 1)) + (else (i32.const 0)) + ) + ) + + ;; CHECK: (func $lsb-if-const-left (param $x i32) (result i32) + ;; CHECK-NEXT: (if (result i32) + ;; CHECK-NEXT: (i32.ctz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (then + ;; CHECK-NEXT: (i32.const 0) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (else + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $lsb-if-const-left (param $x i32) (result i32) + ;; same but constant on the left + (if (result i32) + (i32.and (i32.const 1) (local.get $x)) + (then (i32.const 1)) + (else (i32.const 0)) + ) + ) + + ;; CHECK: (func $lsb-brif (param $x i32) (result i32) + ;; CHECK-NEXT: (block $done (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (br_if $done + ;; CHECK-NEXT: (i32.const 99) + ;; CHECK-NEXT: (i32.ctz + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $lsb-brif (param $x i32) (result i32) + ;; br_if (eqz (and X 1)) => br_if (ctz X): the typical is_skewed/is_scalar pattern + (block $done (result i32) + (drop + (br_if $done + (i32.const 99) + (i32.eqz (i32.and (local.get $x) (i32.const 1))) + ) + ) + (i32.const 42) + ) + ) +)