diff --git a/compiler/vm.nim b/compiler/vm.nim
index a902721c9d..95f232207a 100644
--- a/compiler/vm.nim
+++ b/compiler/vm.nim
@@ -1211,6 +1211,11 @@ proc rawExecute(c: PCtx, start: int, tos: PStackFrame): TFullReg =
     of opcNarrowU:
       decodeB(rkInt)
       regs[ra].intVal = regs[ra].intVal and ((1'i64 shl rb)-1)
+    of opcSignExtend:
+      # like opcNarrowS, but no out of range possible
+      decodeB(rkInt)
+      let imm = 64 - rb
+      regs[ra].intVal = ashr(regs[ra].intVal shl imm, imm)
     of opcIsNil:
       decodeB(rkInt)
       let node = regs[rb].node
diff --git a/compiler/vmdef.nim b/compiler/vmdef.nim
index 7a2bc16072..ea41f36856 100644
--- a/compiler/vmdef.nim
+++ b/compiler/vmdef.nim
@@ -71,6 +71,7 @@ type
     opcSubStr, opcParseFloat, opcConv, opcCast,
     opcQuit,
     opcNarrowS, opcNarrowU,
+    opcSignExtend,
 
     opcAddStrCh,
     opcAddStrStr,
diff --git a/compiler/vmgen.nim b/compiler/vmgen.nim
index d560bce1c9..2f96f60bc8 100644
--- a/compiler/vmgen.nim
+++ b/compiler/vmgen.nim
@@ -938,7 +938,14 @@ proc genMagic(c: PCtx; n: PNode; dest: var TDest; m: TMagic) =
     c.freeTemp(tmp)
     c.freeTemp(tmp2)
 
-  of mShlI: genBinaryABCnarrowU(c, n, dest, opcShlInt)
+  of mShlI:
+    genBinaryABC(c, n, dest, opcShlInt)
+    # genNarrowU modified
+    let t = skipTypes(n.typ, abstractVar-{tyTypeDesc})
+    if t.kind in {tyUInt8..tyUInt32} or (t.kind == tyUInt and t.size < 8):
+      c.gABC(n, opcNarrowU, dest, TRegister(t.size*8))
+    elif t.kind in {tyInt8..tyInt32} or (t.kind == tyInt and t.size < 8):
+      c.gABC(n, opcSignExtend, dest, TRegister(t.size*8))
   of mAshrI: genBinaryABC(c, n, dest, opcAshrInt)
   of mBitandI: genBinaryABC(c, n, dest, opcBitandInt)
   of mBitorI: genBinaryABC(c, n, dest, opcBitorInt)
diff --git a/lib/pure/bitops.nim b/lib/pure/bitops.nim
index 3f213c5ea3..707cfaa750 100644
--- a/lib/pure/bitops.nim
+++ b/lib/pure/bitops.nim
@@ -32,6 +32,18 @@ const useICC_builtins = defined(icc) and useBuiltins
 const useVCC_builtins = defined(vcc) and useBuiltins
 const arch64 = sizeof(int) == 8
 
+template forwardImpl(impl, arg) {.dirty.} =
+  when sizeof(x) <= 4:
+    when x is SomeSignedInt:
+      impl(cast[uint32](x.int32))
+    else:
+      impl(x.uint32)
+  else:
+    when x is SomeSignedInt:
+      impl(cast[uint64](x.int64))
+    else:
+      impl(x.uint64)
+
 # #### Pure Nim version ####
 
 proc firstSetBit_nim(x: uint32): int {.inline, nosideeffect.} =
@@ -185,8 +197,7 @@ proc countSetBits*(x: SomeInteger): int {.inline, nosideeffect.} =
   # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT.
   # like GCC and MSVC
   when nimvm:
-    when sizeof(x) <= 4: result = countSetBits_nim(x.uint32)
-    else:                result = countSetBits_nim(x.uint64)
+    result = forwardImpl(countSetBits_nim, x)
   else:
     when useGCC_builtins:
       when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int
@@ -216,8 +227,7 @@ proc parityBits*(x: SomeInteger): int {.inline, nosideeffect.} =
   # Can be used a base if creating ASM version.
   # https://stackoverflow.com/questions/21617970/how-to-check-if-value-has-even-parity-of-bits-or-odd
   when nimvm:
-    when sizeof(x) <= 4: result = parity_impl(x.uint32)
-    else:                result = parity_impl(x.uint64)
+    result = forwardImpl(parity_impl, x)
   else:
     when useGCC_builtins:
       when sizeof(x) <= 4: result = builtin_parity(x.uint32).int
@@ -235,8 +245,7 @@ proc firstSetBit*(x: SomeInteger): int {.inline, nosideeffect.} =
     when noUndefined:
       if x == 0:
         return 0
-    when sizeof(x) <= 4: result = firstSetBit_nim(x.uint32)
-    else:                result = firstSetBit_nim(x.uint64)
+    result = forwardImpl(firstSetBit_nim, x)
   else:
     when noUndefined and not useGCC_builtins:
       if x == 0:
@@ -270,8 +279,7 @@ proc fastLog2*(x: SomeInteger): int {.inline, nosideeffect.} =
     if x == 0:
       return -1
   when nimvm:
-    when sizeof(x) <= 4: result = fastlog2_nim(x.uint32)
-    else:                result = fastlog2_nim(x.uint64)
+    result = forwardImpl(fastlog2_nim, x)
   else:
     when useGCC_builtins:
       when sizeof(x) <= 4: result = 31 - builtin_clz(x.uint32).int
@@ -302,8 +310,7 @@ proc countLeadingZeroBits*(x: SomeInteger): int {.inline, nosideeffect.} =
     if x == 0:
       return 0
   when nimvm:
-      when sizeof(x) <= 4: result = sizeof(x)*8 - 1 - fastlog2_nim(x.uint32)
-      else:                result = sizeof(x)*8 - 1 - fastlog2_nim(x.uint64)
+    result = sizeof(x)*8 - 1 - forwardImpl(fastlog2_nim, x)
   else:
     when useGCC_builtins:
       when sizeof(x) <= 4: result = builtin_clz(x.uint32).int - (32 - sizeof(x)*8)
diff --git a/tests/stdlib/tbitops.nim b/tests/stdlib/tbitops.nim
index 8301256c44..1cbab48707 100644
--- a/tests/stdlib/tbitops.nim
+++ b/tests/stdlib/tbitops.nim
@@ -1,10 +1,9 @@
 discard """
-  file: "tbitops.nim"
+  nimout: "OK"
   output: "OK"
 """
 import bitops
 
-
 proc main() =
   const U8 = 0b0011_0010'u8
   const I8 = 0b0011_0010'i8
@@ -80,25 +79,6 @@ proc main() =
   doAssert( U8.rotateLeftBits(3) == 0b10010001'u8)
   doAssert( U8.rotateRightBits(3) == 0b0100_0110'u8)
 
-  static :
-    # test bitopts at compile time with vm
-    doAssert( U8.fastLog2 == 5)
-    doAssert( I8.fastLog2 == 5)
-    doAssert( U8.countLeadingZeroBits == 2)
-    doAssert( I8.countLeadingZeroBits == 2)
-    doAssert( U8.countTrailingZeroBits == 1)
-    doAssert( I8.countTrailingZeroBits == 1)
-    doAssert( U8.firstSetBit == 2)
-    doAssert( I8.firstSetBit == 2)
-    doAssert( U8.parityBits == 1)
-    doAssert( I8.parityBits == 1)
-    doAssert( U8.countSetBits == 3)
-    doAssert( I8.countSetBits == 3)
-    doAssert( U8.rotateLeftBits(3) == 0b10010001'u8)
-    doAssert( U8.rotateRightBits(3) == 0b0100_0110'u8)
-
-
-
   template test_undefined_impl(ffunc: untyped; expected: int; is_static: bool) =
     doAssert( ffunc(0'u8) == expected)
     doAssert( ffunc(0'i8) == expected)
@@ -143,26 +123,67 @@ proc main() =
     doAssert( U64A.rotateLeftBits(64) == U64A)
     doAssert( U64A.rotateRightBits(64) == U64A)
 
-    static:    # check for undefined behavior with rotate by zero.
-      doAssert( U8.rotateLeftBits(0) == U8)
-      doAssert( U8.rotateRightBits(0) == U8)
-      doAssert( U16.rotateLeftBits(0) == U16)
-      doAssert( U16.rotateRightBits(0) == U16)
-      doAssert( U32.rotateLeftBits(0) == U32)
-      doAssert( U32.rotateRightBits(0) == U32)
-      doAssert( U64A.rotateLeftBits(0) == U64A)
-      doAssert( U64A.rotateRightBits(0) == U64A)
-
-      # check for undefined behavior with rotate by integer width.
-      doAssert( U8.rotateLeftBits(8) == U8)
-      doAssert( U8.rotateRightBits(8) == U8)
-      doAssert( U16.rotateLeftBits(16) == U16)
-      doAssert( U16.rotateRightBits(16) == U16)
-      doAssert( U32.rotateLeftBits(32) == U32)
-      doAssert( U32.rotateRightBits(32) == U32)
-      doAssert( U64A.rotateLeftBits(64) == U64A)
-      doAssert( U64A.rotateRightBits(64) == U64A)
+  block:
+    # mask operations
+    var v: uint8
+    v.setMask(0b1100_0000)
+    v.setMask(0b0000_1100)
+    doAssert(v == 0b1100_1100)
+    v.flipMask(0b0101_0101)
+    doAssert(v == 0b1001_1001)
+    v.clearMask(0b1000_1000)
+    doAssert(v == 0b0001_0001)
+    v.clearMask(0b0001_0001)
+    doAssert(v == 0b0000_0000)
+  block:
+    # single bit operations
+    var v: uint8
+    v.setBit(0)
+    doAssert v == 0x0000_0001
+    v.setBit(1)
+    doAssert v == 0b0000_0011
+    v.flipBit(7)
+    doAssert v == 0b1000_0011
+    v.clearBit(0)
+    doAssert v == 0b1000_0010
+    v.flipBit(1)
+    doAssert v == 0b1000_0000
+    doAssert v.testbit(7)
+    doAssert not v.testbit(6)
+  block:
+    # multi bit operations
+    var v: uint8
+    v.setBits(0, 1, 7)
+    doAssert v == 0b1000_0011
+    v.flipBits(2, 3)
+    doAssert v == 0b1000_1111
+    v.clearBits(7, 0, 1)
+    doAssert v == 0b0000_1100
+  block:
+    # signed
+    var v: int8
+    v.setBit(7)
+    doAssert v == -128
+  block:
+    var v: uint64
+    v.setBit(63)
+    doAssert v == 0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000'u64
 
   echo "OK"
 
+block: # not ready for vm because exception is compile error
+  try:
+    var v: uint32
+    var i = 32
+    v.setBit(i)
+    doAssert false
+  except RangeError:
+    discard
+  except:
+    doAssert false
+
+
 main()
+static:
+  # test everything on vm as well
+  main()