Finding JIT Optimizer Bugs using SMT Solvers and Fuzzing

much

import __pypy__

def wrong(x):
    a = __pypy__.intop.int_add(x, 10)
    if a < 15:
        if x < 6:
            return 0
        return 1
    return 2

import __pypy__

def wrong(x):
    a = __pypy__.intop.int_add(x, 10)
    if a < 15:
        return 0
    return 2

from z3 import BitVec, Implies, prove
x = BitVec('x', 64)
a = x + 10
cond1 = a < 15
cond2 = x < 6
prove(Implies(cond1, cond2))

counterexample
[x = 9223372036854775803]
def add_to_solver(self, ops, state):
    for op in ops:
        if op.type != 'v': # is it an operation with a result
            res = self.newvar(op)
        else: # or does it return void
            res = None

       # ...

        # convert arguments
        if op.numargs() == 1:
            arg0 = self.convertarg(op, 0)
        elif op.numargs() == 2:
            arg0 = self.convertarg(op, 0)
            arg1 = self.convertarg(op, 1)

        # compute results
        if opname == "int_add":
            expr = arg0 + arg1
        elif opname == "int_sub":
            expr = arg0 - arg1
        elif opname == "int_mul":
            expr = arg0 * arg1
        elif opname == "int_and":
            expr = arg0 & arg1
        elif opname == "int_or":
            expr = arg0 | arg1
        elif opname == "int_xor":
            expr = arg0 ^ arg1

        # ...  more operations, some shown below

        self.solver.add(res == expr)

def newvar(self, box, repr=None):
    # ... some logic around making the string representation
    # somewhat nicer omitted
    result = z3.BitVec(repr, LONG_BIT)
    self.box_to_z3[box] = result
    return result

def convert(self, box):
    if isinstance(box, ConstInt):
        return z3.BitVecVal(box.getint(), LONG_BIT)
    return self.box_to_z3[box]

def convertarg(self, box, arg):
    return self.convert(box.getarg(arg))

def cond(self, z3expr):
    return z3.If(z3expr, TRUEBV, FALSEBV)


def add_to_solver(self, ops, state):
        # ... start as above

        # more cases
        elif opname == "int_eq":
            expr = self.cond(arg0 == arg1)
        elif opname == "int_ne":
            expr = self.cond(arg0 != arg1)
        elif opname == "int_lt":
            expr = self.cond(arg0 < arg1)
        elif opname == "int_le":
            expr = self.cond(arg0 <= arg1)
        elif opname == "int_gt":
            expr = self.cond(arg0 > arg1)
        elif opname == "int_ge":
            expr = self.cond(arg0 >= arg1)
        elif opname == "int_is_true":
            expr = self.cond(arg0 != FALSEBV)
        elif opname == "uint_lt":
            expr = self.cond(z3.ULT(arg0, arg1))
        elif opname == "uint_le":
            expr = self.cond(z3.ULE(arg0, arg1))
        elif opname == "uint_gt":
            expr = self.cond(z3.UGT(arg0, arg1))
        elif opname == "uint_ge":
            expr = self.cond(z3.UGE(arg0, arg1))
        elif opname == "int_is_zero":
            expr = self.cond(arg0 == FALSEBV)

        # ... rest as above

def guard_to_condition(self, guard, state):
    opname = guard.getopname()
    if opname == "guard_true":
        return self.convertarg(guard, 0) == TRUEBV
    elif opname == "guard_false":
        return self.convertarg(guard, 0) == FALSEBV
    elif opname == "guard_value":
        return self.convertarg(guard, 0) == self.convertarg(guard, 1)

    # ... some more exist, shown below

def add_to_solver(self, ops, state):

        # ... more cases

        elif opname == "int_add_ovf":
            expr = arg0 + arg1
            m = z3.SignExt(LONG_BIT, arg0) + z3.SignExt(LONG_BIT, arg1)
            state.no_ovf = m == z3.SignExt(LONG_BIT, expr)
        elif opname == "int_sub_ovf":
            expr = arg0 - arg1
            m = z3.SignExt(LONG_BIT, arg0) - z3.SignExt(LONG_BIT, arg1)
            state.no_ovf = m == z3.SignExt(LONG_BIT, expr)
        elif opname == "int_mul_ovf":
            expr = arg0 * arg1
            m = z3.SignExt(LONG_BIT, arg0) * z3.SignExt(LONG_BIT, arg1)
            state.no_ovf = m == z3.SignExt(LONG_BIT, expr)

        # ...

def guard_to_condition(self, guard, state):

    # ... more cases

    elif opname == "guard_no_overflow":
        assert state.no_ovf is not None
        return state.no_ovf
    elif opname == "guard_overflow":
        assert state.no_ovf is not None
        return z3.Not(state.no_ovf)

    # ... more cases

# input                       # optimized
[i0]                          [i0]
i1 = int_add(i0, 10)          i1 = int_add(i0, 10)
i2 = int_lt(i1, 15)           i2 = int_lt(i1, 15)
guard_true(i2)                guard_true(i2)
i3 = int_lt(i0, 6)            jump(0)
guard_true(i3)
jump(0)

i1unoptimized == input_i0 + 10
i2unoptimized == If(i1unoptimized < 15, 1, 0)
i1optimized == input_i0 + 10
i2optimized == If(i1optimized < 15, 1, 0)

i1unoptimized == input_i0 + 10
i2unoptimized == If(i1unoptimized < 15, 1, 0)
i1optimized == input_i0 + 10
i2optimized == If(i1optimized < 15, 1, 0)
i1optimized == 1
i2optimized == 1

...
i3unoptimized == If(input_i0 < 6, 1, 0)

input_i0 = 9223372036854775800
i1unoptimized = 9223372036854775810
i2unoptimized = 0
i1optimized = 9223372036854775810
i2optimized = 1
i3unoptimized = 0

import __pypy__

def wrong(x):
    a = __pypy__.intop.int_sub(0, x)
    if a < 0:
        if x > 0:
            return 0
        return 1
    return 2

import __pypy__

def wrong(x):
    a = __pypy__.intop.int_sub(0, x)
    if a < 0:
        return 0
    return 2

[i0, i1, i2, i3, i4, i5] # example values: 9, 11, -8, -95, 46, 57
i6 = int_add_ovf(i3, i0) # -86
guard_no_overflow()
i7 = int_sub(i2, -35/ci) # 27
i8 = uint_ge(i3, i5) # 1
guard_true(i8)
i9 = int_lt(i7, i8) # 0
i10 = int_mul_ovf(34/ci, i7) # 918
guard_no_overflow()
i11 = int_and(i10, 63/ci) # 22
i12 = int_rshift(i3, i11) # -1
i13 = int_is_zero(i7) # 0
i14 = int_is_true(i13) # 0
guard_false(i13)
i15 = int_lt(i8, i4) # 1
i16 = int_and(i6, i0) # 8
i17 = uint_ge(i6, -6/ci) # 0
finish()
[a, b]
c = int_add(a, b)
r = int_sub_ovf(c, b)
guard_no_ovf()
finish(r)

[a, b]
finish(a)

[a, b]
c = int_add_ovf(a, b)
guard_no_ovf()
r = int_sub(c, b)
finish(r)

[a, b]
c = int_add_ovf(a, b)
guard_no_ovf()
finish(a)

  $ conda create -c conda-forge -n my-pypy-env pypy python=3.8
  $ conda activate my-pypy-env

  $ conda config --set channel_priority strict

import pytest
from typing import Optional, Any


class Value:
    def find(self):
        raise NotImplementedError("abstract")

    def _set_forwarded(self, value):
        raise NotImplementedError("abstract")


class Operation(Value):
    def __init__(
        self, name: str, args: list[Value]
    ):
        self.name = name
        self.args = args
        self.forwarded = None
        self.info = None

    def __repr__(self):
        return (
            f"Operation({self.name}, "
            f"{self.args}, {self.forwarded}, "
            f"{self.info})"
        )

    def find(self) -> Value:
        op = self
        while isinstance(op, Operation):
            next = op.forwarded
            if next is None:
                return op
            op = next
        return op

    def arg(self, index):
        return self.args[index].find()

    def make_equal_to(self, value: Value):
        self.find()._set_forwarded(value)

    def _set_forwarded(self, value: Value):
        self.forwarded = value


class Constant(Value):
    def __init__(self, value: Any):
        self.value = value

    def __repr__(self):
        return f"Constant({self.value})"

    def find(self):
        return self

    def _set_forwarded(self, value: Value):
        assert (
            isinstance(value, Constant)
            and value.value == self.value
        )

class Block(list):
    def opbuilder(opname):
        def wraparg(arg):
            if not isinstance(arg, Value):
                arg = Constant(arg)
            return arg
        def build(self, *args):
            # construct an Operation, wrap the
            # arguments in Constants if necessary
            op = Operation(opname,
                [wraparg(arg) for arg in args])
            # add it to self, the basic block
            self.append(op)
            return op
        return build

    # a bunch of operations we support
    add = opbuilder("add")
    mul = opbuilder("mul")
    getarg = opbuilder("getarg")
    dummy = opbuilder("dummy")
    lshift = opbuilder("lshift")
    # some new one for this post
    alloc = opbuilder("alloc")
    load = opbuilder("load")
    store = opbuilder("store")
    print = opbuilder("print")

def bb_to_str(bb: Block, varprefix: str = "var"):
    def arg_to_str(arg: Value):
        if isinstance(arg, Constant):
            return str(arg.value)
        else:
            return varnames[arg]

    varnames = {}
    res = []
    for index, op in enumerate(bb):
        var = f"{varprefix}{index}"
        varnames[op] = var
        arguments = ", ".join(
            arg_to_str(op.arg(i))
                for i in range(len(op.args))
        )
        strop = f"{var} = {op.name}({arguments})"
        res.append(strop)
    return "\n".join(res)

var0 = getarg(0)
obj0 = alloc()
store(obj0, 0, var0)
var1 = load(obj0, 0)
print(var1)
def test_interpret():
    bb = Block()
    var0 = bb.getarg(0)
    obj = bb.alloc()
    sto = bb.store(obj, 0, var0)
    var1 = bb.load(obj, 0)
    bb.print(var1)
    assert interpret(bb, 17) == 17

class Object:
    def __init__(self):
        self.contents: dict[int, Any] = {}

    def store(self, idx : int, value : Any):
        self.contents[idx] = value

    def load(self, idx : int):
        return self.contents[idx]

def get_num(op, index=1):
    assert isinstance(op.arg(index), Constant)
    return op.arg(index).value

def interpret(bb : Block, *args : tuple[Any]):
    def argval(op, i):
        arg = op.arg(i)
        if isinstance(arg, Constant):
            return arg.value
        else:
            assert isinstance(arg, Operation)
            return arg.info

    for index, op in enumerate(bb):
        if op.name == "getarg":
            res = args[get_num(op, 0)]
        elif op.name == "alloc":
            res = Object()
        elif op.name == "load":
            fieldnum = get_num(op)
            res = argval(op, 0).load(fieldnum)
        elif op.name == "store":
            obj = argval(op, 0)
            fieldnum = get_num(op)
            fieldvalue = argval(op, 2)
            obj.store(fieldnum, fieldvalue)
            # no result, only side effect
            continue
        elif op.name == "print":
            res = argval(op, 0)
            print(res)
            return res
        else:
            raise NotImplementedError(
                f"{op.name} not supported")
        op.info = res

var0 = getarg(0)
obj0 = alloc()
store(obj0, 0, var0)
var1 = load(obj0, 0)
print(var1)
var0 = getarg(0)
print(var0)
def test_remove_unused_allocation():
    bb = Block()
    var0 = bb.getarg(0)
    obj = bb.alloc()
    sto = bb.store(obj, 0, var0)
    var1 = bb.load(obj, 0)
    bb.print(var1)
    opt_bb = optimize_alloc_removal(bb)
    # the virtual object looks like this:
    #  obj
    # ┌──────────┐
    # │ 0: var0  │
    # └──────────┘
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = print(optvar0)"""

class VirtualObject:
    def __init__(self):
        self.contents: dict[int, Value] = {}

    def store(self, idx, value):
        self.contents[idx] = value

    def load(self, idx):
        return self.contents[idx]

def optimize_alloc_removal(bb):
    opt_bb = Block()
    for op in bb:
        if op.name == "alloc":
            op.info = VirtualObject()
            continue
        if op.name == "load":
            info = op.arg(0).info
            field = get_num(op)
            op.make_equal_to(info.load(field))
            continue
        if op.name == "store":
            info = op.arg(0).info
            field = get_num(op)
            info.store(field, op.arg(2))
            continue
        opt_bb.append(op)
    return opt_bb

def test_remove_two_allocations():
    bb = Block()
    var0 = bb.getarg(0)
    obj0 = bb.alloc()
    sto1 = bb.store(obj0, 0, var0)
    obj1 = bb.alloc()
    sto2 = bb.store(obj1, 0, obj0)
    var1 = bb.load(obj1, 0)
    var2 = bb.load(var1, 0)
    bb.print(var2)
    # the virtual objects look like this:
    #  obj0
    # ┌──────┐
    # │ 0: ╷ │
    # └────┼─┘
    #      │
    #      ▼
    #     obj1
    #   ┌─────────┐
    #   │ 0: var0 │
    #   └─────────┘
    # therefore
    # var1 is the same as obj0
    # var2 is the same as var0
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = print(optvar0)"""

def test_materialize():
    bb = Block()
    var0 = bb.getarg(0)
    obj = bb.alloc()
    sto = bb.store(var0, 0, obj)
    opt_bb = optimize_alloc_removal(bb)
    #  obj is virtual, without any fields
    # ┌───────┐
    # │ empty │
    # └───────┘
    # then we store a reference to obj into
    # field 0 of var0. Since var0 is not virtual,
    # obj escapes, so we have to put it back
    # into the optimized basic block
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = store(optvar0, 0, optvar1)"""
    # so far, fails like this:
    # the line:
    # info.store(field, op.arg(2))
    # produces an AttributeError because info
    # is None

def materialize(opt_bb, value: Operation) -> None:
    assert not isinstance(value, Constant)
    assert isinstance(value, Operation)
    info = value.info
    assert isinstance(info, VirtualObject)
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)

def optimize_alloc_removal(bb):
    opt_bb = Block()
    for op in bb:
        if op.name == "alloc":
            op.info = VirtualObject()
            continue
        if op.name == "load":
            info = op.arg(0).info
            field = get_num(op)
            op.make_equal_to(info.load(field))
            continue
        if op.name == "store":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                info.store(field, op.arg(2))
                continue
            else: # not virtual
                # first materialize the
                # right hand side
                materialize(opt_bb, op.arg(2))
                # then emit the store via
                # the general path below
        opt_bb.append(op)
    return opt_bb

def test_dont_materialize_twice():
    # obj is again an empty virtual object,
    # and we store it into var0 *twice*.
    # this should only materialize it once
    bb = Block()
    var0 = bb.getarg(0)
    obj = bb.alloc()
    sto0 = bb.store(var0, 0, obj)
    sto1 = bb.store(var0, 0, obj)
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = store(optvar0, 0, optvar1)
optvar3 = store(optvar0, 0, optvar1)"""
    # fails so far: the operations that we get
    # at the moment are:
    # optvar0 = getarg(0)
    # optvar1 = alloc()
    # optvar2 = store(optvar0, 0, optvar1)
    # optvar3 = alloc()
    # optvar4 = store(optvar0, 0, optvar3)
    # ie the object is materialized twice,
    # which is incorrect

def materialize(opt_bb, value: Operation) -> None:
    assert not isinstance(value, Constant)
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # but only once
    value.info = None

# optimize_alloc_removal unchanged

def test_materialize_non_virtuals():
    # in this example we store a non-virtual var1
    # into another non-virtual var0
    # this should just lead to no optimization at
    # all
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.getarg(1)
    sto = bb.store(var0, 0, var1)
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = getarg(1)
optvar2 = store(optvar0, 0, optvar1)"""

def test_materialization_constants():
    # in this example we store the constant 17
    # into the non-virtual var0
    # again, this will not be optimized
    bb = Block()
    var0 = bb.getarg(0)
    sto = bb.store(var0, 0, 17)
    opt_bb = optimize_alloc_removal(bb)
    # the previous line fails so far, triggering
    # the assert:
    # assert not isinstance(value, Constant)
    # in materialize
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = store(optvar0, 0, 17)"""

def materialize(opt_bb, value: Operation) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # but only once
    value.info = None

# optimize_alloc_removal unchanged

def test_materialize_fields():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.getarg(1)
    obj = bb.alloc()
    contents0 = bb.store(obj, 0, 8)
    contents1 = bb.store(obj, 1, var1)
    sto = bb.store(var0, 0, obj)

    # the virtual obj looks like this
    #  obj
    # ┌──────┬──────────┐
    # │ 0: 8 │ 1: var1  │
    # └──────┴──────────┘
    # then it needs to be materialized
    # this is the first example where a virtual
    # object that we want to materialize has any
    # content and is not just an empty object
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = getarg(1)
optvar2 = alloc()
optvar3 = store(optvar2, 0, 8)
optvar4 = store(optvar2, 1, optvar1)
optvar5 = store(optvar0, 0, optvar2)"""
    # fails so far! the operations we get
    # at the moment are:
    # optvar0 = getarg(0)
    # optvar1 = getarg(1)
    # optvar2 = alloc()
    # optvar3 = store(optvar0, 0, optvar2)
    # which is wrong, because the store operations
    # into optvar1 got lost

def materialize(opt_bb, value: Operation) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # put the content back
    for idx, val in info.contents.items():
        # re-create store operation
        opt_bb.store(value, idx, val)
    # only materialize once
    value.info = None

# optimize_alloc_removal unchanged

def test_materialize_chained_objects():
    bb = Block()
    var0 = bb.getarg(0)
    obj0 = bb.alloc()
    obj1 = bb.alloc()
    contents = bb.store(obj0, 0, obj1)
    const = bb.store(obj1, 0, 1337)
    sto = bb.store(var0, 0, obj0)
    #  obj0
    # ┌──────┐
    # │ 0: ╷ │
    # └────┼─┘
    #      │
    #      ▼
    #     obj1
    #   ┌─────────┐
    #   │ 0: 1337 │
    #   └─────────┘
    # now obj0 escapes
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = alloc()
optvar3 = store(optvar2, 0, 1337)
optvar4 = store(optvar1, 0, optvar2)
optvar5 = store(optvar0, 0, optvar1)"""
    # fails in an annoying way! the resulting
    # basic block is not in proper SSA form
    # so printing it fails. The optimized
    # block would look like this:
    # optvar0 = getarg(0)
    # optvar1 = alloc()
    # optvar3 = store(optvar1, 0, optvar2)
    # optvar4 = store(optvar0, 0, optvar1)
    # where optvar2 is an ``alloc`` Operation
    # that is not itself in the output block

def materialize(opt_bb, value: Operation) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # put the content back
    for idx, val in sorted(info.contents.items()):
        # materialize recursively
        materialize(opt_bb, val)
        opt_bb.store(value, idx, val)
    # only materialize once
    value.info = None

# optimize_alloc_removal unchanged

def test_object_graph_cycles():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.alloc()
    var2 = bb.store(var1, 0, var1)
    var3 = bb.store(var0, 1, var1)
    #   ┌────────┐
    #   ▼        │
    #  obj0      │
    # ┌──────┐   │
    # │ 0: ╷ │   │
    # └────┼─┘   │
    #      │     │
    #      └─────┘
    # obj0 points to itself, and then it is
    # escaped
    opt_bb = optimize_alloc_removal(bb)
    # the previous line fails with an
    # InfiniteRecursionError
    # materialize calls itself, infinitely

    # what we want is instead this output:
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = store(optvar1, 0, optvar1)
optvar3 = store(optvar0, 1, optvar1)"""

def materialize(opt_bb, value: Operation) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if info is None:
        return # already materialized
    assert value.name == "alloc"
    # put the alloc operation back into the trace
    opt_bb.append(value)
    # only materialize once
    value.info = None
    # put the content back
    for idx, val in sorted(info.contents.items()):
        # materialize recursively
        materialize(opt_bb, val)
        opt_bb.store(value, idx, val)

def test_load_non_virtual():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.load(var0, 0)
    bb.print(var1)
    # the next line fails in the line
    # op.make_equal_to(info.load(field))
    # because info is None
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = load(optvar0, 0)
optvar2 = print(optvar1)"""

def optimize_alloc_removal(bb):
    opt_bb = Block()
    for op in bb:
        if op.name == "alloc":
            op.info = VirtualObject()
            continue
        if op.name == "load":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                op.make_equal_to(info.load(field))
                continue
            # otherwise not virtual, use the
            # general path below
        if op.name == "store":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                info.store(field, op.arg(2))
                continue
            else: # not virtual
                # first materialize the
                # right hand side
                materialize(opt_bb, op.arg(2))
                # then emit the store via
                # the general path below
        opt_bb.append(op)
    return opt_bb

def test_materialize_on_other_ops():
    # materialize not just on store
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.alloc()
    var2 = bb.print(var1)
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = alloc()
optvar2 = print(optvar1)"""
    # again, the resulting basic block is not in
    # valid SSA form

# materialize is unchanged
def materialize(opt_bb, value: Value) -> None:
    if isinstance(value, Constant):
        return
    assert isinstance(value, Operation)
    info = value.info
    if not info:
        # Already materialized
        return
    assert value.name == "alloc"
    opt_bb.append(value)
    value.info = None
    for idx, val in sorted(info.contents.items()):
        materialize(opt_bb, val)
        opt_bb.store(value, idx, val)

def optimize_alloc_removal(bb):
    opt_bb = Block()
    for op in bb:
        if op.name == "alloc":
            op.info = VirtualObject()
            continue
        if op.name == "load":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                op.make_equal_to(info.load(field))
                continue
        if op.name == "store":
            info = op.arg(0).info
            if info: # virtual
                field = get_num(op)
                info.store(field, op.arg(2))
                continue
        # materialize all the arguments of
        # operations that are put into the
        # output basic block
        for arg in op.args:
            materialize(opt_bb, arg.find())
        opt_bb.append(op)
    return opt_bb

def test_sink_allocations():
    bb = Block()
    var0 = bb.getarg(0)
    var1 = bb.alloc()
    var2 = bb.store(var1, 0, 123)
    var3 = bb.store(var1, 1, 456)
    var4 = bb.load(var1, 0)
    var5 = bb.load(var1, 1)
    var6 = bb.add(var4, var5)
    var7 = bb.store(var1, 0, var6)
    var8 = bb.store(var0, 1, var1)
    opt_bb = optimize_alloc_removal(bb)
    assert bb_to_str(opt_bb, "optvar") == """\
optvar0 = getarg(0)
optvar1 = add(123, 456)
optvar2 = alloc()
optvar3 = store(optvar2, 0, optvar1)
optvar4 = store(optvar2, 1, 456)
optvar5 = store(optvar0, 1, optvar2)"""

Background: Python Integers in the PyPy JIT

Background: Bounds Analysis in PyPy's JIT

Motivation: A JIT Miscompilation

How could we have avoided this bug?

Z3 Proof of Concept

SMT Checking of the JIT Optimizer

Finding the Bug, Again

Second bug

Generating Random Traces

Bugs Found

Future Work and Conclusion

Acknowledgements

PyPy v7.3.10: release of python 2.7, 3.8, and 3.9

What is PyPy?

What else is new?

Greatest Hits

Personal Favourites

Interpreter

Version 1: Naive Attempt

Version 2: Re-Materializing Allocations

Version 3: Don't Materialize Twice

Version 4: Materialization of Constants

Version 5: Materializing Fields

Version 6: Recursive Materialization

Version 7: Dealing with Object Cycles

Version 8: Loading from non-virtual objects

Version 9 (Final): Materialize on Other Operations

Conclusion

Footnotes

Topics and goals

What is a sprint?

Location

Exact times

Registration

The PyPy blogposts

Recent Posts

Archives

Tags