PyPy JIT for Aarch64

2019-07-25 18:59 · is

char a
char b

testchar a -> L1
any
char b
end
any
L1: char c
char d
end

choice L1
char a
char b
commit
end
L1: char c
char d
end

...
        elif instruction.name == "any":
            if index >= len(inputstring):
                fail = True
            else:
                pc += 1
                index += 1

...

...
        if instruction.name == "char":
            if index >= len(inputstring):
                fail = True
            elif instruction.character == inputstring[index]:
                pc += 1
                index += 1
            else:
                fail = True
...

...
        elif instruction.name == "jmp":
            pc = instruction.goto
...

...
        elif instruction.name == "choice":
            pc += 1
            choice_points = choice_points.push_choice_point(
                instruction.goto, index, captures)
...

gc.disable
gc.collect_step
gc-disable
gc
gc.disable()
gc.collect_step()
gc.disable()
gc.disable()
with nogc():"
nogc()
"""
A pure python REPL that can parse simple math expressions
"""
while True:
    print(eval(raw_input("> ")))

$ python2 section-1-repl/main.py
> 3 + 4 * ((1.0/(2 * 3 * 4)) + (1.0/(4 * 5 * 6)) - (1.0/(6 * 7 * 8)))
3.1880952381

def repl():
    while True:
        print eval(raw_input('> '))


def entry_point(argv):
    repl()
    return 0


def target(driver, *args):
    return entry_point, None

$ rpython ./section-1-repl/targetrepl1.py
...SNIP...
[translation:ERROR] AnnotatorError: 

object with a __call__ is not RPython: <built-in function raw_input>
Processing block:
 block@18 is a <class 'rpython.flowspace.flowcontext.SpamBlock'> 
 in (target1:2)repl 
 containing the following operations: 
       v0 = simple_call((builtin_function raw_input), ('> ')) 
       v1 = simple_call((builtin_function eval), v0) 
       v2 = str(v1) 
       v3 = simple_call((function rpython_print_item), v2) 
       v4 = simple_call((function rpython_print_newline)) 

from rpython.rlib import rfile

LINE_BUFFER_LENGTH = 1024


def repl(stdin):
    while True:
        print "> ",
        line = stdin.readline(LINE_BUFFER_LENGTH)
        print line


def entry_point(argv):
    stdin, stdout, stderr = rfile.create_stdio()
    try:
        repl(stdin)
    except:
        return 0


def target(driver, *args):
    return entry_point, None

$ rpython --opt=2 section-1-repl/targetrepl2.py
...SNIP...
[Timer] Timings:
[Timer] annotate                       ---  1.2 s
[Timer] rtype_lltype                   ---  0.9 s
[Timer] backendopt_lltype              ---  0.6 s
[Timer] stackcheckinsertion_lltype     ---  0.0 s
[Timer] database_c                     --- 15.0 s
[Timer] source_c                       ---  1.6 s
[Timer] compile_c                      ---  1.9 s
[Timer] =========================================
[Timer] Total:                         --- 21.2 s

$ ./target2-c 
1 + 2
>  1 + 2

^C

from rpython.rlib import rfile

LINE_BUFFER_LENGTH = 1024


def repl(stdin, stdout):
    while True:
        stdout.write("> ")
        line = stdin.readline(LINE_BUFFER_LENGTH)
        print '"%s"' % line.strip()


def entry_point(argv):
    stdin, stdout, stderr = rfile.create_stdio()
    try:
        repl(stdin, stdout)
    except:
        pass
    return 0


def target(driver, *args):
    return entry_point, None

$ ./target3-c 
> hello this seems better
"hello this seems better"
> ^C

OP_CONSTANT
OP_RETURN
OP_NEGATE
OP_ADD
OP_SUBTRACT
OP_MULTIPLY
OP_DIVIDE

class OpCode:
    OP_CONSTANT = 0
    OP_RETURN = 1
    OP_NEGATE = 2
    OP_ADD = 3
    OP_SUBTRACT = 4
    OP_MULTIPLY = 5
    OP_DIVIDE = 6

class Chunk:
    code = None

    def __init__(self):
        self.code = []

    def write_chunk(self, byte):
        self.code.append(byte)

    def disassemble(self, name):
        print "== %s ==\n" % name
        i = 0
        while i < len(self.code):
            i = disassemble_instruction(self, i)

def leftpad_string(string, width, char=" "):
    l = len(string)
    if l > width:
        return string
    return char * (width - l) + string

def entry_point(argv):
    bytecode = Chunk()
    bytecode.write_chunk(OpCode.OP_ADD)
    bytecode.write_chunk(OpCode.OP_RETURN)
    bytecode.disassemble("hello world")
    return 0

def target(driver, *args):
    driver.exe_name = "vm1"
    return entry_point, None

$ ./vm1 
== hello world ==

0000 OP_ADD       
0001 OP_RETURN    

    def add_constant(self, value):
        self.constants.append(value)
        return len(self.constants) - 1

    bytecode = Chunk()
    constant = bytecode.add_constant(1.0)
    bytecode.write_chunk(OpCode.OP_CONSTANT)
    bytecode.write_chunk(constant)

    constant = bytecode.add_constant(2.0)
    bytecode.write_chunk(OpCode.OP_CONSTANT)
    bytecode.write_chunk(constant)

    bytecode.write_chunk(OpCode.OP_ADD)
    bytecode.write_chunk(OpCode.OP_RETURN)

    bytecode.disassemble("adding constants")

== adding constants ==

0000 OP_CONSTANT  (00)        '1'
0002 OP_CONSTANT  (01)        '2'
0004 OP_ADD       
0005 OP_RETURN

class VM(object):
    STACK_MAX_SIZE = 256
    stack = None
    stack_top = 0

    def __init__(self):
        self._reset_stack()

    def _reset_stack(self):
        self.stack = [0] * self.STACK_MAX_SIZE
        self.stack_top = 0

    def _stack_push(self, value):
        assert self.stack_top < self.STACK_MAX_SIZE
        self.stack[self.stack_top] = value
        self.stack_top += 1

    def _stack_pop(self):
        assert self.stack_top >= 0
        self.stack_top -= 1
        return self.stack[self.stack_top]

    def _print_stack(self):
        print "         ",
        if self.stack_top <= 0:
            print "[]",
        else:
            for i in range(self.stack_top):
                print "[ %s ]" % self.stack[i],
        print

    def _run(self):
        while True:
            instruction = self._read_byte()

            if instruction == OpCode.OP_RETURN:
                print "%s" % self._stack_pop()
                return InterpretResultCode.INTERPRET_OK
            elif instruction == OpCode.OP_CONSTANT:
                constant = self._read_constant()
                self._stack_push(constant)
            elif instruction == OpCode.OP_ADD:
                self._binary_op(self._stack_add)    

    def _read_byte(self):
        instruction = self.chunk.code[self.ip]
        self.ip += 1
        return instruction

    def _read_constant(self):
        constant_index = self._read_byte()
        return self.chunk.constants[constant_index]

    @specialize.arg(1)
    def _binary_op(self, operator):
        op2 = self._stack_pop()
        op1 = self._stack_pop()
        result = operator(op1, op2)
        self._stack_push(result)

    @staticmethod
    def _stack_add(op1, op2):
        return op1 + op2

    def interpret_chunk(self, chunk):
        if self.debug_trace:
            print "== VM TRACE =="
        self.chunk = chunk
        self.ip = 0
        try:
            result = self._run()
            return result
        except:
            return InterpretResultCode.INTERPRET_RUNTIME_ERROR

def entry_point(argv):
    bytecode = Chunk()
    constant = bytecode.add_constant(1)
    bytecode.write_chunk(OpCode.OP_CONSTANT)
    bytecode.write_chunk(constant)
    constant = bytecode.add_constant(2)
    bytecode.write_chunk(OpCode.OP_CONSTANT)
    bytecode.write_chunk(constant)
    bytecode.write_chunk(OpCode.OP_ADD)
    bytecode.write_chunk(OpCode.OP_RETURN)

    vm = VM()
    vm.interpret_chunk(bytecode)

    return 0

./vm3
== VM TRACE ==
          []
0000 OP_CONSTANT  (00)        '1'
          [ 1 ]
0002 OP_CONSTANT  (01)        '2'
          [ 1 ] [ 2 ]
0004 OP_ADD       
          [ 3 ]
0005 OP_RETURN    
3

class TokenTypes:
    ERROR = 0
    EOF = 1
    LEFT_PAREN = 2
    RIGHT_PAREN = 3
    MINUS = 4
    PLUS = 5
    SLASH = 6
    STAR = 7
    NUMBER = 8

class Token(object):

    def __init__(self, start, length, token_type):
        self.start = start
        self.length = length
        self.type = token_type

Token(0, 1, TokenTypes.LEFT_PAREN)
Token(2, 1, TokenTypes.NUMBER)
Token(4, 1, TokenTypes.PLUS)
Token(6, 3, TokenTypes.NUMBER)
Token(10, 1, TokenTypes.RIGHT_PAREN)

class Scanner(object):

    def __init__(self, source):
        self.source = source
        self.start = 0
        self.current = 0

    def _number(self):
        while self._peek().isdigit():
            self.advance()

        # Look for decimal point
        if self._peek() == '.' and self._peek_next().isdigit():
            self.advance()
            while self._peek().isdigit():
                self.advance()

        return self._make_token(TokenTypes.NUMBER)

    def _peek(self):
        if self._is_at_end():
            return '\0'
        return self.source[self.current]

    def _peek_next(self):
        if self._is_at_end():
            return '\0'
        return self.source[self.current+1]

    def _is_at_end(self):
        return len(self.source) == self.current

    def advance(self):
        self.current += 1
        return self.source[self.current - 1]

    def _make_token(self, token_type):
        return Token(
            start=self.start,
            length=(self.current - self.start),
            token_type=token_type
        )

    def scan_token(self):
        # skip any whitespace
        while True:
            char = self._peek()
            if char in ' \r\t\n':
                self.advance()
            break

        self.start = self.current

        if self._is_at_end():
            return self._make_token(TokenTypes.EOF)

        char = self.advance()

        if char.isdigit():
            return self._number()

        if char == '(':
            return self._make_token(TokenTypes.LEFT_PAREN)
        if char == ')':
            return self._make_token(TokenTypes.RIGHT_PAREN)
        if char == '-':
            return self._make_token(TokenTypes.MINUS)
        if char == '+':
            return self._make_token(TokenTypes.PLUS)
        if char == '/':
            return self._make_token(TokenTypes.SLASH)
        if char == '*':
            return self._make_token(TokenTypes.STAR)

        return ErrorToken("Unexpected character", self.current)

    def get_token_string(self, token):
        if isinstance(token, ErrorToken):
            return token.message
        else:
            end_loc = token.start + token.length
            assert end_loc < len(self.source)
            assert end_loc > 0
            return self.source[token.start:end_loc]

from scanner import Scanner, TokenTypes, TokenTypeToName


def entry_point(argv):

    source = "(   1   + 2.0 )"

    scanner = Scanner(source)
    t = scanner.scan_token()
    while t.type != TokenTypes.EOF and t.type != TokenTypes.ERROR:
        print TokenTypeToName[t.type],
        if t.type == TokenTypes.NUMBER:
            print "(%s)" % scanner.get_token_string(t),
        print
        t = scanner.scan_token()
    return 0

$ ./scanner1 
LEFT_PAREN
NUMBER (1)
PLUS
NUMBER (2.0)
RIGHT_PAREN

from rpython.rlib import rfile
from scanner import Scanner, TokenTypes, TokenTypeToName

LINE_BUFFER_LENGTH = 1024


def repl(stdin, stdout):
    while True:
        stdout.write("> ")
        source = stdin.readline(LINE_BUFFER_LENGTH)

        scanner = Scanner(source)
        t = scanner.scan_token()
        while t.type != TokenTypes.EOF and t.type != TokenTypes.ERROR:
            print TokenTypeToName[t.type],
            if t.type == TokenTypes.NUMBER:
                print "(%s)" % scanner.get_token_string(t),
            print
            t = scanner.scan_token()


def entry_point(argv):
    stdin, stdout, stderr = rfile.create_stdio()
    try:
        repl(stdin, stdout)
    except:
        pass
    return 0

$ ./scanner2
> (3 *4) - -3
LEFT_PAREN
NUMBER (3)
STAR
NUMBER (4)
RIGHT_PAREN
MINUS
MINUS
NUMBER (3)
> ^C

(3 + 2) - (7 * 2)

OP_CONSTANT  (00) '3.000000'
OP_CONSTANT  (01) '2.000000'
OP_ADD

OP_CONSTANT  (02) '7.000000'
OP_CONSTANT  (03) '2.000000'
OP_MULTIPLY

OP_CONSTANT  (00) '3.000000'
OP_CONSTANT  (01) '2.000000'
OP_ADD
OP_CONSTANT  (02) '7.000000'
OP_CONSTANT  (03) '2.000000'
OP_MULTIPLY
OP_SUBTRACT
OP_RETURN

class Parser(object):
    def __init__(self):
        self.had_error = False
        self.panic_mode = False
        self.current = None
        self.previous = None

class Compiler(object):

    def __init__(self, source):
        self.parser = Parser()
        self.scanner = Scanner(source)
        self.chunk = Chunk()

    def emit_byte(self, byte):
        self.current_chunk().write_chunk(byte)

rules = [
    ParseRule(None,              None,            Precedence.NONE),   # ERROR
    ParseRule(None,              None,            Precedence.NONE),   # EOF
    ParseRule(Compiler.grouping, None,            Precedence.CALL),   # LEFT_PAREN
    ParseRule(None,              None,            Precedence.NONE),   # RIGHT_PAREN
    ParseRule(Compiler.unary,    Compiler.binary, Precedence.TERM),   # MINUS
    ParseRule(None,              Compiler.binary, Precedence.TERM),   # PLUS
    ParseRule(None,              Compiler.binary, Precedence.FACTOR), # SLASH
    ParseRule(None,              Compiler.binary, Precedence.FACTOR), # STAR
    ParseRule(Compiler.number,   None,            Precedence.NONE),   # NUMBER
]

2 + 3 * 4

class Precedence(object):
    NONE = 0
    DEFAULT = 1
    TERM = 2        # + -
    FACTOR = 3      # * /
    UNARY = 4       # ! - +
    CALL = 5        # ()
    PRIMARY = 6

    def parse_precedence(self, precedence):
        # parses any expression of a given precedence level or higher
        self.advance()
        prefix_rule = self._get_rule(self.parser.previous.type).prefix
        prefix_rule(self)

    def unary(self):
        op_type = self.parser.previous.type
        # Compile the operand
        self.parse_precedence(Precedence.UNARY)
        # Emit the operator instruction
        if op_type == TokenTypes.MINUS:
            self.emit_byte(OpCode.OP_NEGATE)

from rpython.rlib import rfile
from compiler import Compiler

LINE_BUFFER_LENGTH = 1024


def entry_point(argv):
    stdin, stdout, stderr = rfile.create_stdio()

    try:
        while True:
            stdout.write("> ")
            source = stdin.readline(LINE_BUFFER_LENGTH)
            compiler = Compiler(source, debugging=True)
            compiler.compile()
    except:
        pass
    return 0

$ ./compiler1 
> (2/4 + 1/2)
== code ==

0000 OP_CONSTANT  (00) '2.000000'
0002 OP_CONSTANT  (01) '4.000000'
0004 OP_DIVIDE    
0005 OP_CONSTANT  (02) '1.000000'
0007 OP_CONSTANT  (00) '2.000000'
0009 OP_DIVIDE    
0010 OP_ADD       
0011 OP_RETURN

from rpython.rlib import rfile
from compiler import Compiler
from vm import VM

LINE_BUFFER_LENGTH = 4096


def entry_point(argv):
    stdin, stdout, stderr = rfile.create_stdio()
    vm = VM()
    try:
        while True:
            stdout.write("> ")
            source = stdin.readline(LINE_BUFFER_LENGTH)
            if source:
                compiler = Compiler(source, debugging=False)
                compiler.compile()
                vm.interpret_chunk(compiler.chunk)
    except:
        pass
    return 0


def target(driver, *args):
    driver.exe_name = "calc"
    return entry_point, None

$ ./calc 
> 2--3
== VM TRACE ==
          []
0000 OP_CONSTANT  (00) '2.000000'
          [ 2.000000 ]
0002 OP_CONSTANT  (01) '3.000000'
          [ 2.000000 ] [ 3.000000 ]
0004 OP_NEGATE    
          [ 2.000000 ] [ -3.000000 ]
0005 OP_SUBTRACT  
          [ 5.000000 ]
0006 OP_RETURN    
5.000000

$ ./calc
> 3 + 4 * ((1/(2 * 3 * 4)) + (1/(4 * 5 * 6)) - (1/(6 * 7 * 8)) + (1/(8 * 9 * 10)) - (1/(10 * 11 * 12)) + (1/(12 * 13 * 14)) - (1/(14 * 15 * 16)) + (1/(16 * 17 * 18)) - (1/(18 * 19 * 20)) + (1/(20 * 21 * 22)) - (1/(22 * 23 * 24)) + (1/(24 * 25 * 26)) - (1/(26 * 27 * 28)) + (1/(28 * 29 * 30)) - (1/(30 * 31 * 32)) + (1/(32 * 33 * 34)) - (1/(34 * 35 * 36)) + (1/(36 * 37 * 38)) - (1/(38 * 39 * 40)) + (1/(40 * 41 * 42)) - (1/(42 * 43 * 44)) + (1/(44 * 45 * 46)) - (1/(46 * 47 * 48)) + (1/(48 * 49 * 50)) - (1/(50 * 51 * 52)) + (1/(52 * 53 * 54)) - (1/(54 * 55 * 56)) + (1/(56 * 57 * 58)) - (1/(58 * 59 * 60)) + (1/(60 * 61 * 62)) - (1/(62 * 63 * 64)) + (1/(64 * 65 * 66)) - (1/(66 * 67 * 68)) + (1/(68 * 69 * 70)) - (1/(70 * 71 * 72)) + (1/(72 * 73 * 74)) - (1/(74 * 75 * 76)) + (1/(76 * 77 * 78)) - (1/(78 * 79 * 80)) + (1/(80 * 81 * 82)) - (1/(82 * 83 * 84)) + (1/(84 * 85 * 86)) - (1/(86 * 87 * 88)) + (1/(88 * 89 * 90)) - (1/(90 * 91 * 92)) + (1/(92 * 93 * 94)) - (1/(94 * 95 * 96)) + (1/(96 * 97 * 98)) - (1/(98 * 99 * 100)) + (1/(100 * 101 * 102)))

== VM TRACE ==
          []
0000 OP_CONSTANT  (00) '3.000000'
          [ 3.000000 ]
0002 OP_CONSTANT  (01) '4.000000'
...SNIP...
0598 OP_CONSTANT  (101) '102.000000'
          [ 3.000000 ] [ 4.000000 ] [ 0.047935 ] [ 1.000000 ] [ 10100.000000 ] [ 102.000000 ]
0600 OP_MULTIPLY  
          [ 3.000000 ] [ 4.000000 ] [ 0.047935 ] [ 1.000000 ] [ 1030200.000000 ]
0601 OP_DIVIDE    
          [ 3.000000 ] [ 4.000000 ] [ 0.047935 ] [ 0.000001 ]
0602 OP_ADD       
          [ 3.000000 ] [ 4.000000 ] [ 0.047936 ]
0603 OP_MULTIPLY  
          [ 3.000000 ] [ 0.191743 ]
0604 OP_ADD       
          [ 3.191743 ]
0605 OP_RETURN    
3.191743

Operator	Description
lpeg.P(string)	Matches string literally
lpeg.P(n)	Matches exactly n characters
lpeg.P(-n)	Matches at most n characters
lpeg.S(string)	Matches any character in string (Set)
lpeg.R(“xy”)	Matches any character between x and y (Range)
pattern^n	Matches at least n repetitions of pattern
pattern^-n	Matches at most n repetitions of pattern
pattern1 * pattern2	Matches pattern1 followed by pattern2
pattern1 + pattern2	Matches pattern1 or pattern2 (ordered choice)
pattern1 - pattern2	Matches pattern1 if pattern2 does not match
-pattern	Equivalent to ("" - pattern)

Operation	What it produces
lpeg.C(pattern)	the match for patten plus all captures made by pattern
lpeg.Cp()	the current position (matches the empty string)

0	1	2	3	4	5	6	7	8
"("	"5"	"1"	"."	"0"	"5"	"+"	"2"	")"
	^			^

0	1	2	3	4	5	6	7	8
"("	"5"	"1"	"."	"0"	"5"	"+"	"2"	")"
	^				^

Parsing Expression Grammars

LPeg

The LPeg Library

The VM

The Bytecode

generic character matching bytecodes

control flow bytecodes

Failure Handling, Backtracking and Choice Points

Captures

The capture object

The RPython Implementation

Benchmarking Result

Conclusion

What is PyPy?

What else is new?

What is PyPy?

What else is new?

PyPy for low-latency systems

PyPy Sprint February 4th-9th 2019 in Düsseldorf

Topics and goals

Location

Exact times

Registration

The road ahead

A REPL

A virtual machine

OpCodes

Chunks

Chunks of data

Emulation

Scanning the source

Scanner

Compiling expressions

References

Test compilation

End to end

The PyPy blogposts

Recent Posts

Archives

Tags