The Repo on GitHub

After this morning’s somewhat lackluster performance, I do a bit of off-line refactoring and now I think we’ll do a Lexicon. Can’t hurt, might help.

So, while idling away in an online chat, I moved all the auxiliary methods into the lexicon definers, for example like this:

    @staticmethod
    def _define_if_else_then(lex):
        def _compile_conditional(forth, word_to_compile, word_list):
            forth.compile_stack.push((word_to_compile, len(word_list) + 1))
            word_list.append(forth.find_word(word_to_compile))
            word_list.append(0)

        def _patch_the_skip(forth, expected, skip_adjustment, word_list):
            key, patch_loc = forth.compile_stack.pop()
            last_loc = len(word_list) + skip_adjustment
            word_list[patch_loc] = last_loc - patch_loc

        def _if(forth):
            _compile_conditional(forth,'*IF', forth.word_list)

        def _else(forth):
            _patch_the_skip(forth, ['*IF'], 1, forth.word_list)
            _compile_conditional(forth, '*ELSE', forth.word_list)

        def _then(forth):
            _patch_the_skip(forth, ['*IF', '*ELSE'], -1, forth.word_list)

        lex.append(PrimaryWord('IF', _if, immediate=True))
        lex.append(PrimaryWord('ELSE', _else, immediate=True))
        lex.append(PrimaryWord('THEN', _then, immediate=True))

Now, as we’ll see below unless I forget, the Forth class is less than 60 lines of code counting white space, plus about 150 lines of lex definers.

Except that I found a few more tiny methods lurking, hold on just a moment while I move them too. OK, that’s done. Now we were talking about a Lexicon object. It’s really little more than a list. It’s only referenced a few times:

class Forth:
    def __init__(self):
        self.active_words = []
        self.compile_stack = Stack()
        self.lexicon = []
        self.define_primaries()
        self.return_stack = Stack()
        self.stack = Stack()
        self.tokens = None
        self.token_index = 0
        self.word_list = None

    def define_primaries(self):
        lex = self.lexicon
        ...

    def _define_colon_semi(lex):
        def _colon(forth):
            forth.compile_stack.push((':', (forth.next_token())))

        def _semi(forth):
            key, definition_name = forth.compile_stack.pop()
            word = SecondaryWord(definition_name, forth.word_list[:])
            forth.lexicon.append(word)
            forth.word_list.clear()

        lex.append(PrimaryWord(':', _colon, immediate=True))
        lex.append(PrimaryWord(';', _semi, immediate=True))

    def find_word(self, word):
        return next(filter(lambda d: d.name == word, reversed(self.lexicon)), None)

We’ll remove the second reference by passing the lexicon into define_primaries. That will pay off a bit in a moment, I think.

class Forth:
    def __init__(self):
        self.active_words = []
        self.compile_stack = Stack()
        self.lexicon = []
        self.define_primaries(self.lexicon)
        self.return_stack = Stack()
        self.stack = Stack()
        self.tokens = None
        self.token_index = 0
        self.word_list = None

    def define_primaries(self, lex):
        self.define_stack_ops(lex)
        self.define_immediate_words(lex)
        self.define_skippers(lex)
        self.define_arithmetic(lex)
        self.define_comparators(lex)
        lex.append(PrimaryWord('SQRT', lambda f: f.stack.push(math.sqrt(f.stack.pop()))))
        lex.append(PrimaryWord('.', lambda f: print(f.stack.pop(), end=' ')))
        lex.append(PrimaryWord('CR', lambda f: print()))

Commit: tidying.

Now we create a little Lexicon class. We can rely on our existing tests to give it a workout.

class Lexicon:
    def __init__(self):
        self.lexicon = []

    def append(self, word):
        self.lexicon.append(word)

    def find_word(self, word):
        return next(filter(lambda d: d.name == word, reversed(self.lexicon)), None)

Then plug it into the Forth init, and redefine find_word:

class Forth:
    def __init__(self):
        self.active_words = []
        self.compile_stack = Stack()
        self.lexicon = Lexicon()
        self.define_primaries(self.lexicon)
        self.return_stack = Stack()
        self.stack = Stack()
        self.tokens = None
        self.token_index = 0
        self.word_list = None

    def find_word(self, word):
        return self.lexicon.find_word(word)

Green. Commit: New Lexicon class is in use.

Now move all the define methods over to Lexicon. This amounts to a massive edit, all very simple:

    def define_primaries(self, forth):
        self.define_stack_ops(forth)
        self.define_immediate_words(forth)
        self.define_skippers(forth)
        self.define_arithmetic(forth)
        self.define_comparators(forth)
        self.append(PrimaryWord('SQRT', lambda f: f.stack.push(math.sqrt(f.stack.pop()))))
        self.append(PrimaryWord('.', lambda f: print(f.stack.pop(), end=' ')))
        self.append(PrimaryWord('CR', lambda f: print()))

    def define_immediate_words(self, forth):
        self._define_begin_until(forth)
        self._define_colon_semi(forth)
        self._define_do_loop(forth)
        self._define_if_else_then(forth)

And so on. Only one method actually uses forth:

    def define_skippers(self, forth):
        def _active_word(forth):
            return forth.active_words[-1]

        def _next_word(forth):
            return _active_word(forth).next_word()

        def _star_loop(forth):
            beginning_of_do_loop = _next_word(forth)
            index = forth.return_stack.pop()
            limit = forth.return_stack.pop()
            index += 1
            if index < limit:
                forth.return_stack.push(limit)
                forth.return_stack.push(index)
                _active_word(forth).skip(beginning_of_do_loop)

        def _zero_branch(forth):
            branch_distance = _next_word(forth)
            if forth.stack.pop() == 0:
                _active_word(forth).skip(branch_distance)

        def _dump_stack(forth):
            forth.stack.dump(_active_word(forth).name, _active_word(forth).pc)

        self.append(PrimaryWord('*LOOP', _star_loop))
        self.append(PrimaryWord('*#', lambda f: f.stack.push(_next_word(f))))
        self.append(PrimaryWord('*IF', _zero_branch))
        self.append(PrimaryWord('*ELSE', lambda f: _active_word(f).skip(_next_word(f))))
        self.append(PrimaryWord('*UNTIL', _zero_branch))
        self.append(PrimaryWord('DUMP', _dump_stack))
        forth.compile(': *DO SWAP >R >R ;')
        forth.compile(': I R@ ;')

That one needs it because it calls back to the compiler. We may be able to resolve that in a better way. I’ll go back and change the signatures of all the methods that don’t use the forth parameter.

That’s a matter of moments. Commit: remove unused forth parameters. There’s still a bit of arranging that could be done in these definitions, mostly occasioned by the needs of the two that are compiled, thus requiring the words they use to be available, while their words are used in other words. We’ll look at that later, or perhaps even off-line, unless it’s interesting. What is most interesting is that now the Forth class, over 200 lines long just a day or so ago, now looks like this:

class Forth:
    def __init__(self):
        self.active_words = []
        self.compile_stack = Stack()
        self.lexicon = Lexicon()
        self.lexicon.define_primaries(self)
        self.return_stack = Stack()
        self.stack = Stack()
        self.tokens = None
        self.token_index = 0
        self.word_list = None

    def next_token(self):
        if self.token_index >= len(self.tokens):
            return None
        token = self.tokens[self.token_index]
        self.token_index += 1
        return token

    def begin(self, word):
        self.active_words.append(word)

    def end(self):
        self.active_words.pop()

    def compile(self, text):
        new_text = re.sub(r'\(.*?\)', ' ', text)
        self.tokens = new_text.split()
        self.token_index = 0
        while self.token_index < len(self.tokens):
            self.compile_a_word().do(self)

    def compile_a_word(self):
        self.word_list = []
        while True:
            token = self.next_token()
            if (definition := self.find_word(token)) is not None:
                if definition.immediate:
                    definition.do(self)
                else:
                    self.word_list.append(definition)
            elif (num := self.compile_number(token)) is not None:
                self.append_number(num, self.word_list)
            else:
                raise SyntaxError(f'Syntax error: "{token}" unrecognized')
            if self.compile_stack.is_empty():
                break
        return SecondaryWord('nameless', self.word_list)

    def append_number(self, num, word_list):
        word_list.append(self.find_word('*#'))
        word_list.append(num)

    def compile_number(self, word):
        try:
            return int(word)
        except ValueError:
            return None

    def find_word(self, word):
        return self.lexicon.find_word(word)

60 lines including white space. Not too bad. We might want to clean up compile_a_word, as it looks a bit jagged. We’ll save that for another day as well, though it is easy to see a couple of extracts that would perk it right up.

For now, we’ve got a compile that we can understand, and all the definitional stuff is off to the side. And the lexicon is in its own object as well. It’s the right thing to do, and I think it may give us an extra payoff if we move more toward a classical Forth setup. I haven’t decided how far we might go in that direction. It’s somewhat important to remember why we’re doing this.

Oh, right, it’s 90 percent for the fun, isn’t it? No problem, then.

See you next time!