Creating a Brainfuck parser, whats the best method

2019-04-30 00:03发布

I'm creating a Brainfuck parser (in a BASIC dialect) ultimately to create an interpreter but i've realise it's not as straight forward as i first thought. My problem is that i need a way to accurately parse the matching loop operators within a Brainfuck program. This is an example program:

,>,>++++++++[<------<------>>-]
<<[>[>+>+<<-]>>[<<+>>-]<<<-]
>>>++++++[<++++++++>-],<.>.

'[' = start of loop

']' = end of loop

I need to record the start and end point of each matching loop operator so i can jump around the source as needed. Some loops are alone, some are nested.

What would be the best way to parse this? I was thinking maybe move through the source file creating a 2D array (or such like) recording the start and end positions of each matching operator, but this seems like a lot of 'to'ing and fro'ing' through the source. Is this the best way to do it?

More info: Brainfuck homepage

EDIT: Sample code in any language greatly appreciated.

10条回答
Rolldiameter
2楼-- · 2019-04-30 00:32

Have you considered using a Stack data structure to record "jump points" (i.e. the location of the instruction pointer).

So basically, every time you encounter a "[" you push the current location of the instruction pointer on this stack. Whenever you encounter a "]" you reset the instruction pointer to the value that's currently on the top of the stack. When a loop is complete, you pop it off the stack.

Here is an example in C++ with 100 memory cells. The code handles nested loops recursively and although it is not refined it should illustrate the concepts..

char cells[100] = {0};   // define 100 memory cells
char* cell = cells;      // set memory pointer to first cell
char* ip = 0;            // define variable used as "instruction pointer"

void interpret(static char* program, int* stack, int sp)
{
    int tmp;
    if(ip == 0)              // if the instruction pointer hasn't been initialized
        ip = program;        //  now would be a good time

    while(*ip)               // this runs for as long as there is valid brainF**k 'code'
    {
        if(*ip == ',')
            *cell = getch();
        else if(*ip == '.')
            putch(*cell);
        else if(*ip == '>')
            cell++;
        else if(*ip == '<')
            cell--;
        else if(*ip == '+')
            *cell = *cell + 1;
        else if(*ip == '-')
            *cell = *cell - 1;
        else if(*ip == '[')
        {           
            stack[sp+1] = ip - program;
            *ip++;
            while(*cell != 0)
            {
                interpret(program, stack, sp + 1);
            }
            tmp = sp + 1;
            while((tmp >= (sp + 1)) || *ip != ']')
            {
                *ip++;
                if(*ip == '[')
                    stack[++tmp] = ip - program;
                else if(*ip == ']')
                    tmp--;
            }           
        }
        else if(*ip == ']')
        {
            ip = program + stack[sp] + 1;
            break;
        }
        *ip++;       // advance instruction
    }
}

int _tmain(int argc, _TCHAR* argv[])
{   
    int stack[100] = {0};  // use a stack of 100 levels, modeled using a simple array
    interpret(",>,>++++++++[<------<------>>-]<<[>[>+>+<<-]>>[<<+>>-]<<<-]>>>++++++[<++++++++>-],<.>.", stack, 0);
    return 0;
}

EDIT I just went over the code again and I realized there was a bug in the while loop that would 'skip' parsed loops if the value of the pointer is 0. This is where I made the change:

while((tmp >= (sp + 1)) || *ip != ']')     // the bug was tmp > (sp + 1)
{
ip++;
if(*ip == '[')
    stack[++tmp] = ip - program;
else if(*ip == ']')
    tmp--;
}

Below is an implementation of the same parser but without using recursion:

char cells[100] = {0};
void interpret(static char* program)
{
    int cnt;               // cnt is a counter that is going to be used
                           //     only when parsing 0-loops
    int stack[100] = {0};  // create a stack, 100 levels deep - modeled
                           //     using a simple array - and initialized to 0
    int sp = 0;            // sp is going to be used as a 'stack pointer'
    char* ip = program;    // ip is going to be used as instruction pointer
                           //    and it is initialized at the beginning or program
    char* cell = cells;    // cell is the pointer to the 'current' memory cell
                           //      and as such, it is initialized to the first
                           //      memory cell

    while(*ip)             // as long as ip point to 'valid code' keep going
    {
        if(*ip == ',')
            *cell = getch();
        else if(*ip == '.')
            putch(*cell);
        else if(*ip == '>')
            cell++;
        else if(*ip == '<')
            cell--;
        else if(*ip == '+')
            *cell = *cell + 1;
        else if(*ip == '-')
            *cell = *cell - 1;
        else if(*ip == '[')
        {           
            if(stack[sp] != ip - program)
                stack[++sp] = ip - program;

            *ip++;

            if(*cell != 0)
                continue;
            else
            {                   
                cnt = 1;
                while((cnt > 0) || *ip != ']')
                {
                    *ip++;
                    if(*ip == '[')
                    cnt++;
                    else if(*ip == ']')
                    cnt--;
                }
                sp--;
            }
        }else if(*ip == ']')
        {               
            ip = program + stack[sp];
            continue;
        }
        *ip++;
    }
}

int _tmain(int argc, _TCHAR* argv[])
{   
    // define our program code here..
    char *prg = ",>++++++[<-------->-],[<+>-]<.";

    interpret(prg);
    return 0;
}
查看更多
我欲成王,谁敢阻挡
3楼-- · 2019-04-30 00:33

The canonical method for parsing a context-free grammar is to use a stack. Anything else and you're working too hard and risking correctness.

You may want to use a parser generator like cup or yacc, as a lot of the dirty work is done for you, but with a language as simple as BF, it may be overkill.

查看更多
Anthone
4楼-- · 2019-04-30 00:37

And here's the same code I gave as an example earlier in C++, but ported to VB.NET. I decided to post it here since Gary mentioned he was trying to write his parser in a BASIC dialect.

Public cells(100) As Byte

Sub interpret(ByVal prog As String)
    Dim program() As Char

    program = prog.ToCharArray()  ' convert the input program into a Char array

    Dim cnt As Integer = 0        ' a counter to be used when skipping over 0-loops                                      
    Dim stack(100) As Integer     ' a simple array to be used as stack
    Dim sp As Integer = 0         ' stack pointer (current stack level)
    Dim ip As Integer = 0         ' Instruction pointer (index of current instruction)
    Dim cell As Integer = 0       ' index of current memory

    While (ip < program.Length)   ' loop over the program
        If (program(ip) = ",") Then
            cells(cell) = CByte(AscW(Console.ReadKey().KeyChar))
        ElseIf (program(ip) = ".") Then
            Console.Write("{0}", Chr(cells(cell)))
        ElseIf (program(ip) = ">") Then
            cell = cell + 1
        ElseIf (program(ip) = "<") Then
            cell = cell - 1
        ElseIf (program(ip) = "+") Then
            cells(cell) = cells(cell) + 1
        ElseIf (program(ip) = "-") Then
            cells(cell) = cells(cell) - 1
        ElseIf (program(ip) = "[") Then
            If (stack(sp) <> ip) Then
                sp = sp + 1
                stack(sp) = ip
            End If

            ip = ip + 1

            If (cells(cell) <> 0) Then
                Continue While
            Else
                cnt = 1
                While ((cnt > 0) Or (program(ip) <> "]"))
                    ip = ip + 1
                    If (program(ip) = "[") Then
                        cnt = cnt + 1
                    ElseIf (program(ip) = "]") Then
                        cnt = cnt - 1
                    End If
                End While
                sp = sp - 1
            End If
        ElseIf (program(ip) = "]") Then
            ip = stack(sp)
            Continue While
        End If
        ip = ip + 1
    End While
End Sub

Sub Main()
    ' invoke the interpreter
    interpret(",>++++++[<-------->-],[<+>-]<.")
End Sub
查看更多
疯言疯语
5楼-- · 2019-04-30 00:40

This question is a bit old, but I wanted to say that the answers here helped me decide the route to take when writing my own Brainf**k interpreter. Here's the final product:

#include <stdio.h>

char *S[9999], P[9999], T[9999],
    **s=S, *p=P, *t=T, c, x;

int main() {
    fread(p, 1, 9999, stdin);
    for (; c=*p; ++p) {
        if (c == ']') {
            if (!x)
                if (*t) p = *(s-1);
                else --s;
            else --x;
        } else if (!x) {
            if (c == '[')
                if (*t) *(s++) = p;
                else ++x;
            }

            if (c == '<') t--;
            if (c == '>') t++;
            if (c == '+') ++*t;
            if (c == '-') --*t;
            if (c == ',') *t = getchar();
            if (c == '.') putchar(*t);
        }
    }
}
查看更多
登录 后发表回答