NASM Subroutine Call Segmentation Fault

2019-09-11 11:16发布

I'm working on a school project involving NASM, and while the language makes some kind of sense to me I always end up having a problem that makes no sense.

The program I'm writing involves 1 command line argument, which must be a string of 0s, 1s, and/or 2s. If it is not, an error message is displayed and the program ends.

If there are no errors, the "suffixes" of the string are displayed in order.

Example:

"./sufsort 00100102" should produce

sorted suffixes:
00100102
00102
0100102
0102
02
100102
102
2

Right now, I'm getting a segmentation fault when I try to call my subroutine sufcmp

%include "asm_io.inc"

section .data
    arg_error_msg: db "Error, incorrect number of arguments. Only 2 arguments are allowed.", 0
    bad_char_msg: db "Error, invalid character used. Only 0, 1, or 2 are allowed.", 0
    bad_length_msg: db "Error, invalid input length. Length must be between 1 and 30.", 0
    sorted_msg: db "sorted suffixes:", 0
    ; y is an array of suffix indeces, which are sorted later by the main method
    y: dd 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30

section .bss
    argc: resd 1        ; number of command line arguments
    X: resb 31          ; array copy of the input string
    N: resd 1           ; length of the input string

section .text
    global asm_main

sufcmp:                         ; sufcmp(String Z, int i, int j)
    enter 0, 0
    pusha

    mov edx, dword [ebp+16]     ; edx = String Z
    mov esi, dword [ebp+12]     ; esi = int i
    mov edi, dword [ebp+8]      ; edi = int j

    CMP_LOOP:
        cmp byte [edx+esi], byte 0          ; if Z[i] = null, ret -1
        je CMP_NEGATIVE
        cmp byte [edx+edi], byte 0          ; if Z[j] = null, ret 1
        je CMP_POSITIVE
        mov al, byte [edx+edi]
        cmp byte [edx+esi], al  ; if Z[i] < Z[j], ret -1
        jl CMP_NEGATIVE
        cmp byte [edx+esi], al  ; if Z[i] > Z[j], ret 1
        jg CMP_POSITIVE
        inc esi                             ; increment i and j
        inc edi
        jmp CMP_LOOP                        ; repeat

    CMP_NEGATIVE:
        popa
        mov eax, dword -1
        jmp CMP_DONE

    CMP_POSITIVE:
        popa
        mov eax, dword 1
        jmp CMP_DONE

    CMP_DONE:
        leave
        ret

asm_main:                           ; sufsort(String inputString)
    enter 0, 0
    pusha

    ARG_CHECK:                      ; Check number of arguments
        mov eax, dword [ebp+8]      ; eax = # of line arguments
        cmp eax, dword 2            ; if there are just 2 line argument, skip the error
        je CHAR_CHECK
        mov eax, arg_error_msg      ; display an error message
        call print_string
        call print_nl
        jmp DONE                    ; terminate the program

    CHAR_CHECK:                     ; Check characters & get length of string
        mov ebx, dword [ebp+12]
        mov ecx, dword [ebx+4]      ; eax = input string
        mov edi, dword 0            ; edi will be the counter
        CHAR_LOOP:
            cmp byte [ecx], byte 0  ; if Z[edi] = null, end the loop
            je CHAR_LOOP_DONE
            mov al, byte [ecx]
            cmp al, '0'     ; if byte [ecx} != '0', '1', '2', complain
            je GOOD_CHAR
            cmp al, '1'
            je GOOD_CHAR
            cmp al, '2'
            je GOOD_CHAR
            BAD_CHAR:
                mov eax, bad_char_msg   ; display an error message
                call print_string
                call print_nl
                jmp DONE                ; terminate the program
            GOOD_CHAR:
                mov [X + edi], al       ; copy the character into X[edi]
            inc ecx
            inc edi
            jmp CHAR_LOOP
        CHAR_LOOP_DONE:
            mov [N], edi                ; N = length of Z
            mov [X + edi], byte 0       ; add a null character to the end of X

    LENGTH_CHECK:               ; Check the length of the input string
        cmp dword [N], 1        ; if N < 1 or N > 30, stop the program
        jl BAD_LENGTH
        cmp dword [N], 30
        jg BAD_LENGTH
        jmp SHOW_COPY           ; else, continue
        BAD_LENGTH:
            mov eax, bad_length_msg     ; display an error message
            call print_string
            call print_nl
            jmp DONE                    ; terminate the program

    SHOW_COPY:              ; output X to check if it copied properly
        mov eax, X
        call print_string
        call print_nl

    BUBBLE_SORT:                ; Bubble sort, which sorts substrings using array y
        mov esi, [N]            ; esi = i (counts from N to 0)
        mov edi, dword 1        ; edi = j (counts from 1 to i)
        BUBBLE_SORT_I:
            cmp esi, dword 0            ; if i = 0, end the outer loop
            je SORTED_SUFFIXES
            BUBBLE_SORT_J:
                cmp esi, edi            ; if i = j, end the inner loop
                je BUBBLE_SORT_J_DONE
                push dword [X]      ; call sufcmp, which takes 3 args (String Z, int i, int j)
                push dword [edi]
                push dword [esi]
                call sufcmp
                add esp, 12             ; move esp back 12 bytes, to undo the 3 pushes
                cmp eax, dword -1
                je NO_SWAP
                    mov ebx, dword [y + edi-1]          ; temp = y[j-1]
                    mov edx, dword [y + edi-1]          ; comparison temp
                    mov edx, dword [y + edi]            ; y[j-1] = y[j]
                    mov [y + edi], ebx
                NO_SWAP:
                inc edi                 ; j += 1
                jmp BUBBLE_SORT_J
            BUBBLE_SORT_J_DONE:
            dec esi             ; i -= 1
            jmp BUBBLE_SORT_I

    SORTED_SUFFIXES_T:          ; print "sorted suffixes"
        mov eax, sorted_msg
        call print_string
        call print_nl

    SORTED_SUFFIXES:
        mov esi, dword 0        ; esi = i
        PRINT_STR_LOOP:
            cmp esi, dword [N-1]            ; if i = N-1, end the outer loop
            je DONE
            mov eax, dword [X]              ; move address of X to eax
            add eax, [y + esi]              ; move eax to address of X[y[i]]
            call print_nl                   ; put each suffix on a separate line
            inc esi                         ; i += 1
            jmp PRINT_STR_LOOP

    DONE:
        popa
        leave
        ret

And I am getting this

enter image description here

I can't find anything that would cause a segmentation fault, since I don't manipulate the stack in any way besides pushing the function arguments and restoring esp after the subroutine returns.

1条回答
【Aperson】
2楼-- · 2019-09-11 12:10

Well, you will need debugger, as there are several problems in your code, and it's a bit too large for me to run it in head accurately (like 100% guarding stack/etc), so only few things I see:

In CHAR_CHECK: loop test the length during loop, so you don't overwrite .bss memory when somebody gives you too long string. You can move the length check right under CHAR_LOOP:, when edi is out of bounds, stop looping.

Also add the null character before storing N (swap those two mov lines), as N is stored right after X in memory, so with 31 (?) long input string you will overwrite N to 0 (this particularly is not exploitable, but the copy of long string may be).

jl/jg used in length check, but length is unsigned, so jb/ja would made more sense to me (not a bug, signed test >=1 && <= 30 will fail at the same time as unsigned one, just doesn't feel right if you have programming OCD).

good/bad char test - you can make it a bit shorter by doing only two tests ('0' <= char && char <= '2'), as ['0', '1', '2'] are values [48, 49, 50].

And now more serious stuff follows.

In I/J loop you don't reset J, so logic of your inner loop will be flawed.

push dword [X] I don't think this does what you think it does. The address of string is X, [X] is content of memory (chars of string). (this will make the sufcmp code to segfault early, when it will try to access "address" '0010', which is not legal.

In the swap, for example mov edx, dword [y + edi] ... you increment edi by 1, but Y is defined as array of dwords, so everywhere the indexing should be edi*4.

cmp esi, dword [N-1] ; if i = N-1 uhm, nope, it will compare esi with value at address N-1, so if [N] contains 16 and ahead of it is single zero byte, the cmp will compare esi with value 4096 (memory at N-1 is: 00 10 00 00 00, so [N] == 0x00000010 and [N-1] == 0x00001000).

mov eax, dword [X] ; move address of X to eax - no, lea would do what the comment says. mov will fetch content of at address X.

add eax, [y + esi] - again using +-1 indexing with dword array.

And you forget to call print_string, only new line is called.

You can rewrite that part as:

mov eax,[y + esi*4]   ; eax = Y[i]
lea eax,[X + eax]     ; eax = address X + Y[i]

And, as I'm cruel and tired, I kept the my biggest worry for last note.

I don't think this will work at all. Your bubble sort is iterating over original X string (well, it's not, but once you fix the argument issues with correct addresses, it will).

Every time. So you keep shuffling content of Y array according to original string in every iteration.


The most important part of my answer is the first sentence. You absolutely need debugger. If you felt like the language made some sense to you up till now, your source doesn't prove that. Actually I can see a glimpses of understanding, so you are basically right, but you would have to be total prodigy whizz kid to be able to pull this without debugger within reasonable time. I would grade you only as above-average, maybe good, but far away from prodigious premises.

If you still want to go without debugger, change technique. Don't write so much of code without compiling + running it. Do it by much much much smaller steps, and keep displaying all sort of things, to be sure your new 3 lines of code do what they should. For example if you would create empty stub for sufcmp just printing the string from pointer, it would segfault right after trying to access the string.

That would maybe give you better hint, than when almost final app code is segfaulting, so instead of hunting problem on recent 10 lines you have 50+ to reason about.


EDIT: algorithm suggestion:

Unless you really must use bubble sort, I would avoid that, and do the brute-force dumb "count" variant of sort.

i:[0,N): count[i] = countif(j:[0,N): X[j] < X[i])
i:[0,N): j:[0,N): if (i == count[j]) print X[j]

I hope you will be able to decipher it... it means that I would calculate for every suffix how many suffixes are "smaller" lexicographically, ie. full O(N2) loopy loop (which is in reality N^3, because comparing strings is another O(N) ... but who cares with N=30, even N5 would be bearable).

Then to print suffixes in correct order you simply search the count array again and again, first time for 0 smaller-count (that's the smallest one), then for 1, ... etc. Till you print all of them.

Actually you may loop through all suffixes, calculate how many are smaller, and put index of that suffix into sorted[smaller_count], so for printing you will just loop through sorted array from 0 to N-1, no searching involved.

查看更多
登录 后发表回答