I'm working on a school project involving NASM, and while the language makes some kind of sense to me I always end up having a problem that makes no sense.
The program I'm writing involves 1 command line argument, which must be a string of 0s, 1s, and/or 2s. If it is not, an error message is displayed and the program ends.
If there are no errors, the "suffixes" of the string are displayed in order.
Example:
"./sufsort 00100102" should produce
sorted suffixes:
00100102
00102
0100102
0102
02
100102
102
2
Right now, I'm getting a segmentation fault when I try to call my subroutine sufcmp
%include "asm_io.inc"
section .data
arg_error_msg: db "Error, incorrect number of arguments. Only 2 arguments are allowed.", 0
bad_char_msg: db "Error, invalid character used. Only 0, 1, or 2 are allowed.", 0
bad_length_msg: db "Error, invalid input length. Length must be between 1 and 30.", 0
sorted_msg: db "sorted suffixes:", 0
; y is an array of suffix indeces, which are sorted later by the main method
y: dd 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
section .bss
argc: resd 1 ; number of command line arguments
X: resb 31 ; array copy of the input string
N: resd 1 ; length of the input string
section .text
global asm_main
sufcmp: ; sufcmp(String Z, int i, int j)
enter 0, 0
pusha
mov edx, dword [ebp+16] ; edx = String Z
mov esi, dword [ebp+12] ; esi = int i
mov edi, dword [ebp+8] ; edi = int j
CMP_LOOP:
cmp byte [edx+esi], byte 0 ; if Z[i] = null, ret -1
je CMP_NEGATIVE
cmp byte [edx+edi], byte 0 ; if Z[j] = null, ret 1
je CMP_POSITIVE
mov al, byte [edx+edi]
cmp byte [edx+esi], al ; if Z[i] < Z[j], ret -1
jl CMP_NEGATIVE
cmp byte [edx+esi], al ; if Z[i] > Z[j], ret 1
jg CMP_POSITIVE
inc esi ; increment i and j
inc edi
jmp CMP_LOOP ; repeat
CMP_NEGATIVE:
popa
mov eax, dword -1
jmp CMP_DONE
CMP_POSITIVE:
popa
mov eax, dword 1
jmp CMP_DONE
CMP_DONE:
leave
ret
asm_main: ; sufsort(String inputString)
enter 0, 0
pusha
ARG_CHECK: ; Check number of arguments
mov eax, dword [ebp+8] ; eax = # of line arguments
cmp eax, dword 2 ; if there are just 2 line argument, skip the error
je CHAR_CHECK
mov eax, arg_error_msg ; display an error message
call print_string
call print_nl
jmp DONE ; terminate the program
CHAR_CHECK: ; Check characters & get length of string
mov ebx, dword [ebp+12]
mov ecx, dword [ebx+4] ; eax = input string
mov edi, dword 0 ; edi will be the counter
CHAR_LOOP:
cmp byte [ecx], byte 0 ; if Z[edi] = null, end the loop
je CHAR_LOOP_DONE
mov al, byte [ecx]
cmp al, '0' ; if byte [ecx} != '0', '1', '2', complain
je GOOD_CHAR
cmp al, '1'
je GOOD_CHAR
cmp al, '2'
je GOOD_CHAR
BAD_CHAR:
mov eax, bad_char_msg ; display an error message
call print_string
call print_nl
jmp DONE ; terminate the program
GOOD_CHAR:
mov [X + edi], al ; copy the character into X[edi]
inc ecx
inc edi
jmp CHAR_LOOP
CHAR_LOOP_DONE:
mov [N], edi ; N = length of Z
mov [X + edi], byte 0 ; add a null character to the end of X
LENGTH_CHECK: ; Check the length of the input string
cmp dword [N], 1 ; if N < 1 or N > 30, stop the program
jl BAD_LENGTH
cmp dword [N], 30
jg BAD_LENGTH
jmp SHOW_COPY ; else, continue
BAD_LENGTH:
mov eax, bad_length_msg ; display an error message
call print_string
call print_nl
jmp DONE ; terminate the program
SHOW_COPY: ; output X to check if it copied properly
mov eax, X
call print_string
call print_nl
BUBBLE_SORT: ; Bubble sort, which sorts substrings using array y
mov esi, [N] ; esi = i (counts from N to 0)
mov edi, dword 1 ; edi = j (counts from 1 to i)
BUBBLE_SORT_I:
cmp esi, dword 0 ; if i = 0, end the outer loop
je SORTED_SUFFIXES
BUBBLE_SORT_J:
cmp esi, edi ; if i = j, end the inner loop
je BUBBLE_SORT_J_DONE
push dword [X] ; call sufcmp, which takes 3 args (String Z, int i, int j)
push dword [edi]
push dword [esi]
call sufcmp
add esp, 12 ; move esp back 12 bytes, to undo the 3 pushes
cmp eax, dword -1
je NO_SWAP
mov ebx, dword [y + edi-1] ; temp = y[j-1]
mov edx, dword [y + edi-1] ; comparison temp
mov edx, dword [y + edi] ; y[j-1] = y[j]
mov [y + edi], ebx
NO_SWAP:
inc edi ; j += 1
jmp BUBBLE_SORT_J
BUBBLE_SORT_J_DONE:
dec esi ; i -= 1
jmp BUBBLE_SORT_I
SORTED_SUFFIXES_T: ; print "sorted suffixes"
mov eax, sorted_msg
call print_string
call print_nl
SORTED_SUFFIXES:
mov esi, dword 0 ; esi = i
PRINT_STR_LOOP:
cmp esi, dword [N-1] ; if i = N-1, end the outer loop
je DONE
mov eax, dword [X] ; move address of X to eax
add eax, [y + esi] ; move eax to address of X[y[i]]
call print_nl ; put each suffix on a separate line
inc esi ; i += 1
jmp PRINT_STR_LOOP
DONE:
popa
leave
ret
And I am getting this
I can't find anything that would cause a segmentation fault, since I don't manipulate the stack in any way besides pushing the function arguments and restoring esp after the subroutine returns.
Well, you will need debugger, as there are several problems in your code, and it's a bit too large for me to run it in head accurately (like 100% guarding stack/etc), so only few things I see:
In
CHAR_CHECK:
loop test the length during loop, so you don't overwrite.bss
memory when somebody gives you too long string. You can move the length check right underCHAR_LOOP:
, whenedi
is out of bounds, stop looping.Also add the null character before storing
N
(swap those twomov
lines), asN
is stored right afterX
in memory, so with 31 (?) long input string you will overwriteN
to0
(this particularly is not exploitable, but the copy of long string may be).jl/jg
used in length check, but length is unsigned, sojb/ja
would made more sense to me (not a bug, signed test>=1 && <= 30
will fail at the same time as unsigned one, just doesn't feel right if you have programming OCD).good/bad char test - you can make it a bit shorter by doing only two tests
('0' <= char && char <= '2')
, as['0', '1', '2']
are values[48, 49, 50]
.And now more serious stuff follows.
In I/J loop you don't reset J, so logic of your inner loop will be flawed.
push dword [X]
I don't think this does what you think it does. The address of string isX
,[X]
is content of memory (chars of string). (this will make thesufcmp
code to segfault early, when it will try to access "address"'0010'
, which is not legal.In the swap, for example
mov edx, dword [y + edi]
... you incrementedi
by 1, butY
is defined as array of dwords, so everywhere the indexing should beedi*4
.cmp esi, dword [N-1] ; if i = N-1
uhm, nope, it will compareesi
with value at addressN-1
, so if[N]
contains 16 and ahead of it is single zero byte, thecmp
will compareesi
with value4096
(memory at N-1 is:00 10 00 00 00
, so[N] == 0x00000010
and[N-1] == 0x00001000
).mov eax, dword [X] ; move address of X to eax
- no,lea
would do what the comment says.mov
will fetch content of at addressX
.add eax, [y + esi]
- again using +-1 indexing withdword
array.And you forget to call print_string, only new line is called.
You can rewrite that part as:
And, as I'm cruel and tired, I kept the my biggest worry for last note.
I don't think this will work at all. Your bubble sort is iterating over original
X
string (well, it's not, but once you fix the argument issues with correct addresses, it will).Every time. So you keep shuffling content of
Y
array according to original string in every iteration.The most important part of my answer is the first sentence. You absolutely need debugger. If you felt like the language made some sense to you up till now, your source doesn't prove that. Actually I can see a glimpses of understanding, so you are basically right, but you would have to be total prodigy whizz kid to be able to pull this without debugger within reasonable time. I would grade you only as above-average, maybe good, but far away from prodigious premises.
If you still want to go without debugger, change technique. Don't write so much of code without compiling + running it. Do it by much much much smaller steps, and keep displaying all sort of things, to be sure your new 3 lines of code do what they should. For example if you would create empty stub for
sufcmp
just printing the string from pointer, it would segfault right after trying to access the string.That would maybe give you better hint, than when almost final app code is segfaulting, so instead of hunting problem on recent 10 lines you have 50+ to reason about.
EDIT: algorithm suggestion:
Unless you really must use bubble sort, I would avoid that, and do the brute-force dumb "count" variant of sort.
I hope you will be able to decipher it... it means that I would calculate for every suffix how many suffixes are "smaller" lexicographically, ie. full O(N2) loopy loop (which is in reality N^3, because comparing strings is another O(N) ... but who cares with N=30, even N5 would be bearable).
Then to print suffixes in correct order you simply search the
count
array again and again, first time for0
smaller-count (that's the smallest one), then for1
, ... etc. Till you print all of them.Actually you may loop through all suffixes, calculate how many are smaller, and put index of that suffix into
sorted[smaller_count]
, so for printing you will just loop throughsorted
array from 0 to N-1, no searching involved.