How to use strings in emu8086

2020-01-27 08:24发布

问题:

I need help with strings in emu8086. I have initialized a string:

str1 db "0neWord"

And I have an empty string:

str2 db ?

Now I need to check all letters in str1 and copy to str2, but if the letter in str1 is 0, I need to replace it with O. If not, I need to just copy the letter.

How can I do this?

回答1:

str2 db ? is not an empty string. db stands for "define byte", and that ? means single uninitialized byte.

The db "0neWord" is assembler's convenience, it will compile into series of bytes defined as '0', 'n', 'e', ..., 'd'. There's no such thing as "string" type in assembler, everything is compiled into machine code, which can be viewed as series of bytes. What "type" of data are stored in memory depends on the instructions used to access them, but in the memory everything is just series of bytes and can be viewed as such.

This is probably a good time for you to check emu8086 debugger documentation, and look at the memory at address str1 after loading the code into debugger, to see how it did compile.

So as soon as you will copy second byte from str1 to str2, you will start overwriting some memory you didn't expect to overwrite.

To allocate some fixed size memory buffer you can use for example str2 db 100 DUP(?) doing 100 times ? definition to db, thus reserving 100 bytes of memory there, next bytes of machine code in the same section will be compiled beyond str2+100 address.


To do anything with str1 "string" you need to know:

1) its address in memory, the x86 assembler has many ways how to get that, but two most straightforward are:

  • mov <r16>,OFFSET str1 (r16 is any 16b register)
  • lea <r16>,[str1] (does the same thing in this case)

2) its size OR structure. You didn't put any structure there, like nul-terminated strings have byte with value 0 at their end, or DOS int 21h, ah=9 service to display string expects string terminated with dollar sign '$', etc. So you need at least size. And EQU directive of assembler, and "current position" can be used to calculate size of the str1 like this:

str1 db "0neWord"
str1size EQU $-str1  ; "$" is assemblers "current_address" counter

Hm, I tried to verify this first, by reading some docs, but it's very difficult for me to find any good complete emu8086 documentation (found something like "reference", and it's completely missing description of assembler directives).

I wonder why so many people still land on this, instead of linux + nasm/similar, which are completely free, open source and documented.

So let's hope the emu8086 works like MASM/TASM and that I still recall that syntax correctly, then the above mentioned size definition should work. Otherwise consult your examples/docs.


Finally, when you have address, size, and large enough target buffer (again to load it's address you can use OFFSET or lea in emu8086), you can code your task for example in this way:

    ; pseudo code follows, replace it by actual x86 instructions
    ; and registers as you wish
    ; ("r16_something" means one of 16b register, r8 is 8b register)
    lea   r16_str1,[str1]   ; load CPU with address of str1
    mov   r16_counter,str1size  ; load CPU with str1 size value
    lea   r16_str2,[str2]   ; load address of target buffer
loop_per_character:
    mov   r8_char,[r16_str1] ; read single character
    cmp   r8_char,'0'
    jne   skip_non_ascii_zero_char
    ; the character is equal to ASCII '0' character (value 48)
    mov   r8_char,'O'   ; replace it with 'O'
skip_non_ascii_zero_char:
    ; here the character was modified as needed, write it to str2 buffer
    mov   [r16_str2],r8_char
    ; make both str1/2 pointers to point to next character
    inc   r16_str1
    inc   r16_str2
    ; count down the counter, and loop until zero is reached
    dec   r16_counter
    jnz   loop_per_character
    ; the memory starting at "str2" should now contain
    ; modified copy of "str1"

    ; ... add exit instructions ...

Hmm.. turns out the "pseudo code" is full x86 code, you just have to assign real registers to the pseudo ones, and replace them everywhere in source.

I tried to put there very extensive comments (by my point of view), so can understand every instruction used. You should consult each one with Intel's instruction reference guide, cross-reading it with whatever tutorial/lessons you have available for Assembly, until you will feel like you understand what is register, memory, etc.

Also debug the code instruction by instruction, checking the state of CPU (register values, flags) and memory content after each instruction, to get the idea how it works.



回答2:

There are multiple ways to do that. Here are some examples:

1) With String Instruction:

.model small

.data 

        str1 db "0neWord$"

        size equ $-str1

        str2 db size dup ('') 


.code  

main:

        mov ax, @data
        mov ds, ax 

        mov cx, size

        cld          ; DF might have been set, but we want lodsb to go forwards
        lea si, str1
        mov ax, 0  
        mov bx, 0

     copyStr:

        lodsb  ;str1 to al

        cmp al, '0'
        je alterChar        

        mov str2[bx], al
        jmp continue

     alterChar:

        mov str2[bx], 'o'

     continue:

        inc bx

        loop copyStr 

        mov str2[bx], '$'

        mov ah, 09h 
        lea dx, str2
        int 21h     

        mov ah, 04ch
        int 21h                  

end main 

2) Without String Instruction:

.model small

.data 

        str1 db "0neWord$"
        str2 db ?

.code  

main:

        mov ax, @data
        mov ds, ax

        mov si, 0               

        call copyStr      

        mov ah, 09h
        lea dx, str2
        int 21h

        mov ah, 04ch
        int 21h

   copyStr proc 

        mov bx, 0

      compute:            

           mov bl, str1 [si]

           cmp bl, '0'
           je alterChar 

           mov str2[si], bl

           jmp continue           

        alterChar:

           mov str2 [si], 'o'    

        continue:

           inc si

           cmp str1[si], '$'
           je return                      
           jmp compute

      return:

           mov str2[si], '$' 
           ret     

   copyStr endp    

end main 

LODSB Instruction you can learn more about string instructions from here

3) With lodsb / stosb and simplified / optimized:

.model small
.data 
        str1 db "0neWord$"
        size equ $-str1

        str2 db size dup ('') 

.code  
main:
        mov   ax, @data
        mov   ds, ax
        mov   es, ax       ; stosb stores to [es:di]

        mov   si, OFFSET str1
        mov   di, OFFSET str2
        cld          ; make sure stosb/lodsb go forwards

     ; copy SI to DI, including the terminating '$'
     copyStr:           ; do {
        lodsb             ; str1 to al

        cmp   al, '0'
        je    alterChar        

     doneAlteration:
        stosb             ; al to str2
        cmp   al, '$'
        jne   copyStr    ; } while(c != '$')

        mov   ah, 09h      ; print implicit-length string
        mov   dx, OFFSET str2
        int   21h     

        mov   ah, 04ch     ; exit
        int   21h                  

     alterChar:
        mov   al, 'o'
        ;jmp   doneAlteration
        stosb
        jmp   copyStr

end main