Binary representation in processor's registers

2019-08-24 17:38发布

问题:

I would like to ask about process of put instructions into registers. For example: we want to overwrite count '50' into EBX (in ASCII '50' is count '2'). EBX consists of 32 bits. When we put '50' into it, it will be arranged as binary represent, yes? (0000000 | 00000000 | 00000000 | 00110010). Have a right? What happens with bits, when we place a string into register?

回答1:

EAX holds 32 bits which Intel calls "integer". The programmer - and sometimes the assembler - decides how to interpret these bits. If you load EAX with the number 50 (not the string '50')

mov eax, 50

the assembler decides to generate a machine instruction that loads the 50 in a manner, that you can read it as number 50 in a binary system:

00000000000000000000000000110010

Try out, what the assembler does if you feed it with a string:

GLOBAL _start

SECTION .bss
    outstr resb 40

SECTION .data
    _start:
        mov eax, 'Four'         ; Load EAX with a string
        call int2bin            ; Convert it to a binary string in outstr
        mov byte [edi], 10      ; Add a line feed
        inc edi                 ; Increment the pointer

        mov eax, 4              ; SYS_WRITE
        mov ebx, 1              ; STDOUT
        mov ecx, outstr         ; Pointer to output buffer
        mov edx, edi            ; Count of bytes to send:
        sub edx, outstr         ;   EDX = EDI (offset returned from int2bin) - offset of output buffer
        int 0x80                ; Call kernel

        mov eax, 1              ; SYS_EXIT
        xor ebx, ebx            ; Returncode: 0 (ok)
        int 0x80                ; Call kernel

    int2bin:    ; Converts an integer in EAX to a binary string in outstr
        mov edi, outstr         ; Pointer to a string
        mov ecx, 32             ; Loop counter

        .LL1:
        test cl, 0b111          ; CL%8 = 0 ?
        jnz .F                  ; No: skip the next instructions
        mov Byte [edi], ' '     ; Store a space
        inc edi                 ; and increment the pointer
        .F:

        shl eax, 1              ; The leftmost bit into carry flag
        setc dl                 ; Carry flag into DL
        or dl, '0'              ; Convert it to ASCII
        mov [edi], dl           ; Store it to outstr
        inc edi                 ; Increment the pointer
        loop .LL1               ; Loop ECX times

        mov byte [edi], 0       ; Null termination if needed as C string (not needed here)
        ret

Output:

01110010 01110101 01101111 01000110

NASM stored it backwards in EAX. The ASCII of leftmost character is stored in the rightmost byte of EAX, the second-to-last character is to be found in the second byte, and so on. Better to see when those bytes are printed as ASCII characters:

GLOBAL _start

SECTION .bss
    outstr resb 40

SECTION .data
    _start:
        mov eax, 'Four'         ; Load EAX with a string
        call int2str            ; Convert it to a binary string in outstr
        mov byte [edi], 10      ; Add a line feed
        inc edi                 ; Increment the pointer

        mov eax, 4              ; SYS_WRITE
        mov ebx, 1              ; STDOUT
        mov ecx, outstr         ; Pointer to output buffer
        mov edx, edi            ; Count of bytes to send:
        sub edx, outstr         ;   EDX = EDI (offset returned from int2bin) - offset of output buffer
        int 0x80                ; Call kernel

        mov eax, 1              ; SYS_EXIT
        xor ebx, ebx            ; Returncode: 0 (ok)
        int 0x80                ; Call kernel

    int2str:    ; Converts an integer in EAX to an ASCII string in outstr
        mov edi, outstr         ; Pointer to a string
        mov ecx, 4              ; Loop counter

        .LL1:
        rol eax, 8
        mov [edi], al           ; Store it to outstr
        inc edi                 ; Increment the pointer
        loop .LL1               ; Loop ECX times

        mov byte [edi], 0       ; Null termination if needed as C string (not needed here)
        ret

Output:

ruoF

Both programs above show EAX in big endian order. This is the order you are familiar with looking at decimal numbers. The most significant digit is left and the least significant digit is right. However, EAX would be saved in memory or disk in little endian order, starting the sequence from the right with the least significant byte. Looking at the memory with a disassembler or debugger you would see 'F','o','u','r' as well as you had defined it in a .data section with db 'Four'. Therefore you'll get no difference when you load a register with a string, save it to memory and call the write routine of the kernel:

GLOBAL _start

SECTION .bss
    outstr resb 40

SECTION .data
    _start:
        mov eax, 'Hell'             ; Load EAX with the first part of the string
        mov ebx, 'o wo'             ; Load EBX with the second part
        mov ecx, 'rld!'             ; Load ECX with the third part
        mov dword [outstr], eax     ; Store the first part in outstr (little endian)
        mov dword [outstr+4], ebx   ; Append the second part
        mov dword [outstr+8], ecx   ; Append the third part

        mov eax, 4                  ; SYS_WRITE
        mov ebx, 1                  ; STDOUT
        mov ecx, outstr             ; Pointer to output buffer
        mov edx, (3*4)              ; Count of bytes to send (3 DWORD à 4 bytes)
        int 0x80                    ; Call kernel

        mov eax, 1                  ; SYS_EXIT
        xor ebx, ebx                ; Returncode: 0 (ok)
        int 0x80                    ; Call kernel

Output:

Hello world!

Please note: This behavior is made by the NASM programmers. Other assemblers might have a different behavior.