Assembly program that finds second largest value i

I wrote a assembly program that finds the max value in an array, but now I would like it to find the second largest number in the array. How would I modify my program to do this?

This is the program I wrote and it does work. The program prints all the values in the array and then finds the max value of the array. Now I want it to find the second largest value.

 %include "io.mac"
.STACK 100H 

.DATA
   Numbers DW 3,4,5,2,6,0
   msg0  db "Printing values in array",0
   msg1  db "Max",0
   msg2  db "Min",0

   .CODE
        .STARTUP
    PutStr msg0
    mov dx, Numbers 
    mov si, dx ;point to array
    printValues:
    cmp word [si], 0
    je procedure
    nwln
    PutInt [si]
    add si, 2
    jmp printValues

    procedure:
    push Numbers ;push Number to stack to pass parameter by stack
    call maxMeth
    nwln
    PutStr msg1
    nwln

    PutInt ax
    nwln




    complete:
.EXIT





maxMeth:
    enter 0,0 ;save old bp and set bp to sp
    mov si, [bp+4] ;point to array 
    mov ax, [si]   ; ax holds max
    add si,2 ; Increment si to next number

;Now entering loop
max:   
    cmp word [si],0   ; checks to see if the number is 0 and if it is, then we are done.
    je finish
    cmp ax, [si]        ; ax holds the max . So if ax is greater than si, then dont assign si to ax. 
    jge increment
    jmp newMax
newMax: 
    mov ax, [si] ; Otherwise we have a new a max

increment:   
    add si, 2   ;now increment si to check the next value and jump back to the main loop.
    jmp max

finish: ;clean up.
    leave ;pop bp
    ret   ;return

I edited my program to keep track of the second max and the result I receive for the second max value is 3. I was expecting the program to output 5. Im not sure why I am getting the wrong output. Here are my edits to the program.

%include "io.mac"
.STACK 100H 

.DATA
   Numbers DW 3,4,5,2,6,0
   msg0  db "Printing values in array",0
   msg1  db "Max",0


   .CODE
        .STARTUP
    PutStr msg0
    mov dx, Numbers 
    mov si, dx ;point to array
    printValues:
    cmp word [si], 0
    je procedure
    nwln
    PutInt [si]
    add si, 2
    jmp printValues

    procedure:
    push Numbers ;push Number to stack to pass parameter by stack
    call maxMeth
    nwln
    PutStr msg1
    nwln

    PutInt ax
    nwln

   PutInt bx
    nwln


    complete:
.EXIT





maxMeth:
    enter 0,0 ;save old bp and set bp to sp
    mov si, [bp+4] ;point to array 
    mov ax, [si]   ; ax holds max
    mov bx, [si]   ; ax holds max
    add si,2 ; Increment si to next number

;Now entering loop
max:   
    cmp word [si],0   ; checks to see if the number is 0 and if it is, then we are done.
    je finish          ;equals 0, then finished
    cmp ax, [si]         
    jg testSecondMax   ;So if ax is greater than si, then dont assign si to ax and check second max
    jmp newMax         ;otherwise go to new max


newMax: 
    mov ax, [si] ; save new max
    jmp increment ; and increment

testSecondMax:
    cmp bx, [si]
    jl secondMax
    jg increment

secondMax:
    mov bx, [si]
    jmp increment

increment:  
    add si, 2   ;now increment si to check the next value and jump back to the main loop.
    jmp max 



finish: ;clean up.
    leave ;pop bp
    ret   ;return

I ended up writing code for this because it made me wonder how efficiently I could implement the loop. If you want to solve your homework yourself, don't look at my code, just the bullet points in English. Use a debugger to single-step through your code.

Here's how I would change your code:

NASM style: use local labels (like .noswap:) within functions. Indent operands to a consistent column so it doesn't look ragged. Comment your function with inputs / return value and calling convention (what registers it clobbers).
optimize out the jmp next_instruction before newMax: because it's just an expensive no-op to jump where execution was going to fall through anyway.
Unless maybe optimizing for a real 8086, don't use enter, it's slow.
Load each element being checking into a register, instead of using the same memory operand multiple times. (x86-16 has 6 integer registers other than BP/SP; use them.)
put the loop-exit conditional branch at the bottom. (Jump to there for the loop entry point if needed).
Keep a max and 2nd-max in two registers, like you're keeping max in AX.
When you find an element greater than 2nd-max, keep the highest 2 of the 3 numbers you have. i.e. maintain a 2-element queue / sorted list in 2 registers.

Untested:

; word max2Meth(word *array);
; Input: implicit-length array (terminated by a 0 element),
;        pointed to by pointer passed on the stack.  (DS segment)
; returns in ax
; clobbers: cx, dx
global max2Meth
max2Meth:
    push  bp
    mov   bp, sp     ; make a stack frame.  (ENTER is slow, don't use it)
    push  si         ; SI is call-preserved in many calling conventions.  Omit this if you want to just clobber it.

    mov   si, [bp+4] ; pointer to array 

    mov   ax, [si]   ; assume that the list is non-empty
    mov   dx, ax     ; start with max = max2 instead of putting a conditional xchg outside the loop

    jmp   .loop_entry   ; enter at the bottom, at the conditional branch
;;; ax: 2nd max
;;; dx: max

.maxloop:              ; do {
    cmp cx, ax         ; check against 2nd-max, because the common case is less than both.
    jg  .updateMaxes   ; optimize for the common case: fall through on not-found

.loop_entry:
    add  si, 2
    mov  cx, [si]      ;   c = *p++;
    test cx, cx
    jnz .maxloop       ; } while (c != 0);

.finish:
   ; 2nd-max is already in AX, just clean up and return

    pop  si
    leave    ;pop bp would be faster because SP is already pointing to the right place
    ret

; This block is part of the function, even though it's placed after the ret
.updateMaxes:
    mov  ax, cx           ; Bump the old 2nd-max unconditionally
    cmp  ax, dx
    jle  .loop_entry      ; if 2nd_max <= max, return to the loop
    xchg ax, dx           ; otherwise swap
    jmp  .loop_entry

Putting the block for a rare case out-of-line is nice, because it means the common case can just fall through with no taken branches. Often putting if/else conditions inline require a jmp somewhere just to avoid running the else part after the if. But .updateMaxes: ends up being rather elegant, IMO: it has to jump back into the loop, and we can do that either before or after swapping.

16-bit xchg is 3 uops (as expensive as 3 mov instructions), but to make that cheaper (and only do mov ax, dx / mov dx, cx) in the new-max case we'd have to make the new-2ndmax case slower. Assuming it's more likely to just update the 2nd max than to update both, I think just mov and cmp/jcc is a win there. You could make that part branchless with cmov (on a 686 CPU), which might be good. Making the whole thing branchless would give you quite a long dependency chain, and not be worth it unless the array elements were on average getting larger towards the end so you had frequent max-updates all the time (but not with a pattern, so you get branch misses).

On Intel Haswell/Skylake, the inner loop is only 4 fused-domain uops (both compare/branches can macro-fuse). Over long runs of no updates, it should run at 1 iteration per clock.

If you're optimizing for code-size over speed (e.g. for a real 8086), use ax as the temporary and lodsw instead of mov ax, [si] and add si, 2. (Keep your max in a different register).

With an implicit-length list, you can't just use scasw with 2nd-max in ax, because you need to check for 0 as well as > 2nd-max :/

As a further optimization, you could use bp instead of si, if you're using the tiny model (SS = DS), because you don't need to access the stack after loading the pointer. You can just pop bp instead of leave

Before I thought of simply entering the loop with ax = dx = first element, I was going to use this code before the loop:

    mov   ax, [si]   ; assume that the list is non-empty
    mov   dx, [si+2] ; but check if it's only 1 element long, like maxMeth does
    test  dx, dx
    jz    .finish

    add   si, 4      ; first 2 elements are loaded

    cmp   ax, dx     ; sort them so ax <= dx
    jng  .noswap
    xchg  ax, dx
 .noswap:

Another way to structure the inner loop would be like this:

.maxloop:              ; do {
    add  si, 2
    mov  cx, [si]      ;   c = *p++;

    test cx, cx
    jz .finish         ; jz instead of jnz: exit the loop on the sentinel value

    cmp cx, ax         ; check against 2nd-max, because the common case is less than both.
    jng .maxloop

;; .updateMaxes:  ;; conditionally fall through into this part 
    mov  ax, cx           ; Bump the old 2nd-max unconditionally
    cmp  ax, dx
    jle  .maxloop         ; if 2nd_max <= max, return to the loop
    xchg ax, dx           ; otherwise swap
    jmp  .maxloop

.finish:

This is probably better because we can just fall into the top of the loop to start. We get out of the loop with a jz that skips over the conditional-update stuff, so we don't have any branches that exist only to skip over code that's "in the way". (i.e. we've succeeded at laying out our code blocks efficiently.)

The only downside for some CPUs is that the test/jz and cmp/jg are back to back. Some CPUs do better when conditional branches are separated by a couple more instructions than that. (e.g. unless you get lucky on how these hit the decoders on Sandybridge, one of the two branches won't macro-fuse. But they would with the first loop.)

Reminder: Stack Overflow user contributions are licensed under cc by-sa 3.0 with attribution required, so if you copy-paste my whole code, make sure you include https://stackoverflow.com/questions/47466116/assembly-program-that-finds-second-largest-value-in-array/47467682#47467682 in a comment.