Generate unique random numbers in a loop

2019-05-15 02:10发布

问题:

OK, after few hours of insane debugging, I finally have this:

(defmacro assoc-bind (bindings expression &rest body)
  (let* ((i (gensym))
         (exp (gensym))
         (abindings
          (let ((cursor bindings) result)
            (while cursor
              (push (caar cursor) result)
              (push (cdar cursor) result)
              (setq cursor (cdr cursor)))
            (setq result (nreverse result))
            (cons (list i `(quote ,result))
                  (cons (list exp expression) result)))))
    `(let (,@abindings)
       (while ,i
         (set (car ,i) (caar ,exp))
         (setq ,i (cdr ,i))
         (set (car ,i) (cdar ,exp))
         (setq ,i (cdr ,i) ,exp (cdr ,exp)))
       ,@body)))

(let ((i 0) (limit 100) (test (make-string 100 ?-))
      bag bag-iter next-random last)
  (while (< i limit)
    ;; bag is an alist of a format of ((min . max) ...)
    (setq bag-iter bag next-random (random limit))
    (message "original-random: %d" next-random)
    (if bag-iter
        (catch 't
          (setq last nil)
          (while bag-iter
            ;; cannot use `destructuring-bind' here,
            ;; it errors if not enough conses
            (assoc-bind
                ((lower-a . upper-a) (lower-b . upper-b))
                bag-iter
              (cond
               ;; CASE 0: ============ no more conses
               ((and (null lower-b) (>= next-random upper-a))
                (cond
                 ((= next-random upper-a)
                  (if (< (1+ next-random) limit)
                      (setcdr (car bag-iter) (incf next-random))
                    (setcar (car bag-iter) (incf next-random))
                    (when (and last (= 1 (- (cdar last) next-random)))
                      (setcdr (car last) upper-a)
                      (setcdr last nil))))
                 ;; increase right
                 ((= (- next-random upper-a) 1)
                    (setcdr (car bag-iter) next-random))
                  ;; add new cons
                  (t (setcdr bag-iter
                             (list (cons next-random next-random)))))
                (message "case 0")
                (throw 't nil))
               ;; CASE 1: ============ before the first
               ((< next-random lower-a)
                (if (= (1+ next-random) lower-a)
                    (setcar (car bag-iter) next-random)
                  (if last
                      (setcdr last
                              (cons (cons next-random next-random)
                                    bag-iter))
                    (setq bag (cons (cons next-random next-random) bag))))
                (message "case 1")
                (throw 't nil))
               ;; CASE 2: ============ in the first range
               ((< next-random upper-a)
                (if (or (and (> (- next-random lower-a)
                                (- upper-a next-random))
                             (< (1+ upper-a) limit))
                        (= lower-a 0))
                    ;; modify right
                    (progn
                      (setq next-random (1+ upper-a))
                      (setcdr (car bag-iter) next-random)
                      (when (and lower-b (= (- lower-b next-random) 1))
                        ;; converge right
                        (setcdr (car bag-iter) upper-b)
                        (setcdr bag-iter (cddr bag-iter))))
                  ;; modify left
                  (setq next-random (1- lower-a))
                  (setcar (car bag-iter) next-random)
                  (when (and last (= (- next-random (cdar last)) 1))
                    ;; converge left
                    (setcdr (car last) upper-a)
                    (setcdr last (cdr bag-iter))))
                (message "case 2")
                (throw 't nil))
               ;; CASE 3: ============ in the middle
               ((< next-random lower-b)
                (cond
                 ;; increase previous
                 ((= next-random upper-a)
                  (setq next-random (1+ next-random))
                  (setcdr (car bag-iter) next-random)
                  (when (= (- lower-b next-random) 1)
                    ;; converge left, if needed
                    (setcdr (car bag-iter) upper-b)
                    (setcdr bag-iter (cddr bag-iter))))
                 ;; converge right
                 ((= (- lower-b upper-a) 1)
                  (setcdr (car bag-iter) upper-b)
                  (setcdr bag-iter (cddr bag-iter)))
                 ;; increase left
                 ((= (- next-random 1) upper-a)
                  (setcdr (car bag-iter) next-random)
                  (when (= next-random (1- lower-b))
                    (setcdr (car bag-iter) upper-b)
                    (setcdr bag-iter (cddr bag-iter))))
                 ;; decrease right
                 ((= (- lower-b next-random) 1)
                  (setcar (cadr bag-iter) next-random))
                 ;; we have room for a new cons
                 (t (setcdr bag-iter
                            (cons (cons next-random next-random)
                                  (cdr bag-iter)))))
                (message "case 3")
                (throw 't nil)))
              (setq last bag-iter bag-iter (cdr bag-iter)))))
      (setq bag (list (cons next-random next-random))))
    (message "next-random: %d" next-random)
    (message "bag: %s" bag)
    (when (char-equal (aref test next-random) ?x)
      (throw nil nil))
    (aset test next-random ?x)
    (incf i))
  (message test))

It works, but it is super ugly. When I started working on this I imagined that the function should not take more then some dozen lines of code. In good hope that my initial assumption was not so far off, I'm asking you to try to help to tidy this up.

If reading my code gives you a headache (I can absolutely understand it!) here's a description of what the above does:

Generates random numbers within given interval (starting with zero for simplicity and up to limit). Each iteration ensures that the newly generated number is unique by verifying it against the pre-recorded ranges of numbers that have already been generated. These ranges are stored in a form of alist, i.e. ((min-0 . max-0) (min-1 . max-1) ... (min-N . max-N)). After checking that the new generated random number is not within any range, that number is used, and the range is updated with the generated number. Otherwise, the number is replaced by such number, which is closer to it from either min or max of the range it is in, but it cannot exceed the limit or be negative.

Rules for updating ranges:

Given N = new random number, and the two ranges ((a . b) (c . d)) it is possible that the following changes will happen:

if N < a - 1: ((N . N) (a . b) (c . d))
if N < a + (b - a) / 2: (((1- a) . b) (c . d))
if N < b and (c - b) > 2: ((a . (1+ b)) (c . d))
if N < b and (c - b) = 2: ((a . d))
if N = c - 1: ((a . b) ((1- c) . d))
if N < c: ((a . b) (N . N) (c . d))

I hope I covered all cases.

Bonus points if you have a way to describe the time/space complexity of the algo :) Also, if you can think of another approach to the problem, or you can certainly tell that there's something wrong with uniformity of distribution in this case, do tell!

EDIT:

Too tired to test it at the moment, but here was another idea I had, just in case:

(defun pprint-bytearray
  (array &optional bigendian bits-per-byte byte-separator)
  (unless bits-per-byte (setq bits-per-byte 32))
  (unless byte-separator (setq byte-separator ","))
  (let ((result
         (with-output-to-string
           (princ "[")
           (++ (for i across array)
             (if bigendian
                 (++ (for j from 0 downto (- bits-per-byte))
                   (princ (logand 1 (lsh i j))))
               (++ (for j from (- bits-per-byte) to 0)
                 (princ (logand 1 (lsh i j)))))
             (princ byte-separator)))))
    (if (> (length result) 1)
        (aset result (1- (length result)) ?\])
      (setq result (concat result "]")))
    result))

(defun random-in-range (limit &optional bits)
  (unless bits (setq bits 31))
  (let ((i 0) (test (make-string limit ?-))
        (cache (make-vector (ceiling limit bits) 0))
        next-random searching
        left-shift right-shift)
    (while (< i limit)
      (setq next-random (random limit))
      (let* ((divisor (floor next-random bits))
             (reminder (lsh 1 (- next-random (* divisor bits)))))
        (if (= (logand (aref cache divisor) reminder) 0)
            ;; we have a good random
            (aset cache divisor (logior (aref cache divisor) reminder))
          ;; will search for closest unset bit
          (setq left-shift (1- next-random)
                right-shift (1+ next-random)
                searching t)
          (message "have collision %s" next-random)
          (while searching
            ;; step left and try again
            (when (> left-shift 0)
              (setq divisor (floor left-shift bits)
                    reminder (lsh 1 (- left-shift (* divisor bits))))
              (if (= (logand (aref cache divisor) reminder) 0)
                  (setf next-random left-shift
                        searching nil
                        (aref cache divisor)
                        (logior (aref cache divisor) reminder))
                (decf left-shift)))
            ;; step right and try again
            (when (and searching (< right-shift limit))
              (setq divisor (floor right-shift bits)
                    reminder (lsh 1 (- right-shift (* divisor bits))))
              (if (= (logand (aref cache divisor) reminder) 0)
                  (setf next-random right-shift
                        searching nil
                        (aref cache divisor)
                        (logior (aref cache divisor) reminder))
                (incf right-shift))))))
      (incf i)
      (message "cache: %s" (pprint-bytearray cache t 31 ""))
      (when (char-equal (aref test next-random) ?x)
        (throw nil next-random))
      (aset test next-random ?x)
      (message "next-random: %d" next-random))))

(random-in-range 100)

Which should reduce the memory usage by the factor of 31 (maybe it can be 32, I don't know how many bits of an int it is safe to use in eLisp, ints seem to be platform dependant).

I.e. we can divide the natural numbers in groups 31 numbers each, and within each such group there would be possible to store all of its members (or their combinations) as a single int (each one of the numbers needs only one bit to show its presence). This makes searching for the nearest unused neighbour somewhat more convoluted, but the benefit of 31 times memory reduction (and no need of dynamic allocation) looks like a good perspective...

EDIT2:

OK, I finally figured out how to do that with bit mask. Updated the code above. This can save up memory up to by the factor of 64 (I think so...) of the range, where you generate random.

回答1:

For a simpler approach, just generate a sequence of numbers in the desired interval, then shuffle them. Then, when you need a random number, just take the next one off that list.

This ensures that all the numbers in the desired interval are there once and only once, and that each random number acquired is unique and that the entire interval will be exhausted if you go through it.

These satisfy your requirements as I understand them.



回答2:

The following code is lightly tested and perhaps not the most beautiful style, but I still think it should work and is a bit simpler than yours. My algorithm could be viewed as the opposite of yours: instead of adding random numbers to the set of already chosen numbers, I start with the full set of possible integers and remove the ith from it (this is done by pick). I used the same storage as yours for sets of integers.

(defun pick (index bag)
  "Pick integer at position INDEX in the set described by BAG

BAG is of the form ((min0 . max0) (min1 . max1) ...)

The result is returned in the form: (n . new-bag)
where N is the integer picked, and NEW-BAG is the set obtained by
removing N from BAG."
  (let* ((range (car bag))   ;; The first range in the set,
         (beg (car range))   ;; of the form (beg . end)
         (end (cdr range))   ;;
         (last (- end beg))) ;; index of the last element in the range

    (if (<= index last)
        ;; We are picking an element of the first range
        (let ((n (+ beg index)))
          (cons n
                (cond
                 ;; Case of a singleton (n . n)
                 ((= last 0)
                  (rest bag))

                 ;; If we are picking the first element of the range
                 ((= index 0)
                  (cons `(,(1+ beg) . ,end) (rest bag)))

                 ;; If we are picking the last element
                 ((= index last)
                  (cons `(,beg . ,(- end 1)) (rest bag)))

                 ;; Otherwise, the range is split into two parts
                 (t
                  (concatenate 'list
                               `((,beg . ,(- n 1))
                                 (,(1+ n) . ,end))
                               (rest bag))))))

      ;; We will pick an element from a range further down the list
      ;; by recursively calling `pick' on the tail
      (let* ((rec     (pick (- index last 1) (rest bag)))
             (n       (car rec))
             (new-bag (cdr rec)))
        (cons n (cons range new-bag))))))

(defun generate (count limit)
  (let ((bag `((1 . ,limit)))
        (result nil)
        n pick-result)
    (dotimes (i count)
      (setq pick-result (pick (random (- limit i)) bag))
      (setq n   (car pick-result))
      (setq bag (cdr pick-result))
      (setq result (cons n result)))
    result))

(generate 10 100)
;; ==> (64 26 43 44 55 5 89 20 12 25)

You're probably a much better LISP coder than I am, so I'm sure you'll be able to rewrite this piece of code in a more legible way.