• Re: Slow Loop (alternatives in lisp?)

    From B. Pym@21:1/5 to Pascal Bourguignon on Mon Jun 17 23:45:07 2024
    Pascal Bourguignon wrote:

    Hello, I'm trying to imitate the behaviour of the pivot-table in excel where you take a list of items and another list of their values and
    you sum similar ones together (see toy example below). I have a list
    of 30000 items and their associated values and in excel using a pivot- table the computation is done instantaneously (less than 2 seconds)
    while the procedure I wrote in lisp will take about 12 hours !(I give
    an example of only 15 items below, this goes fast of course because
    only 15 items, but the 30,000 will take an estimate of about 12 hours;
    I never reached that far because around 5 hours I give up). Do you
    know why? Is there a way to enhance the procedure and make it as fast
    as the pivot table? Thanks


    ;; Tabulate like the pivot table.
    (time
    (let ((ls (vector "a" "b" "c" "b" "a" "f" "e" "g"
    "h" "k" "z" "k" "r" "u" "f"))
    (counter (vector 1 5 8 7 14 8 3 7 9 4 3 21 5 7 9))
    (i 0))
    (loop while (< i (length ls)) do
    (let ((j (+ i 1)))
    (loop while (< j (length ls)) do
    (when (and (equal (elt ls i) (elt ls j))
    (not (equal (elt ls j) 'indic)))
    (incf (elt counter i) (elt counter j))
    (setf (elt ls j) 'indic
    (elt counter j) 'indic))
    (incf j)))
    (incf i))
    (values (delete 'indic ls)
    (delete 'indic counter))))

    Real time: 0.009765 sec.
    Run time: 0.012 sec.
    Space: 102408 Bytes
    #("a" "b" "c" "f" "e" "g" "h" "k" "z" "r" "u") ;
    #(15 12 8 17 3 7 9 25 3 5 7)

    Gauche Scheme

    (use srfi-13) ;; string<
    (use srfi-43) ;; vector-binary-search

    (define (string-cmp a b)
    (cond ((string< a b) -1)
    ((string= a b) 0)
    (else 1)))

    (define (do-the-pivot keys counts)
    (let* ((unique-keys
    (sort (delete-duplicates (vector->list keys)) string<))
    (pivot-keys (list->vector unique-keys))
    (pivot-counts (make-vector (vector-length pivot-keys) 0)))
    (vector-for-each
    (lambda (_ k n)
    (let ((i (vector-binary-search pivot-keys k string-cmp)))
    (vector-set! pivot-counts i
    (+ n (vector-ref pivot-counts i)))))
    keys
    counts)
    (values pivot-keys pivot-counts)))

    (do-the-pivot
    #("a" "b" "c" "b" "a" "f" "e" "g" "h" "k" "z" "k" "r" "u" "f")
    #(1 5 8 7 14 8 3 7 9 4 3 21 5 7 9))

    ===>
    #("a" "b" "c" "e" "f" "g" "h" "k" "r" "u" "z")
    #(15 12 8 3 17 7 9 25 5 7 3)

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)
  • From Kaz Kylheku@21:1/5 to B. Pym on Tue Jun 18 02:07:05 2024
    On 2024-06-17, B. Pym <No_spamming@noWhere_7073.org> wrote:
    Pascal Bourguignon wrote:

    Hello, I'm trying to imitate the behaviour of the pivot-table in excel
    where you take a list of items and another list of their values and
    you sum similar ones together (see toy example below). I have a list
    of 30000 items and their associated values and in excel using a pivot-
    table the computation is done instantaneously (less than 2 seconds)
    while the procedure I wrote in lisp will take about 12 hours !(I give
    an example of only 15 items below, this goes fast of course because
    only 15 items, but the 30,000 will take an estimate of about 12 hours;
    I never reached that far because around 5 hours I give up). Do you
    know why? Is there a way to enhance the procedure and make it as fast
    as the pivot table? Thanks


    ;; Tabulate like the pivot table.
    (time
    (let ((ls (vector "a" "b" "c" "b" "a" "f" "e" "g"
    "h" "k" "z" "k" "r" "u" "f"))
    (counter (vector 1 5 8 7 14 8 3 7 9 4 3 21 5 7 9))
    (i 0))
    (loop while (< i (length ls)) do
    (let ((j (+ i 1)))
    (loop while (< j (length ls)) do
    (when (and (equal (elt ls i) (elt ls j))
    (not (equal (elt ls j) 'indic)))
    (incf (elt counter i) (elt counter j))
    (setf (elt ls j) 'indic
    (elt counter j) 'indic))
    (incf j)))
    (incf i))
    (values (delete 'indic ls)
    (delete 'indic counter))))

    Real time: 0.009765 sec.
    Run time: 0.012 sec.
    Space: 102408 Bytes
    #("a" "b" "c" "f" "e" "g" "h" "k" "z" "r" "u") ;
    #(15 12 8 17 3 7 9 25 3 5 7)

    Gauche Scheme

    (use srfi-13) ;; string<
    (use srfi-43) ;; vector-binary-search

    (define (string-cmp a b)
    (cond ((string< a b) -1)
    ((string= a b) 0)
    (else 1)))

    (define (do-the-pivot keys counts)
    (let* ((unique-keys
    (sort (delete-duplicates (vector->list keys)) string<))
    (pivot-keys (list->vector unique-keys))
    (pivot-counts (make-vector (vector-length pivot-keys) 0)))
    (vector-for-each
    (lambda (_ k n)
    (let ((i (vector-binary-search pivot-keys k string-cmp)))
    (vector-set! pivot-counts i
    (+ n (vector-ref pivot-counts i)))))
    keys
    counts)
    (values pivot-keys pivot-counts)))

    (do-the-pivot
    #("a" "b" "c" "b" "a" "f" "e" "g" "h" "k" "z" "k" "r" "u" "f")
    #(1 5 8 7 14 8 3 7 9 4 3 21 5 7 9))


    #("a" "b" "c" "e" "f" "g" "h" "k" "r" "u" "z")
    #(15 12 8 3 17 7 9 25 5 7 3)

    This is the TXR Lisp interactive listener of TXR 294.
    Quit with :quit or Ctrl-D on an empty line. Ctrl-X ? for cheatsheet.
    TXR is light and portable; take it camping, or to the Bahamas.
    (defun pivot (k v)
    (flow (list k v)
    transpose
    (sort-group @1 car)
    (mapcar [juxt caar (op sum @1 cadr)])
    transpose))
    pivot
    (pivot
    #("a" "b" "c" "b" "a" "f" "e" "g" "h" "k" "z" "k" "r" "u" "f")
    #(1 5 8 7 14 8 3 7 9 4 3 21 5 7 9))
    (("a" "b" "c" "e" "f" "g" "h" "k" "r" "u" "z") (15 12 8 3 17 7 9 25 5 7 3))

    Using group-reduce:

    (defun pivot (k v)
    (flow (list k v)
    transpose
    (group-reduce (hash) first [mapf + use second] @1 0)
    hash-pairs
    (sort @1 : first)
    transpose))
    pivot
    (pivot
    #("a" "b" "c" "b" "a" "f" "e" "g" "h" "k" "z" "k" "r" "u" "f")
    #(1 5 8 7 14 8 3 7 9 4 3 21 5 7 9))
    (("a" "b" "c" "e" "f" "g" "h" "k" "r" "u" "z") (15 12 8 3 17 7 9 25 5 7 3))

    Nicer with group-map:

    (defun pivot (k v)
    (flow (list k v)
    transpose
    (group-map first (op sum @1 second))
    hash-pairs
    (sort @1 : first)
    transpose))
    pivot
    (pivot
    #("a" "b" "c" "b" "a" "f" "e" "g" "h" "k" "z" "k" "r" "u" "f")
    #(1 5 8 7 14 8 3 7 9 4 3 21 5 7 9))
    (("a" "b" "c" "e" "f" "g" "h" "k" "r" "u" "z") (15 12 8 3 17 7 9 25 5 7 3))

    --
    TXR Programming Language: http://nongnu.org/txr
    Cygnal: Cygwin Native Application Library: http://kylheku.com/cygnal
    Mastodon: @[email protected]

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)
  • From B. Pym@21:1/5 to All on Mon Jul 22 18:29:50 2024
    * there is already a WHILE in Common Lisp. No need to invent a new one:

    (loop while (foo-p) do .... )

    Gauche Scheme

    (while (read) (print "True enough."))
    2
    True enough.
    #t
    True enough.
    'yes
    True enough.
    #f

    (while (read) => x (print x " is truer than you think."))
    "why"
    why is truer than you think.
    88
    88 is truer than you think.
    0
    0 is truer than you think.
    #f




    Paul Graham:

    I consider Loop one of the worst flaws in CL, and an example
    to be borne in mind by both macro writers and language designers.

    [In "ANSI Common Lisp", Graham makes the following comments:]

    The loop macro was originally designed to help inexperienced
    Lisp users write iterative code. Instead of writing Lisp code,
    you express your program in a form meant to resemble English,
    and this is then translated into Lisp. Unfortunately, loop is
    more like English than its designers ever intended: you can
    use it in simple cases without quite understanding how it
    works, but to understand it in the abstract is almost
    impossible.
    ....
    the ANSI standard does not really give a formal specification
    of its behavior.
    ....
    The first thing one notices about the loop macro is that it
    has syntax. A loop expression contains not subexpressions but
    clauses. The clauses are not delimited by parentheses;
    instead, each kind has a distinct syntax. In that, loop
    resembles traditional Algol-like languages. But the other
    distinctive feature of loop, which makes it as unlike Algol as
    Lisp, is that the order in which things happen is only
    loosely related to the order in which the clauses occur.
    ....
    For such reasons, the use of loop cannot be recommended.


    Dan Weinreb, one of the designers of Common Lisp:

    ... the problem with LOOP was that it turned out to be hard to
    predict what it would do, when you started using a lot of
    different facets of LOOP all together. This is a serious problem
    since the whole idea of LOOP was to let you use many facets
    together; if you're not doing that, LOOP is overkill.


    Barry Margolin:

    My recommendation is based on seeing many question in the past
    of the form "What happens if you use both XXX and YYY in the
    same LOOP?" The unfortunate fact is that when we were writing
    the standard we didn't have time to nail down all the possible
    interactions between different LOOP features, so many of these
    are not well specified. And even if we did get it right in
    the standard, it's likely to be difficult to find them and I
    wouldn't trust that all implementors got it right (many of
    those questions were probably from implementors, trying to
    figure out what they were supposed to do). And even if they
    all got it right, someone reading your code may not be able to
    figure it out.

    So, with all those potential problems, my feeling is that if
    you have to ask, it's probably better to use something other
    than LOOP.

    3. Loop is very powerful, granted, and many people are trying to
    argue that "you can do so much with loop that it's unreadable."
    This is not an argument.

    But it is! Because any use of LOOP has the potential to be
    unreadable, the reader must read it carefully to verify that
    it's just one of the cases that doesn't require careful
    reading!


    John Foderaro:

    I'm not trying to join a debate on loop. I just wanted to present
    the other side of [the issue so that] the intelligent people can
    then weigh the arguments on both sides.

    I'm not suggesting that loop can be fixed either by adding
    parenthesis or coming up with ways of indenting it to make it
    understandable. It's a lost cause.

    ...

    Another great example from kmp:

    === from kmp

    For example, you might think
    (loop with i = (random 100) for x from 1 to 10 do (print (list i x)))
    and
    (loop for i = (random 100) for x from 1 to 10 do (print (list i x)))
    meant the same in English, [but they don't do the same thing in loop]

    === end kmp

    loop lulls you into thinking that you understand the program since
    you understand English. Make no mistake about it, loop is its
    own language. If you use it you condem everyone who reads the
    code to also learn the loop language.

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)
  • From Kaz Kylheku@21:1/5 to B. Pym on Tue Jul 23 01:24:37 2024
    On 2024-07-22, B. Pym <[email protected]> wrote:
    * there is already a WHILE in Common Lisp. No need to invent a new one:

    (loop while (foo-p) do .... )

    Gauche Scheme

    (while (read) (print "True enough."))
    2
    True enough.
    #t
    True enough.
    'yes
    True enough.
    #f

    (while (read) => x (print x " is truer than you think."))
    [ ... ]
    Paul Graham:

    I consider Loop one of the worst flaws in CL, and an example
    to be borne in mind by both macro writers and language designers.

    But the above => cruft is exactly like LOOP clause syntax.

    It might as well be

    (while (read) with x do (print ...))

    More Lispy way:

    (whilet ((x (read)))
    ...)

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)
  • From B. Pym@21:1/5 to All on Sat Aug 17 09:30:28 2024
    (defun distribution1 (items values test)
    (let ((table (make-hash-table :test test)))
    (loop for item in items
    for value in values
    do (incf (gethash item table 0) value))
    (let ((items-list nil))
    (maphash (lambda (item sum-value)
    (push (cons item sum-value) items-list))
    table)
    (sort items-list #'> :key #'cdr))))

    An example call:

    CL-USER 58 > (distribution1 '("a" "b" "c" "b" "a" "f" "e" "g"
    "h" "k" "z" "k" "r" "u" "f")
    '(1 5 8 7 14 8 3 7 9 4 3 21 5 7 9)
    #'equal)
    (("k" . 25) ("f" . 17) ("a" . 15) ("b" . 12) ("h" . 9) ("c" . 8)
    ("g" . 7) ("u" . 7) ("r" . 5) ("e" . 3) ("z" . 3))

    newLISP

    Let's simply use an association list.

    (macro (ainc! Alist Key Value Function Deflt)
    (local (E-Message Val Func Def)
    (setq Func Function)
    (if (true? Func)
    (setq Val Value)
    (begin (setq Func +) (setq Val (or Value 1))))
    (setq Def Deflt)
    (if (= nil Def) (setq Def 0))
    (unless
    (catch
    (setf (assoc Key Alist)
    (list ($it 0) (Func Val ($it 1))))
    'E-Message)
    (setf Alist (cons (list Key (Func Val Def)) Alist)))))

    (define (distribution1 items vals)
    (let (table '())
    (dolist (it items) (ainc! table it (pop vals)))
    (sort table (fn (a b) (> (a 1) (b 1))))))

    (distribution1
    '("a" "b" "c" "b" "a" "f" "e" "g" "h" "k" "z" "k" "r" "u" "f")
    '(1 5 8 7 14 8 3 7 9 4 3 21 5 7 9))

    ===>
    (("k" 25) ("f" 17) ("a" 15) ("b" 12) ("h" 9) ("c" 8) ("g" 7)
    ("u" 7) ("r" 5) ("e" 3) ("z" 3))

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)