• Re: Tokenizer (Re: Most efficient way to read words from string.)

    From B. Pym@21:1/5 to All on Tue Aug 26 09:04:18 2025
    XPost: comp.lang.scheme

    (defun string-split (str &optional (separator #\Space))
    "Splits the string STR at each SEPARATOR character occurrence.
    The resulting substrings are collected into a list which is returned.
    A SEPARATOR at the beginning or at the end of the string STR results
    in an empty string in the first or last position of the list
    returned."
    (declare (type string str)
    (type character separator))
    (loop for start = 0 then (1+ end)
    for end = (position separator str :start 0)
    then (position separator str :start start)
    for substr = (subseq str start end)
    then (subseq str start end)
    collect substr into result
    when (null end) do (return result)
    ))


    Testing:

    * (string-split " foo bar ")

    ("" "foo" "" "" "" "" "" "bar" "")


    Gauche Scheme

    "!" is similar to "do".

    (define (tokenize str separators)
    (let ((seps (string->list separators)))
    (! (ch :in (reverse (cons (car seps) (string->list str)))
    := sep (member ch seps)
    r cons (list->string tmp) :if (and (pair? tmp) sep)
    tmp '() (if sep '() (cons ch tmp)))
    #f r)))

    (tokenize " foo; bar, baz, and ... zap" " ,;.")
    ===>
    ("foo" "bar" "baz" "and" "zap")

    Given:

    (define-syntax !-aux
    (syntax-rules (<> @ @@ + - cons cdr :in :across :along
    :if :if-else !
    :let := )
    [(_ (:let id val z ...) seen (lets ...) stuff ...)
    (!-aux (z ...) seen (lets ... (id val)) stuff ...) ]
    [(_ (:= id val z ...) stuff ...)
    (!-aux (:let id #f dummy #f (set! id val) z ...) stuff ...) ]
    [(_ (:if bool z ...) (seen ... (v i u)) stuff ...)
    (!-aux (z ...)
    (seen ... (v i (if bool u v))) stuff ...) ]
    ;;
    [(_ (:if-else bool z ...) (seen ... (a b c)(d e f)) stuff ...)
    (!-aux (:let yes #f z ...)
    (seen ... (a b (begin (set! yes bool) (if yes c a)))
    (d e (if (not yes) f d))) stuff ...) ]
    ;;
    [(_ (x :in lst z ...) seen lets bool stuff ...)
    (!-aux (:let xs lst x (and (pair? xs)(pop! xs)) <> z ...)
    seen lets (or (not x) bool) stuff ...) ]
    [(_ (x :across vec z ...) seen lets bool stuff ...)
    (!-aux (:let v vec :let i 0
    x (and (< i (vector-length v))
    (begin0 (vector-ref v i) (inc! i))) <>
    z ...)
    seen lets (or (not x) bool) stuff ...) ]
    [(_ (ch :along str z ...) seen lets bool stuff ...)
    (!-aux (:let s str :let i 0
    ch (and (< i (string-length s))
    (begin0 (string-ref s i) (inc! i))) <>
    z ...)
    seen lets (or (not ch) bool) stuff ...) ]
    [(_ (a b <> z ...) (seen ...) stuff ...)
    (!-aux (z ...) (seen ... (a b b)) stuff ...) ]
    [(_ (a b + z ...) (seen ...) stuff ...)
    (!-aux (z ...) (seen ... (a b (+ 1 a))) stuff ...) ]
    [(_ (a + n z ...) (seen ...) stuff ...)
    (!-aux (z ...) (seen ... (a 0 (+ n a))) stuff ...) ]
    [(_ (a b - z ...) (seen ...) stuff ...)
    (!-aux (z ...) (seen ... (a b (- a 1))) stuff ...) ]
    [(_ (a cons b z ...) (seen ...) stuff ...)
    (!-aux (z ...) (seen ... (a '() (cons b a))) stuff ...) ]
    [(_ (a b cdr z ...) (seen ...) stuff ...)
    (!-aux (z ...) (seen ... (a b (cdr a))) stuff ...) ]
    [(_ (a b c z ...) (seen ...) stuff ...)
    (!-aux (z ...) (seen ... (a b c)) stuff ...) ]
    [(_ (a b) (seen ...) stuff ...)
    (!-aux () (seen ... (a b)) stuff ...) ]
    [(_ (a) (seen ...) stuff ...)
    (!-aux () (seen ... (a '())) stuff ...) ]
    ;;
    [(_ () seen lets bool ! action ...)
    (!-aux () seen lets bool #t (action ...)) ]
    ;;
    [(_ () ((a b c) z ...) lets bool)
    (!-aux () ((a b c) z ...) lets bool a) ]
    [(_ () ((a b c) z ...) lets bool @)
    (!-aux () ((a b c) z ...) lets bool (reverse a)) ]
    [(_ () seen lets bool @ result stuff ...)
    (!-aux () seen lets bool (reverse result) stuff ...) ]
    ;;
    [(_ () seen lets bool @@ (what x ...) stuff ...)
    (!-aux () seen lets bool (what (reverse x) ...) stuff ...) ]
    [(_ () seen lets bool (what @ x z ...) stuff ...)
    (!-aux () seen lets bool (what (reverse x) z ...) stuff ...) ]
    [(_ () seen lets bool (what x @ y z ...) stuff ...)
    (!-aux () seen lets bool (what x (reverse y) z ...) stuff ...) ]
    ;;
    [(_ () ((a b c) z ...) lets 0 stuff ...)
    (!-aux () ((a b c) z ...) lets (= 0 a) stuff ...) ]
    [(_ () seen lets bool result stuff ...)
    (let lets (do seen (bool result) stuff ...)) ]
    ))
    (define-syntax !
    (syntax-rules ()
    [(_ specs bool stuff ...)
    (!-aux specs () () bool stuff ...) ]
    [(_ specs) (! specs #f) ]
    ))

    --
    [T]he problem is that lispniks are as cultish as any other devout group and basically fall down frothing at the mouth if they see [heterodoxy].
    --- Kenny Tilton
    The good news is, it's not Lisp that sucks, but Common Lisp. --- Paul Graham

    --- SoupGate-Win32 v1.05
    * Origin: fsxNet Usenet Gateway (21:1/5)