文字列を分割する


Tags: R6RS, 文字列, FIXME

(import (rnrs base))

(define (string-split-by-char str spliter)
  (let loop ((ls (string->list str)) (buf '()) (ret '()))
    (if (pair? ls)
      (if (char=? (car ls) spliter)
        (loop (cdr ls) '() (cons (list->string (reverse buf)) ret))
        (loop (cdr ls) (cons (car ls) buf) ret))
      (reverse (cons (list->string (reverse buf)) ret)))))

(define (string-split-by-string str spliter)
  (if (zero? (string-length spliter))
    (list str)
    (let ((spl (string->list spliter)))
      (let loop ((ls (string->list str)) (sp spl) (tmp '()) (buf '()) (ret '()))
        (if (pair? sp)
          (if (pair? ls)
            (if (char=? (car ls) (car sp))
              (loop (cdr ls) (cdr sp) (cons (car ls) tmp) buf ret)
              (loop (cdr ls) spl '() (cons (car ls) (append tmp buf)) ret))
            (reverse (cons (list->string (reverse (append tmp buf))) ret)))
          (loop ls spl '() '() (cons (list->string (reverse buf)) ret)))))))

(define (string-split str spliter)
  (cond
    ((char? spliter) (string-split-by-char str spliter))
    ((string? spliter) (string-split-by-string str spliter))
    (else #f)))

(string-split "" #\&)               ;=> ("")
(string-split "&" #\&)              ;=> ("" "")
(string-split "abc&def&ghi" #\&)    ;=> ("abc" "def" "ghi")
(string-split "&abc&def&ghi&" #\&)  ;=> ("" "abc" "def" "ghi" "")

(string-split "abc123def123ghi" "123")       ;=> ("abc" "def" "ghi")
(string-split "123abc123def123ghi123" "123") ;=> ("" "abc" "def" "ghi" "")

SRFI 13 の string-tokenize を使うと、逆に、構成要素となる文字の集合を指定して文字列を抽出することができる。

(import (srfi :13)
        (srfi :14))

(string-tokenize "" (char-set-complement (char-set #\&))) ; => ()
(string-tokenize "&" (char-set-complement (char-set #\&))) ; => ()
(string-tokenize "abc&def&ghi" (char-set-complement (char-set #\&)))
;; => ("abc" "def" "ghi")
(string-tokenize "&abc&def&ghi" (char-set-complement (char-set #\&)))
;; => ("abc" "def" "ghi")