;;;; Scheme Regular Expression Language Implementation -- regexp.scm

(define (r:dot) ".")

(define (r:bol) "^")

(define (r:eol) "$")

(define (r:quote string)
  (r:seq
   (call-with-output-string		; see RefMan section 14.3
    (lambda (port)
      (let ((end (string-length string)))
        (do ((i 0 (+ i 1))) ((not (< i end))) ; see RefMan 2.9
          (let ((c (string-ref string i)))
            (if (or (char=? c #\.)
                    (char=? c #\[)
                    (char=? c #\\)
                    (char=? c #\^)
                    (char=? c #\$)
		    (char=? c #\*))
                (write-char #\\ port))
            (write-char c port))))))))

(define (r:char-from char-set)		; see RefMan section 5.6
  (let ((members (char-set-members char-set)))
    (cond ((not (pair? members))
	   (r:seq))
          ((not (pair? (cdr members)))
	   (r:quote (string (car members))))
          (else
           (%char-from #f members)))))

(define (r:char-not-from char-set)
  (%char-from #t (char-set-members char-set)))

(define (%char-from negate? members)
  (let ((right? (memv #\] members))
        (caret? (memv #\^ members))
        (hyphen? (memv #\- members))
        (others
         (delete-matching-items members
                                (lambda (c)
                                  (or (char=? c #\])
                                      (char=? c #\^)
                                      (char=? c #\-))))))
    (if (and caret?
             hyphen?
             (not right?)
             (not negate?)
             (null? others))
        "[-^]"
        (string-append "["
                       (if negate? "^" "")
                       (if right? "]" "")
                       (list->string others)
                       (if caret? "^" "")
                       (if hyphen? "-" "")
                       "]"))))

;;; Means of combination for patterns

(define (r:seq . exprs)
  (string-append "\\(" (apply string-append exprs) "\\)"))

(define (r:alt . exprs)
  (if (pair? exprs)
      (apply r:seq
             (cons (car exprs)
                   (append-map (lambda (expr)
                                 (list "\\|" expr))
                               (cdr exprs))))
      (r:seq)))

(define (r:repeat min max expr)
  (if (not (exact-nonnegative-integer? min))
      (error "Min must be non-negative integer:" min))
  (if max
      (begin
        (if (not (exact-nonnegative-integer? max))
            (error "Max must be non-negative integer:" max))
        (if (not (<= min max))
            (error "Min not less than max:" min max))))
  (cond ((not max)
	 (apply r:seq
		(append (make-list min expr)
			(list expr "*"))))
	((= max min)
	 (apply r:seq (make-list min expr)))
	(else
	 (apply r:seq
		(append (make-list min expr)
			(make-list (- max min)
				   (r:seq expr "\\|")))))))

;;; The following magic allows a program in MIT/GNU Scheme to call the
;;; grep system utility, returning the list of grep output lines to
;;; the caller.  You can make similar mechanisms to call other system
;;; utilities.

(load-option 'synchronous-subprocess)


(define (r:grep expr filename)
  (r:grep-like "grep" '() expr filename))

(define (r:egrep expr filename)
  (if (eq? microcode-id/operating-system 'nt)
      (r:grep-like "grep" '("-E") expr filename)
      (r:grep-like "egrep" '() expr filename)))

(define (r:grep-like program options expr filename)
  (let ((port (open-output-string)))
    (and (= (run-synchronous-subprocess program
	      (append options
		      (list "-e" expr (->namestring filename)))
	      'output port)
            0)
	 (r:split-lines (get-output-string port)))))

(define (r:split-lines string)
  (reverse
   (let ((end (string-length string)))
     (let loop ((i 0) (lines '()))
       (if (< i end)
	   (let ((j
		  (substring-find-next-char string i end #\newline)))
	     (if j
		 (loop (+ j 1)
		       (cons (substring string i j) lines))
		 (cons (substring string i end) lines)))
	   lines)))))

#|
;;; An alternate implementation using MIT/GNU Scheme's internal
;;; regular-expression interpreter.

(define (r:grep expr filename)
  (call-with-input-file filename
    (lambda (port)
      (let loop ((lines '()))
	(let ((line (read-line port)))
	  (if (eof-object? line)
	      (reverse lines)
	      (loop (if (re-string-search-forward expr line #f)
			(cons line lines)
			lines))))))))
|#

#|
;;; For example...

;;; Note, the result of the next two requests were not in this file
;;; when the requests were made!

(pp (r:grep (r:quote "r:sex") "regexp.scm"))
("(pp (r:grep (r:quote \"r:sex\") \"regexp.scm\"))")
;Unspecified return value

(pp (r:grep (r:quote "r:seq") "regexp.scm"))
("  (r:seq"
 "\t   (r:seq))"
 "(define (r:seq . exprs)"
 "      (apply r:seq"
 "      (r:seq)))"
 "\t (apply r:seq"
 "\t (apply r:seq (make-list min expr)))"
 "\t (apply r:seq"
 "\t\t\t\t   (r:seq expr \"\\\\|\")))))))"
 "(pp (r:grep (r:quote \"r:seq\") \"regexp.scm\"))"
 "(pp (r:grep (r:seq (r:quote \"a\") (r:dot) (r:quote \"c\")) \"tests.txt\"))"
 " (r:grep (r:seq \" \""
 "    (r:seq (r:bol)")
;Unspecified return value

(pp (r:grep (r:seq (r:quote "a") (r:dot) (r:quote "c")) "tests.txt"))
("[00]. abc"
 "[01]. aac"
 "[02]. acc"
 "[03]. zzzaxcqqq"
 "[10]. catcatdogdog"
 "[12]. catcatcatdogdogdog")
;Unspecified return value

;;; And...

(pp (r:grep (r:alt (r:quote "foo") (r:quote "bar") (r:quote "baz"))
	    "tests.txt"))
("[05]. foo" "[06]. bar" "[07]. foo bar baz quux")
;Unspecified return value


(pp (r:grep (r:repeat 3 5 (r:alt (r:quote "cat") (r:quote "dog")))
	    "tests.txt"))
("[09]. catdogcat"
 "[10]. catcatdogdog"
 "[11]. dogdogcatdogdog"
 "[12]. catcatcatdogdogdog"
 "[13]. acatdogdogcats"
 "[14]. ifacatdogdogs"
 "[15]. acatdogdogsme")
;Unspecified return value

(pp
 (r:grep (r:seq " "
		(r:repeat 3 5 (r:alt (r:quote "cat") (r:quote "dog")))
		(r:eol)) 
         "tests.txt"))
("[09]. catdogcat" "[10]. catcatdogdog" "[11]. dogdogcatdogdog")
;Unspecified return value

(pp
 (r:grep
  (let ((digit 
	 (r:char-from (string->char-set "0123456789"))))
    (r:seq (r:bol)
	   (r:quote "[")
	   digit
	   digit
	   (r:quote "]")
	   (r:quote ".")
	   (r:quote " ")
	   (r:char-from (char-set #\a #\b))
	   (r:repeat 3 5 (r:alt "cat" "dog"))
	   (r:char-not-from (char-set #\d #\e #\f))
	   (r:eol)))
  "tests.txt"))
("[13]. acatdogdogcats")
;Unspecified return value
|#