Escape sequence interpreter

This commit is contained in:
Daniel Ziltener 2024-09-14 14:53:27 +02:00
parent 839f439924
commit ac9973cc56
Signed by: zilti
GPG key ID: B38976E82C9DAE42
6 changed files with 95 additions and 20 deletions

View file

@ -1,4 +1,4 @@
# Created 2024-09-14 Sat 00:15 # Created 2024-09-14 Sat 14:52
#+title: SRFI-180 #+title: SRFI-180
#+author: Daniel Ziltener #+author: Daniel Ziltener
#+export_file_name: README.org #+export_file_name: README.org
@ -182,8 +182,9 @@ Daniel Ziltener
** Version History ** Version History
#+name: version-history #+name: version-history
| 1.5 | Reimplementation | | 1.5.1 | Escape sequences |
| 1.0 | Reference Implementation | | 1.5.0 | Reimplementation |
| 1.0.0 | Reference Implementation |
* License * License

View file

@ -3,7 +3,7 @@
(synopsis "A JSON parser and printer that supports JSON bigger than memory.") (synopsis "A JSON parser and printer that supports JSON bigger than memory.")
(category parsing) (category parsing)
(license "BSD") (license "BSD")
(version "1.5.0") (version "1.5.1")
(dependencies srfi-34 srfi-35 srfi-158) (dependencies srfi-34 srfi-35 srfi-158)
(test-dependencies test) (test-dependencies test)
(components (components

View file

@ -103,7 +103,7 @@
(let-values (((token next-char* new-charcount nesting-delta) (let-values (((token next-char* new-charcount nesting-delta)
((determine-reader-proc next-char) ((determine-reader-proc next-char)
json-number-of-characters next-char input-generator))) json-number-of-characters next-char input-generator)))
(if (not (eq? '() token)) (unless (null? token)
(yield token)) (yield token))
(loop next-char* (+ (or json-nesting-depth 0) nesting-delta) new-charcount))))))))) (loop next-char* (+ (or json-nesting-depth 0) nesting-delta) new-charcount)))))))))
@ -146,6 +146,36 @@
next-char* (+ charcount 1) 0) next-char* (+ charcount 1) 0)
(read-number (+ charcount 1) next-char* input-proc accu)))) (read-number (+ charcount 1) next-char* input-proc accu))))
(define (translate-escape char input-proc)
(case char
((#\") #\")
((#\') #\')
((#\\) #\\)
((#\n) #\newline)
((#\t) #\tab)
((#\u) (read-unicode-escape input-proc))
((#\x) (read-hex-escape input-proc))
((#\O) #\null)
((#\r) #\return)
((#\|) #\|)
((#\v) #\vtab)
((#\a) #\alarm)
((#\b) #\backspace)))
(define (read-hex-escape input-proc)
(let ((pos1 (input-proc))
(pos2 (input-proc)))
(integer->char
(string->number (list->string (list pos1 pos2)) 16))))
(define (read-unicode-escape input-proc)
(let ((pos1 (input-proc))
(pos2 (input-proc))
(pos3 (input-proc))
(pos4 (input-proc)))
(integer->char
(string->number (list->string (list pos1 pos2 pos3 pos4)) 16))))
(define (read-string charcount next-char input-proc #!optional (beginning? #t) (accu '()) (esc? #f)) (define (read-string charcount next-char input-proc #!optional (beginning? #t) (accu '()) (esc? #f))
(cond (cond
(beginning? (beginning?
@ -156,10 +186,16 @@
((and (not esc?) (char=? next-char #\")) ((and (not esc?) (char=? next-char #\"))
(values (reverse-list->string accu) (values (reverse-list->string accu)
(input-proc) (+ charcount 1) 0)) (input-proc) (+ charcount 1) 0))
(else (read-string (+ charcount 1) ((and (not esc?) (char=? next-char #\\))
(input-proc) input-proc (read-string (+ charcount 1) (input-proc) input-proc #f accu #t))
#f (cons next-char accu) (else (let ((current-char (if esc?
(and (not esc?) (char=? next-char #\\)))))) (translate-escape next-char input-proc)
next-char)))
(read-string (+ charcount 1)
(input-proc) input-proc
#f
(cons current-char accu)
#f)))))
(define-record json-foldstate mode cache accumulator) (define-record json-foldstate mode cache accumulator)
@ -172,7 +208,7 @@
(cons obj (json-foldstate-accumulator foldstate))) (cons obj (json-foldstate-accumulator foldstate)))
foldstate)) foldstate))
((%object) (begin ((%object) (begin
(if (equal? '() (json-foldstate-cache foldstate)) (if (null? (json-foldstate-cache foldstate))
(begin (begin
(json-foldstate-cache-set! foldstate obj)) (json-foldstate-cache-set! foldstate obj))
(begin (begin

View file

@ -305,7 +305,7 @@ Streaming event-based JSON reader. =PORT-OR-GENERATOR= default value is the valu
(let-values (((token next-char* new-charcount nesting-delta) (let-values (((token next-char* new-charcount nesting-delta)
((determine-reader-proc next-char) ((determine-reader-proc next-char)
json-number-of-characters next-char input-generator))) json-number-of-characters next-char input-generator)))
(if (not (eq? '() token)) (unless (null? token)
(yield token)) (yield token))
(loop next-char* (+ (or json-nesting-depth 0) nesting-delta) new-charcount))))))))) (loop next-char* (+ (or json-nesting-depth 0) nesting-delta) new-charcount)))))))))
#+end_src #+end_src
@ -571,6 +571,36 @@ String reader
#+name: string-reader #+name: string-reader
#+begin_src scheme :tangle srfi-180.impl.scm :exports none :results silent #+begin_src scheme :tangle srfi-180.impl.scm :exports none :results silent
(define (translate-escape char input-proc)
(case char
((#\") #\")
((#\') #\')
((#\\) #\\)
((#\n) #\newline)
((#\t) #\tab)
((#\u) (read-unicode-escape input-proc))
((#\x) (read-hex-escape input-proc))
((#\O) #\null)
((#\r) #\return)
((#\|) #\|)
((#\v) #\vtab)
((#\a) #\alarm)
((#\b) #\backspace)))
(define (read-hex-escape input-proc)
(let ((pos1 (input-proc))
(pos2 (input-proc)))
(integer->char
(string->number (list->string (list pos1 pos2)) 16))))
(define (read-unicode-escape input-proc)
(let ((pos1 (input-proc))
(pos2 (input-proc))
(pos3 (input-proc))
(pos4 (input-proc)))
(integer->char
(string->number (list->string (list pos1 pos2 pos3 pos4)) 16))))
(define (read-string charcount next-char input-proc #!optional (beginning? #t) (accu '()) (esc? #f)) (define (read-string charcount next-char input-proc #!optional (beginning? #t) (accu '()) (esc? #f))
(cond (cond
(beginning? (beginning?
@ -581,10 +611,16 @@ String reader
((and (not esc?) (char=? next-char #\")) ((and (not esc?) (char=? next-char #\"))
(values (reverse-list->string accu) (values (reverse-list->string accu)
(input-proc) (+ charcount 1) 0)) (input-proc) (+ charcount 1) 0))
(else (read-string (+ charcount 1) ((and (not esc?) (char=? next-char #\\))
(input-proc) input-proc (read-string (+ charcount 1) (input-proc) input-proc #f accu #t))
#f (cons next-char accu) (else (let ((current-char (if esc?
(and (not esc?) (char=? next-char #\\)))))) (translate-escape next-char input-proc)
next-char)))
(read-string (+ charcount 1)
(input-proc) input-proc
#f
(cons current-char accu)
#f)))))
#+end_src #+end_src
#+name: string-reader-test #+name: string-reader-test
@ -593,9 +629,9 @@ String reader
<<string-reader>> <<string-reader>>
<<tokenpredicates>> <<tokenpredicates>>
(test-group "String reading" (test-group "String reading"
(let ((input '(#\T #\e #\s #\t #\space #\T #\e #\\ #\s #\\ #\" #\t #\" #\space))) (let ((input '(#\T #\e #\s #\t #\space #\T #\e #\s #\\ #\" #\t #\" #\space)))
(test "String" (test "String"
'("Test Te\\s\\\"t" #\space 14) '("Test Tes\"t" #\space 13)
(let-values (((val input charcount nesting-delta) (let-values (((val input charcount nesting-delta)
(read-string 0 #\" (lambda () (let ((next (car input))) (read-string 0 #\" (lambda () (let ((next (car input)))
(set! input (cdr input)) (set! input (cdr input))
@ -635,7 +671,7 @@ Fundamental JSON iterator.
(cons obj (json-foldstate-accumulator foldstate))) (cons obj (json-foldstate-accumulator foldstate)))
foldstate)) foldstate))
((%object) (begin ((%object) (begin
(if (equal? '() (json-foldstate-cache foldstate)) (if (null? (json-foldstate-cache foldstate))
(begin (begin
(json-foldstate-cache-set! foldstate obj)) (json-foldstate-cache-set! foldstate obj))
(begin (begin
@ -963,6 +999,7 @@ Daniel Ziltener
** Version History ** Version History
#+name: version-history #+name: version-history
| 1.5.1 | Escape sequences |
| 1.5.0 | Reimplementation | | 1.5.0 | Reimplementation |
| 1.0.0 | Reference Implementation | | 1.0.0 | Reference Implementation |

View file

@ -1,5 +1,6 @@
;; -*- Scheme -*- ;; -*- Scheme -*-
(repo git "https://gitea.lyrion.ch/Chicken/srfi-180.git") (repo git "https://gitea.lyrion.ch/Chicken/srfi-180.git")
(uri targz "https://gitea.lyrion.ch/Chicken/srfi-180/archive/{egg-release}.tar.gz") (uri targz "https://gitea.lyrion.ch/Chicken/srfi-180/archive/{egg-release}.tar.gz")
(release "1.5.1") ;; Escape sequences
(release "1.5.0") ;; Reimplementation (release "1.5.0") ;; Reimplementation
(release "1.0.0") ;; Reference Implementation (release "1.0.0") ;; Reference Implementation

View file

@ -79,9 +79,9 @@
(list val input charcount))))) (list val input charcount)))))
(test-group "String reading" (test-group "String reading"
(let ((input '(#\T #\e #\s #\t #\space #\T #\e #\\ #\s #\\ #\" #\t #\" #\space))) (let ((input '(#\T #\e #\s #\t #\space #\T #\e #\s #\\ #\" #\t #\" #\space)))
(test "String" (test "String"
'("Test Te\\s\\\"t" #\space 14) '("Test Tes\"t" #\space 13)
(let-values (((val input charcount nesting-delta) (let-values (((val input charcount nesting-delta)
(read-string 0 #\" (lambda () (let ((next (car input))) (read-string 0 #\" (lambda () (let ((next (car input)))
(set! input (cdr input)) (set! input (cdr input))