Escape sequence interpreter

This commit is contained in:
Daniel Ziltener 2024-09-14 14:53:27 +02:00
parent 839f439924
commit ac9973cc56
Signed by: zilti
GPG key ID: B38976E82C9DAE42
6 changed files with 95 additions and 20 deletions

View file

@ -1,4 +1,4 @@
# Created 2024-09-14 Sat 00:15
# Created 2024-09-14 Sat 14:52
#+title: SRFI-180
#+author: Daniel Ziltener
#+export_file_name: README.org
@ -182,8 +182,9 @@ Daniel Ziltener
** Version History
#+name: version-history
| 1.5 | Reimplementation |
| 1.0 | Reference Implementation |
| 1.5.1 | Escape sequences |
| 1.5.0 | Reimplementation |
| 1.0.0 | Reference Implementation |
* License

View file

@ -3,7 +3,7 @@
(synopsis "A JSON parser and printer that supports JSON bigger than memory.")
(category parsing)
(license "BSD")
(version "1.5.0")
(version "1.5.1")
(dependencies srfi-34 srfi-35 srfi-158)
(test-dependencies test)
(components

View file

@ -103,7 +103,7 @@
(let-values (((token next-char* new-charcount nesting-delta)
((determine-reader-proc next-char)
json-number-of-characters next-char input-generator)))
(if (not (eq? '() token))
(unless (null? token)
(yield token))
(loop next-char* (+ (or json-nesting-depth 0) nesting-delta) new-charcount)))))))))
@ -146,6 +146,36 @@
next-char* (+ charcount 1) 0)
(read-number (+ charcount 1) next-char* input-proc accu))))
(define (translate-escape char input-proc)
(case char
((#\") #\")
((#\') #\')
((#\\) #\\)
((#\n) #\newline)
((#\t) #\tab)
((#\u) (read-unicode-escape input-proc))
((#\x) (read-hex-escape input-proc))
((#\O) #\null)
((#\r) #\return)
((#\|) #\|)
((#\v) #\vtab)
((#\a) #\alarm)
((#\b) #\backspace)))
(define (read-hex-escape input-proc)
(let ((pos1 (input-proc))
(pos2 (input-proc)))
(integer->char
(string->number (list->string (list pos1 pos2)) 16))))
(define (read-unicode-escape input-proc)
(let ((pos1 (input-proc))
(pos2 (input-proc))
(pos3 (input-proc))
(pos4 (input-proc)))
(integer->char
(string->number (list->string (list pos1 pos2 pos3 pos4)) 16))))
(define (read-string charcount next-char input-proc #!optional (beginning? #t) (accu '()) (esc? #f))
(cond
(beginning?
@ -156,10 +186,16 @@
((and (not esc?) (char=? next-char #\"))
(values (reverse-list->string accu)
(input-proc) (+ charcount 1) 0))
(else (read-string (+ charcount 1)
(input-proc) input-proc
#f (cons next-char accu)
(and (not esc?) (char=? next-char #\\))))))
((and (not esc?) (char=? next-char #\\))
(read-string (+ charcount 1) (input-proc) input-proc #f accu #t))
(else (let ((current-char (if esc?
(translate-escape next-char input-proc)
next-char)))
(read-string (+ charcount 1)
(input-proc) input-proc
#f
(cons current-char accu)
#f)))))
(define-record json-foldstate mode cache accumulator)
@ -172,7 +208,7 @@
(cons obj (json-foldstate-accumulator foldstate)))
foldstate))
((%object) (begin
(if (equal? '() (json-foldstate-cache foldstate))
(if (null? (json-foldstate-cache foldstate))
(begin
(json-foldstate-cache-set! foldstate obj))
(begin

View file

@ -305,7 +305,7 @@ Streaming event-based JSON reader. =PORT-OR-GENERATOR= default value is the valu
(let-values (((token next-char* new-charcount nesting-delta)
((determine-reader-proc next-char)
json-number-of-characters next-char input-generator)))
(if (not (eq? '() token))
(unless (null? token)
(yield token))
(loop next-char* (+ (or json-nesting-depth 0) nesting-delta) new-charcount)))))))))
#+end_src
@ -571,6 +571,36 @@ String reader
#+name: string-reader
#+begin_src scheme :tangle srfi-180.impl.scm :exports none :results silent
(define (translate-escape char input-proc)
(case char
((#\") #\")
((#\') #\')
((#\\) #\\)
((#\n) #\newline)
((#\t) #\tab)
((#\u) (read-unicode-escape input-proc))
((#\x) (read-hex-escape input-proc))
((#\O) #\null)
((#\r) #\return)
((#\|) #\|)
((#\v) #\vtab)
((#\a) #\alarm)
((#\b) #\backspace)))
(define (read-hex-escape input-proc)
(let ((pos1 (input-proc))
(pos2 (input-proc)))
(integer->char
(string->number (list->string (list pos1 pos2)) 16))))
(define (read-unicode-escape input-proc)
(let ((pos1 (input-proc))
(pos2 (input-proc))
(pos3 (input-proc))
(pos4 (input-proc)))
(integer->char
(string->number (list->string (list pos1 pos2 pos3 pos4)) 16))))
(define (read-string charcount next-char input-proc #!optional (beginning? #t) (accu '()) (esc? #f))
(cond
(beginning?
@ -581,10 +611,16 @@ String reader
((and (not esc?) (char=? next-char #\"))
(values (reverse-list->string accu)
(input-proc) (+ charcount 1) 0))
(else (read-string (+ charcount 1)
(input-proc) input-proc
#f (cons next-char accu)
(and (not esc?) (char=? next-char #\\))))))
((and (not esc?) (char=? next-char #\\))
(read-string (+ charcount 1) (input-proc) input-proc #f accu #t))
(else (let ((current-char (if esc?
(translate-escape next-char input-proc)
next-char)))
(read-string (+ charcount 1)
(input-proc) input-proc
#f
(cons current-char accu)
#f)))))
#+end_src
#+name: string-reader-test
@ -593,9 +629,9 @@ String reader
<<string-reader>>
<<tokenpredicates>>
(test-group "String reading"
(let ((input '(#\T #\e #\s #\t #\space #\T #\e #\\ #\s #\\ #\" #\t #\" #\space)))
(let ((input '(#\T #\e #\s #\t #\space #\T #\e #\s #\\ #\" #\t #\" #\space)))
(test "String"
'("Test Te\\s\\\"t" #\space 14)
'("Test Tes\"t" #\space 13)
(let-values (((val input charcount nesting-delta)
(read-string 0 #\" (lambda () (let ((next (car input)))
(set! input (cdr input))
@ -635,7 +671,7 @@ Fundamental JSON iterator.
(cons obj (json-foldstate-accumulator foldstate)))
foldstate))
((%object) (begin
(if (equal? '() (json-foldstate-cache foldstate))
(if (null? (json-foldstate-cache foldstate))
(begin
(json-foldstate-cache-set! foldstate obj))
(begin
@ -963,6 +999,7 @@ Daniel Ziltener
** Version History
#+name: version-history
| 1.5.1 | Escape sequences |
| 1.5.0 | Reimplementation |
| 1.0.0 | Reference Implementation |

View file

@ -1,5 +1,6 @@
;; -*- Scheme -*-
(repo git "https://gitea.lyrion.ch/Chicken/srfi-180.git")
(uri targz "https://gitea.lyrion.ch/Chicken/srfi-180/archive/{egg-release}.tar.gz")
(release "1.5.1") ;; Escape sequences
(release "1.5.0") ;; Reimplementation
(release "1.0.0") ;; Reference Implementation

View file

@ -79,9 +79,9 @@
(list val input charcount)))))
(test-group "String reading"
(let ((input '(#\T #\e #\s #\t #\space #\T #\e #\\ #\s #\\ #\" #\t #\" #\space)))
(let ((input '(#\T #\e #\s #\t #\space #\T #\e #\s #\\ #\" #\t #\" #\space)))
(test "String"
'("Test Te\\s\\\"t" #\space 14)
'("Test Tes\"t" #\space 13)
(let-values (((val input charcount nesting-delta)
(read-string 0 #\" (lambda () (let ((next (car input)))
(set! input (cdr input))