;;;; C string handling procedures for R6RS Scheme
;;;; Copyright (C)  2025 Nikolaos Chatzikonstantnou
;;;;
;;;; This program is free software: you can redistribute it and/or modify
;;;; it under the terms of the GNU General Public License as published by
;;;; the Free Software Foundation, either version 3 of the License, or
;;;; (at your option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;;; GNU General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU General Public License
;;;; along with this program.  If not, see <https://www.gnu.org/licenses/>.

;;; Commentary:

;;; Code:

(library (parser elf cstrings)
  (export
   parse-ascii-string
   parse-cstrings
   parse-cstring-bstree
   search-cstring-bstree)

  (import (rnrs base (6))
          (rnrs bytevectors (6))
          (rnrs control (6))
          (rnrs io ports (6))
          (rnrs mutable-pairs (6))
          (parser elf bstree))

  (define ascii-transcoder (make-transcoder (latin-1-codec)))

  (define (parse-ascii-string bv start end)
    "Parse a latin-1 string out of BV into a Scheme string."
    (let* ((n (- end start))
           (s (if (<= n 0) #vu8()
                  (make-bytevector n))))
      (unless (<= n 0)
        (bytevector-copy! bv start s 0 n))
      (bytevector->string s ascii-transcoder)))

  (define (parse-cstrings bv start end)
    "Parse the C strings out of BV into a vector of Scheme strings.

The result is a vector of (offset . str) conses, sorted by offset."
    (do ((i start (+ i 1))
         (offset start)
         (acc '())
         (tail '()))
        ((>= i end) (list->vector acc))
      ;; If we are at a NUL byte then we are at the end of a C string.
      (when (zero? (bytevector-u8-ref bv i))
        (let* ((s (parse-ascii-string bv offset i))
               (new-tail (list (cons offset s))))
          ;; The logic below maintains the tail of the list for fast
          ;; insertion at the end of the list, to keep the strings
          ;; sorted by increasing offset without having to reverse the
          ;; result at the end.
          (if (null? acc)
              (begin (set! acc new-tail)
                     (set! tail acc))
              (begin (set-cdr! tail new-tail)
                     (set! tail new-tail))))
        ;; Skip over the NUL byte.
        (set! offset (+ 1 i)))))

  (define (parse-cstring-bstree bv start end)
    "Parse a balanced binary search tree out of the bytevector of C strings."
    (let* ((v (parse-cstrings bv start end))
           (n (vector-length v)))
      (vector->bstree v 0 n)))

  (define (search-cstring-bstree bstree i)
    "Search the string balanced binary search tree for the given index."
    (let* ((pair (bstree-value bstree))
           (index (car pair))
           (str   (cdr pair))
           ;; +1 accounts for the nul byte.
           (end (+ 1 index (string-length str))))
      (cond ((< i index) (search-cstring-bstree (bstree-left  bstree) i))
            ((<= end i)  (search-cstring-bstree (bstree-right bstree) i))
            ((= index i) str)
            (else (substring str (- i index))))))
  )
