|
| 1 | +;;; SRFI 129 |
| 2 | +;;; Titlecase procedures |
| 3 | +;;; |
| 4 | +;;; This SRFI defines R7RS-style char-title-case?, char-titlecase, and |
| 5 | +;;; string-titlecase procedures. |
| 6 | +;;; |
| 7 | +;;; Copyright © 2015 John Cowan. All Rights Reserved. |
| 8 | +;;; |
| 9 | +;;; Permission is hereby granted, free of charge, to any person |
| 10 | +;;; obtaining a copy of this software and associated documentation |
| 11 | +;;; files (the "Software"), to deal in the Software without |
| 12 | +;;; restriction, including without limitation the rights to use, |
| 13 | +;;; copy, modify, merge, publish, distribute, sublicense, and/or |
| 14 | +;;; sell copies of the Software, and to permit persons to whom the |
| 15 | +;;; Software is furnished to do so, subject to the following |
| 16 | +;;; conditions: |
| 17 | +;;; |
| 18 | +;;; The above copyright notice and this permission notice shall be |
| 19 | +;;; included in all copies or substantial portions of the Software. |
| 20 | +;;; |
| 21 | +;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 22 | +;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
| 23 | +;;; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 24 | +;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
| 25 | +;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 26 | +;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 27 | +;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| 28 | +;;; OTHER DEALINGS IN THE SOFTWARE. |
| 29 | +;;; |
| 30 | +;;; LispKit Port: |
| 31 | +;;; Copyright © 2018 Matthias Zenger. All rights reserved. |
| 32 | + |
| 33 | +(define-library (srfi 129) |
| 34 | + |
| 35 | + (export char-title-case? |
| 36 | + char-titlecase |
| 37 | + string-titlecase) |
| 38 | + |
| 39 | + (import (lispkit base)) |
| 40 | + |
| 41 | + (begin |
| 42 | + ;;;; Alists for titlecase functions |
| 43 | + |
| 44 | + ;;; Assumes that char->integer and integer->char are a subset of Unicode |
| 45 | + ;;; codepoint mappings rather than some random codes, as R5RS allows |
| 46 | + ;;; but R[67]RS do not. It may be necessary to remove some lines if |
| 47 | + ;;; the codepoints referred to don't correspond to characters present |
| 48 | + ;;; in the implementation. |
| 49 | + |
| 50 | + ;;; These maps are valid from Unicode 5.0 to at least Unicode 8.0 |
| 51 | + ;;; and are expected to be stable for the foreseeable future. |
| 52 | + |
| 53 | + ;; Alist mapping titlecase characters to themselves |
| 54 | + (define titlecase-chars '( |
| 55 | + (#x01C5 #x01C5) ; LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON |
| 56 | + (#x01C8 #x01C8) ; LATIN CAPITAL LETTER L WITH SMALL LETTER J |
| 57 | + (#x01CB #x01CB) ; LATIN CAPITAL LETTER N WITH SMALL LETTER J |
| 58 | + (#x01F2 #x01F2) ; LATIN CAPITAL LETTER D WITH SMALL LETTER Z |
| 59 | + (#x1F88 #x1F88) ; GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI |
| 60 | + (#x1F89 #x1F89) ; GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI |
| 61 | + (#x1F8A #x1F8A) ; GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI |
| 62 | + (#x1F8B #x1F8B) ; GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI |
| 63 | + (#x1F8C #x1F8C) ; GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI |
| 64 | + (#x1F8D #x1F8D) ; GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI |
| 65 | + (#x1F8E #x1F8E) ; GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI |
| 66 | + (#x1F8F #x1F8F) ; GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI |
| 67 | + (#x1F98 #x1F98) ; GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI |
| 68 | + (#x1F99 #x1F99) ; GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI |
| 69 | + (#x1F9A #x1F9A) ; GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI |
| 70 | + (#x1F9B #x1F9B) ; GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI |
| 71 | + (#x1F9C #x1F9C) ; GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI |
| 72 | + (#x1F9D #x1F9D) ; GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI |
| 73 | + (#x1F9E #x1F9E) ; GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI |
| 74 | + (#x1F9F #x1F9F) ; GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI |
| 75 | + (#x1FA8 #x1FA8) ; GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI |
| 76 | + (#x1FA9 #x1FA9) ; GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI |
| 77 | + (#x1FAA #x1FAA) ; GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI |
| 78 | + (#x1FAB #x1FAB) ; GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI |
| 79 | + (#x1FAC #x1FAC) ; GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI |
| 80 | + (#x1FAD #x1FAD) ; GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI |
| 81 | + (#x1FAE #x1FAE) ; GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI |
| 82 | + (#x1FAF #x1FAF) ; GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI |
| 83 | + (#x1FBC #x1FBC) ; GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI |
| 84 | + (#x1FCC #x1FCC) ; GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI |
| 85 | + (#x1FFC #x1FFC) ; GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI |
| 86 | + )) |
| 87 | + |
| 88 | + ;; Alist mapping characters to their single-letter titlecase equivalents |
| 89 | + ;; when those are distinct from their uppercase equivalents |
| 90 | + (define title-single-map (append titlecase-chars '( |
| 91 | + (#x01C4 #x01C5) ; LATIN CAPITAL LETTER DZ WITH CARON |
| 92 | + (#x01C6 #x01C5) ; LATIN SMALL LETTER DZ WITH CARON |
| 93 | + (#x01C7 #x01C8) ; LATIN CAPITAL LETTER LJ |
| 94 | + (#x01C9 #x01C8) ; LATIN SMALL LETTER LJ |
| 95 | + (#x01CA #x01CB) ; LATIN CAPITAL LETTER NJ |
| 96 | + (#x01CC #x01CB) ; LATIN SMALL LETTER NJ |
| 97 | + (#x01F1 #x01F2) ; LATIN CAPITAL LETTER DZ |
| 98 | + (#x01F3 #x01F2) ; LATIN SMALL LETTER DZ |
| 99 | + ))) |
| 100 | + |
| 101 | + ;; Alist mapping characters to their multiple-letter titlecase equivalents |
| 102 | + (define title-multiple-map (append title-single-map '( |
| 103 | + (#x00DF #x0053 #x0073) ; LATIN SMALL LETTER SHARP S |
| 104 | + (#xFB00 #x0046 #x0066) ; LATIN SMALL LIGATURE FF |
| 105 | + (#xFB01 #x0046 #x0069) ; LATIN SMALL LIGATURE FI |
| 106 | + (#xFB02 #x0046 #x006C) ; LATIN SMALL LIGATURE FL |
| 107 | + (#xFB03 #x0046 #x0066 #x0069) ; LATIN SMALL LIGATURE FFI |
| 108 | + (#xFB04 #x0046 #x0066 #x006C) ; LATIN SMALL LIGATURE FFL |
| 109 | + (#xFB05 #x0053 #x0074) ; LATIN SMALL LIGATURE LONG S T |
| 110 | + (#xFB06 #x0053 #x0074) ; LATIN SMALL LIGATURE ST |
| 111 | + (#x0587 #x0535 #x0582) ; ARMENIAN SMALL LIGATURE ECH YIWN |
| 112 | + (#xFB13 #x0544 #x0576) ; ARMENIAN SMALL LIGATURE MEN NOW |
| 113 | + (#xFB14 #x0544 #x0565) ; ARMENIAN SMALL LIGATURE MEN ECH |
| 114 | + (#xFB15 #x0544 #x056B) ; ARMENIAN SMALL LIGATURE MEN INI |
| 115 | + (#xFB16 #x054E #x0576) ; ARMENIAN SMALL LIGATURE VEW NOW |
| 116 | + (#xFB17 #x0544 #x056D) ; ARMENIAN SMALL LIGATURE MEN XEH |
| 117 | + (#x0149 #x02BC #x004E) ; LATIN SMALL LETTER N PRECEDED BY APOSTROPHE |
| 118 | + (#x0390 #x0399 #x0308 #x0301) ; GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS |
| 119 | + (#x03B0 #x03A5 #x0308 #x0301) ; GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS |
| 120 | + (#x01F0 #x004A #x030C) ; LATIN SMALL LETTER J WITH CARON |
| 121 | + (#x1E96 #x0048 #x0331) ; LATIN SMALL LETTER H WITH LINE BELOW |
| 122 | + (#x1E97 #x0054 #x0308) ; LATIN SMALL LETTER T WITH DIAERESIS |
| 123 | + (#x1E98 #x0057 #x030A) ; LATIN SMALL LETTER W WITH RING ABOVE |
| 124 | + (#x1E99 #x0059 #x030A) ; LATIN SMALL LETTER Y WITH RING ABOVE |
| 125 | + (#x1E9A #x0041 #x02BE) ; LATIN SMALL LETTER A WITH RIGHT HALF RING |
| 126 | + (#x1F50 #x03A5 #x0313) ; GREEK SMALL LETTER UPSILON WITH PSILI |
| 127 | + (#x1F52 #x03A5 #x0313 #x0300) ; GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA |
| 128 | + (#x1F54 #x03A5 #x0313 #x0301) ; GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA |
| 129 | + (#x1F56 #x03A5 #x0313 #x0342) ; GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI |
| 130 | + (#x1FB6 #x0391 #x0342) ; GREEK SMALL LETTER ALPHA WITH PERISPOMENI |
| 131 | + (#x1FC6 #x0397 #x0342) ; GREEK SMALL LETTER ETA WITH PERISPOMENI |
| 132 | + (#x1FD2 #x0399 #x0308 #x0300) ; GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA |
| 133 | + (#x1FD3 #x0399 #x0308 #x0301) ; GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA |
| 134 | + (#x1FD6 #x0399 #x0342) ; GREEK SMALL LETTER IOTA WITH PERISPOMENI |
| 135 | + (#x1FD7 #x0399 #x0308 #x0342) ; GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI |
| 136 | + (#x1FE2 #x03A5 #x0308 #x0300) ; GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA |
| 137 | + (#x1FE3 #x03A5 #x0308 #x0301) ; GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA |
| 138 | + (#x1FE4 #x03A1 #x0313) ; GREEK SMALL LETTER RHO WITH PSILI |
| 139 | + (#x1FE6 #x03A5 #x0342) ; GREEK SMALL LETTER UPSILON WITH PERISPOMENI |
| 140 | + (#x1FE7 #x03A5 #x0308 #x0342) ; GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI |
| 141 | + (#x1FF6 #x03A9 #x0342) ; GREEK SMALL LETTER OMEGA WITH PERISPOMENI |
| 142 | + (#x1FB2 #x1FBA #x0345) ; GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI |
| 143 | + (#x1FB4 #x0386 #x0345) ; GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI |
| 144 | + (#x1FC2 #x1FCA #x0345) ; GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI |
| 145 | + (#x1FC4 #x0389 #x0345) ; GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI |
| 146 | + (#x1FF2 #x1FFA #x0345) ; GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI |
| 147 | + (#x1FF4 #x038F #x0345) ; GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI |
| 148 | + (#x1FB7 #x0391 #x0342 #x0345) ; GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI |
| 149 | + (#x1FC7 #x0397 #x0342 #x0345) ; GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI |
| 150 | + (#x1FF7 #x03A9 #x0342 #x0345) ; GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI |
| 151 | + ))) |
| 152 | + |
| 153 | + ;; Alist mapping characters to their multiple-character lowercase equivalents |
| 154 | + (define lower-multiple-map '( |
| 155 | + (#x0130 #x0069 #x0307) ; LATIN CAPITAL LETTER I WITH DOT ABOVE |
| 156 | + )) |
| 157 | + ) |
| 158 | + |
| 159 | + (begin |
| 160 | + ;;;; Implementation of SRFI 129 titlecase functions |
| 161 | + |
| 162 | + ;; Returns #t if argument is a titlecase character, #f if not |
| 163 | + (define (char-title-case? ch) |
| 164 | + (let* ((codepoint (char->integer ch)) |
| 165 | + (result (assq codepoint titlecase-chars))) |
| 166 | + (if result #t #f))) |
| 167 | + |
| 168 | + ;; Returns the single-character titlecase mapping of argument |
| 169 | + (define (char-titlecase ch) |
| 170 | + (let* ((codepoint (char->integer ch)) |
| 171 | + (result (assq codepoint title-single-map))) |
| 172 | + (if result |
| 173 | + (integer->char (cadr result)) |
| 174 | + (char-upcase ch)))) |
| 175 | + |
| 176 | + ;; Returns #t if a character is caseless, otherwise #f |
| 177 | + (define (char-caseless? ch) |
| 178 | + (not (or (char-lower-case? ch) (char-upper-case? ch) (char-title-case? ch)))) |
| 179 | + |
| 180 | + ;; Push a list onto another list in reverse order |
| 181 | + (define (reverse-push new old) |
| 182 | + (if (null? new) |
| 183 | + old |
| 184 | + (reverse-push (cdr new) (cons (car new) old)))) |
| 185 | + ) |
| 186 | +) |
| 187 | + |
0 commit comments